FROMLIST: BACKPORT: THP zones: the use cases of policy zones
There are three types of zones: 1. The first four zones partition the physical address space of CPU memory. 2. The device zone provides interoperability between CPU and device memory. 3. The movable zone commonly represents a memory allocation policy. Though originally designed for memory hot removal, the movable zone is instead widely used for other purposes, e.g., CMA and kdump kernel, on platforms that do not support hot removal, e.g., Android and ChromeOS. Nowadays, it is legitimately a zone independent of any physical characteristics. In spite of being somewhat regarded as a hack, largely due to the lack of a generic design concept for its true major use cases (on billions of client devices), the movable zone naturally resembles a policy (virtual) zone overlayed on the first four (physical) zones. This proposal formally generalizes this concept as policy zones so that additional policies can be implemented and enforced by subsequent zones after the movable zone. An inherited requirement of policy zones (and the first four zones) is that subsequent zones must be able to fall back to previous zones and therefore must add new properties to the previous zones rather than remove existing ones from them. Also, all properties must be known at the allocation time, rather than the runtime, e.g., memory object size and mobility are valid properties but hotness and lifetime are not. ZONE_MOVABLE becomes the first policy zone, followed by two new policy zones: 1. ZONE_NOSPLIT, which contains pages that are movable (inherited from ZONE_MOVABLE) and restricted to a minimum order to be anti-fragmentation. The latter means that they cannot be split down below that order, while they are free or in use. 2. ZONE_NOMERGE, which contains pages that are movable and restricted to an exact order. The latter means that not only is split prohibited (inherited from ZONE_NOSPLIT) but also merge (see the reason in Chapter Three), while they are free or in use. Since these two zones only can serve THP allocations (__GFP_MOVABLE | __GFP_COMP), they are called THP zones. Reclaim works seamlessly and compaction is not needed for these two zones. Compared with the hugeTLB pool approach, THP zones tap into core MM features including: 1. THP allocations can fall back to the lower zones, which can have higher latency but still succeed. 2. THPs can be either shattered (see Chapter Two) if partially unmapped or reclaimed if becoming cold. 3. THP orders can be much smaller than the PMD/PUD orders, e.g., 64KB contiguous PTEs on arm64 [1], which are more suitable for client workloads. Policy zones can be dynamically resized by offlining pages in one of them and onlining those pages in another of them. Note that this is only done among policy zones, not between a policy zone and a physical zone, since resizing is a (software) policy, not a physical characteristic. Implementing the same idea in the pageblock granularity has also been explored but rejected at Google. Pageblocks have a finer granularity and therefore can be more flexible than zones. The tradeoff is that this alternative implementation was more complex and failed to bring a better ROI. However, the rejection was mainly due to its inability to be smoothly extended to 1GB THPs [2], which is a planned use case of TAO. [1] https://lore.kernel.org/20240215103205.2607016-1-ryan.roberts@arm.com/ [2] https://lore.kernel.org/20200928175428.4110504-1-zi.yan@sent.com/ Change-Id: I7eb555541d04b16b93dea5aa0e2b329c49694a10 Signed-off-by: Yu Zhao <yuzhao@google.com> Link: https://lore.kernel.org/r/20240229183436.4110845-2-yuzhao@google.com/ Bug: 313807618 [ Don't allocate order 0 from nomerge/nosplit zone - causes increase in reclaim activity ] Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
This commit is contained in:
@@ -3500,6 +3500,16 @@
|
|||||||
allocations which rules out almost all kernel
|
allocations which rules out almost all kernel
|
||||||
allocations. Use with caution!
|
allocations. Use with caution!
|
||||||
|
|
||||||
|
nosplit=X,Y [MM] Set the minimum order of the nosplit zone. Pages in
|
||||||
|
this zone can't be split down below order Y, while free
|
||||||
|
or in use.
|
||||||
|
Like movablecore, X should be either nn[KMGTPE] or n%.
|
||||||
|
|
||||||
|
nomerge=X,Y [MM] Set the exact orders of the nomerge zone. Pages in
|
||||||
|
this zone are always order Y, meaning they can't be
|
||||||
|
split or merged while free or in use.
|
||||||
|
Like movablecore, X should be either nn[KMGTPE] or n%.
|
||||||
|
|
||||||
MTD_Partition= [MTD]
|
MTD_Partition= [MTD]
|
||||||
Format: <name>,<region-number>,<size>,<offset>
|
Format: <name>,<region-number>,<size>,<offset>
|
||||||
|
|
||||||
|
@@ -2228,7 +2228,7 @@ static bool virtio_mem_bbm_bb_is_movable(struct virtio_mem *vm,
|
|||||||
page = pfn_to_online_page(pfn);
|
page = pfn_to_online_page(pfn);
|
||||||
if (!page)
|
if (!page)
|
||||||
continue;
|
continue;
|
||||||
if (page_zonenum(page) != ZONE_MOVABLE)
|
if (!is_zone_movable_page(page))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -85,8 +85,8 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
|
|||||||
* GFP_ZONES_SHIFT must be <= 2 on 32 bit platforms.
|
* GFP_ZONES_SHIFT must be <= 2 on 32 bit platforms.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if defined(CONFIG_ZONE_DEVICE) && (MAX_NR_ZONES-1) <= 4
|
#if MAX_NR_ZONES - 2 - IS_ENABLED(CONFIG_ZONE_DEVICE) <= 4
|
||||||
/* ZONE_DEVICE is not a valid GFP zone specifier */
|
/* zones beyond ZONE_MOVABLE are not valid GFP zone specifiers */
|
||||||
#define GFP_ZONES_SHIFT 2
|
#define GFP_ZONES_SHIFT 2
|
||||||
#else
|
#else
|
||||||
#define GFP_ZONES_SHIFT ZONES_SHIFT
|
#define GFP_ZONES_SHIFT ZONES_SHIFT
|
||||||
@@ -124,6 +124,8 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
|
|||||||
| 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA | ___GFP_HIGHMEM) \
|
| 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA | ___GFP_HIGHMEM) \
|
||||||
)
|
)
|
||||||
|
|
||||||
|
DECLARE_STATIC_KEY_FALSE(movablecore_enabled);
|
||||||
|
|
||||||
static inline enum zone_type __gfp_zone(gfp_t flags)
|
static inline enum zone_type __gfp_zone(gfp_t flags)
|
||||||
{
|
{
|
||||||
enum zone_type z;
|
enum zone_type z;
|
||||||
@@ -132,9 +134,30 @@ static inline enum zone_type __gfp_zone(gfp_t flags)
|
|||||||
z = (GFP_ZONE_TABLE >> (bit * GFP_ZONES_SHIFT)) &
|
z = (GFP_ZONE_TABLE >> (bit * GFP_ZONES_SHIFT)) &
|
||||||
((1 << GFP_ZONES_SHIFT) - 1);
|
((1 << GFP_ZONES_SHIFT) - 1);
|
||||||
VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
|
VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
|
||||||
|
|
||||||
|
if ((flags & __GFP_COMP) &&
|
||||||
|
(!static_branch_unlikely(&movablecore_enabled) || (flags & __GFP_MOVABLE)))
|
||||||
|
return LAST_VIRT_ZONE;
|
||||||
|
|
||||||
return z;
|
return z;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern int zone_nomerge_order __read_mostly;
|
||||||
|
extern int zone_nosplit_order __read_mostly;
|
||||||
|
|
||||||
|
static inline enum zone_type gfp_order_zone(gfp_t flags, int order)
|
||||||
|
{
|
||||||
|
enum zone_type zid = __gfp_zone(flags);
|
||||||
|
|
||||||
|
if (zid >= ZONE_NOMERGE && (!zone_nomerge_order || order != zone_nomerge_order))
|
||||||
|
zid = ZONE_NOMERGE - 1;
|
||||||
|
|
||||||
|
if (zid == ZONE_NOSPLIT && (!zone_nosplit_order || order < zone_nosplit_order))
|
||||||
|
zid = ZONE_NOSPLIT - 1;
|
||||||
|
|
||||||
|
return zid;
|
||||||
|
}
|
||||||
|
|
||||||
enum zone_type gfp_zone(gfp_t flags);
|
enum zone_type gfp_zone(gfp_t flags);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@@ -294,7 +294,6 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
|
|||||||
unsigned long len, unsigned long pgoff, unsigned long flags);
|
unsigned long len, unsigned long pgoff, unsigned long flags);
|
||||||
|
|
||||||
void folio_prep_large_rmappable(struct folio *folio);
|
void folio_prep_large_rmappable(struct folio *folio);
|
||||||
bool can_split_folio(struct folio *folio, int *pextra_pins);
|
|
||||||
int split_huge_page_to_list(struct page *page, struct list_head *list);
|
int split_huge_page_to_list(struct page *page, struct list_head *list);
|
||||||
static inline int split_huge_page(struct page *page)
|
static inline int split_huge_page(struct page *page)
|
||||||
{
|
{
|
||||||
@@ -446,11 +445,6 @@ static inline void folio_prep_large_rmappable(struct folio *folio) {}
|
|||||||
|
|
||||||
#define thp_get_unmapped_area NULL
|
#define thp_get_unmapped_area NULL
|
||||||
|
|
||||||
static inline bool
|
|
||||||
can_split_folio(struct folio *folio, int *pextra_pins)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
static inline int
|
static inline int
|
||||||
split_huge_page_to_list(struct page *page, struct list_head *list)
|
split_huge_page_to_list(struct page *page, struct list_head *list)
|
||||||
{
|
{
|
||||||
|
@@ -157,7 +157,7 @@ extern enum zone_type policy_zone;
|
|||||||
|
|
||||||
static inline void check_highest_zone(enum zone_type k)
|
static inline void check_highest_zone(enum zone_type k)
|
||||||
{
|
{
|
||||||
if (k > policy_zone && k != ZONE_MOVABLE)
|
if (k > policy_zone && !zid_is_virt(k))
|
||||||
policy_zone = k;
|
policy_zone = k;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -815,11 +815,15 @@ enum zone_type {
|
|||||||
* there can be false negatives).
|
* there can be false negatives).
|
||||||
*/
|
*/
|
||||||
ZONE_MOVABLE,
|
ZONE_MOVABLE,
|
||||||
|
ZONE_NOSPLIT,
|
||||||
|
ZONE_NOMERGE,
|
||||||
#ifdef CONFIG_ZONE_DEVICE
|
#ifdef CONFIG_ZONE_DEVICE
|
||||||
ZONE_DEVICE,
|
ZONE_DEVICE,
|
||||||
#endif
|
#endif
|
||||||
__MAX_NR_ZONES
|
__MAX_NR_ZONES,
|
||||||
|
|
||||||
|
LAST_PHYS_ZONE = ZONE_MOVABLE - 1,
|
||||||
|
LAST_VIRT_ZONE = ZONE_NOMERGE,
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef __GENERATING_BOUNDS_H
|
#ifndef __GENERATING_BOUNDS_H
|
||||||
@@ -938,6 +942,8 @@ struct zone {
|
|||||||
seqlock_t span_seqlock;
|
seqlock_t span_seqlock;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
int order;
|
||||||
|
|
||||||
int initialized;
|
int initialized;
|
||||||
|
|
||||||
/* Write-intensive fields used from the page allocator */
|
/* Write-intensive fields used from the page allocator */
|
||||||
@@ -1155,12 +1161,22 @@ static inline bool folio_is_zone_device(const struct folio *folio)
|
|||||||
|
|
||||||
static inline bool is_zone_movable_page(const struct page *page)
|
static inline bool is_zone_movable_page(const struct page *page)
|
||||||
{
|
{
|
||||||
return page_zonenum(page) == ZONE_MOVABLE;
|
return page_zonenum(page) >= ZONE_MOVABLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool folio_is_zone_movable(const struct folio *folio)
|
static inline bool folio_is_zone_movable(const struct folio *folio)
|
||||||
{
|
{
|
||||||
return folio_zonenum(folio) == ZONE_MOVABLE;
|
return folio_zonenum(folio) >= ZONE_MOVABLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool page_can_split(struct page *page)
|
||||||
|
{
|
||||||
|
return page_zonenum(page) < ZONE_NOSPLIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool folio_can_split(struct folio *folio)
|
||||||
|
{
|
||||||
|
return folio_zonenum(folio) < ZONE_NOSPLIT;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -1480,6 +1496,32 @@ static inline int local_memory_node(int node_id) { return node_id; };
|
|||||||
*/
|
*/
|
||||||
#define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones)
|
#define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones)
|
||||||
|
|
||||||
|
static inline bool zid_is_virt(enum zone_type zid)
|
||||||
|
{
|
||||||
|
return zid > LAST_PHYS_ZONE && zid <= LAST_VIRT_ZONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool zone_can_frag(struct zone *zone)
|
||||||
|
{
|
||||||
|
VM_WARN_ON_ONCE(zone->order && zone_idx(zone) < ZONE_NOSPLIT);
|
||||||
|
|
||||||
|
return zone_idx(zone) < ZONE_NOSPLIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool zone_is_suitable(struct zone *zone, int order)
|
||||||
|
{
|
||||||
|
int zid = zone_idx(zone);
|
||||||
|
|
||||||
|
if (zid < ZONE_NOSPLIT)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (!zone->order)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return (zid == ZONE_NOSPLIT && order >= zone->order) ||
|
||||||
|
(zid == ZONE_NOMERGE && order == zone->order);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_ZONE_DEVICE
|
#ifdef CONFIG_ZONE_DEVICE
|
||||||
static inline bool zone_is_zone_device(struct zone *zone)
|
static inline bool zone_is_zone_device(struct zone *zone)
|
||||||
{
|
{
|
||||||
@@ -1528,13 +1570,13 @@ static inline int zone_to_nid(struct zone *zone)
|
|||||||
static inline void zone_set_nid(struct zone *zone, int nid) {}
|
static inline void zone_set_nid(struct zone *zone, int nid) {}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern int movable_zone;
|
extern int virt_zone;
|
||||||
|
|
||||||
static inline int is_highmem_idx(enum zone_type idx)
|
static inline int is_highmem_idx(enum zone_type idx)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_HIGHMEM
|
#ifdef CONFIG_HIGHMEM
|
||||||
return (idx == ZONE_HIGHMEM ||
|
return (idx == ZONE_HIGHMEM ||
|
||||||
(idx == ZONE_MOVABLE && movable_zone == ZONE_HIGHMEM));
|
(zid_is_virt(idx) && virt_zone == ZONE_HIGHMEM));
|
||||||
#else
|
#else
|
||||||
return 0;
|
return 0;
|
||||||
#endif
|
#endif
|
||||||
|
@@ -404,7 +404,7 @@ enum node_states {
|
|||||||
#else
|
#else
|
||||||
N_HIGH_MEMORY = N_NORMAL_MEMORY,
|
N_HIGH_MEMORY = N_NORMAL_MEMORY,
|
||||||
#endif
|
#endif
|
||||||
N_MEMORY, /* The node has memory(regular, high, movable) */
|
N_MEMORY, /* The node has memory in any of the zones */
|
||||||
N_CPU, /* The node has one or more cpus */
|
N_CPU, /* The node has one or more cpus */
|
||||||
N_GENERIC_INITIATOR, /* The node has one or more Generic Initiators */
|
N_GENERIC_INITIATOR, /* The node has one or more Generic Initiators */
|
||||||
NR_NODE_STATES
|
NR_NODE_STATES
|
||||||
|
@@ -27,7 +27,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL, \
|
#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL, \
|
||||||
HIGHMEM_ZONE(xx) xx##_MOVABLE, DEVICE_ZONE(xx)
|
HIGHMEM_ZONE(xx) xx##_MOVABLE, xx##_NOSPLIT, xx##_NOMERGE, DEVICE_ZONE(xx)
|
||||||
|
|
||||||
enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
|
enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
|
||||||
FOR_ALL_ZONES(PGALLOC)
|
FOR_ALL_ZONES(PGALLOC)
|
||||||
|
@@ -276,7 +276,9 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
|
|||||||
IFDEF_ZONE_DMA32( EM (ZONE_DMA32, "DMA32")) \
|
IFDEF_ZONE_DMA32( EM (ZONE_DMA32, "DMA32")) \
|
||||||
EM (ZONE_NORMAL, "Normal") \
|
EM (ZONE_NORMAL, "Normal") \
|
||||||
IFDEF_ZONE_HIGHMEM( EM (ZONE_HIGHMEM,"HighMem")) \
|
IFDEF_ZONE_HIGHMEM( EM (ZONE_HIGHMEM,"HighMem")) \
|
||||||
EMe(ZONE_MOVABLE,"Movable")
|
EM (ZONE_MOVABLE,"Movable") \
|
||||||
|
EM (ZONE_NOSPLIT,"NoSplit") \
|
||||||
|
EMe(ZONE_NOMERGE,"NoMerge")
|
||||||
|
|
||||||
#define LRU_NAMES \
|
#define LRU_NAMES \
|
||||||
EM (LRU_INACTIVE_ANON, "inactive_anon") \
|
EM (LRU_INACTIVE_ANON, "inactive_anon") \
|
||||||
|
@@ -2725,6 +2725,9 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
|
|||||||
ac->highest_zoneidx, ac->nodemask) {
|
ac->highest_zoneidx, ac->nodemask) {
|
||||||
enum compact_result status;
|
enum compact_result status;
|
||||||
|
|
||||||
|
if (!zone_can_frag(zone))
|
||||||
|
continue;
|
||||||
|
|
||||||
if (prio > MIN_COMPACT_PRIORITY
|
if (prio > MIN_COMPACT_PRIORITY
|
||||||
&& compaction_deferred(zone, order)) {
|
&& compaction_deferred(zone, order)) {
|
||||||
rc = max_t(enum compact_result, COMPACT_DEFERRED, rc);
|
rc = max_t(enum compact_result, COMPACT_DEFERRED, rc);
|
||||||
@@ -2797,6 +2800,9 @@ static void proactive_compact_node(pg_data_t *pgdat)
|
|||||||
if (!populated_zone(zone))
|
if (!populated_zone(zone))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if (!zone_can_frag(zone))
|
||||||
|
continue;
|
||||||
|
|
||||||
cc.zone = zone;
|
cc.zone = zone;
|
||||||
|
|
||||||
compact_zone(&cc, NULL);
|
compact_zone(&cc, NULL);
|
||||||
@@ -2829,6 +2835,9 @@ static void compact_node(int nid)
|
|||||||
if (!populated_zone(zone))
|
if (!populated_zone(zone))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if (!zone_can_frag(zone))
|
||||||
|
continue;
|
||||||
|
|
||||||
cc.zone = zone;
|
cc.zone = zone;
|
||||||
|
|
||||||
compact_zone(&cc, NULL);
|
compact_zone(&cc, NULL);
|
||||||
@@ -2942,6 +2951,9 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat)
|
|||||||
if (!populated_zone(zone))
|
if (!populated_zone(zone))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if (!zone_can_frag(zone))
|
||||||
|
continue;
|
||||||
|
|
||||||
/* Allocation can already succeed, check other zones */
|
/* Allocation can already succeed, check other zones */
|
||||||
if (zone_watermark_ok(zone, pgdat->kcompactd_max_order,
|
if (zone_watermark_ok(zone, pgdat->kcompactd_max_order,
|
||||||
min_wmark_pages(zone),
|
min_wmark_pages(zone),
|
||||||
|
@@ -2986,10 +2986,13 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Racy check whether the huge page can be split */
|
/* Racy check whether the huge page can be split */
|
||||||
bool can_split_folio(struct folio *folio, int *pextra_pins)
|
static bool can_split_folio(struct folio *folio, int *pextra_pins)
|
||||||
{
|
{
|
||||||
int extra_pins;
|
int extra_pins;
|
||||||
|
|
||||||
|
if (!folio_can_split(folio))
|
||||||
|
return false;
|
||||||
|
|
||||||
/* Additional pins from page cache */
|
/* Additional pins from page cache */
|
||||||
if (folio_test_anon(folio))
|
if (folio_test_anon(folio))
|
||||||
extra_pins = folio_test_swapcache(folio) ?
|
extra_pins = folio_test_swapcache(folio) ?
|
||||||
|
@@ -1836,22 +1836,20 @@ bool vma_policy_mof(struct vm_area_struct *vma)
|
|||||||
|
|
||||||
bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone)
|
bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone)
|
||||||
{
|
{
|
||||||
enum zone_type dynamic_policy_zone = policy_zone;
|
WARN_ON_ONCE(zid_is_virt(policy_zone));
|
||||||
|
|
||||||
BUG_ON(dynamic_policy_zone == ZONE_MOVABLE);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* if policy->nodes has movable memory only,
|
* If policy->nodes has memory in virtual zones only, we apply policy
|
||||||
* we apply policy when gfp_zone(gfp) = ZONE_MOVABLE only.
|
* only if gfp_zone(gfp) can allocate from those zones.
|
||||||
*
|
*
|
||||||
* policy->nodes is intersect with node_states[N_MEMORY].
|
* policy->nodes is intersect with node_states[N_MEMORY].
|
||||||
* so if the following test fails, it implies
|
* so if the following test fails, it implies
|
||||||
* policy->nodes has movable memory only.
|
* policy->nodes has memory in virtual zones only.
|
||||||
*/
|
*/
|
||||||
if (!nodes_intersects(policy->nodes, node_states[N_HIGH_MEMORY]))
|
if (!nodes_intersects(policy->nodes, node_states[N_HIGH_MEMORY]))
|
||||||
dynamic_policy_zone = ZONE_MOVABLE;
|
return zone > LAST_PHYS_ZONE;
|
||||||
|
|
||||||
return zone >= dynamic_policy_zone;
|
return zone >= policy_zone;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@@ -2028,7 +2028,7 @@ struct folio *alloc_migration_target(struct folio *src, unsigned long private)
|
|||||||
order = folio_order(src);
|
order = folio_order(src);
|
||||||
}
|
}
|
||||||
zidx = zone_idx(folio_zone(src));
|
zidx = zone_idx(folio_zone(src));
|
||||||
if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE)
|
if (zidx > ZONE_NORMAL)
|
||||||
gfp_mask |= __GFP_HIGHMEM;
|
gfp_mask |= __GFP_HIGHMEM;
|
||||||
|
|
||||||
return __folio_alloc(gfp_mask, order, nid, mtc->nmask);
|
return __folio_alloc(gfp_mask, order, nid, mtc->nmask);
|
||||||
@@ -2525,7 +2525,7 @@ static int numamigrate_isolate_folio(pg_data_t *pgdat, struct folio *folio)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
wakeup_kswapd(pgdat->node_zones + z, 0,
|
wakeup_kswapd(pgdat->node_zones + z, 0,
|
||||||
folio_order(folio), ZONE_MOVABLE);
|
folio_order(folio), z);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
482
mm/mm_init.c
482
mm/mm_init.c
@@ -217,12 +217,18 @@ postcore_initcall(mm_sysfs_init);
|
|||||||
|
|
||||||
static unsigned long arch_zone_lowest_possible_pfn[MAX_NR_ZONES] __initdata;
|
static unsigned long arch_zone_lowest_possible_pfn[MAX_NR_ZONES] __initdata;
|
||||||
static unsigned long arch_zone_highest_possible_pfn[MAX_NR_ZONES] __initdata;
|
static unsigned long arch_zone_highest_possible_pfn[MAX_NR_ZONES] __initdata;
|
||||||
static unsigned long zone_movable_pfn[MAX_NUMNODES] __initdata;
|
|
||||||
|
|
||||||
static unsigned long required_kernelcore __initdata;
|
static unsigned long virt_zones[LAST_VIRT_ZONE - LAST_PHYS_ZONE][MAX_NUMNODES] __initdata;
|
||||||
static unsigned long required_kernelcore_percent __initdata;
|
#define pfn_of(zid, nid) (virt_zones[(zid) - LAST_PHYS_ZONE - 1][nid])
|
||||||
static unsigned long required_movablecore __initdata;
|
|
||||||
static unsigned long required_movablecore_percent __initdata;
|
static unsigned long zone_nr_pages[LAST_VIRT_ZONE - LAST_PHYS_ZONE + 1] __initdata;
|
||||||
|
#define nr_pages_of(zid) (zone_nr_pages[(zid) - LAST_PHYS_ZONE])
|
||||||
|
|
||||||
|
static unsigned long zone_percentage[LAST_VIRT_ZONE - LAST_PHYS_ZONE + 1] __initdata;
|
||||||
|
#define percentage_of(zid) (zone_percentage[(zid) - LAST_PHYS_ZONE])
|
||||||
|
|
||||||
|
int zone_nosplit_order __read_mostly;
|
||||||
|
int zone_nomerge_order __read_mostly;
|
||||||
|
|
||||||
static unsigned long nr_kernel_pages __initdata;
|
static unsigned long nr_kernel_pages __initdata;
|
||||||
static unsigned long nr_all_pages __initdata;
|
static unsigned long nr_all_pages __initdata;
|
||||||
@@ -273,25 +279,71 @@ static int __init cmdline_parse_kernelcore(char *p)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return cmdline_parse_core(p, &required_kernelcore,
|
return cmdline_parse_core(p, &nr_pages_of(LAST_PHYS_ZONE),
|
||||||
&required_kernelcore_percent);
|
&percentage_of(LAST_PHYS_ZONE));
|
||||||
}
|
}
|
||||||
early_param("kernelcore", cmdline_parse_kernelcore);
|
early_param("kernelcore", cmdline_parse_kernelcore);
|
||||||
|
|
||||||
|
DEFINE_STATIC_KEY_FALSE(movablecore_enabled);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* movablecore=size sets the amount of memory for use for allocations that
|
* movablecore=size sets the amount of memory for use for allocations that
|
||||||
* can be reclaimed or migrated.
|
* can be reclaimed or migrated.
|
||||||
*/
|
*/
|
||||||
static int __init cmdline_parse_movablecore(char *p)
|
static int __init cmdline_parse_movablecore(char *p)
|
||||||
{
|
{
|
||||||
return cmdline_parse_core(p, &required_movablecore,
|
static_branch_enable(&movablecore_enabled);
|
||||||
&required_movablecore_percent);
|
|
||||||
|
return cmdline_parse_core(p, &nr_pages_of(ZONE_MOVABLE),
|
||||||
|
&percentage_of(ZONE_MOVABLE));
|
||||||
}
|
}
|
||||||
early_param("movablecore", cmdline_parse_movablecore);
|
early_param("movablecore", cmdline_parse_movablecore);
|
||||||
|
|
||||||
|
static int __init parse_zone_order(char *p, unsigned long *nr_pages,
|
||||||
|
unsigned long *percent, int *order)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
unsigned long n;
|
||||||
|
char *s = strchr(p, ',');
|
||||||
|
|
||||||
|
if (!s)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
*s++ = '\0';
|
||||||
|
|
||||||
|
err = kstrtoul(s, 0, &n);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
if (n < 2 || n > MAX_ORDER)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
err = cmdline_parse_core(p, nr_pages, percent);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
*order = n;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __init parse_zone_nosplit(char *p)
|
||||||
|
{
|
||||||
|
return parse_zone_order(p, &nr_pages_of(ZONE_NOSPLIT),
|
||||||
|
&percentage_of(ZONE_NOSPLIT), &zone_nosplit_order);
|
||||||
|
}
|
||||||
|
early_param("nosplit", parse_zone_nosplit);
|
||||||
|
|
||||||
|
static int __init parse_zone_nomerge(char *p)
|
||||||
|
{
|
||||||
|
return parse_zone_order(p, &nr_pages_of(ZONE_NOMERGE),
|
||||||
|
&percentage_of(ZONE_NOMERGE), &zone_nomerge_order);
|
||||||
|
}
|
||||||
|
early_param("nomerge", parse_zone_nomerge);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* early_calculate_totalpages()
|
* early_calculate_totalpages()
|
||||||
* Sum pages in active regions for movable zone.
|
* Sum pages in active regions for virtual zones.
|
||||||
* Populate N_MEMORY for calculating usable_nodes.
|
* Populate N_MEMORY for calculating usable_nodes.
|
||||||
*/
|
*/
|
||||||
static unsigned long __init early_calculate_totalpages(void)
|
static unsigned long __init early_calculate_totalpages(void)
|
||||||
@@ -311,24 +363,110 @@ static unsigned long __init early_calculate_totalpages(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This finds a zone that can be used for ZONE_MOVABLE pages. The
|
* This finds a physical zone that can be used for virtual zones. The
|
||||||
* assumption is made that zones within a node are ordered in monotonic
|
* assumption is made that zones within a node are ordered in monotonic
|
||||||
* increasing memory addresses so that the "highest" populated zone is used
|
* increasing memory addresses so that the "highest" populated zone is used
|
||||||
*/
|
*/
|
||||||
static void __init find_usable_zone_for_movable(void)
|
static void __init find_usable_zone(void)
|
||||||
{
|
{
|
||||||
int zone_index;
|
int zone_index;
|
||||||
for (zone_index = MAX_NR_ZONES - 1; zone_index >= 0; zone_index--) {
|
for (zone_index = LAST_PHYS_ZONE; zone_index >= 0; zone_index--) {
|
||||||
if (zone_index == ZONE_MOVABLE)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (arch_zone_highest_possible_pfn[zone_index] >
|
if (arch_zone_highest_possible_pfn[zone_index] >
|
||||||
arch_zone_lowest_possible_pfn[zone_index])
|
arch_zone_lowest_possible_pfn[zone_index])
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
VM_BUG_ON(zone_index == -1);
|
VM_BUG_ON(zone_index == -1);
|
||||||
movable_zone = zone_index;
|
virt_zone = zone_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init find_virt_zone(unsigned long occupied, unsigned long *zone_pfn)
|
||||||
|
{
|
||||||
|
int i, nid;
|
||||||
|
unsigned long node_avg, remaining;
|
||||||
|
int usable_nodes = nodes_weight(node_states[N_MEMORY]);
|
||||||
|
/* usable_startpfn is the lowest possible pfn virtual zones can be at */
|
||||||
|
unsigned long usable_startpfn = arch_zone_lowest_possible_pfn[virt_zone];
|
||||||
|
|
||||||
|
restart:
|
||||||
|
/* Carve out memory as evenly as possible throughout nodes */
|
||||||
|
node_avg = occupied / usable_nodes;
|
||||||
|
for_each_node_state(nid, N_MEMORY) {
|
||||||
|
unsigned long start_pfn, end_pfn;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Recalculate node_avg if the division per node now exceeds
|
||||||
|
* what is necessary to satisfy the amount of memory to carve
|
||||||
|
* out.
|
||||||
|
*/
|
||||||
|
if (occupied < node_avg)
|
||||||
|
node_avg = occupied / usable_nodes;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* As the map is walked, we track how much memory is usable
|
||||||
|
* using remaining. When it is 0, the rest of the node is
|
||||||
|
* usable.
|
||||||
|
*/
|
||||||
|
remaining = node_avg;
|
||||||
|
|
||||||
|
/* Go through each range of PFNs within this node */
|
||||||
|
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
|
||||||
|
unsigned long size_pages;
|
||||||
|
|
||||||
|
start_pfn = max(start_pfn, zone_pfn[nid]);
|
||||||
|
if (start_pfn >= end_pfn)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Account for what is only usable when carving out */
|
||||||
|
if (start_pfn < usable_startpfn) {
|
||||||
|
unsigned long nr_pages = min(end_pfn, usable_startpfn) - start_pfn;
|
||||||
|
|
||||||
|
remaining -= min(nr_pages, remaining);
|
||||||
|
occupied -= min(nr_pages, occupied);
|
||||||
|
|
||||||
|
/* Continue if range is now fully accounted */
|
||||||
|
if (end_pfn <= usable_startpfn) {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Push zone_pfn to the end so that if
|
||||||
|
* we have to carve out more across
|
||||||
|
* nodes, we will not double account
|
||||||
|
* here.
|
||||||
|
*/
|
||||||
|
zone_pfn[nid] = end_pfn;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
start_pfn = usable_startpfn;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The usable PFN range is from start_pfn->end_pfn.
|
||||||
|
* Calculate size_pages as the number of pages used.
|
||||||
|
*/
|
||||||
|
size_pages = end_pfn - start_pfn;
|
||||||
|
if (size_pages > remaining)
|
||||||
|
size_pages = remaining;
|
||||||
|
zone_pfn[nid] = start_pfn + size_pages;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Some memory was carved out, update counts and break
|
||||||
|
* if the request for this node has been satisfied.
|
||||||
|
*/
|
||||||
|
occupied -= min(occupied, size_pages);
|
||||||
|
remaining -= size_pages;
|
||||||
|
if (!remaining)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If there is still more to carve out, we do another pass with one less
|
||||||
|
* node in the count. This will push zone_pfn[nid] further along on the
|
||||||
|
* nodes that still have memory until the request is fully satisfied.
|
||||||
|
*/
|
||||||
|
usable_nodes--;
|
||||||
|
if (usable_nodes && occupied > usable_nodes)
|
||||||
|
goto restart;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -337,19 +475,19 @@ static void __init find_usable_zone_for_movable(void)
|
|||||||
* memory. When they don't, some nodes will have more kernelcore than
|
* memory. When they don't, some nodes will have more kernelcore than
|
||||||
* others
|
* others
|
||||||
*/
|
*/
|
||||||
static void __init find_zone_movable_pfns_for_nodes(void)
|
static void __init find_virt_zones(void)
|
||||||
{
|
{
|
||||||
int i, nid;
|
int i;
|
||||||
|
int nid;
|
||||||
unsigned long usable_startpfn;
|
unsigned long usable_startpfn;
|
||||||
unsigned long kernelcore_node, kernelcore_remaining;
|
|
||||||
/* save the state before borrow the nodemask */
|
/* save the state before borrow the nodemask */
|
||||||
nodemask_t saved_node_state = node_states[N_MEMORY];
|
nodemask_t saved_node_state = node_states[N_MEMORY];
|
||||||
unsigned long totalpages = early_calculate_totalpages();
|
unsigned long totalpages = early_calculate_totalpages();
|
||||||
int usable_nodes = nodes_weight(node_states[N_MEMORY]);
|
|
||||||
struct memblock_region *r;
|
struct memblock_region *r;
|
||||||
|
unsigned long occupied = 0;
|
||||||
|
|
||||||
/* Need to find movable_zone earlier when movable_node is specified. */
|
/* Need to find virt_zone earlier when movable_node is specified. */
|
||||||
find_usable_zone_for_movable();
|
find_usable_zone();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If movable_node is specified, ignore kernelcore and movablecore
|
* If movable_node is specified, ignore kernelcore and movablecore
|
||||||
@@ -363,8 +501,8 @@ static void __init find_zone_movable_pfns_for_nodes(void)
|
|||||||
nid = memblock_get_region_node(r);
|
nid = memblock_get_region_node(r);
|
||||||
|
|
||||||
usable_startpfn = PFN_DOWN(r->base);
|
usable_startpfn = PFN_DOWN(r->base);
|
||||||
zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
|
pfn_of(ZONE_MOVABLE, nid) = pfn_of(ZONE_MOVABLE, nid) ?
|
||||||
min(usable_startpfn, zone_movable_pfn[nid]) :
|
min(usable_startpfn, pfn_of(ZONE_MOVABLE, nid)) :
|
||||||
usable_startpfn;
|
usable_startpfn;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -400,8 +538,8 @@ static void __init find_zone_movable_pfns_for_nodes(void)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
|
pfn_of(ZONE_MOVABLE, nid) = pfn_of(ZONE_MOVABLE, nid) ?
|
||||||
min(usable_startpfn, zone_movable_pfn[nid]) :
|
min(usable_startpfn, pfn_of(ZONE_MOVABLE, nid)) :
|
||||||
usable_startpfn;
|
usable_startpfn;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -411,151 +549,92 @@ static void __init find_zone_movable_pfns_for_nodes(void)
|
|||||||
goto out2;
|
goto out2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (zone_nomerge_order > pageblock_order) {
|
||||||
|
nr_pages_of(ZONE_NOMERGE) = 0;
|
||||||
|
percentage_of(ZONE_NOMERGE) = 0;
|
||||||
|
zone_nomerge_order = 0;
|
||||||
|
pr_warn("zone %s order %d cannot be higher than pageblock order %d\n",
|
||||||
|
zone_names[ZONE_NOMERGE], zone_nomerge_order, pageblock_order);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (zone_nosplit_order > pageblock_order) {
|
||||||
|
nr_pages_of(ZONE_NOSPLIT) = 0;
|
||||||
|
percentage_of(ZONE_NOSPLIT) = 0;
|
||||||
|
zone_nosplit_order = 0;
|
||||||
|
pr_warn("zone %s order %d cannot be higher than pageblock order %d\n",
|
||||||
|
zone_names[ZONE_NOSPLIT], zone_nosplit_order, pageblock_order);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (zone_nomerge_order && zone_nomerge_order <= zone_nosplit_order) {
|
||||||
|
nr_pages_of(ZONE_NOSPLIT) = nr_pages_of(ZONE_NOMERGE) = 0;
|
||||||
|
percentage_of(ZONE_NOSPLIT) = percentage_of(ZONE_NOMERGE) = 0;
|
||||||
|
zone_nosplit_order = zone_nomerge_order = 0;
|
||||||
|
pr_warn("zone %s order %d cannot be higher than zone %s order %d\n",
|
||||||
|
zone_names[ZONE_NOSPLIT], zone_nosplit_order,
|
||||||
|
zone_names[ZONE_NOMERGE], zone_nomerge_order);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If kernelcore=nn% or movablecore=nn% was specified, calculate the
|
* If kernelcore=nn% or movablecore=nn% was specified, calculate the
|
||||||
* amount of necessary memory.
|
* amount of necessary memory.
|
||||||
*/
|
*/
|
||||||
if (required_kernelcore_percent)
|
for (i = LAST_PHYS_ZONE; i <= LAST_VIRT_ZONE; i++) {
|
||||||
required_kernelcore = (totalpages * 100 * required_kernelcore_percent) /
|
if (percentage_of(i))
|
||||||
10000UL;
|
nr_pages_of(i) = totalpages * percentage_of(i) / 100;
|
||||||
if (required_movablecore_percent)
|
|
||||||
required_movablecore = (totalpages * 100 * required_movablecore_percent) /
|
nr_pages_of(i) = roundup(nr_pages_of(i), MAX_ORDER_NR_PAGES);
|
||||||
10000UL;
|
occupied += nr_pages_of(i);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If movablecore= was specified, calculate what size of
|
* If movablecore= was specified, calculate what size of
|
||||||
* kernelcore that corresponds so that memory usable for
|
* kernelcore that corresponds so that memory usable for
|
||||||
* any allocation type is evenly spread. If both kernelcore
|
* any allocation type is evenly spread. If both kernelcore
|
||||||
* and movablecore are specified, then the value of kernelcore
|
* and movablecore are specified, then the value of kernelcore
|
||||||
* will be used for required_kernelcore if it's greater than
|
* will be used if it's greater than what movablecore would have
|
||||||
* what movablecore would have allowed.
|
* allowed.
|
||||||
*/
|
*/
|
||||||
if (required_movablecore) {
|
if (occupied < totalpages) {
|
||||||
unsigned long corepages;
|
enum zone_type zid;
|
||||||
|
|
||||||
/*
|
zid = !nr_pages_of(LAST_PHYS_ZONE) || nr_pages_of(ZONE_MOVABLE) ?
|
||||||
* Round-up so that ZONE_MOVABLE is at least as large as what
|
LAST_PHYS_ZONE : ZONE_MOVABLE;
|
||||||
* was requested by the user
|
nr_pages_of(zid) += totalpages - occupied;
|
||||||
*/
|
|
||||||
required_movablecore =
|
|
||||||
roundup(required_movablecore, MAX_ORDER_NR_PAGES);
|
|
||||||
required_movablecore = min(totalpages, required_movablecore);
|
|
||||||
corepages = totalpages - required_movablecore;
|
|
||||||
|
|
||||||
required_kernelcore = max(required_kernelcore, corepages);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If kernelcore was not specified or kernelcore size is larger
|
* If kernelcore was not specified or kernelcore size is larger
|
||||||
* than totalpages, there is no ZONE_MOVABLE.
|
* than totalpages, there are not virtual zones.
|
||||||
*/
|
*/
|
||||||
if (!required_kernelcore || required_kernelcore >= totalpages)
|
occupied = nr_pages_of(LAST_PHYS_ZONE);
|
||||||
|
if (!occupied || occupied >= totalpages)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
/* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
|
for (i = LAST_PHYS_ZONE + 1; i <= LAST_VIRT_ZONE; i++) {
|
||||||
usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone];
|
if (!nr_pages_of(i))
|
||||||
|
continue;
|
||||||
|
|
||||||
restart:
|
find_virt_zone(occupied, &pfn_of(i, 0));
|
||||||
/* Spread kernelcore memory as evenly as possible throughout nodes */
|
occupied += nr_pages_of(i);
|
||||||
kernelcore_node = required_kernelcore / usable_nodes;
|
|
||||||
for_each_node_state(nid, N_MEMORY) {
|
|
||||||
unsigned long start_pfn, end_pfn;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Recalculate kernelcore_node if the division per node
|
|
||||||
* now exceeds what is necessary to satisfy the requested
|
|
||||||
* amount of memory for the kernel
|
|
||||||
*/
|
|
||||||
if (required_kernelcore < kernelcore_node)
|
|
||||||
kernelcore_node = required_kernelcore / usable_nodes;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* As the map is walked, we track how much memory is usable
|
|
||||||
* by the kernel using kernelcore_remaining. When it is
|
|
||||||
* 0, the rest of the node is usable by ZONE_MOVABLE
|
|
||||||
*/
|
|
||||||
kernelcore_remaining = kernelcore_node;
|
|
||||||
|
|
||||||
/* Go through each range of PFNs within this node */
|
|
||||||
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
|
|
||||||
unsigned long size_pages;
|
|
||||||
|
|
||||||
start_pfn = max(start_pfn, zone_movable_pfn[nid]);
|
|
||||||
if (start_pfn >= end_pfn)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/* Account for what is only usable for kernelcore */
|
|
||||||
if (start_pfn < usable_startpfn) {
|
|
||||||
unsigned long kernel_pages;
|
|
||||||
kernel_pages = min(end_pfn, usable_startpfn)
|
|
||||||
- start_pfn;
|
|
||||||
|
|
||||||
kernelcore_remaining -= min(kernel_pages,
|
|
||||||
kernelcore_remaining);
|
|
||||||
required_kernelcore -= min(kernel_pages,
|
|
||||||
required_kernelcore);
|
|
||||||
|
|
||||||
/* Continue if range is now fully accounted */
|
|
||||||
if (end_pfn <= usable_startpfn) {
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Push zone_movable_pfn to the end so
|
|
||||||
* that if we have to rebalance
|
|
||||||
* kernelcore across nodes, we will
|
|
||||||
* not double account here
|
|
||||||
*/
|
|
||||||
zone_movable_pfn[nid] = end_pfn;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
start_pfn = usable_startpfn;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The usable PFN range for ZONE_MOVABLE is from
|
|
||||||
* start_pfn->end_pfn. Calculate size_pages as the
|
|
||||||
* number of pages used as kernelcore
|
|
||||||
*/
|
|
||||||
size_pages = end_pfn - start_pfn;
|
|
||||||
if (size_pages > kernelcore_remaining)
|
|
||||||
size_pages = kernelcore_remaining;
|
|
||||||
zone_movable_pfn[nid] = start_pfn + size_pages;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Some kernelcore has been met, update counts and
|
|
||||||
* break if the kernelcore for this node has been
|
|
||||||
* satisfied
|
|
||||||
*/
|
|
||||||
required_kernelcore -= min(required_kernelcore,
|
|
||||||
size_pages);
|
|
||||||
kernelcore_remaining -= size_pages;
|
|
||||||
if (!kernelcore_remaining)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* If there is still required_kernelcore, we do another pass with one
|
|
||||||
* less node in the count. This will push zone_movable_pfn[nid] further
|
|
||||||
* along on the nodes that still have memory until kernelcore is
|
|
||||||
* satisfied
|
|
||||||
*/
|
|
||||||
usable_nodes--;
|
|
||||||
if (usable_nodes && required_kernelcore > usable_nodes)
|
|
||||||
goto restart;
|
|
||||||
|
|
||||||
out2:
|
out2:
|
||||||
/* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */
|
/* Align starts of virtual zones on all nids to MAX_ORDER_NR_PAGES */
|
||||||
for (nid = 0; nid < MAX_NUMNODES; nid++) {
|
for (nid = 0; nid < MAX_NUMNODES; nid++) {
|
||||||
unsigned long start_pfn, end_pfn;
|
unsigned long start_pfn, end_pfn;
|
||||||
|
unsigned long prev_virt_zone_pfn = 0;
|
||||||
zone_movable_pfn[nid] =
|
|
||||||
roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
|
|
||||||
|
|
||||||
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
|
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
|
||||||
if (zone_movable_pfn[nid] >= end_pfn)
|
|
||||||
zone_movable_pfn[nid] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
for (i = LAST_PHYS_ZONE + 1; i <= LAST_VIRT_ZONE; i++) {
|
||||||
|
pfn_of(i, nid) = roundup(pfn_of(i, nid), MAX_ORDER_NR_PAGES);
|
||||||
|
|
||||||
|
if (pfn_of(i, nid) <= prev_virt_zone_pfn || pfn_of(i, nid) >= end_pfn)
|
||||||
|
pfn_of(i, nid) = 0;
|
||||||
|
|
||||||
|
if (pfn_of(i, nid))
|
||||||
|
prev_virt_zone_pfn = pfn_of(i, nid);
|
||||||
|
}
|
||||||
|
}
|
||||||
out:
|
out:
|
||||||
/* restore the node_state */
|
/* restore the node_state */
|
||||||
node_states[N_MEMORY] = saved_node_state;
|
node_states[N_MEMORY] = saved_node_state;
|
||||||
@@ -1104,38 +1183,54 @@ void __ref memmap_init_zone_device(struct zone *zone,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The zone ranges provided by the architecture do not include ZONE_MOVABLE
|
* The zone ranges provided by the architecture do not include virtual zones
|
||||||
* because it is sized independent of architecture. Unlike the other zones,
|
* because they are sized independent of architecture. Unlike physical zones,
|
||||||
* the starting point for ZONE_MOVABLE is not fixed. It may be different
|
* the starting point for the first populated virtual zone is not fixed. It may
|
||||||
* in each node depending on the size of each node and how evenly kernelcore
|
* be different in each node depending on the size of each node and how evenly
|
||||||
* is distributed. This helper function adjusts the zone ranges
|
* kernelcore is distributed. This helper function adjusts the zone ranges
|
||||||
* provided by the architecture for a given node by using the end of the
|
* provided by the architecture for a given node by using the end of the
|
||||||
* highest usable zone for ZONE_MOVABLE. This preserves the assumption that
|
* highest usable zone for the first populated virtual zone. This preserves the
|
||||||
* zones within a node are in order of monotonic increases memory addresses
|
* assumption that zones within a node are in order of monotonic increases
|
||||||
|
* memory addresses.
|
||||||
*/
|
*/
|
||||||
static void __init adjust_zone_range_for_zone_movable(int nid,
|
static void __init adjust_zone_range(int nid,
|
||||||
unsigned long zone_type,
|
unsigned long zone_type,
|
||||||
unsigned long node_end_pfn,
|
unsigned long node_end_pfn,
|
||||||
unsigned long *zone_start_pfn,
|
unsigned long *zone_start_pfn,
|
||||||
unsigned long *zone_end_pfn)
|
unsigned long *zone_end_pfn)
|
||||||
{
|
{
|
||||||
/* Only adjust if ZONE_MOVABLE is on this node */
|
int i = max_t(int, zone_type, LAST_PHYS_ZONE);
|
||||||
if (zone_movable_pfn[nid]) {
|
unsigned long next_virt_zone_pfn = 0;
|
||||||
/* Size ZONE_MOVABLE */
|
|
||||||
if (zone_type == ZONE_MOVABLE) {
|
|
||||||
*zone_start_pfn = zone_movable_pfn[nid];
|
|
||||||
*zone_end_pfn = min(node_end_pfn,
|
|
||||||
arch_zone_highest_possible_pfn[movable_zone]);
|
|
||||||
|
|
||||||
/* Adjust for ZONE_MOVABLE starting within this range */
|
while (i++ < LAST_VIRT_ZONE) {
|
||||||
} else if (!mirrored_kernelcore &&
|
if (pfn_of(i, nid)) {
|
||||||
*zone_start_pfn < zone_movable_pfn[nid] &&
|
next_virt_zone_pfn = pfn_of(i, nid);
|
||||||
*zone_end_pfn > zone_movable_pfn[nid]) {
|
break;
|
||||||
*zone_end_pfn = zone_movable_pfn[nid];
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Check if this whole range is within ZONE_MOVABLE */
|
if (zone_type <= LAST_PHYS_ZONE) {
|
||||||
} else if (*zone_start_pfn >= zone_movable_pfn[nid])
|
if (!next_virt_zone_pfn)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (!mirrored_kernelcore &&
|
||||||
|
*zone_start_pfn < next_virt_zone_pfn &&
|
||||||
|
*zone_end_pfn > next_virt_zone_pfn)
|
||||||
|
*zone_end_pfn = next_virt_zone_pfn;
|
||||||
|
else if (*zone_start_pfn >= next_virt_zone_pfn)
|
||||||
*zone_start_pfn = *zone_end_pfn;
|
*zone_start_pfn = *zone_end_pfn;
|
||||||
|
} else if (zone_type <= LAST_VIRT_ZONE) {
|
||||||
|
if (!pfn_of(zone_type, nid))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (next_virt_zone_pfn)
|
||||||
|
*zone_end_pfn = min3(next_virt_zone_pfn,
|
||||||
|
node_end_pfn,
|
||||||
|
arch_zone_highest_possible_pfn[virt_zone]);
|
||||||
|
else
|
||||||
|
*zone_end_pfn = min(node_end_pfn,
|
||||||
|
arch_zone_highest_possible_pfn[virt_zone]);
|
||||||
|
*zone_start_pfn = min(*zone_end_pfn, pfn_of(zone_type, nid));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1191,7 +1286,7 @@ static unsigned long __init zone_absent_pages_in_node(int nid,
|
|||||||
* Treat pages to be ZONE_MOVABLE in ZONE_NORMAL as absent pages
|
* Treat pages to be ZONE_MOVABLE in ZONE_NORMAL as absent pages
|
||||||
* and vice versa.
|
* and vice versa.
|
||||||
*/
|
*/
|
||||||
if (mirrored_kernelcore && zone_movable_pfn[nid]) {
|
if (mirrored_kernelcore && pfn_of(ZONE_MOVABLE, nid)) {
|
||||||
unsigned long start_pfn, end_pfn;
|
unsigned long start_pfn, end_pfn;
|
||||||
struct memblock_region *r;
|
struct memblock_region *r;
|
||||||
|
|
||||||
@@ -1231,8 +1326,7 @@ static unsigned long __init zone_spanned_pages_in_node(int nid,
|
|||||||
/* Get the start and end of the zone */
|
/* Get the start and end of the zone */
|
||||||
*zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high);
|
*zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high);
|
||||||
*zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high);
|
*zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high);
|
||||||
adjust_zone_range_for_zone_movable(nid, zone_type, node_end_pfn,
|
adjust_zone_range(nid, zone_type, node_end_pfn, zone_start_pfn, zone_end_pfn);
|
||||||
zone_start_pfn, zone_end_pfn);
|
|
||||||
|
|
||||||
/* Check that this node has pages within the zone's required range */
|
/* Check that this node has pages within the zone's required range */
|
||||||
if (*zone_end_pfn < node_start_pfn || *zone_start_pfn > node_end_pfn)
|
if (*zone_end_pfn < node_start_pfn || *zone_start_pfn > node_end_pfn)
|
||||||
@@ -1297,6 +1391,10 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat,
|
|||||||
#if defined(CONFIG_MEMORY_HOTPLUG)
|
#if defined(CONFIG_MEMORY_HOTPLUG)
|
||||||
zone->present_early_pages = real_size;
|
zone->present_early_pages = real_size;
|
||||||
#endif
|
#endif
|
||||||
|
if (i == ZONE_NOSPLIT)
|
||||||
|
zone->order = zone_nosplit_order;
|
||||||
|
if (i == ZONE_NOMERGE)
|
||||||
|
zone->order = zone_nomerge_order;
|
||||||
|
|
||||||
totalpages += spanned;
|
totalpages += spanned;
|
||||||
realtotalpages += real_size;
|
realtotalpages += real_size;
|
||||||
@@ -1748,7 +1846,7 @@ static void __init check_for_memory(pg_data_t *pgdat)
|
|||||||
{
|
{
|
||||||
enum zone_type zone_type;
|
enum zone_type zone_type;
|
||||||
|
|
||||||
for (zone_type = 0; zone_type <= ZONE_MOVABLE - 1; zone_type++) {
|
for (zone_type = 0; zone_type <= LAST_PHYS_ZONE; zone_type++) {
|
||||||
struct zone *zone = &pgdat->node_zones[zone_type];
|
struct zone *zone = &pgdat->node_zones[zone_type];
|
||||||
if (populated_zone(zone)) {
|
if (populated_zone(zone)) {
|
||||||
if (IS_ENABLED(CONFIG_HIGHMEM))
|
if (IS_ENABLED(CONFIG_HIGHMEM))
|
||||||
@@ -1798,7 +1896,7 @@ static bool arch_has_descending_max_zone_pfns(void)
|
|||||||
void __init free_area_init(unsigned long *max_zone_pfn)
|
void __init free_area_init(unsigned long *max_zone_pfn)
|
||||||
{
|
{
|
||||||
unsigned long start_pfn, end_pfn;
|
unsigned long start_pfn, end_pfn;
|
||||||
int i, nid, zone;
|
int i, j, nid, zone;
|
||||||
bool descending;
|
bool descending;
|
||||||
|
|
||||||
/* Record where the zone boundaries are */
|
/* Record where the zone boundaries are */
|
||||||
@@ -1810,15 +1908,12 @@ void __init free_area_init(unsigned long *max_zone_pfn)
|
|||||||
start_pfn = PHYS_PFN(memblock_start_of_DRAM());
|
start_pfn = PHYS_PFN(memblock_start_of_DRAM());
|
||||||
descending = arch_has_descending_max_zone_pfns();
|
descending = arch_has_descending_max_zone_pfns();
|
||||||
|
|
||||||
for (i = 0; i < MAX_NR_ZONES; i++) {
|
for (i = 0; i <= LAST_PHYS_ZONE; i++) {
|
||||||
if (descending)
|
if (descending)
|
||||||
zone = MAX_NR_ZONES - i - 1;
|
zone = LAST_PHYS_ZONE - i;
|
||||||
else
|
else
|
||||||
zone = i;
|
zone = i;
|
||||||
|
|
||||||
if (zone == ZONE_MOVABLE)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
end_pfn = max(max_zone_pfn[zone], start_pfn);
|
end_pfn = max(max_zone_pfn[zone], start_pfn);
|
||||||
arch_zone_lowest_possible_pfn[zone] = start_pfn;
|
arch_zone_lowest_possible_pfn[zone] = start_pfn;
|
||||||
arch_zone_highest_possible_pfn[zone] = end_pfn;
|
arch_zone_highest_possible_pfn[zone] = end_pfn;
|
||||||
@@ -1826,15 +1921,17 @@ void __init free_area_init(unsigned long *max_zone_pfn)
|
|||||||
start_pfn = end_pfn;
|
start_pfn = end_pfn;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Find the PFNs that ZONE_MOVABLE begins at in each node */
|
/* Initialise every node */
|
||||||
memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
|
mminit_verify_pageflags_layout();
|
||||||
find_zone_movable_pfns_for_nodes();
|
setup_nr_node_ids();
|
||||||
|
set_pageblock_order();
|
||||||
|
|
||||||
|
/* Find the PFNs that virtual zones begin at in each node */
|
||||||
|
find_virt_zones();
|
||||||
|
|
||||||
/* Print out the zone ranges */
|
/* Print out the zone ranges */
|
||||||
pr_info("Zone ranges:\n");
|
pr_info("Zone ranges:\n");
|
||||||
for (i = 0; i < MAX_NR_ZONES; i++) {
|
for (i = 0; i <= LAST_PHYS_ZONE; i++) {
|
||||||
if (i == ZONE_MOVABLE)
|
|
||||||
continue;
|
|
||||||
pr_info(" %-8s ", zone_names[i]);
|
pr_info(" %-8s ", zone_names[i]);
|
||||||
if (arch_zone_lowest_possible_pfn[i] ==
|
if (arch_zone_lowest_possible_pfn[i] ==
|
||||||
arch_zone_highest_possible_pfn[i])
|
arch_zone_highest_possible_pfn[i])
|
||||||
@@ -1847,12 +1944,14 @@ void __init free_area_init(unsigned long *max_zone_pfn)
|
|||||||
<< PAGE_SHIFT) - 1);
|
<< PAGE_SHIFT) - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Print out the PFNs ZONE_MOVABLE begins at in each node */
|
/* Print out the PFNs virtual zones begin at in each node */
|
||||||
pr_info("Movable zone start for each node\n");
|
for (; i <= LAST_VIRT_ZONE; i++) {
|
||||||
for (i = 0; i < MAX_NUMNODES; i++) {
|
pr_info("%s zone start for each node\n", zone_names[i]);
|
||||||
if (zone_movable_pfn[i])
|
for (j = 0; j < MAX_NUMNODES; j++) {
|
||||||
pr_info(" Node %d: %#018Lx\n", i,
|
if (pfn_of(i, j))
|
||||||
(u64)zone_movable_pfn[i] << PAGE_SHIFT);
|
pr_info(" Node %d: %#018Lx\n",
|
||||||
|
j, (u64)pfn_of(i, j) << PAGE_SHIFT);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1868,11 +1967,6 @@ void __init free_area_init(unsigned long *max_zone_pfn)
|
|||||||
subsection_map_init(start_pfn, end_pfn - start_pfn);
|
subsection_map_init(start_pfn, end_pfn - start_pfn);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Initialise every node */
|
|
||||||
mminit_verify_pageflags_layout();
|
|
||||||
setup_nr_node_ids();
|
|
||||||
set_pageblock_order();
|
|
||||||
|
|
||||||
for_each_node(nid) {
|
for_each_node(nid) {
|
||||||
pg_data_t *pgdat;
|
pg_data_t *pgdat;
|
||||||
|
|
||||||
|
@@ -315,6 +315,8 @@ char * const zone_names[MAX_NR_ZONES] = {
|
|||||||
"HighMem",
|
"HighMem",
|
||||||
#endif
|
#endif
|
||||||
"Movable",
|
"Movable",
|
||||||
|
"NoSplit",
|
||||||
|
"NoMerge",
|
||||||
#ifdef CONFIG_ZONE_DEVICE
|
#ifdef CONFIG_ZONE_DEVICE
|
||||||
"Device",
|
"Device",
|
||||||
#endif
|
#endif
|
||||||
@@ -338,9 +340,9 @@ int user_min_free_kbytes = -1;
|
|||||||
static int watermark_boost_factor __read_mostly = 15000;
|
static int watermark_boost_factor __read_mostly = 15000;
|
||||||
static int watermark_scale_factor = 10;
|
static int watermark_scale_factor = 10;
|
||||||
|
|
||||||
/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
|
/* virt_zone is the "real" zone pages in virtual zones are taken from */
|
||||||
int movable_zone;
|
int virt_zone;
|
||||||
EXPORT_SYMBOL(movable_zone);
|
EXPORT_SYMBOL(virt_zone);
|
||||||
|
|
||||||
#if MAX_NUMNODES > 1
|
#if MAX_NUMNODES > 1
|
||||||
unsigned int nr_node_ids __read_mostly = MAX_NUMNODES;
|
unsigned int nr_node_ids __read_mostly = MAX_NUMNODES;
|
||||||
@@ -802,9 +804,6 @@ buddy_merge_likely(unsigned long pfn, unsigned long buddy_pfn,
|
|||||||
unsigned long higher_page_pfn;
|
unsigned long higher_page_pfn;
|
||||||
struct page *higher_page;
|
struct page *higher_page;
|
||||||
|
|
||||||
if (order >= MAX_ORDER - 1)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
higher_page_pfn = buddy_pfn & pfn;
|
higher_page_pfn = buddy_pfn & pfn;
|
||||||
higher_page = page + (higher_page_pfn - pfn);
|
higher_page = page + (higher_page_pfn - pfn);
|
||||||
|
|
||||||
@@ -812,6 +811,11 @@ buddy_merge_likely(unsigned long pfn, unsigned long buddy_pfn,
|
|||||||
NULL) != NULL;
|
NULL) != NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int zone_max_order(struct zone *zone)
|
||||||
|
{
|
||||||
|
return zone->order && zone_idx(zone) == ZONE_NOMERGE ? zone->order : MAX_ORDER;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Freeing function for a buddy system allocator.
|
* Freeing function for a buddy system allocator.
|
||||||
*
|
*
|
||||||
@@ -846,6 +850,7 @@ static inline void __free_one_page(struct page *page,
|
|||||||
unsigned long combined_pfn;
|
unsigned long combined_pfn;
|
||||||
struct page *buddy;
|
struct page *buddy;
|
||||||
bool to_tail;
|
bool to_tail;
|
||||||
|
int max_order = zone_max_order(zone);
|
||||||
|
|
||||||
VM_BUG_ON(!zone_is_initialized(zone));
|
VM_BUG_ON(!zone_is_initialized(zone));
|
||||||
VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
|
VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
|
||||||
@@ -857,7 +862,7 @@ static inline void __free_one_page(struct page *page,
|
|||||||
VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page);
|
VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page);
|
||||||
VM_BUG_ON_PAGE(bad_range(zone, page), page);
|
VM_BUG_ON_PAGE(bad_range(zone, page), page);
|
||||||
|
|
||||||
while (order < MAX_ORDER) {
|
while (order < max_order) {
|
||||||
if (compaction_capture(capc, page, order, migratetype)) {
|
if (compaction_capture(capc, page, order, migratetype)) {
|
||||||
__mod_zone_freepage_state(zone, -(1 << order),
|
__mod_zone_freepage_state(zone, -(1 << order),
|
||||||
migratetype);
|
migratetype);
|
||||||
@@ -904,6 +909,8 @@ done_merging:
|
|||||||
to_tail = true;
|
to_tail = true;
|
||||||
else if (is_shuffle_order(order))
|
else if (is_shuffle_order(order))
|
||||||
to_tail = shuffle_pick_tail();
|
to_tail = shuffle_pick_tail();
|
||||||
|
else if (order + 1 >= max_order)
|
||||||
|
to_tail = false;
|
||||||
else
|
else
|
||||||
to_tail = buddy_merge_likely(pfn, buddy_pfn, page, order);
|
to_tail = buddy_merge_likely(pfn, buddy_pfn, page, order);
|
||||||
|
|
||||||
@@ -941,6 +948,8 @@ int split_free_page(struct page *free_page,
|
|||||||
int mt;
|
int mt;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
|
VM_WARN_ON_ONCE_PAGE(!page_can_split(free_page), free_page);
|
||||||
|
|
||||||
if (split_pfn_offset == 0)
|
if (split_pfn_offset == 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
@@ -1652,6 +1661,8 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
|
|||||||
struct free_area *area;
|
struct free_area *area;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
|
|
||||||
|
VM_WARN_ON_ONCE(!zone_is_suitable(zone, order));
|
||||||
|
|
||||||
/* Find a page of the appropriate size in the preferred list */
|
/* Find a page of the appropriate size in the preferred list */
|
||||||
for (current_order = order; current_order < NR_PAGE_ORDERS; ++current_order) {
|
for (current_order = order; current_order < NR_PAGE_ORDERS; ++current_order) {
|
||||||
area = &(zone->free_area[current_order]);
|
area = &(zone->free_area[current_order]);
|
||||||
@@ -2987,6 +2998,9 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
|
|||||||
long min = mark;
|
long min = mark;
|
||||||
int o;
|
int o;
|
||||||
|
|
||||||
|
if (!zone_is_suitable(z, order))
|
||||||
|
return false;
|
||||||
|
|
||||||
/* free_pages may go negative - that's OK */
|
/* free_pages may go negative - that's OK */
|
||||||
free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags);
|
free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags);
|
||||||
|
|
||||||
@@ -3079,6 +3093,9 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
|
|||||||
{
|
{
|
||||||
long free_pages;
|
long free_pages;
|
||||||
|
|
||||||
|
if (!zone_is_suitable(z, order))
|
||||||
|
return false;
|
||||||
|
|
||||||
free_pages = zone_page_state(z, NR_FREE_PAGES);
|
free_pages = zone_page_state(z, NR_FREE_PAGES);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -3227,6 +3244,9 @@ retry:
|
|||||||
struct page *page;
|
struct page *page;
|
||||||
unsigned long mark;
|
unsigned long mark;
|
||||||
|
|
||||||
|
if (!zone_is_suitable(zone, order))
|
||||||
|
continue;
|
||||||
|
|
||||||
if (cpusets_enabled() &&
|
if (cpusets_enabled() &&
|
||||||
(alloc_flags & ALLOC_CPUSET) &&
|
(alloc_flags & ALLOC_CPUSET) &&
|
||||||
!__cpuset_zone_allowed(zone, gfp_mask))
|
!__cpuset_zone_allowed(zone, gfp_mask))
|
||||||
@@ -3906,6 +3926,9 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
|
|||||||
|
|
||||||
alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, alloc_flags);
|
alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, alloc_flags);
|
||||||
|
|
||||||
|
if (!(gfp_mask & __GFP_DIRECT_RECLAIM) && gfp_order_zone(gfp_mask, order) > ZONE_MOVABLE)
|
||||||
|
alloc_flags |= ALLOC_KSWAPD;
|
||||||
|
|
||||||
return alloc_flags;
|
return alloc_flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -5844,9 +5867,9 @@ static void __setup_per_zone_wmarks(void)
|
|||||||
struct zone *zone;
|
struct zone *zone;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
/* Calculate total number of !ZONE_HIGHMEM and !ZONE_MOVABLE pages */
|
/* Calculate total number of pages below ZONE_HIGHMEM */
|
||||||
for_each_zone(zone) {
|
for_each_zone(zone) {
|
||||||
if (!is_highmem(zone) && zone_idx(zone) != ZONE_MOVABLE)
|
if (zone_idx(zone) <= ZONE_NORMAL)
|
||||||
lowmem_pages += zone_managed_pages(zone);
|
lowmem_pages += zone_managed_pages(zone);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -5856,11 +5879,11 @@ static void __setup_per_zone_wmarks(void)
|
|||||||
spin_lock_irqsave(&zone->lock, flags);
|
spin_lock_irqsave(&zone->lock, flags);
|
||||||
tmp = (u64)pages_min * zone_managed_pages(zone);
|
tmp = (u64)pages_min * zone_managed_pages(zone);
|
||||||
do_div(tmp, lowmem_pages);
|
do_div(tmp, lowmem_pages);
|
||||||
if (is_highmem(zone) || zone_idx(zone) == ZONE_MOVABLE) {
|
if (zone_idx(zone) > ZONE_NORMAL) {
|
||||||
/*
|
/*
|
||||||
* __GFP_HIGH and PF_MEMALLOC allocations usually don't
|
* __GFP_HIGH and PF_MEMALLOC allocations usually don't
|
||||||
* need highmem and movable zones pages, so cap pages_min
|
* need pages from zones above ZONE_NORMAL, so cap
|
||||||
* to a small value here.
|
* pages_min to a small value here.
|
||||||
*
|
*
|
||||||
* The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN)
|
* The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN)
|
||||||
* deltas control async page reclaim, and so should
|
* deltas control async page reclaim, and so should
|
||||||
|
@@ -70,7 +70,7 @@ static struct page *has_unmovable_pages(unsigned long start_pfn, unsigned long e
|
|||||||
* pages then it should be reasonably safe to assume the rest
|
* pages then it should be reasonably safe to assume the rest
|
||||||
* is movable.
|
* is movable.
|
||||||
*/
|
*/
|
||||||
if (zone_idx(zone) == ZONE_MOVABLE)
|
if (zid_is_virt(zone_idx(zone)))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
29
mm/vmscan.c
29
mm/vmscan.c
@@ -1918,18 +1918,13 @@ retry:
|
|||||||
goto keep_locked;
|
goto keep_locked;
|
||||||
if (folio_maybe_dma_pinned(folio))
|
if (folio_maybe_dma_pinned(folio))
|
||||||
goto keep_locked;
|
goto keep_locked;
|
||||||
if (folio_test_large(folio)) {
|
/*
|
||||||
/* cannot split folio, skip it */
|
* Split partially mapped folios right away.
|
||||||
if (!can_split_folio(folio, NULL))
|
* We can free the unmapped pages without IO.
|
||||||
goto activate_locked;
|
*/
|
||||||
/*
|
if (folio_test_large(folio) &&
|
||||||
* Split partially mapped folios right away.
|
data_race(!list_empty(&folio->_deferred_list)))
|
||||||
* We can free the unmapped pages without IO.
|
split_folio_to_list(folio, folio_list);
|
||||||
*/
|
|
||||||
if (data_race(!list_empty(&folio->_deferred_list)) &&
|
|
||||||
split_folio_to_list(folio, folio_list))
|
|
||||||
goto activate_locked;
|
|
||||||
}
|
|
||||||
if (!add_to_swap(folio)) {
|
if (!add_to_swap(folio)) {
|
||||||
int __maybe_unused order = folio_order(folio);
|
int __maybe_unused order = folio_order(folio);
|
||||||
|
|
||||||
@@ -6824,7 +6819,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
|
|||||||
orig_mask = sc->gfp_mask;
|
orig_mask = sc->gfp_mask;
|
||||||
if (buffer_heads_over_limit) {
|
if (buffer_heads_over_limit) {
|
||||||
sc->gfp_mask |= __GFP_HIGHMEM;
|
sc->gfp_mask |= __GFP_HIGHMEM;
|
||||||
sc->reclaim_idx = gfp_zone(sc->gfp_mask);
|
sc->reclaim_idx = gfp_order_zone(sc->gfp_mask, sc->order);
|
||||||
}
|
}
|
||||||
|
|
||||||
for_each_zone_zonelist_nodemask(zone, z, zonelist,
|
for_each_zone_zonelist_nodemask(zone, z, zonelist,
|
||||||
@@ -7154,7 +7149,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
|
|||||||
struct scan_control sc = {
|
struct scan_control sc = {
|
||||||
.nr_to_reclaim = SWAP_CLUSTER_MAX,
|
.nr_to_reclaim = SWAP_CLUSTER_MAX,
|
||||||
.gfp_mask = current_gfp_context(gfp_mask),
|
.gfp_mask = current_gfp_context(gfp_mask),
|
||||||
.reclaim_idx = gfp_zone(gfp_mask),
|
.reclaim_idx = gfp_order_zone(gfp_mask, order),
|
||||||
.order = order,
|
.order = order,
|
||||||
.nodemask = nodemask,
|
.nodemask = nodemask,
|
||||||
.priority = DEF_PRIORITY,
|
.priority = DEF_PRIORITY,
|
||||||
@@ -7920,6 +7915,10 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
|
|||||||
if (!cpuset_zone_allowed(zone, gfp_flags))
|
if (!cpuset_zone_allowed(zone, gfp_flags))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
curr_idx = gfp_order_zone(gfp_flags, order);
|
||||||
|
if (highest_zoneidx > curr_idx)
|
||||||
|
highest_zoneidx = curr_idx;
|
||||||
|
|
||||||
pgdat = zone->zone_pgdat;
|
pgdat = zone->zone_pgdat;
|
||||||
curr_idx = READ_ONCE(pgdat->kswapd_highest_zoneidx);
|
curr_idx = READ_ONCE(pgdat->kswapd_highest_zoneidx);
|
||||||
|
|
||||||
@@ -8129,7 +8128,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
|
|||||||
.may_writepage = !!(node_reclaim_mode & RECLAIM_WRITE),
|
.may_writepage = !!(node_reclaim_mode & RECLAIM_WRITE),
|
||||||
.may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP),
|
.may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP),
|
||||||
.may_swap = 1,
|
.may_swap = 1,
|
||||||
.reclaim_idx = gfp_zone(gfp_mask),
|
.reclaim_idx = gfp_order_zone(gfp_mask, order),
|
||||||
};
|
};
|
||||||
unsigned long pflags;
|
unsigned long pflags;
|
||||||
|
|
||||||
|
@@ -1163,6 +1163,7 @@ int fragmentation_index(struct zone *zone, unsigned int order)
|
|||||||
|
|
||||||
#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
|
#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
|
||||||
TEXT_FOR_HIGHMEM(xx) xx "_movable", \
|
TEXT_FOR_HIGHMEM(xx) xx "_movable", \
|
||||||
|
xx "_nosplit", xx "_nomerge", \
|
||||||
TEXT_FOR_DEVICE(xx)
|
TEXT_FOR_DEVICE(xx)
|
||||||
|
|
||||||
const char * const vmstat_text[] = {
|
const char * const vmstat_text[] = {
|
||||||
@@ -1692,7 +1693,8 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
|
|||||||
"\n spanned %lu"
|
"\n spanned %lu"
|
||||||
"\n present %lu"
|
"\n present %lu"
|
||||||
"\n managed %lu"
|
"\n managed %lu"
|
||||||
"\n cma %lu",
|
"\n cma %lu"
|
||||||
|
"\n order %u",
|
||||||
zone_page_state(zone, NR_FREE_PAGES),
|
zone_page_state(zone, NR_FREE_PAGES),
|
||||||
zone->watermark_boost,
|
zone->watermark_boost,
|
||||||
min_wmark_pages(zone),
|
min_wmark_pages(zone),
|
||||||
@@ -1701,7 +1703,8 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
|
|||||||
zone->spanned_pages,
|
zone->spanned_pages,
|
||||||
zone->present_pages,
|
zone->present_pages,
|
||||||
zone_managed_pages(zone),
|
zone_managed_pages(zone),
|
||||||
zone_cma_pages(zone));
|
zone_cma_pages(zone),
|
||||||
|
zone->order);
|
||||||
|
|
||||||
seq_printf(m,
|
seq_printf(m,
|
||||||
"\n protection: (%ld",
|
"\n protection: (%ld",
|
||||||
|
Reference in New Issue
Block a user