BACKPORT: mm: page_alloc: group fallback functions together

The way the fallback rules are spread out makes them hard to follow.  Move
the functions next to each other at least.

Link: https://lkml.kernel.org/r/20250225001023.1494422-4-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Brendan Jackman <jackmanb@google.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

Bug: 420836317
(cherry picked from commit a4138a2702a4428317ecdb115934554df4b788b4)
[
1. In the original patch of the find_suitable_fallback function, replace MIGRATE_PCPTYPES with MIGRATE_FALLBACKS.;
2. Keep the hook function in the reserve_highatomic_pageblock and unreserve_highatomic_pageblock functions.
]
Change-Id: I069e8dd7f8b009c686daef4459f9f1452b3f4c2c
Signed-off-by: yipeng xiang <yipengxiang@honor.corp-partner.google.com>
This commit is contained in:
Johannes Weiner
2025-02-24 19:08:26 -05:00
committed by Isaac Manjarres
parent 707dfe67d6
commit c746bc1949

View File

@@ -1994,6 +1994,43 @@ static void change_pageblock_range(struct page *pageblock_page,
}
}
static inline bool boost_watermark(struct zone *zone)
{
unsigned long max_boost;
if (!watermark_boost_factor)
return false;
/*
* Don't bother in zones that are unlikely to produce results.
* On small machines, including kdump capture kernels running
* in a small area, boosting the watermark can cause an out of
* memory situation immediately.
*/
if ((pageblock_nr_pages * 4) > zone_managed_pages(zone))
return false;
max_boost = mult_frac(zone->_watermark[WMARK_HIGH],
watermark_boost_factor, 10000);
/*
* high watermark may be uninitialised if fragmentation occurs
* very early in boot so do not boost. We do not fall
* through and boost by pageblock_nr_pages as failing
* allocations that early means that reclaim is not going
* to help and it may even be impossible to reclaim the
* boosted watermark resulting in a hang.
*/
if (!max_boost)
return false;
max_boost = max(pageblock_nr_pages, max_boost);
zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages,
max_boost);
return true;
}
/*
* When we are falling back to another migratetype during allocation, try to
* steal extra free pages from the same pageblocks to satisfy further
@@ -2035,41 +2072,38 @@ static bool can_steal_fallback(unsigned int order, int start_mt)
return false;
}
static inline bool boost_watermark(struct zone *zone)
/*
* Check whether there is a suitable fallback freepage with requested order.
* If only_stealable is true, this function returns fallback_mt only if
* we can steal other freepages all together. This would help to reduce
* fragmentation due to mixed migratetype pages in one pageblock.
*/
int find_suitable_fallback(struct free_area *area, unsigned int order,
int migratetype, bool only_stealable, bool *can_steal)
{
unsigned long max_boost;
int i;
int fallback_mt;
if (!watermark_boost_factor)
return false;
/*
* Don't bother in zones that are unlikely to produce results.
* On small machines, including kdump capture kernels running
* in a small area, boosting the watermark can cause an out of
* memory situation immediately.
*/
if ((pageblock_nr_pages * 4) > zone_managed_pages(zone))
return false;
if (area->nr_free == 0)
return -1;
max_boost = mult_frac(zone->_watermark[WMARK_HIGH],
watermark_boost_factor, 10000);
*can_steal = false;
for (i = 0; i < MIGRATE_FALLBACKS - 1 ; i++) {
fallback_mt = fallbacks[migratetype][i];
if (free_area_empty(area, fallback_mt))
continue;
/*
* high watermark may be uninitialised if fragmentation occurs
* very early in boot so do not boost. We do not fall
* through and boost by pageblock_nr_pages as failing
* allocations that early means that reclaim is not going
* to help and it may even be impossible to reclaim the
* boosted watermark resulting in a hang.
*/
if (!max_boost)
return false;
if (can_steal_fallback(order, migratetype))
*can_steal = true;
max_boost = max(pageblock_nr_pages, max_boost);
if (!only_stealable)
return fallback_mt;
zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages,
max_boost);
if (*can_steal)
return fallback_mt;
}
return true;
return -1;
}
/*
@@ -2145,185 +2179,6 @@ try_to_steal_block(struct zone *zone, struct page *page,
return NULL;
}
/*
* Check whether there is a suitable fallback freepage with requested order.
* If only_stealable is true, this function returns fallback_mt only if
* we can steal other freepages all together. This would help to reduce
* fragmentation due to mixed migratetype pages in one pageblock.
*/
int find_suitable_fallback(struct free_area *area, unsigned int order,
int migratetype, bool only_stealable, bool *can_steal)
{
int i;
int fallback_mt;
if (area->nr_free == 0)
return -1;
*can_steal = false;
for (i = 0; i < MIGRATE_FALLBACKS - 1 ; i++) {
fallback_mt = fallbacks[migratetype][i];
if (free_area_empty(area, fallback_mt))
continue;
if (can_steal_fallback(order, migratetype))
*can_steal = true;
if (!only_stealable)
return fallback_mt;
if (*can_steal)
return fallback_mt;
}
return -1;
}
/*
* Reserve the pageblock(s) surrounding an allocation request for
* exclusive use of high-order atomic allocations if there are no
* empty page blocks that contain a page with a suitable order
*/
static void reserve_highatomic_pageblock(struct page *page, int order,
struct zone *zone)
{
int mt;
unsigned long max_managed, flags;
bool bypass = false;
/*
* The number reserved as: minimum is 1 pageblock, maximum is
* roughly 1% of a zone. But if 1% of a zone falls below a
* pageblock size, then don't reserve any pageblocks.
* Check is race-prone but harmless.
*/
if ((zone_managed_pages(zone) / 100) < pageblock_nr_pages)
return;
max_managed = ALIGN((zone_managed_pages(zone) / 100), pageblock_nr_pages);
if (zone->nr_reserved_highatomic >= max_managed)
return;
trace_android_vh_reserve_highatomic_bypass(page, &bypass);
if (bypass)
return;
spin_lock_irqsave(&zone->lock, flags);
/* Recheck the nr_reserved_highatomic limit under the lock */
if (zone->nr_reserved_highatomic >= max_managed)
goto out_unlock;
/* Yoink! */
mt = get_pageblock_migratetype(page);
/* Only reserve normal pageblocks (i.e., they can merge with others) */
if (!migratetype_is_mergeable(mt))
goto out_unlock;
if (order < pageblock_order) {
if (move_freepages_block(zone, page, mt, MIGRATE_HIGHATOMIC) == -1)
goto out_unlock;
zone->nr_reserved_highatomic += pageblock_nr_pages;
} else {
change_pageblock_range(page, order, MIGRATE_HIGHATOMIC);
zone->nr_reserved_highatomic += 1 << order;
}
out_unlock:
spin_unlock_irqrestore(&zone->lock, flags);
}
/*
* Used when an allocation is about to fail under memory pressure. This
* potentially hurts the reliability of high-order allocations when under
* intense memory pressure but failed atomic allocations should be easier
* to recover from than an OOM.
*
* If @force is true, try to unreserve pageblocks even though highatomic
* pageblock is exhausted.
*/
static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
bool force)
{
struct zonelist *zonelist = ac->zonelist;
unsigned long flags;
struct zoneref *z;
struct zone *zone;
struct page *page;
int order;
bool skip_unreserve_highatomic = false;
int ret;
for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx,
ac->nodemask) {
/*
* Preserve at least one pageblock unless memory pressure
* is really high.
*/
if (!force && zone->nr_reserved_highatomic <=
pageblock_nr_pages)
continue;
trace_android_vh_unreserve_highatomic_bypass(force, zone,
&skip_unreserve_highatomic);
if (skip_unreserve_highatomic)
continue;
spin_lock_irqsave(&zone->lock, flags);
for (order = 0; order < NR_PAGE_ORDERS; order++) {
struct free_area *area = &(zone->free_area[order]);
unsigned long size;
page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC);
if (!page)
continue;
/*
* It should never happen but changes to
* locking could inadvertently allow a per-cpu
* drain to add pages to MIGRATE_HIGHATOMIC
* while unreserving so be safe and watch for
* underflows.
*/
size = max(pageblock_nr_pages, 1UL << order);
size = min(size, zone->nr_reserved_highatomic);
zone->nr_reserved_highatomic -= size;
/*
* Convert to ac->migratetype and avoid the normal
* pageblock stealing heuristics. Minimally, the caller
* is doing the work and needs the pages. More
* importantly, if the block was always converted to
* MIGRATE_UNMOVABLE or another type then the number
* of pageblocks that cannot be completely freed
* may increase.
*/
if (order < pageblock_order)
ret = move_freepages_block(zone, page,
MIGRATE_HIGHATOMIC,
ac->migratetype);
else {
move_to_free_list(page, zone, order,
MIGRATE_HIGHATOMIC,
ac->migratetype);
change_pageblock_range(page, order,
ac->migratetype);
ret = 1;
}
/*
* Reserving the block(s) already succeeded,
* so this should not fail on zone boundaries.
*/
WARN_ON_ONCE(ret == -1);
if (ret > 0) {
spin_unlock_irqrestore(&zone->lock, flags);
return ret;
}
}
spin_unlock_irqrestore(&zone->lock, flags);
}
return false;
}
/*
* Try finding a free buddy page on the fallback list.
*
@@ -3215,6 +3070,151 @@ noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
}
ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE);
/*
* Reserve the pageblock(s) surrounding an allocation request for
* exclusive use of high-order atomic allocations if there are no
* empty page blocks that contain a page with a suitable order
*/
static void reserve_highatomic_pageblock(struct page *page, int order,
struct zone *zone)
{
int mt;
unsigned long max_managed, flags;
bool bypass = false;
/*
* The number reserved as: minimum is 1 pageblock, maximum is
* roughly 1% of a zone. But if 1% of a zone falls below a
* pageblock size, then don't reserve any pageblocks.
* Check is race-prone but harmless.
*/
if ((zone_managed_pages(zone) / 100) < pageblock_nr_pages)
return;
max_managed = ALIGN((zone_managed_pages(zone) / 100), pageblock_nr_pages);
if (zone->nr_reserved_highatomic >= max_managed)
return;
trace_android_vh_reserve_highatomic_bypass(page, &bypass);
if (bypass)
return;
spin_lock_irqsave(&zone->lock, flags);
/* Recheck the nr_reserved_highatomic limit under the lock */
if (zone->nr_reserved_highatomic >= max_managed)
goto out_unlock;
/* Yoink! */
mt = get_pageblock_migratetype(page);
/* Only reserve normal pageblocks (i.e., they can merge with others) */
if (!migratetype_is_mergeable(mt))
goto out_unlock;
if (order < pageblock_order) {
if (move_freepages_block(zone, page, mt, MIGRATE_HIGHATOMIC) == -1)
goto out_unlock;
zone->nr_reserved_highatomic += pageblock_nr_pages;
} else {
change_pageblock_range(page, order, MIGRATE_HIGHATOMIC);
zone->nr_reserved_highatomic += 1 << order;
}
out_unlock:
spin_unlock_irqrestore(&zone->lock, flags);
}
/*
* Used when an allocation is about to fail under memory pressure. This
* potentially hurts the reliability of high-order allocations when under
* intense memory pressure but failed atomic allocations should be easier
* to recover from than an OOM.
*
* If @force is true, try to unreserve pageblocks even though highatomic
* pageblock is exhausted.
*/
static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
bool force)
{
struct zonelist *zonelist = ac->zonelist;
unsigned long flags;
struct zoneref *z;
struct zone *zone;
struct page *page;
int order;
bool skip_unreserve_highatomic = false;
int ret;
for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx,
ac->nodemask) {
/*
* Preserve at least one pageblock unless memory pressure
* is really high.
*/
if (!force && zone->nr_reserved_highatomic <=
pageblock_nr_pages)
continue;
trace_android_vh_unreserve_highatomic_bypass(force, zone,
&skip_unreserve_highatomic);
if (skip_unreserve_highatomic)
continue;
spin_lock_irqsave(&zone->lock, flags);
for (order = 0; order < NR_PAGE_ORDERS; order++) {
struct free_area *area = &(zone->free_area[order]);
unsigned long size;
page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC);
if (!page)
continue;
/*
* It should never happen but changes to
* locking could inadvertently allow a per-cpu
* drain to add pages to MIGRATE_HIGHATOMIC
* while unreserving so be safe and watch for
* underflows.
*/
size = max(pageblock_nr_pages, 1UL << order);
size = min(size, zone->nr_reserved_highatomic);
zone->nr_reserved_highatomic -= size;
/*
* Convert to ac->migratetype and avoid the normal
* pageblock stealing heuristics. Minimally, the caller
* is doing the work and needs the pages. More
* importantly, if the block was always converted to
* MIGRATE_UNMOVABLE or another type then the number
* of pageblocks that cannot be completely freed
* may increase.
*/
if (order < pageblock_order)
ret = move_freepages_block(zone, page,
MIGRATE_HIGHATOMIC,
ac->migratetype);
else {
move_to_free_list(page, zone, order,
MIGRATE_HIGHATOMIC,
ac->migratetype);
change_pageblock_range(page, order,
ac->migratetype);
ret = 1;
}
/*
* Reserving the block(s) already succeeded,
* so this should not fail on zone boundaries.
*/
WARN_ON_ONCE(ret == -1);
if (ret > 0) {
spin_unlock_irqrestore(&zone->lock, flags);
return ret;
}
}
spin_unlock_irqrestore(&zone->lock, flags);
}
return false;
}
static inline long __zone_watermark_unusable_free(struct zone *z,
unsigned int order, unsigned int alloc_flags)
{