From a63eadb11dae237cf71e00071d92171e4b067190 Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Fri, 30 Aug 2024 11:03:38 +0100 Subject: [PATCH] BACKPORT: mm: introduce a pageflag for partially mapped folios Currently folio->_deferred_list is used to keep track of partially_mapped folios that are going to be split under memory pressure. In the next patch, all THPs that are faulted in and collapsed by khugepaged are also going to be tracked using _deferred_list. This patch introduces a pageflag to be able to distinguish between partially mapped folios and others in the deferred_list at split time in deferred_split_scan. Its needed as __folio_remove_rmap decrements _mapcount, _large_mapcount and _entire_mapcount, hence it won't be possible to distinguish between partially mapped folios and others in deferred_split_scan. Eventhough it introduces an extra flag to track if the folio is partially mapped, there is no functional change intended with this patch and the flag is not useful in this patch itself, it will become useful in the next patch when _deferred_list has non partially mapped folios. Link: https://lkml.kernel.org/r/20240830100438.3623486-5-usamaarif642@gmail.com Change-Id: I8c96fb673742dcb05244048e4f9f50395f237ea5 Signed-off-by: Usama Arif Cc: Alexander Zhu Cc: Barry Song Cc: David Hildenbrand Cc: Domenico Cerasuolo Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kairui Song Cc: Matthew Wilcox Cc: Mike Rapoport Cc: Nico Pache Cc: Rik van Riel Cc: Roman Gushchin Cc: Ryan Roberts Cc: Shakeel Butt Cc: Shuang Zhai Cc: Yu Zhao Cc: Shuang Zhai Cc: Hugh Dickins Signed-off-by: Andrew Morton (cherry picked from commit 8422acdc97ed5839692b45f800dbfb78abe65a94) [ Fix conflicts due to TAO; THP Shatter OOT patches - Kalesh Singh ] Bug: 419599659 Signed-off-by: Kalesh Singh --- include/linux/huge_mm.h | 4 ++-- include/linux/page-flags.h | 13 ++++++++++- mm/huge_memory.c | 46 +++++++++++++++++++++++++++++--------- mm/memcontrol.c | 3 ++- mm/migrate.c | 5 +++-- mm/page_alloc.c | 5 +++-- mm/rmap.c | 4 ++-- 7 files changed, 60 insertions(+), 20 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index b9d642d626e4..a9bcef250958 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -338,7 +338,7 @@ static inline int split_huge_page(struct page *page) { return split_huge_page_to_list(page, NULL); } -void deferred_split_folio(struct folio *folio); +void deferred_split_folio(struct folio *folio, bool partially_mapped); void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address, bool freeze, struct folio *folio); @@ -498,7 +498,7 @@ static inline int split_huge_page(struct page *page) { return 0; } -static inline void deferred_split_folio(struct folio *folio) {} +static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {} #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5f348fd5836b..a52fe91f78ad 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -197,6 +197,7 @@ enum pageflags { /* At least one page in this folio has the hwpoison flag set */ PG_has_hwpoisoned = PG_error, PG_large_rmappable = PG_workingset, /* anon or file-backed */ + PG_partially_mapped = PG_reclaim, /* was identified to be partially mapped */ }; #define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1) @@ -858,8 +859,18 @@ static inline void ClearPageCompound(struct page *page) ClearPageHead(page); } PAGEFLAG(LargeRmappable, large_rmappable, PF_SECOND) +FOLIO_TEST_FLAG(partially_mapped, FOLIO_SECOND_PAGE) +/* + * PG_partially_mapped is protected by deferred_split split_queue_lock, + * so its safe to use non-atomic set/clear. + */ +__FOLIO_SET_FLAG(partially_mapped, FOLIO_SECOND_PAGE) +__FOLIO_CLEAR_FLAG(partially_mapped, FOLIO_SECOND_PAGE) #else TESTPAGEFLAG_FALSE(LargeRmappable, large_rmappable) +FOLIO_TEST_FLAG_FALSE(partially_mapped) +__FOLIO_SET_FLAG_NOOP(partially_mapped) +__FOLIO_CLEAR_FLAG_NOOP(partially_mapped) #endif #define PG_head_mask ((1UL << PG_head)) @@ -1127,7 +1138,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) */ #define PAGE_FLAGS_SECOND \ (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ - 1UL << PG_large_rmappable) + 1UL << PG_large_rmappable | 1UL << PG_partially_mapped) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2d1dd704652d..2cf234a2c9ae 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3501,7 +3501,17 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) if (folio_order(folio) > 1 && !list_empty(&folio->_deferred_list)) { ds_queue->split_queue_len--; - mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1); + if (folio_test_partially_mapped(folio)) { + __folio_clear_partially_mapped(folio); + mod_mthp_stat(folio_order(folio), + MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1); + } + /* + * Reinitialize page_deferred_list after removing the + * page from the split_queue, otherwise a subsequent + * split will see list corruption when checking the + * page_deferred_list. + */ list_del_init(&folio->_deferred_list); } spin_unlock(&ds_queue->split_queue_lock); @@ -3566,13 +3576,18 @@ void __folio_undo_large_rmappable(struct folio *folio) spin_lock_irqsave(&ds_queue->split_queue_lock, flags); if (!list_empty(&folio->_deferred_list)) { ds_queue->split_queue_len--; - mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1); + if (folio_test_partially_mapped(folio)) { + __folio_clear_partially_mapped(folio); + mod_mthp_stat(folio_order(folio), + MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1); + } list_del_init(&folio->_deferred_list); } spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); } -void deferred_split_folio(struct folio *folio) +/* partially_mapped=false won't clear PG_partially_mapped folio flag */ +void deferred_split_folio(struct folio *folio, bool partially_mapped) { struct deferred_split *ds_queue = get_deferred_split_queue(folio); #ifdef CONFIG_MEMCG @@ -3600,14 +3615,21 @@ void deferred_split_folio(struct folio *folio) if (folio_test_swapcache(folio)) return; - if (!list_empty(&folio->_deferred_list)) - return; - spin_lock_irqsave(&ds_queue->split_queue_lock, flags); + if (partially_mapped) { + if (!folio_test_partially_mapped(folio)) { + __folio_set_partially_mapped(folio); + if (folio_test_pmd_mappable(folio)) + count_vm_event(THP_DEFERRED_SPLIT_PAGE); + count_mthp_stat(folio_order(folio), MTHP_STAT_SPLIT_DEFERRED); + mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, 1); + + } + } else { + /* partially mapped folios cannot become non-partially mapped */ + VM_WARN_ON_FOLIO(folio_test_partially_mapped(folio), folio); + } if (list_empty(&folio->_deferred_list)) { - count_vm_event(THP_DEFERRED_SPLIT_PAGE); - count_mthp_stat(folio_order(folio), MTHP_STAT_SPLIT_DEFERRED); - mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, 1); list_add_tail(&folio->_deferred_list, &ds_queue->split_queue); ds_queue->split_queue_len++; #ifdef CONFIG_MEMCG @@ -3659,7 +3681,11 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, list_move(&folio->_deferred_list, &list); } else { /* We lost race with folio_put() */ - mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1); + if (folio_test_partially_mapped(folio)) { + __folio_clear_partially_mapped(folio); + mod_mthp_stat(folio_order(folio), + MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1); + } list_del_init(&folio->_deferred_list); ds_queue->split_queue_len--; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7d2d04e79fa0..db6f2a735b98 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7311,7 +7311,8 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug) VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); VM_BUG_ON_FOLIO(folio_order(folio) > 1 && !folio_test_hugetlb(folio) && - !list_empty(&folio->_deferred_list), folio); + !list_empty(&folio->_deferred_list) && + folio_test_partially_mapped(folio), folio); /* * Nobody should be changing or seriously looking at diff --git a/mm/migrate.c b/mm/migrate.c index 2522829f6c19..4a8cca5b2d1a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1776,8 +1776,9 @@ static int migrate_pages_batch(struct list_head *from, * use _deferred_list. */ if (nr_pages > 2 && - !list_empty(&folio->_deferred_list)) { - if (try_split_folio(folio, split_folios, reason) == 0) { + !list_empty(&folio->_deferred_list) && + folio_test_partially_mapped(folio)) { + if (!try_split_folio(folio, split_folios, mode)) { nr_failed++; stats->nr_thp_split += is_thp; continue; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c0e79aaa5c45..2992f365c6b9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1101,8 +1101,9 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page) break; case 2: /* the second tail page: deferred_list overlaps ->mapping */ - if (unlikely(!list_empty(&folio->_deferred_list))) { - bad_page(page, "on deferred list"); + if (unlikely(!list_empty(&folio->_deferred_list) && + folio_test_partially_mapped(folio))) { + bad_page(page, "partially mapped folio on deferred list"); goto out; } break; diff --git a/mm/rmap.c b/mm/rmap.c index 674f130a455c..1fd7dae3a255 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1602,8 +1602,8 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, * Check partially_mapped first to ensure it is a large folio. */ if (folio_test_anon(folio) && partially_mapped && - list_empty(&folio->_deferred_list)) - deferred_split_folio(folio); + !folio_test_partially_mapped(folio)) + deferred_split_folio(folio, true); } /*