diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5787f5e7b1ed..aebcc6d8dcbc 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3546,38 +3546,18 @@ out: return ret; } -/* - * __folio_unqueue_deferred_split() is not to be called directly: - * the folio_unqueue_deferred_split() inline wrapper in mm/internal.h - * limits its calls to those folios which may have a _deferred_list for - * queueing THP splits, and that list is (racily observed to be) non-empty. - * - * It is unsafe to call folio_unqueue_deferred_split() until folio refcount is - * zero: because even when split_queue_lock is held, a non-empty _deferred_list - * might be in use on deferred_split_scan()'s unlocked on-stack list. - * - * If memory cgroups are enabled, split_queue_lock is in the mem_cgroup: it is - * therefore important to unqueue deferred split before changing folio memcg. - */ -bool __folio_unqueue_deferred_split(struct folio *folio) +void __folio_undo_large_rmappable(struct folio *folio) { struct deferred_split *ds_queue; unsigned long flags; - bool unqueued = false; - - WARN_ON_ONCE(folio_ref_count(folio)); - WARN_ON_ONCE(!mem_cgroup_disabled() && !folio_memcg(folio)); ds_queue = get_deferred_split_queue(folio); spin_lock_irqsave(&ds_queue->split_queue_lock, flags); if (!list_empty(&folio->_deferred_list)) { ds_queue->split_queue_len--; list_del_init(&folio->_deferred_list); - unqueued = true; } spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); - - return unqueued; /* useful for debug warnings */ } void deferred_split_folio(struct folio *folio) @@ -3596,11 +3576,14 @@ void deferred_split_folio(struct folio *folio) return; /* - * Exclude swapcache: originally to avoid a corrupt deferred split - * queue. Nowadays that is fully prevented by mem_cgroup_swapout(); - * but if page reclaim is already handling the same folio, it is - * unnecessary to handle it again in the shrinker, so excluding - * swapcache here may still be a useful optimization. + * The try_to_unmap() in page reclaim path might reach here too, + * this may cause a race condition to corrupt deferred split queue. + * And, if page reclaim is already handling the same folio, it is + * unnecessary to handle it again in shrinker. + * + * Check the swapcache flag to determine if the folio is being + * handled by page reclaim since THP swap would add the folio into + * swap cache before calling try_to_unmap(). */ if (folio_test_swapcache(folio)) return; diff --git a/mm/internal.h b/mm/internal.h index da8bd4bfbb3e..fa628fc1000f 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -669,11 +669,11 @@ static inline void folio_set_order(struct folio *folio, unsigned int order) #endif } -bool __folio_unqueue_deferred_split(struct folio *folio); -static inline bool folio_unqueue_deferred_split(struct folio *folio) +void __folio_undo_large_rmappable(struct folio *folio); +static inline void folio_undo_large_rmappable(struct folio *folio) { if (folio_order(folio) <= 1 || !folio_test_large_rmappable(folio)) - return false; + return; /* * At this point, there is no one trying to add the folio to @@ -681,9 +681,9 @@ static inline bool folio_unqueue_deferred_split(struct folio *folio) * to check without acquiring the split_queue_lock. */ if (data_race(list_empty(&folio->_deferred_list))) - return false; + return; - return __folio_unqueue_deferred_split(folio); + __folio_undo_large_rmappable(folio); } static inline struct folio *page_rmappable_folio(struct page *page) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2694ddf0f99f..7d2d04e79fa0 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6019,8 +6019,6 @@ int mem_cgroup_move_account(struct folio *folio, css_get(&to->css); css_put(&from->css); - /* Warning should never happen, so don't worry about refcount non-0 */ - WARN_ON_ONCE(folio_unqueue_deferred_split(folio)); folio->memcg_data = (unsigned long)to; __folio_memcg_unlock(from); @@ -6391,9 +6389,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, enum mc_target_type target_type; union mc_target target; struct folio *folio; - bool tried_split_before = false; -retry_pmd: ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { if (mc.precharge < HPAGE_PMD_NR) { @@ -6403,27 +6399,6 @@ retry_pmd: target_type = get_mctgt_type_thp(vma, addr, *pmd, &target); if (target_type == MC_TARGET_PAGE) { folio = target.folio; - /* - * Deferred split queue locking depends on memcg, - * and unqueue is unsafe unless folio refcount is 0: - * split or skip if on the queue? first try to split. - */ - if (!list_empty(&folio->_deferred_list)) { - spin_unlock(ptl); - if (!tried_split_before) - split_folio(folio); - folio_unlock(folio); - folio_put(folio); - if (tried_split_before) - return 0; - tried_split_before = true; - goto retry_pmd; - } - /* - * So long as that pmd lock is held, the folio cannot - * be racily added to the _deferred_list, because - * __folio_remove_rmap() will find !partially_mapped. - */ if (folio_isolate_lru(folio)) { if (!mem_cgroup_move_account(folio, true, mc.from, mc.to)) { @@ -7334,6 +7309,9 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug) struct obj_cgroup *objcg; VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); + VM_BUG_ON_FOLIO(folio_order(folio) > 1 && + !folio_test_hugetlb(folio) && + !list_empty(&folio->_deferred_list), folio); /* * Nobody should be changing or seriously looking at @@ -7380,7 +7358,6 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug) ug->nr_memory += nr_pages; ug->pgpgout++; - WARN_ON_ONCE(folio_unqueue_deferred_split(folio)); folio->memcg_data = 0; } @@ -7499,9 +7476,6 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new) /* Transfer the charge and the css ref */ commit_charge(new, memcg); - - /* Warning should never happen, so don't worry about refcount non-0 */ - WARN_ON_ONCE(folio_unqueue_deferred_split(old)); old->memcg_data = 0; } @@ -7711,7 +7685,6 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) VM_BUG_ON_FOLIO(oldid, folio); mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); - folio_unqueue_deferred_split(folio); folio->memcg_data = 0; if (!mem_cgroup_is_root(memcg)) diff --git a/mm/migrate.c b/mm/migrate.c index 6e8293a4d1cf..bc02aaa554e0 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -438,7 +438,7 @@ int folio_migrate_mapping(struct address_space *mapping, folio_test_large_rmappable(folio)) { if (!folio_ref_freeze(folio, expected_count)) return -EAGAIN; - folio_unqueue_deferred_split(folio); + folio_undo_large_rmappable(folio); folio_ref_unfreeze(folio, expected_count); } @@ -461,7 +461,8 @@ int folio_migrate_mapping(struct address_space *mapping, } /* Take off deferred split queue while frozen and memcg set */ - folio_unqueue_deferred_split(folio); + if (folio_test_large(folio) && folio_test_large_rmappable(folio)) + folio_undo_large_rmappable(folio); /* * Now we know that no one else is looking at the folio: diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 419e9bbd2c0c..3c3aa1c37f12 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -698,7 +698,7 @@ void destroy_large_folio(struct folio *folio) return; } - folio_unqueue_deferred_split(folio); + folio_undo_large_rmappable(folio); mem_cgroup_uncharge(folio); free_the_page(&folio->page, folio_order(folio)); }