Revert "BACKPORT: mm/thp: fix deferred split unqueue naming and locking"

This reverts commit f5f3d8adea. Change-Id: I59066f7c0e24ee6eebe57ea5fa1197c185df4fe3 Bug: 419599659 Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
2025-05-28 14:54:53 -07:00
parent c06fa3b5cd
commit bc9f1a0f43
5 changed files with 21 additions and 64 deletions
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3546,38 +3546,18 @@ out:
 	return ret;
 }

-/*
- * __folio_unqueue_deferred_split() is not to be called directly:
- * the folio_unqueue_deferred_split() inline wrapper in mm/internal.h
- * limits its calls to those folios which may have a _deferred_list for
- * queueing THP splits, and that list is (racily observed to be) non-empty.
- *
- * It is unsafe to call folio_unqueue_deferred_split() until folio refcount is
- * zero: because even when split_queue_lock is held, a non-empty _deferred_list
- * might be in use on deferred_split_scan()'s unlocked on-stack list.
- *
- * If memory cgroups are enabled, split_queue_lock is in the mem_cgroup: it is
- * therefore important to unqueue deferred split before changing folio memcg.
- */
-bool __folio_unqueue_deferred_split(struct folio *folio)
+void __folio_undo_large_rmappable(struct folio *folio)
 {
 	struct deferred_split *ds_queue;
 	unsigned long flags;
-	bool unqueued = false;
-
-	WARN_ON_ONCE(folio_ref_count(folio));
-	WARN_ON_ONCE(!mem_cgroup_disabled() && !folio_memcg(folio));

 	ds_queue = get_deferred_split_queue(folio);
 	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
 	if (!list_empty(&folio->_deferred_list)) {
 		ds_queue->split_queue_len--;
 		list_del_init(&folio->_deferred_list);
-		unqueued = true;
 	}
 	spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
-
-	return unqueued;	/* useful for debug warnings */
 }

 void deferred_split_folio(struct folio *folio)
@@ -3596,11 +3576,14 @@ void deferred_split_folio(struct folio *folio)
 		return;

 	/*
-	 * Exclude swapcache: originally to avoid a corrupt deferred split
-	 * queue. Nowadays that is fully prevented by mem_cgroup_swapout();
-	 * but if page reclaim is already handling the same folio, it is
-	 * unnecessary to handle it again in the shrinker, so excluding
-	 * swapcache here may still be a useful optimization.
+	 * The try_to_unmap() in page reclaim path might reach here too,
+	 * this may cause a race condition to corrupt deferred split queue.
+	 * And, if page reclaim is already handling the same folio, it is
+	 * unnecessary to handle it again in shrinker.
+	 *
+	 * Check the swapcache flag to determine if the folio is being
+	 * handled by page reclaim since THP swap would add the folio into
+	 * swap cache before calling try_to_unmap().
 	 */
 	if (folio_test_swapcache(folio))
 		return;
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -669,11 +669,11 @@ static inline void folio_set_order(struct folio *folio, unsigned int order)
 #endif
 }

-bool __folio_unqueue_deferred_split(struct folio *folio);
-static inline bool folio_unqueue_deferred_split(struct folio *folio)
+void __folio_undo_large_rmappable(struct folio *folio);
+static inline void folio_undo_large_rmappable(struct folio *folio)
 {
 	if (folio_order(folio) <= 1 || !folio_test_large_rmappable(folio))
-		return false;
+		return;

 	/*
 	 * At this point, there is no one trying to add the folio to
@@ -681,9 +681,9 @@ static inline bool folio_unqueue_deferred_split(struct folio *folio)
 	 * to check without acquiring the split_queue_lock.
 	 */
 	if (data_race(list_empty(&folio->_deferred_list)))
-		return false;
+		return;

-	return __folio_unqueue_deferred_split(folio);
+	__folio_undo_large_rmappable(folio);
 }

 static inline struct folio *page_rmappable_folio(struct page *page)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6019,8 +6019,6 @@ int mem_cgroup_move_account(struct folio *folio,
 	css_get(&to->css);
 	css_put(&from->css);

-	/* Warning should never happen, so don't worry about refcount non-0 */
-	WARN_ON_ONCE(folio_unqueue_deferred_split(folio));
 	folio->memcg_data = (unsigned long)to;

 	__folio_memcg_unlock(from);
@@ -6391,9 +6389,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
 	enum mc_target_type target_type;
 	union mc_target target;
 	struct folio *folio;
-	bool tried_split_before = false;

-retry_pmd:
 	ptl = pmd_trans_huge_lock(pmd, vma);
 	if (ptl) {
 		if (mc.precharge < HPAGE_PMD_NR) {
@@ -6403,27 +6399,6 @@ retry_pmd:
 		target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);
 		if (target_type == MC_TARGET_PAGE) {
 			folio = target.folio;
-			/*
-			 * Deferred split queue locking depends on memcg,
-			 * and unqueue is unsafe unless folio refcount is 0:
-			 * split or skip if on the queue? first try to split.
-			 */
-			if (!list_empty(&folio->_deferred_list)) {
-				spin_unlock(ptl);
-				if (!tried_split_before)
-					split_folio(folio);
-				folio_unlock(folio);
-				folio_put(folio);
-				if (tried_split_before)
-					return 0;
-				tried_split_before = true;
-				goto retry_pmd;
-			}
-			/*
-			 * So long as that pmd lock is held, the folio cannot
-			 * be racily added to the _deferred_list, because
-			 * __folio_remove_rmap() will find !partially_mapped.
-			 */
 			if (folio_isolate_lru(folio)) {
 				if (!mem_cgroup_move_account(folio, true,
 							     mc.from, mc.to)) {
@@ -7334,6 +7309,9 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
 	struct obj_cgroup *objcg;

 	VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
+	VM_BUG_ON_FOLIO(folio_order(folio) > 1 &&
+			!folio_test_hugetlb(folio) &&
+			!list_empty(&folio->_deferred_list), folio);

 	/*
 	 * Nobody should be changing or seriously looking at
@@ -7380,7 +7358,6 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
 			ug->nr_memory += nr_pages;
 		ug->pgpgout++;

-		WARN_ON_ONCE(folio_unqueue_deferred_split(folio));
 		folio->memcg_data = 0;
 	}

@@ -7499,9 +7476,6 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)

 	/* Transfer the charge and the css ref */
 	commit_charge(new, memcg);
-
-	/* Warning should never happen, so don't worry about refcount non-0 */
-	WARN_ON_ONCE(folio_unqueue_deferred_split(old));
 	old->memcg_data = 0;
 }

@@ -7711,7 +7685,6 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
 	VM_BUG_ON_FOLIO(oldid, folio);
 	mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries);

-	folio_unqueue_deferred_split(folio);
 	folio->memcg_data = 0;

 	if (!mem_cgroup_is_root(memcg))
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -438,7 +438,7 @@ int folio_migrate_mapping(struct address_space *mapping,
 		    folio_test_large_rmappable(folio)) {
 			if (!folio_ref_freeze(folio, expected_count))
 				return -EAGAIN;
-			folio_unqueue_deferred_split(folio);
+			folio_undo_large_rmappable(folio);
 			folio_ref_unfreeze(folio, expected_count);
 		}

@@ -461,7 +461,8 @@ int folio_migrate_mapping(struct address_space *mapping,
 	}

 	/* Take off deferred split queue while frozen and memcg set */
-	folio_unqueue_deferred_split(folio);
+	if (folio_test_large(folio) && folio_test_large_rmappable(folio))
+		folio_undo_large_rmappable(folio);

 	/*
 	 * Now we know that no one else is looking at the folio:
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -698,7 +698,7 @@ void destroy_large_folio(struct folio *folio)
 		return;
 	}

-	folio_unqueue_deferred_split(folio);
+	folio_undo_large_rmappable(folio);
 	mem_cgroup_uncharge(folio);
 	free_the_page(&folio->page, folio_order(folio));
 }