diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0e35b72e7cb1..dfabbcb8e7ac 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1155,6 +1155,7 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, } void split_page_memcg(struct page *head, unsigned int nr); +void folio_copy_memcg(struct folio *folio); unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, @@ -1584,6 +1585,10 @@ static inline void split_page_memcg(struct page *head, unsigned int nr) { } +static inline void folio_copy_memcg(struct folio *folio) +{ +} + static inline unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index daeef147501b..66af3d07d94a 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -220,6 +220,25 @@ static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *foli VM_WARN_ON_ONCE(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen)); } +static inline bool lru_gen_add_dst(struct lruvec *lruvec, struct folio *dst) +{ + int gen = folio_lru_gen(dst); + int type = folio_is_file_lru(dst); + int zone = folio_zonenum(dst); + struct lru_gen_folio *lrugen = &lruvec->lrugen; + + if (gen < 0) + return false; + + lockdep_assert_held(&lruvec->lru_lock); + VM_WARN_ON_ONCE_FOLIO(folio_lruvec(dst) != lruvec, dst); + + list_add_tail(&dst->lru, &lrugen->folios[gen][type][zone]); + lru_gen_update_size(lruvec, dst, -1, gen); + + return true; +} + static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) { unsigned long seq; @@ -305,6 +324,11 @@ static inline bool lru_gen_in_fault(void) return false; } +static inline bool lru_gen_add_dst(struct lruvec *lruvec, struct folio *dst) +{ + return false; +} + static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) { return false; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 36765081b8c1..937ef6c33501 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -352,14 +352,19 @@ struct folio { struct { unsigned long _flags_1; unsigned long _head_1; - unsigned long _folio_avail; /* public: */ atomic_t _entire_mapcount; atomic_t _nr_pages_mapped; atomic_t _pincount; #ifdef CONFIG_64BIT + unsigned int __padding; unsigned int _folio_nr_pages; #endif + union { + unsigned long _private_1; + unsigned long *_dst_ul; + struct page **_dst_pp; + }; /* private: the union with struct page is transitional */ }; struct page __page_1; @@ -405,6 +410,7 @@ FOLIO_MATCH(memcg_data, memcg_data); offsetof(struct page, pg) + sizeof(struct page)) FOLIO_MATCH(flags, _flags_1); FOLIO_MATCH(compound_head, _head_1); +FOLIO_MATCH(private, _private_1); #undef FOLIO_MATCH #define FOLIO_MATCH(pg, fl) \ static_assert(offsetof(struct folio, fl) == \ diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 86a33075dfd5..faa993839c22 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -108,6 +108,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_SPLIT_PAGE_FAILED, THP_DEFERRED_SPLIT_PAGE, THP_SPLIT_PMD, + THP_SHATTER_PAGE, + THP_SHATTER_PAGE_FAILED, + THP_SHATTER_PAGE_DISCARDED, THP_SCAN_EXCEED_NONE_PTE, THP_SCAN_EXCEED_SWAP_PTE, THP_SCAN_EXCEED_SHARED_PTE, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index dffa327edbcd..0596313f4e5e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2629,6 +2629,10 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, entry = pte_swp_mksoft_dirty(entry); if (uffd_wp) entry = pte_swp_mkuffd_wp(entry); + if (vma->vm_flags & VM_LOCKED) + set_src_usage(page + i, SRC_PAGE_MLOCKED); + else + set_src_usage(page + i, SRC_PAGE_MAPPED); VM_WARN_ON(!pte_none(ptep_get(pte + i))); set_pte_at(mm, addr, pte + i, entry); @@ -2780,6 +2784,156 @@ static void remap_page(struct folio *folio, unsigned long nr) } } +static int prep_to_unmap(struct folio *src) +{ + int nr_pages = folio_nr_pages(src); + + if (folio_can_split(src)) + return 0; + + WARN_ON_ONCE(src->_dst_pp); + + src->_dst_pp = kcalloc(nr_pages, sizeof(struct page *), GFP_ATOMIC); + + return src->_dst_pp ? 0 : -ENOMEM; +} + +static bool try_to_discard(struct folio *src, int i) +{ + int usage; + void *addr; + struct page *page = folio_page(src, i); + + if (!folio_test_anon(src)) + return false; + + if (folio_test_swapcache(src)) + return false; + + usage = src_page_usage(page); + if (usage & SRC_PAGE_MLOCKED) + return false; + + if (!(usage & SRC_PAGE_MAPPED)) + return true; + + addr = kmap_local_page(page); + if (!memchr_inv(addr, 0, PAGE_SIZE)) + set_src_usage(page, SRC_PAGE_CLEAN); + kunmap_local(addr); + + return can_discard_src(page); +} + +static int prep_dst_pages(struct folio *src) +{ + int i; + int nr_pages = folio_nr_pages(src); + + if (folio_can_split(src)) + return 0; + + if (WARN_ON_ONCE(!src->_dst_pp)) + return -ENOMEM; + + for (i = 0; i < nr_pages; i++) { + struct page *dst = NULL; + + if (try_to_discard(src, i)) { + count_vm_event(THP_SHATTER_PAGE_DISCARDED); + continue; + } + + do { + int nid = folio_nid(src); + gfp_t gfp = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) | + GFP_NOWAIT | __GFP_THISNODE; + + if (dst) + __free_page(dst); + + dst = alloc_pages_node(nid, gfp, 0); + if (!dst) + return -ENOMEM; + } while (!page_ref_freeze(dst, 1)); + + copy_highpage(dst, folio_page(src, i)); + src->_dst_ul[i] |= (unsigned long)dst; + + cond_resched(); + } + + return 0; +} + +static void free_dst_pages(struct folio *src) +{ + int i; + int nr_pages = folio_nr_pages(src); + + if (folio_can_split(src)) + return; + + for (i = 0; i < nr_pages; i++) { + struct page *dst = folio_dst_page(src, i); + + if (!dst) + continue; + + page_ref_unfreeze(dst, 1); + __free_page(dst); + } + + kfree(src->_dst_pp); + src->_dst_pp = NULL; +} + +static void reset_src_folio(struct folio *src) +{ + if (folio_can_split(src)) + return; + + if (WARN_ON_ONCE(!src->_dst_pp)) + return; + + if (!folio_mapping_flags(src)) + src->mapping = NULL; + + if (folio_test_anon(src) && folio_test_swapcache(src)) { + folio_clear_swapcache(src); + src->swap.val = 0; + } + + kfree(src->_dst_pp); + src->_dst_pp = NULL; +} + +static bool lru_add_dst(struct lruvec *lruvec, struct folio *src, struct folio *dst) +{ + if (folio_can_split(src)) + return false; + + VM_WARN_ON_ONCE_FOLIO(!folio_test_lru(src), src); + VM_WARN_ON_ONCE_FOLIO(folio_test_lru(dst), dst); + VM_WARN_ON_ONCE_FOLIO(folio_lruvec(dst) != folio_lruvec(src), dst); + + if (!lru_gen_add_dst(lruvec, dst)) { + enum lru_list lru = folio_lru_list(dst); + int zone = folio_zonenum(dst); + int delta = folio_nr_pages(dst); + + if (folio_test_unevictable(dst)) + dst->mlock_count = 0; + else + list_add_tail(&dst->lru, &src->lru); + update_lru_size(lruvec, lru, zone, delta); + } + + folio_set_lru(dst); + + return true; +} + static void lru_add_page_tail(struct page *head, struct page *tail, struct lruvec *lruvec, struct list_head *list) { @@ -2793,7 +2947,7 @@ static void lru_add_page_tail(struct page *head, struct page *tail, VM_WARN_ON(PageLRU(head)); get_page(tail); list_add_tail(&tail->lru, list); - } else { + } else if (!lru_add_dst(lruvec, page_folio(head), page_folio(tail))) { /* head is still on lru (and we have it frozen) */ VM_WARN_ON(!PageLRU(head)); if (PageUnevictable(tail)) @@ -2808,7 +2962,7 @@ static void __split_huge_page_tail(struct folio *folio, int tail, struct lruvec *lruvec, struct list_head *list) { struct page *head = &folio->page; - struct page *page_tail = head + tail; + struct page *page_tail = folio_dst_page(folio, tail); /* * Careful: new_folio is not a "real" folio before we cleared PageTail. * Don't pass it around before clear_compound_head(). @@ -2849,8 +3003,8 @@ static void __split_huge_page_tail(struct folio *folio, int tail, LRU_GEN_MASK | LRU_REFS_MASK)); /* ->mapping in first and second tail page is replaced by other uses */ - VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, - page_tail); + VM_BUG_ON_PAGE(folio_can_split(folio) && tail > 2 && + page_tail->mapping != TAIL_MAPPING, page_tail); page_tail->mapping = head->mapping; page_tail->index = head->index + tail; @@ -2905,9 +3059,13 @@ static void __split_huge_page(struct page *page, struct list_head *list, unsigned long offset = 0; unsigned int nr = thp_nr_pages(head); int i, nr_dropped = 0; + bool can_split = folio_can_split(folio); /* complete memcg works before add pages to LRU */ - split_page_memcg(head, nr); + if (can_split) + split_page_memcg(head, nr); + else + folio_copy_memcg(folio); if (folio_test_anon(folio) && folio_test_swapcache(folio)) { offset = swp_offset(folio->swap); @@ -2920,46 +3078,51 @@ static void __split_huge_page(struct page *page, struct list_head *list, ClearPageHasHWPoisoned(head); - for (i = nr - 1; i >= 1; i--) { + for (i = nr - 1; i >= can_split; i--) { + struct page *dst = folio_dst_page(folio, i); + + if (!dst) + continue; + __split_huge_page_tail(folio, i, lruvec, list); /* Some pages can be beyond EOF: drop them from page cache */ - if (head[i].index >= end) { - struct folio *tail = page_folio(head + i); + if (dst->index >= end) { + struct folio *tail = page_folio(dst); - if (shmem_mapping(head->mapping)) + if (shmem_mapping(tail->mapping)) nr_dropped++; else if (folio_test_clear_dirty(tail)) folio_account_cleaned(tail, - inode_to_wb(folio->mapping->host)); + inode_to_wb(tail->mapping->host)); __filemap_remove_folio(tail, NULL); folio_put(tail); - } else if (!PageAnon(page)) { - __xa_store(&head->mapping->i_pages, head[i].index, - head + i, 0); + } else if (!PageAnon(dst)) { + __xa_store(&dst->mapping->i_pages, dst->index, dst, 0); } else if (swap_cache) { - __xa_store(&swap_cache->i_pages, offset + i, - head + i, 0); + __xa_store(&swap_cache->i_pages, offset + i, dst, 0); } } - ClearPageCompound(head); + if (can_split) + ClearPageCompound(head); unlock_page_lruvec(lruvec); /* Caller disabled irqs, so they are still disabled here */ - split_page_owner(head, nr); + if (can_split) + split_page_owner(head, nr); /* See comment in __split_huge_page_tail() */ if (PageAnon(head)) { /* Additional pin to swap cache */ if (PageSwapCache(head)) { - page_ref_add(head, 2); + page_ref_add(head, 2 - !can_split); xa_unlock(&swap_cache->i_pages); } else { page_ref_inc(head); } } else { /* Additional pin to page cache */ - page_ref_add(head, 2); + page_ref_add(head, 2 - !can_split); xa_unlock(&head->mapping->i_pages); } local_irq_enable(); @@ -2969,8 +3132,9 @@ static void __split_huge_page(struct page *page, struct list_head *list, remap_page(folio, nr); for (i = 0; i < nr; i++) { - struct page *subpage = head + i; - if (subpage == page) + struct page *subpage = folio_dst_page(folio, i); + + if (!subpage || subpage == page) continue; unlock_page(subpage); @@ -2983,6 +3147,8 @@ static void __split_huge_page(struct page *page, struct list_head *list, */ free_page_and_swap_cache(subpage); } + + reset_src_folio(folio); } /* Racy check whether the huge page can be split */ @@ -2990,9 +3156,6 @@ static bool can_split_folio(struct folio *folio, int *pextra_pins) { int extra_pins; - if (!folio_can_split(folio)) - return false; - /* Additional pins from page cache */ if (folio_test_anon(folio)) extra_pins = folio_test_swapcache(folio) ? @@ -3112,8 +3275,21 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) goto out_unlock; } + ret = prep_to_unmap(folio); + if (ret) + goto out_unlock; + unmap_folio(folio); + if (!folio_ref_freeze(folio, 1 + extra_pins)) { + ret = -EAGAIN; + goto remap; + } + + ret = prep_dst_pages(folio); + if (ret) + goto unfreeze; + /* block interrupt reentry in xa_lock and spinlock */ local_irq_disable(); if (mapping) { @@ -3123,44 +3299,44 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) */ xas_lock(&xas); xas_reset(&xas); - if (xas_load(&xas) != folio) + if (xas_load(&xas) != folio) { + ret = -EAGAIN; goto fail; + } } /* Prevent deferred_split_scan() touching ->_refcount */ spin_lock(&ds_queue->split_queue_lock); - if (folio_ref_freeze(folio, 1 + extra_pins)) { - if (!list_empty(&folio->_deferred_list)) { - ds_queue->split_queue_len--; - list_del(&folio->_deferred_list); - } - spin_unlock(&ds_queue->split_queue_lock); - if (mapping) { - int nr = folio_nr_pages(folio); + if (!list_empty(&folio->_deferred_list)) { + ds_queue->split_queue_len--; + list_del_init(&folio->_deferred_list); + } + spin_unlock(&ds_queue->split_queue_lock); + if (mapping) { + int nr = folio_nr_pages(folio); - xas_split(&xas, folio, folio_order(folio)); - if (folio_test_pmd_mappable(folio)) { - if (folio_test_swapbacked(folio)) { - __lruvec_stat_mod_folio(folio, - NR_SHMEM_THPS, -nr); - } else { - __lruvec_stat_mod_folio(folio, - NR_FILE_THPS, -nr); - filemap_nr_thps_dec(mapping); - } + xas_split(&xas, folio, folio_order(folio)); + if (folio_test_pmd_mappable(folio)) { + if (folio_test_swapbacked(folio)) { + __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, -nr); + } else { + __lruvec_stat_mod_folio(folio, NR_FILE_THPS, -nr); + filemap_nr_thps_dec(mapping); } } + } - __split_huge_page(page, list, end); - ret = 0; - } else { - spin_unlock(&ds_queue->split_queue_lock); + __split_huge_page(page, list, end); + if (ret) { fail: if (mapping) xas_unlock(&xas); local_irq_enable(); +unfreeze: + folio_ref_unfreeze(folio, 1 + extra_pins); +remap: + free_dst_pages(folio); remap_page(folio, folio_nr_pages(folio)); - ret = -EAGAIN; } out_unlock: @@ -3172,6 +3348,12 @@ out_unlock: i_mmap_unlock_read(mapping); out: xas_destroy(&xas); + + if (!folio_can_split(folio)) { + count_vm_event(!ret ? THP_SHATTER_PAGE : THP_SHATTER_PAGE_FAILED); + return ret ? : 1; + } + count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED); return ret; } diff --git a/mm/internal.h b/mm/internal.h index 5389197209a3..2cf481d89907 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1416,4 +1416,43 @@ struct vma_prepare { struct vm_area_struct *remove; struct vm_area_struct *remove2; }; + +#define SRC_PAGE_MAPPED BIT(0) +#define SRC_PAGE_MLOCKED BIT(1) +#define SRC_PAGE_CLEAN BIT(2) +#define SRC_PAGE_USAGE_MASK (BIT(3) - 1) + +static inline unsigned long src_page_usage(struct page *page) +{ + struct folio *src = page_folio(page); + int i = folio_page_idx(src, page); + + if (folio_can_split(src) || !src->_dst_ul) + return 0; + + return src->_dst_ul[i] & SRC_PAGE_USAGE_MASK; +} + +static inline bool can_discard_src(struct page *page) +{ + return src_page_usage(page) & SRC_PAGE_CLEAN; +} + +static inline void set_src_usage(struct page *page, unsigned long usage) +{ + struct folio *src = page_folio(page); + int i = folio_page_idx(src, page); + + if (!folio_can_split(src) && src->_dst_ul) + src->_dst_ul[i] |= usage; +} + +static inline struct page *folio_dst_page(struct folio *src, int i) +{ + if (folio_can_split(src) || !src->_dst_ul) + return folio_page(src, i); + + return (void *)(src->_dst_ul[i] & ~SRC_PAGE_USAGE_MASK); +} + #endif /* __MM_INTERNAL_H */ diff --git a/mm/madvise.c b/mm/madvise.c index 0102183578ae..d32a3a8f16d7 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -418,7 +418,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, err = split_folio(folio); folio_unlock(folio); folio_put(folio); - if (!err) + if (err >= 0) goto regular_folio; return 0; } @@ -516,7 +516,7 @@ regular_folio: if (!start_pte) break; arch_enter_lazy_mmu_mode(); - if (!err) + if (err >= 0) nr = 0; continue; } @@ -749,7 +749,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, if (!start_pte) break; arch_enter_lazy_mmu_mode(); - if (!err) + if (err >= 0) nr = 0; continue; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7e9337b1ee3f..5515f4909ad8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3476,6 +3476,53 @@ void split_page_memcg(struct page *head, unsigned int nr) css_get_many(&memcg->css, nr - 1); } +void folio_copy_memcg(struct folio *src) +{ + int i; + unsigned long flags; + int delta = 0; + int nr_pages = folio_nr_pages(src); + struct mem_cgroup *memcg = folio_memcg(src); + + if (folio_can_split(src)) + return; + + if (WARN_ON_ONCE(!src->_dst_pp)) + return; + + if (mem_cgroup_disabled()) + return; + + if (WARN_ON_ONCE(!memcg)) + return; + + VM_WARN_ON_ONCE_FOLIO(!folio_test_large(src), src); + VM_WARN_ON_ONCE_FOLIO(folio_ref_count(src), src); + + for (i = 0; i < nr_pages; i++) { + struct page *dst = folio_dst_page(src, i); + + if (!dst) + continue; + + commit_charge(page_folio(dst), memcg); + delta++; + } + + if (!mem_cgroup_is_root(memcg)) { + page_counter_charge(&memcg->memory, delta); + if (do_memsw_account()) + page_counter_charge(&memcg->memsw, delta); + } + + css_get_many(&memcg->css, delta); + + local_irq_save(flags); + mem_cgroup_charge_statistics(memcg, delta); + memcg_check_events(memcg, folio_nid(src)); + local_irq_restore(flags); +} + #ifdef CONFIG_SWAP /** * mem_cgroup_move_swap_account - move swap charge and swap_cgroup's record. diff --git a/mm/memory-failure.c b/mm/memory-failure.c index aed6517b8aa0..c6e2ce7b8237 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2286,7 +2286,7 @@ try_again: * page is a valid handlable page. */ SetPageHasHWPoisoned(hpage); - if (try_to_split_thp_page(p) < 0) { + if (try_to_split_thp_page(p)) { res = action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); goto unlock_mutex; } diff --git a/mm/migrate.c b/mm/migrate.c index 44516d933e7d..2de7dffe1b39 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -185,36 +185,52 @@ EXPORT_SYMBOL_GPL(putback_movable_pages); /* * Restore a potential migration pte to a working pte entry */ -static bool remove_migration_pte(struct folio *folio, - struct vm_area_struct *vma, unsigned long addr, void *old) +static bool remove_migration_pte(struct folio *dst, + struct vm_area_struct *vma, unsigned long addr, void *arg) { - DEFINE_FOLIO_VMA_WALK(pvmw, old, vma, addr, PVMW_SYNC | PVMW_MIGRATION); + struct folio *src = arg; + DEFINE_FOLIO_VMA_WALK(pvmw, src, vma, addr, PVMW_SYNC | PVMW_MIGRATION); while (page_vma_mapped_walk(&pvmw)) { rmap_t rmap_flags = RMAP_NONE; pte_t old_pte; pte_t pte; swp_entry_t entry; - struct page *new; + struct page *page; + struct folio *folio; unsigned long idx = 0; /* pgoff is invalid for ksm pages, but they are never large */ - if (folio_test_large(folio) && !folio_test_hugetlb(folio)) + if (folio_test_large(dst) && !folio_test_hugetlb(dst)) idx = linear_page_index(vma, pvmw.address) - pvmw.pgoff; - new = folio_page(folio, idx); + page = folio_page(dst, idx); + + if (src == dst) { + if (can_discard_src(page)) { + VM_WARN_ON_ONCE_FOLIO(!folio_test_anon(src), src); + + pte_clear_not_present_full(pvmw.vma->vm_mm, pvmw.address, + pvmw.pte, false); + dec_mm_counter(pvmw.vma->vm_mm, MM_ANONPAGES); + continue; + } + page = folio_dst_page(src, idx); + } + + folio = page_folio(page); #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION /* PMD-mapped THP migration entry */ if (!pvmw.pte) { VM_BUG_ON_FOLIO(folio_test_hugetlb(folio) || !folio_test_pmd_mappable(folio), folio); - remove_migration_pmd(&pvmw, new); + remove_migration_pmd(&pvmw, page); continue; } #endif folio_get(folio); - pte = mk_pte(new, READ_ONCE(vma->vm_page_prot)); + pte = mk_pte(page, READ_ONCE(vma->vm_page_prot)); old_pte = ptep_get(pvmw.pte); if (pte_swp_soft_dirty(old_pte)) pte = pte_mksoft_dirty(pte); @@ -232,13 +248,13 @@ static bool remove_migration_pte(struct folio *folio, if (folio_test_anon(folio) && !is_readable_migration_entry(entry)) rmap_flags |= RMAP_EXCLUSIVE; - if (unlikely(is_device_private_page(new))) { + if (unlikely(is_device_private_page(page))) { if (pte_write(pte)) entry = make_writable_device_private_entry( - page_to_pfn(new)); + page_to_pfn(page)); else entry = make_readable_device_private_entry( - page_to_pfn(new)); + page_to_pfn(page)); pte = swp_entry_to_pte(entry); if (pte_swp_soft_dirty(old_pte)) pte = pte_swp_mksoft_dirty(pte); @@ -264,17 +280,17 @@ static bool remove_migration_pte(struct folio *folio, #endif { if (folio_test_anon(folio)) - folio_add_anon_rmap_pte(folio, new, vma, + folio_add_anon_rmap_pte(folio, page, vma, pvmw.address, rmap_flags); else - folio_add_file_rmap_pte(folio, new, vma); + folio_add_file_rmap_pte(folio, page, vma); set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); } if (vma->vm_flags & VM_LOCKED) mlock_drain_local(); trace_remove_migration_pte(pvmw.address, pte_val(pte), - compound_order(new)); + compound_order(page)); /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, pvmw.address, pvmw.pte); @@ -1482,10 +1498,30 @@ out: return rc; } -static inline int try_split_folio(struct folio *folio, struct list_head *split_folios) +static inline int try_split_folio(struct folio *folio, struct list_head *split_folios, + int reason) { int rc; + if (!folio_can_split(folio)) { + LIST_HEAD(head); + + if (reason != MR_CONTIG_RANGE) + return -EBUSY; + + folio_lock(folio); + rc = split_folio_to_list(folio, &head); + folio_unlock(folio); + + if (rc > 0) { + putback_movable_pages(&head); + return 0; + } + + VM_WARN_ON_ONCE_FOLIO(!rc, folio); + return rc; + } + folio_lock(folio); rc = split_folio_to_list(folio, split_folios); folio_unlock(folio); @@ -1665,7 +1701,7 @@ static int migrate_pages_batch(struct list_head *from, if (!thp_migration_supported() && is_thp) { nr_failed++; stats->nr_thp_failed++; - if (!try_split_folio(folio, split_folios)) { + if (!try_split_folio(folio, split_folios, reason)) { stats->nr_thp_split++; continue; } @@ -1696,7 +1732,7 @@ static int migrate_pages_batch(struct list_head *from, stats->nr_thp_failed += is_thp; /* Large folio NUMA faulting doesn't split to retry. */ if (folio_test_large(folio) && !nosplit) { - int ret = try_split_folio(folio, split_folios); + int ret = try_split_folio(folio, split_folios, reason); if (!ret) { stats->nr_thp_split += is_thp; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index eca9cb56df4c..120a317d0938 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1080,6 +1080,10 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page) bad_page(page, "nonzero pincount"); goto out; } + if (unlikely(folio->_private_1)) { + bad_page(page, "nonzero _private_1"); + goto out; + } break; case 2: /* diff --git a/mm/rmap.c b/mm/rmap.c index 79a0d1917099..9f76e1a17f26 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -2274,6 +2274,10 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, hsz); else set_pte_at(mm, address, pvmw.pte, swp_pte); + if (vma->vm_flags & VM_LOCKED) + set_src_usage(subpage, SRC_PAGE_MLOCKED); + else + set_src_usage(subpage, SRC_PAGE_MAPPED); trace_set_migration_pte(address, pte_val(swp_pte), compound_order(&folio->page)); /* diff --git a/mm/shmem.c b/mm/shmem.c index fb2e23434509..12d4490a7e76 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -701,7 +701,7 @@ next: folio_put(folio); /* If split failed move the inode on the list back to shrinklist */ - if (ret) + if (ret < 0) goto move_back; split++; @@ -1469,7 +1469,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) if (folio_test_large(folio)) { /* Ensure the subpages are still dirty */ folio_test_set_dirty(folio); - if (split_huge_page(page) < 0) + if (split_huge_page(page)) goto redirty; folio = page_folio(page); folio_clear_dirty(folio); diff --git a/mm/truncate.c b/mm/truncate.c index 21dc202519c2..b7671d151d06 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -210,6 +210,7 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio) */ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) { + int err; loff_t pos = folio_pos(folio); unsigned int offset, length; @@ -241,8 +242,11 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) folio_invalidate(folio, offset, length); if (!folio_test_large(folio)) return true; - if (split_folio(folio) == 0) + err = split_folio(folio); + if (!err) return true; + if (err > 0) + return false; if (folio_test_dirty(folio)) return false; truncate_inode_folio(folio->mapping, folio); diff --git a/mm/vmscan.c b/mm/vmscan.c index e610baa18413..2d8caa7c5c84 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1950,6 +1950,15 @@ retry: goto keep_locked; } + if (folio_ref_count(folio) == 1) { + folio_unlock(folio); + if (folio_put_testzero(folio)) + goto free_it; + + nr_reclaimed += nr_pages; + continue; + } + /* * If the folio was split above, the tail pages will make * their own pass through this function and be accounted diff --git a/mm/vmstat.c b/mm/vmstat.c index 4bcb6ec8a5b9..e49e774296b2 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1358,6 +1358,9 @@ const char * const vmstat_text[] = { "thp_split_page_failed", "thp_deferred_split_page", "thp_split_pmd", + "thp_shatter_page", + "thp_shatter_page_failed", + "thp_shatter_page_discarded", "thp_scan_exceed_none_pte", "thp_scan_exceed_swap_pte", "thp_scan_exceed_share_pte",