FROMLIST: BACKPORT: THP shattering: the reverse of collapsing
In contrast to split, shatter migrates occupied pages in a partially mapped THP to a bunch of base folios. IOW, unlike split done in place, shatter is the exact opposite of collapse. The advantage of shattering is that it keeps the original THP intact. The cost of copying during the migration is not a side effect, but rather by design, since splitting is considered a discouraged behavior. In retail terms, the return of a purchase is charged with a restocking fee and the original goods can be resold. THPs from ZONE_NOMERGE can only be shattered, since they cannot be split or merged. THPs from ZONE_NOSPLIT can be shattered or split (the latter requires [1]), if they are above the minimum order. [1] https://lore.kernel.org/20240226205534.1603748-1-zi.yan@sent.com/ Change-Id: I7637124bb1ede775dba7b1d363d53256f337851f Signed-off-by: Yu Zhao <yuzhao@google.com> Link: https://lore.kernel.org/r/20240229183436.4110845-3-yuzhao@google.com/ Bug: 313807618 Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
This commit is contained in:
@@ -1155,6 +1155,7 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
|
||||
}
|
||||
|
||||
void split_page_memcg(struct page *head, unsigned int nr);
|
||||
void folio_copy_memcg(struct folio *folio);
|
||||
|
||||
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
|
||||
gfp_t gfp_mask,
|
||||
@@ -1584,6 +1585,10 @@ static inline void split_page_memcg(struct page *head, unsigned int nr)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void folio_copy_memcg(struct folio *folio)
|
||||
{
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
|
||||
gfp_t gfp_mask,
|
||||
|
@@ -220,6 +220,25 @@ static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *foli
|
||||
VM_WARN_ON_ONCE(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen));
|
||||
}
|
||||
|
||||
static inline bool lru_gen_add_dst(struct lruvec *lruvec, struct folio *dst)
|
||||
{
|
||||
int gen = folio_lru_gen(dst);
|
||||
int type = folio_is_file_lru(dst);
|
||||
int zone = folio_zonenum(dst);
|
||||
struct lru_gen_folio *lrugen = &lruvec->lrugen;
|
||||
|
||||
if (gen < 0)
|
||||
return false;
|
||||
|
||||
lockdep_assert_held(&lruvec->lru_lock);
|
||||
VM_WARN_ON_ONCE_FOLIO(folio_lruvec(dst) != lruvec, dst);
|
||||
|
||||
list_add_tail(&dst->lru, &lrugen->folios[gen][type][zone]);
|
||||
lru_gen_update_size(lruvec, dst, -1, gen);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
|
||||
{
|
||||
unsigned long seq;
|
||||
@@ -305,6 +324,11 @@ static inline bool lru_gen_in_fault(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool lru_gen_add_dst(struct lruvec *lruvec, struct folio *dst)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
|
||||
{
|
||||
return false;
|
||||
|
@@ -352,14 +352,19 @@ struct folio {
|
||||
struct {
|
||||
unsigned long _flags_1;
|
||||
unsigned long _head_1;
|
||||
unsigned long _folio_avail;
|
||||
/* public: */
|
||||
atomic_t _entire_mapcount;
|
||||
atomic_t _nr_pages_mapped;
|
||||
atomic_t _pincount;
|
||||
#ifdef CONFIG_64BIT
|
||||
unsigned int __padding;
|
||||
unsigned int _folio_nr_pages;
|
||||
#endif
|
||||
union {
|
||||
unsigned long _private_1;
|
||||
unsigned long *_dst_ul;
|
||||
struct page **_dst_pp;
|
||||
};
|
||||
/* private: the union with struct page is transitional */
|
||||
};
|
||||
struct page __page_1;
|
||||
@@ -405,6 +410,7 @@ FOLIO_MATCH(memcg_data, memcg_data);
|
||||
offsetof(struct page, pg) + sizeof(struct page))
|
||||
FOLIO_MATCH(flags, _flags_1);
|
||||
FOLIO_MATCH(compound_head, _head_1);
|
||||
FOLIO_MATCH(private, _private_1);
|
||||
#undef FOLIO_MATCH
|
||||
#define FOLIO_MATCH(pg, fl) \
|
||||
static_assert(offsetof(struct folio, fl) == \
|
||||
|
@@ -108,6 +108,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
|
||||
THP_SPLIT_PAGE_FAILED,
|
||||
THP_DEFERRED_SPLIT_PAGE,
|
||||
THP_SPLIT_PMD,
|
||||
THP_SHATTER_PAGE,
|
||||
THP_SHATTER_PAGE_FAILED,
|
||||
THP_SHATTER_PAGE_DISCARDED,
|
||||
THP_SCAN_EXCEED_NONE_PTE,
|
||||
THP_SCAN_EXCEED_SWAP_PTE,
|
||||
THP_SCAN_EXCEED_SHARED_PTE,
|
||||
|
278
mm/huge_memory.c
278
mm/huge_memory.c
@@ -2629,6 +2629,10 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
|
||||
entry = pte_swp_mksoft_dirty(entry);
|
||||
if (uffd_wp)
|
||||
entry = pte_swp_mkuffd_wp(entry);
|
||||
if (vma->vm_flags & VM_LOCKED)
|
||||
set_src_usage(page + i, SRC_PAGE_MLOCKED);
|
||||
else
|
||||
set_src_usage(page + i, SRC_PAGE_MAPPED);
|
||||
|
||||
VM_WARN_ON(!pte_none(ptep_get(pte + i)));
|
||||
set_pte_at(mm, addr, pte + i, entry);
|
||||
@@ -2780,6 +2784,156 @@ static void remap_page(struct folio *folio, unsigned long nr)
|
||||
}
|
||||
}
|
||||
|
||||
static int prep_to_unmap(struct folio *src)
|
||||
{
|
||||
int nr_pages = folio_nr_pages(src);
|
||||
|
||||
if (folio_can_split(src))
|
||||
return 0;
|
||||
|
||||
WARN_ON_ONCE(src->_dst_pp);
|
||||
|
||||
src->_dst_pp = kcalloc(nr_pages, sizeof(struct page *), GFP_ATOMIC);
|
||||
|
||||
return src->_dst_pp ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
static bool try_to_discard(struct folio *src, int i)
|
||||
{
|
||||
int usage;
|
||||
void *addr;
|
||||
struct page *page = folio_page(src, i);
|
||||
|
||||
if (!folio_test_anon(src))
|
||||
return false;
|
||||
|
||||
if (folio_test_swapcache(src))
|
||||
return false;
|
||||
|
||||
usage = src_page_usage(page);
|
||||
if (usage & SRC_PAGE_MLOCKED)
|
||||
return false;
|
||||
|
||||
if (!(usage & SRC_PAGE_MAPPED))
|
||||
return true;
|
||||
|
||||
addr = kmap_local_page(page);
|
||||
if (!memchr_inv(addr, 0, PAGE_SIZE))
|
||||
set_src_usage(page, SRC_PAGE_CLEAN);
|
||||
kunmap_local(addr);
|
||||
|
||||
return can_discard_src(page);
|
||||
}
|
||||
|
||||
static int prep_dst_pages(struct folio *src)
|
||||
{
|
||||
int i;
|
||||
int nr_pages = folio_nr_pages(src);
|
||||
|
||||
if (folio_can_split(src))
|
||||
return 0;
|
||||
|
||||
if (WARN_ON_ONCE(!src->_dst_pp))
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
struct page *dst = NULL;
|
||||
|
||||
if (try_to_discard(src, i)) {
|
||||
count_vm_event(THP_SHATTER_PAGE_DISCARDED);
|
||||
continue;
|
||||
}
|
||||
|
||||
do {
|
||||
int nid = folio_nid(src);
|
||||
gfp_t gfp = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
|
||||
GFP_NOWAIT | __GFP_THISNODE;
|
||||
|
||||
if (dst)
|
||||
__free_page(dst);
|
||||
|
||||
dst = alloc_pages_node(nid, gfp, 0);
|
||||
if (!dst)
|
||||
return -ENOMEM;
|
||||
} while (!page_ref_freeze(dst, 1));
|
||||
|
||||
copy_highpage(dst, folio_page(src, i));
|
||||
src->_dst_ul[i] |= (unsigned long)dst;
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_dst_pages(struct folio *src)
|
||||
{
|
||||
int i;
|
||||
int nr_pages = folio_nr_pages(src);
|
||||
|
||||
if (folio_can_split(src))
|
||||
return;
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
struct page *dst = folio_dst_page(src, i);
|
||||
|
||||
if (!dst)
|
||||
continue;
|
||||
|
||||
page_ref_unfreeze(dst, 1);
|
||||
__free_page(dst);
|
||||
}
|
||||
|
||||
kfree(src->_dst_pp);
|
||||
src->_dst_pp = NULL;
|
||||
}
|
||||
|
||||
static void reset_src_folio(struct folio *src)
|
||||
{
|
||||
if (folio_can_split(src))
|
||||
return;
|
||||
|
||||
if (WARN_ON_ONCE(!src->_dst_pp))
|
||||
return;
|
||||
|
||||
if (!folio_mapping_flags(src))
|
||||
src->mapping = NULL;
|
||||
|
||||
if (folio_test_anon(src) && folio_test_swapcache(src)) {
|
||||
folio_clear_swapcache(src);
|
||||
src->swap.val = 0;
|
||||
}
|
||||
|
||||
kfree(src->_dst_pp);
|
||||
src->_dst_pp = NULL;
|
||||
}
|
||||
|
||||
static bool lru_add_dst(struct lruvec *lruvec, struct folio *src, struct folio *dst)
|
||||
{
|
||||
if (folio_can_split(src))
|
||||
return false;
|
||||
|
||||
VM_WARN_ON_ONCE_FOLIO(!folio_test_lru(src), src);
|
||||
VM_WARN_ON_ONCE_FOLIO(folio_test_lru(dst), dst);
|
||||
VM_WARN_ON_ONCE_FOLIO(folio_lruvec(dst) != folio_lruvec(src), dst);
|
||||
|
||||
if (!lru_gen_add_dst(lruvec, dst)) {
|
||||
enum lru_list lru = folio_lru_list(dst);
|
||||
int zone = folio_zonenum(dst);
|
||||
int delta = folio_nr_pages(dst);
|
||||
|
||||
if (folio_test_unevictable(dst))
|
||||
dst->mlock_count = 0;
|
||||
else
|
||||
list_add_tail(&dst->lru, &src->lru);
|
||||
update_lru_size(lruvec, lru, zone, delta);
|
||||
}
|
||||
|
||||
folio_set_lru(dst);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void lru_add_page_tail(struct page *head, struct page *tail,
|
||||
struct lruvec *lruvec, struct list_head *list)
|
||||
{
|
||||
@@ -2793,7 +2947,7 @@ static void lru_add_page_tail(struct page *head, struct page *tail,
|
||||
VM_WARN_ON(PageLRU(head));
|
||||
get_page(tail);
|
||||
list_add_tail(&tail->lru, list);
|
||||
} else {
|
||||
} else if (!lru_add_dst(lruvec, page_folio(head), page_folio(tail))) {
|
||||
/* head is still on lru (and we have it frozen) */
|
||||
VM_WARN_ON(!PageLRU(head));
|
||||
if (PageUnevictable(tail))
|
||||
@@ -2808,7 +2962,7 @@ static void __split_huge_page_tail(struct folio *folio, int tail,
|
||||
struct lruvec *lruvec, struct list_head *list)
|
||||
{
|
||||
struct page *head = &folio->page;
|
||||
struct page *page_tail = head + tail;
|
||||
struct page *page_tail = folio_dst_page(folio, tail);
|
||||
/*
|
||||
* Careful: new_folio is not a "real" folio before we cleared PageTail.
|
||||
* Don't pass it around before clear_compound_head().
|
||||
@@ -2849,8 +3003,8 @@ static void __split_huge_page_tail(struct folio *folio, int tail,
|
||||
LRU_GEN_MASK | LRU_REFS_MASK));
|
||||
|
||||
/* ->mapping in first and second tail page is replaced by other uses */
|
||||
VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
|
||||
page_tail);
|
||||
VM_BUG_ON_PAGE(folio_can_split(folio) && tail > 2 &&
|
||||
page_tail->mapping != TAIL_MAPPING, page_tail);
|
||||
page_tail->mapping = head->mapping;
|
||||
page_tail->index = head->index + tail;
|
||||
|
||||
@@ -2905,9 +3059,13 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
||||
unsigned long offset = 0;
|
||||
unsigned int nr = thp_nr_pages(head);
|
||||
int i, nr_dropped = 0;
|
||||
bool can_split = folio_can_split(folio);
|
||||
|
||||
/* complete memcg works before add pages to LRU */
|
||||
split_page_memcg(head, nr);
|
||||
if (can_split)
|
||||
split_page_memcg(head, nr);
|
||||
else
|
||||
folio_copy_memcg(folio);
|
||||
|
||||
if (folio_test_anon(folio) && folio_test_swapcache(folio)) {
|
||||
offset = swp_offset(folio->swap);
|
||||
@@ -2920,46 +3078,51 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
||||
|
||||
ClearPageHasHWPoisoned(head);
|
||||
|
||||
for (i = nr - 1; i >= 1; i--) {
|
||||
for (i = nr - 1; i >= can_split; i--) {
|
||||
struct page *dst = folio_dst_page(folio, i);
|
||||
|
||||
if (!dst)
|
||||
continue;
|
||||
|
||||
__split_huge_page_tail(folio, i, lruvec, list);
|
||||
/* Some pages can be beyond EOF: drop them from page cache */
|
||||
if (head[i].index >= end) {
|
||||
struct folio *tail = page_folio(head + i);
|
||||
if (dst->index >= end) {
|
||||
struct folio *tail = page_folio(dst);
|
||||
|
||||
if (shmem_mapping(head->mapping))
|
||||
if (shmem_mapping(tail->mapping))
|
||||
nr_dropped++;
|
||||
else if (folio_test_clear_dirty(tail))
|
||||
folio_account_cleaned(tail,
|
||||
inode_to_wb(folio->mapping->host));
|
||||
inode_to_wb(tail->mapping->host));
|
||||
__filemap_remove_folio(tail, NULL);
|
||||
folio_put(tail);
|
||||
} else if (!PageAnon(page)) {
|
||||
__xa_store(&head->mapping->i_pages, head[i].index,
|
||||
head + i, 0);
|
||||
} else if (!PageAnon(dst)) {
|
||||
__xa_store(&dst->mapping->i_pages, dst->index, dst, 0);
|
||||
} else if (swap_cache) {
|
||||
__xa_store(&swap_cache->i_pages, offset + i,
|
||||
head + i, 0);
|
||||
__xa_store(&swap_cache->i_pages, offset + i, dst, 0);
|
||||
}
|
||||
}
|
||||
|
||||
ClearPageCompound(head);
|
||||
if (can_split)
|
||||
ClearPageCompound(head);
|
||||
unlock_page_lruvec(lruvec);
|
||||
/* Caller disabled irqs, so they are still disabled here */
|
||||
|
||||
split_page_owner(head, nr);
|
||||
if (can_split)
|
||||
split_page_owner(head, nr);
|
||||
|
||||
/* See comment in __split_huge_page_tail() */
|
||||
if (PageAnon(head)) {
|
||||
/* Additional pin to swap cache */
|
||||
if (PageSwapCache(head)) {
|
||||
page_ref_add(head, 2);
|
||||
page_ref_add(head, 2 - !can_split);
|
||||
xa_unlock(&swap_cache->i_pages);
|
||||
} else {
|
||||
page_ref_inc(head);
|
||||
}
|
||||
} else {
|
||||
/* Additional pin to page cache */
|
||||
page_ref_add(head, 2);
|
||||
page_ref_add(head, 2 - !can_split);
|
||||
xa_unlock(&head->mapping->i_pages);
|
||||
}
|
||||
local_irq_enable();
|
||||
@@ -2969,8 +3132,9 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
||||
remap_page(folio, nr);
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
struct page *subpage = head + i;
|
||||
if (subpage == page)
|
||||
struct page *subpage = folio_dst_page(folio, i);
|
||||
|
||||
if (!subpage || subpage == page)
|
||||
continue;
|
||||
unlock_page(subpage);
|
||||
|
||||
@@ -2983,6 +3147,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
||||
*/
|
||||
free_page_and_swap_cache(subpage);
|
||||
}
|
||||
|
||||
reset_src_folio(folio);
|
||||
}
|
||||
|
||||
/* Racy check whether the huge page can be split */
|
||||
@@ -2990,9 +3156,6 @@ static bool can_split_folio(struct folio *folio, int *pextra_pins)
|
||||
{
|
||||
int extra_pins;
|
||||
|
||||
if (!folio_can_split(folio))
|
||||
return false;
|
||||
|
||||
/* Additional pins from page cache */
|
||||
if (folio_test_anon(folio))
|
||||
extra_pins = folio_test_swapcache(folio) ?
|
||||
@@ -3112,8 +3275,21 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = prep_to_unmap(folio);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
unmap_folio(folio);
|
||||
|
||||
if (!folio_ref_freeze(folio, 1 + extra_pins)) {
|
||||
ret = -EAGAIN;
|
||||
goto remap;
|
||||
}
|
||||
|
||||
ret = prep_dst_pages(folio);
|
||||
if (ret)
|
||||
goto unfreeze;
|
||||
|
||||
/* block interrupt reentry in xa_lock and spinlock */
|
||||
local_irq_disable();
|
||||
if (mapping) {
|
||||
@@ -3123,44 +3299,44 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
||||
*/
|
||||
xas_lock(&xas);
|
||||
xas_reset(&xas);
|
||||
if (xas_load(&xas) != folio)
|
||||
if (xas_load(&xas) != folio) {
|
||||
ret = -EAGAIN;
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
/* Prevent deferred_split_scan() touching ->_refcount */
|
||||
spin_lock(&ds_queue->split_queue_lock);
|
||||
if (folio_ref_freeze(folio, 1 + extra_pins)) {
|
||||
if (!list_empty(&folio->_deferred_list)) {
|
||||
ds_queue->split_queue_len--;
|
||||
list_del(&folio->_deferred_list);
|
||||
}
|
||||
spin_unlock(&ds_queue->split_queue_lock);
|
||||
if (mapping) {
|
||||
int nr = folio_nr_pages(folio);
|
||||
if (!list_empty(&folio->_deferred_list)) {
|
||||
ds_queue->split_queue_len--;
|
||||
list_del_init(&folio->_deferred_list);
|
||||
}
|
||||
spin_unlock(&ds_queue->split_queue_lock);
|
||||
if (mapping) {
|
||||
int nr = folio_nr_pages(folio);
|
||||
|
||||
xas_split(&xas, folio, folio_order(folio));
|
||||
if (folio_test_pmd_mappable(folio)) {
|
||||
if (folio_test_swapbacked(folio)) {
|
||||
__lruvec_stat_mod_folio(folio,
|
||||
NR_SHMEM_THPS, -nr);
|
||||
} else {
|
||||
__lruvec_stat_mod_folio(folio,
|
||||
NR_FILE_THPS, -nr);
|
||||
filemap_nr_thps_dec(mapping);
|
||||
}
|
||||
xas_split(&xas, folio, folio_order(folio));
|
||||
if (folio_test_pmd_mappable(folio)) {
|
||||
if (folio_test_swapbacked(folio)) {
|
||||
__lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, -nr);
|
||||
} else {
|
||||
__lruvec_stat_mod_folio(folio, NR_FILE_THPS, -nr);
|
||||
filemap_nr_thps_dec(mapping);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__split_huge_page(page, list, end);
|
||||
ret = 0;
|
||||
} else {
|
||||
spin_unlock(&ds_queue->split_queue_lock);
|
||||
__split_huge_page(page, list, end);
|
||||
if (ret) {
|
||||
fail:
|
||||
if (mapping)
|
||||
xas_unlock(&xas);
|
||||
local_irq_enable();
|
||||
unfreeze:
|
||||
folio_ref_unfreeze(folio, 1 + extra_pins);
|
||||
remap:
|
||||
free_dst_pages(folio);
|
||||
remap_page(folio, folio_nr_pages(folio));
|
||||
ret = -EAGAIN;
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
@@ -3172,6 +3348,12 @@ out_unlock:
|
||||
i_mmap_unlock_read(mapping);
|
||||
out:
|
||||
xas_destroy(&xas);
|
||||
|
||||
if (!folio_can_split(folio)) {
|
||||
count_vm_event(!ret ? THP_SHATTER_PAGE : THP_SHATTER_PAGE_FAILED);
|
||||
return ret ? : 1;
|
||||
}
|
||||
|
||||
count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
|
||||
return ret;
|
||||
}
|
||||
|
@@ -1416,4 +1416,43 @@ struct vma_prepare {
|
||||
struct vm_area_struct *remove;
|
||||
struct vm_area_struct *remove2;
|
||||
};
|
||||
|
||||
#define SRC_PAGE_MAPPED BIT(0)
|
||||
#define SRC_PAGE_MLOCKED BIT(1)
|
||||
#define SRC_PAGE_CLEAN BIT(2)
|
||||
#define SRC_PAGE_USAGE_MASK (BIT(3) - 1)
|
||||
|
||||
static inline unsigned long src_page_usage(struct page *page)
|
||||
{
|
||||
struct folio *src = page_folio(page);
|
||||
int i = folio_page_idx(src, page);
|
||||
|
||||
if (folio_can_split(src) || !src->_dst_ul)
|
||||
return 0;
|
||||
|
||||
return src->_dst_ul[i] & SRC_PAGE_USAGE_MASK;
|
||||
}
|
||||
|
||||
static inline bool can_discard_src(struct page *page)
|
||||
{
|
||||
return src_page_usage(page) & SRC_PAGE_CLEAN;
|
||||
}
|
||||
|
||||
static inline void set_src_usage(struct page *page, unsigned long usage)
|
||||
{
|
||||
struct folio *src = page_folio(page);
|
||||
int i = folio_page_idx(src, page);
|
||||
|
||||
if (!folio_can_split(src) && src->_dst_ul)
|
||||
src->_dst_ul[i] |= usage;
|
||||
}
|
||||
|
||||
static inline struct page *folio_dst_page(struct folio *src, int i)
|
||||
{
|
||||
if (folio_can_split(src) || !src->_dst_ul)
|
||||
return folio_page(src, i);
|
||||
|
||||
return (void *)(src->_dst_ul[i] & ~SRC_PAGE_USAGE_MASK);
|
||||
}
|
||||
|
||||
#endif /* __MM_INTERNAL_H */
|
||||
|
@@ -418,7 +418,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
|
||||
err = split_folio(folio);
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
if (!err)
|
||||
if (err >= 0)
|
||||
goto regular_folio;
|
||||
return 0;
|
||||
}
|
||||
@@ -516,7 +516,7 @@ regular_folio:
|
||||
if (!start_pte)
|
||||
break;
|
||||
arch_enter_lazy_mmu_mode();
|
||||
if (!err)
|
||||
if (err >= 0)
|
||||
nr = 0;
|
||||
continue;
|
||||
}
|
||||
@@ -749,7 +749,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
|
||||
if (!start_pte)
|
||||
break;
|
||||
arch_enter_lazy_mmu_mode();
|
||||
if (!err)
|
||||
if (err >= 0)
|
||||
nr = 0;
|
||||
continue;
|
||||
}
|
||||
|
@@ -3476,6 +3476,53 @@ void split_page_memcg(struct page *head, unsigned int nr)
|
||||
css_get_many(&memcg->css, nr - 1);
|
||||
}
|
||||
|
||||
void folio_copy_memcg(struct folio *src)
|
||||
{
|
||||
int i;
|
||||
unsigned long flags;
|
||||
int delta = 0;
|
||||
int nr_pages = folio_nr_pages(src);
|
||||
struct mem_cgroup *memcg = folio_memcg(src);
|
||||
|
||||
if (folio_can_split(src))
|
||||
return;
|
||||
|
||||
if (WARN_ON_ONCE(!src->_dst_pp))
|
||||
return;
|
||||
|
||||
if (mem_cgroup_disabled())
|
||||
return;
|
||||
|
||||
if (WARN_ON_ONCE(!memcg))
|
||||
return;
|
||||
|
||||
VM_WARN_ON_ONCE_FOLIO(!folio_test_large(src), src);
|
||||
VM_WARN_ON_ONCE_FOLIO(folio_ref_count(src), src);
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
struct page *dst = folio_dst_page(src, i);
|
||||
|
||||
if (!dst)
|
||||
continue;
|
||||
|
||||
commit_charge(page_folio(dst), memcg);
|
||||
delta++;
|
||||
}
|
||||
|
||||
if (!mem_cgroup_is_root(memcg)) {
|
||||
page_counter_charge(&memcg->memory, delta);
|
||||
if (do_memsw_account())
|
||||
page_counter_charge(&memcg->memsw, delta);
|
||||
}
|
||||
|
||||
css_get_many(&memcg->css, delta);
|
||||
|
||||
local_irq_save(flags);
|
||||
mem_cgroup_charge_statistics(memcg, delta);
|
||||
memcg_check_events(memcg, folio_nid(src));
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SWAP
|
||||
/**
|
||||
* mem_cgroup_move_swap_account - move swap charge and swap_cgroup's record.
|
||||
|
@@ -2286,7 +2286,7 @@ try_again:
|
||||
* page is a valid handlable page.
|
||||
*/
|
||||
SetPageHasHWPoisoned(hpage);
|
||||
if (try_to_split_thp_page(p) < 0) {
|
||||
if (try_to_split_thp_page(p)) {
|
||||
res = action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
|
||||
goto unlock_mutex;
|
||||
}
|
||||
|
70
mm/migrate.c
70
mm/migrate.c
@@ -185,36 +185,52 @@ EXPORT_SYMBOL_GPL(putback_movable_pages);
|
||||
/*
|
||||
* Restore a potential migration pte to a working pte entry
|
||||
*/
|
||||
static bool remove_migration_pte(struct folio *folio,
|
||||
struct vm_area_struct *vma, unsigned long addr, void *old)
|
||||
static bool remove_migration_pte(struct folio *dst,
|
||||
struct vm_area_struct *vma, unsigned long addr, void *arg)
|
||||
{
|
||||
DEFINE_FOLIO_VMA_WALK(pvmw, old, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
|
||||
struct folio *src = arg;
|
||||
DEFINE_FOLIO_VMA_WALK(pvmw, src, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
|
||||
|
||||
while (page_vma_mapped_walk(&pvmw)) {
|
||||
rmap_t rmap_flags = RMAP_NONE;
|
||||
pte_t old_pte;
|
||||
pte_t pte;
|
||||
swp_entry_t entry;
|
||||
struct page *new;
|
||||
struct page *page;
|
||||
struct folio *folio;
|
||||
unsigned long idx = 0;
|
||||
|
||||
/* pgoff is invalid for ksm pages, but they are never large */
|
||||
if (folio_test_large(folio) && !folio_test_hugetlb(folio))
|
||||
if (folio_test_large(dst) && !folio_test_hugetlb(dst))
|
||||
idx = linear_page_index(vma, pvmw.address) - pvmw.pgoff;
|
||||
new = folio_page(folio, idx);
|
||||
page = folio_page(dst, idx);
|
||||
|
||||
if (src == dst) {
|
||||
if (can_discard_src(page)) {
|
||||
VM_WARN_ON_ONCE_FOLIO(!folio_test_anon(src), src);
|
||||
|
||||
pte_clear_not_present_full(pvmw.vma->vm_mm, pvmw.address,
|
||||
pvmw.pte, false);
|
||||
dec_mm_counter(pvmw.vma->vm_mm, MM_ANONPAGES);
|
||||
continue;
|
||||
}
|
||||
page = folio_dst_page(src, idx);
|
||||
}
|
||||
|
||||
folio = page_folio(page);
|
||||
|
||||
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
|
||||
/* PMD-mapped THP migration entry */
|
||||
if (!pvmw.pte) {
|
||||
VM_BUG_ON_FOLIO(folio_test_hugetlb(folio) ||
|
||||
!folio_test_pmd_mappable(folio), folio);
|
||||
remove_migration_pmd(&pvmw, new);
|
||||
remove_migration_pmd(&pvmw, page);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
folio_get(folio);
|
||||
pte = mk_pte(new, READ_ONCE(vma->vm_page_prot));
|
||||
pte = mk_pte(page, READ_ONCE(vma->vm_page_prot));
|
||||
old_pte = ptep_get(pvmw.pte);
|
||||
if (pte_swp_soft_dirty(old_pte))
|
||||
pte = pte_mksoft_dirty(pte);
|
||||
@@ -232,13 +248,13 @@ static bool remove_migration_pte(struct folio *folio,
|
||||
if (folio_test_anon(folio) && !is_readable_migration_entry(entry))
|
||||
rmap_flags |= RMAP_EXCLUSIVE;
|
||||
|
||||
if (unlikely(is_device_private_page(new))) {
|
||||
if (unlikely(is_device_private_page(page))) {
|
||||
if (pte_write(pte))
|
||||
entry = make_writable_device_private_entry(
|
||||
page_to_pfn(new));
|
||||
page_to_pfn(page));
|
||||
else
|
||||
entry = make_readable_device_private_entry(
|
||||
page_to_pfn(new));
|
||||
page_to_pfn(page));
|
||||
pte = swp_entry_to_pte(entry);
|
||||
if (pte_swp_soft_dirty(old_pte))
|
||||
pte = pte_swp_mksoft_dirty(pte);
|
||||
@@ -264,17 +280,17 @@ static bool remove_migration_pte(struct folio *folio,
|
||||
#endif
|
||||
{
|
||||
if (folio_test_anon(folio))
|
||||
folio_add_anon_rmap_pte(folio, new, vma,
|
||||
folio_add_anon_rmap_pte(folio, page, vma,
|
||||
pvmw.address, rmap_flags);
|
||||
else
|
||||
folio_add_file_rmap_pte(folio, new, vma);
|
||||
folio_add_file_rmap_pte(folio, page, vma);
|
||||
set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
|
||||
}
|
||||
if (vma->vm_flags & VM_LOCKED)
|
||||
mlock_drain_local();
|
||||
|
||||
trace_remove_migration_pte(pvmw.address, pte_val(pte),
|
||||
compound_order(new));
|
||||
compound_order(page));
|
||||
|
||||
/* No need to invalidate - it was non-present before */
|
||||
update_mmu_cache(vma, pvmw.address, pvmw.pte);
|
||||
@@ -1482,10 +1498,30 @@ out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline int try_split_folio(struct folio *folio, struct list_head *split_folios)
|
||||
static inline int try_split_folio(struct folio *folio, struct list_head *split_folios,
|
||||
int reason)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (!folio_can_split(folio)) {
|
||||
LIST_HEAD(head);
|
||||
|
||||
if (reason != MR_CONTIG_RANGE)
|
||||
return -EBUSY;
|
||||
|
||||
folio_lock(folio);
|
||||
rc = split_folio_to_list(folio, &head);
|
||||
folio_unlock(folio);
|
||||
|
||||
if (rc > 0) {
|
||||
putback_movable_pages(&head);
|
||||
return 0;
|
||||
}
|
||||
|
||||
VM_WARN_ON_ONCE_FOLIO(!rc, folio);
|
||||
return rc;
|
||||
}
|
||||
|
||||
folio_lock(folio);
|
||||
rc = split_folio_to_list(folio, split_folios);
|
||||
folio_unlock(folio);
|
||||
@@ -1665,7 +1701,7 @@ static int migrate_pages_batch(struct list_head *from,
|
||||
if (!thp_migration_supported() && is_thp) {
|
||||
nr_failed++;
|
||||
stats->nr_thp_failed++;
|
||||
if (!try_split_folio(folio, split_folios)) {
|
||||
if (!try_split_folio(folio, split_folios, reason)) {
|
||||
stats->nr_thp_split++;
|
||||
continue;
|
||||
}
|
||||
@@ -1696,7 +1732,7 @@ static int migrate_pages_batch(struct list_head *from,
|
||||
stats->nr_thp_failed += is_thp;
|
||||
/* Large folio NUMA faulting doesn't split to retry. */
|
||||
if (folio_test_large(folio) && !nosplit) {
|
||||
int ret = try_split_folio(folio, split_folios);
|
||||
int ret = try_split_folio(folio, split_folios, reason);
|
||||
|
||||
if (!ret) {
|
||||
stats->nr_thp_split += is_thp;
|
||||
|
@@ -1080,6 +1080,10 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page)
|
||||
bad_page(page, "nonzero pincount");
|
||||
goto out;
|
||||
}
|
||||
if (unlikely(folio->_private_1)) {
|
||||
bad_page(page, "nonzero _private_1");
|
||||
goto out;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
/*
|
||||
|
@@ -2274,6 +2274,10 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
|
||||
hsz);
|
||||
else
|
||||
set_pte_at(mm, address, pvmw.pte, swp_pte);
|
||||
if (vma->vm_flags & VM_LOCKED)
|
||||
set_src_usage(subpage, SRC_PAGE_MLOCKED);
|
||||
else
|
||||
set_src_usage(subpage, SRC_PAGE_MAPPED);
|
||||
trace_set_migration_pte(address, pte_val(swp_pte),
|
||||
compound_order(&folio->page));
|
||||
/*
|
||||
|
@@ -701,7 +701,7 @@ next:
|
||||
folio_put(folio);
|
||||
|
||||
/* If split failed move the inode on the list back to shrinklist */
|
||||
if (ret)
|
||||
if (ret < 0)
|
||||
goto move_back;
|
||||
|
||||
split++;
|
||||
@@ -1469,7 +1469,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
|
||||
if (folio_test_large(folio)) {
|
||||
/* Ensure the subpages are still dirty */
|
||||
folio_test_set_dirty(folio);
|
||||
if (split_huge_page(page) < 0)
|
||||
if (split_huge_page(page))
|
||||
goto redirty;
|
||||
folio = page_folio(page);
|
||||
folio_clear_dirty(folio);
|
||||
|
@@ -210,6 +210,7 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio)
|
||||
*/
|
||||
bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
|
||||
{
|
||||
int err;
|
||||
loff_t pos = folio_pos(folio);
|
||||
unsigned int offset, length;
|
||||
|
||||
@@ -241,8 +242,11 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
|
||||
folio_invalidate(folio, offset, length);
|
||||
if (!folio_test_large(folio))
|
||||
return true;
|
||||
if (split_folio(folio) == 0)
|
||||
err = split_folio(folio);
|
||||
if (!err)
|
||||
return true;
|
||||
if (err > 0)
|
||||
return false;
|
||||
if (folio_test_dirty(folio))
|
||||
return false;
|
||||
truncate_inode_folio(folio->mapping, folio);
|
||||
|
@@ -1950,6 +1950,15 @@ retry:
|
||||
goto keep_locked;
|
||||
}
|
||||
|
||||
if (folio_ref_count(folio) == 1) {
|
||||
folio_unlock(folio);
|
||||
if (folio_put_testzero(folio))
|
||||
goto free_it;
|
||||
|
||||
nr_reclaimed += nr_pages;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the folio was split above, the tail pages will make
|
||||
* their own pass through this function and be accounted
|
||||
|
@@ -1358,6 +1358,9 @@ const char * const vmstat_text[] = {
|
||||
"thp_split_page_failed",
|
||||
"thp_deferred_split_page",
|
||||
"thp_split_pmd",
|
||||
"thp_shatter_page",
|
||||
"thp_shatter_page_failed",
|
||||
"thp_shatter_page_discarded",
|
||||
"thp_scan_exceed_none_pte",
|
||||
"thp_scan_exceed_swap_pte",
|
||||
"thp_scan_exceed_share_pte",
|
||||
|
Reference in New Issue
Block a user