Merge android15-6.6 into android15-6.6-lts

This merges the android15-6.6 branch into the -lts branch, catching it up with the latest changes in there. It contains the following commits: * 3a0107a38e ANDROID: KVM: arm64: Ensure SVE initialization precedes PSCI for protected VCPUs * 3b75103301 ANDROID: 16K: Use vma_area slab cache for pad VMA * a213abada8 UPSTREAM: af_unix: Fix uninit-value in __unix_walk_scc() * 5156d49ed9 UPSTREAM: af_unix: Fix garbage collection of embryos carrying OOB with SCM_RIGHTS * fbd783363d ANDROID: af_unix: Provide ABI fixes for recently introduced 'struct scm_fp_list' attribute 'dead' * ddd6979a15 UPSTREAM: af_unix: Add dead flag to struct scm_fp_list. * 95a397ac6b UPSTREAM: af_unix: Don't access successor in unix_del_edges() during GC. * a130d07d24 UPSTREAM: af_unix: Try not to hold unix_gc_lock during accept(). * 5ada288086 UPSTREAM: af_unix: Remove lock dance in unix_peek_fds(). * 11d208f893 UPSTREAM: af_unix: Replace garbage collection algorithm. * 67a3a58da1 UPSTREAM: af_unix: Detect dead SCC. * b9f8dfdb54 UPSTREAM: af_unix: Assign a unique index to SCC. * b22b0a7597 UPSTREAM: af_unix: Avoid Tarjan's algorithm if unnecessary. * 1e4d62adeb UPSTREAM: af_unix: Skip GC if no cycle exists. * 250c362acd UPSTREAM: af_unix: Save O(n) setup of Tarjan's algo. * 0c40a05117 UPSTREAM: af_unix: Fix up unix_edge.successor for embryo socket. * f5ea8b439d UPSTREAM: af_unix: Save listener for embryo socket. * 279ed20d5f UPSTREAM: af_unix: Detect Strongly Connected Components. * 16dca90335 UPSTREAM: af_unix: Iterate all vertices by DFS. * 80df4d17af UPSTREAM: af_unix: Bulk update unix_tot_inflight/unix_inflight when queuing skb. * 40549e6976 ANDROID: af_unix: Provide ABI fixes for recently introduced 'struct scm_fp_list' attribute 'inflight' * 769fc01f23 UPSTREAM: af_unix: Link struct unix_edge when queuing skb. * de6b1e85b9 ANDROID: af_unix: Provide ABI fixes for recently introduced 'struct scm_fp_list' attribute 'edges' * 844c9666eb UPSTREAM: af_unix: Allocate struct unix_edge for each inflight AF_UNIX fd. * c93b3ba51e ANDROID: af_unix: Provide ABI fixes for recently introduced 'struct scm_fp_list' attribute 'vertices' * ffef32ddaf UPSTREAM: af_unix: Allocate struct unix_vertex for each inflight AF_UNIX fd. * f972f2d7b1 ANDROID: af_unix: Allocate memory for the largest possible size of 'struct scm_fp_list' * b077571da9 UPSTREAM: af_unix: Remove CONFIG_UNIX_SCM. * a390e62751 ANDROID: Align x86-64 microdroid cgroup support with aarch64 microdroid * 6dbb3c2e90 BACKPORT: mm: remove folio from deferred split list before uncharging it * a8553b4e2a BACKPORT: mm: use __page_cache_release() in folios_put() * 4d61851d14 UPSTREAM: mm: fix list corruption in put_pages_list * f61f355bdc UPSTREAM: mm: use free_unref_folios() in put_pages_list() * 316b2e6e4b BACKPORT: mm: remove use of folio list from folios_put() * f9c6fb1b82 BACKPORT: memcg: add mem_cgroup_uncharge_folios() * 3bc695b2be Merge tag 'android15-6.6.92_r00' into android15-6.6 * 0813441033 FROMGIT: scsi: core: ufs: Fix a hang in the error handler * a74f052176 FROMGIT: genirq/cpuhotplug: Restore affinity even for suspended IRQ * fc6844d9d2 FROMGIT: genirq/cpuhotplug: Rebalance managed interrupts across multi-CPU hotplug * 0bc63a98d9 ANDROID: abi_gki_aarch64_vivo: Update symbol list * 8fb77f6f9d ANDROID: mm: Reset unused page flag bits on free * f0bd864fe0 Revert "ANDROID: mm: Set PAGE_BLOCK_ORDER to 7 when ARM64_16K_PAGES" * 97f5b70ad3 ANDROID: GKI: Update symbol list for xiaomi * 2bc7bc937c BACKPORT: erofs: lazily initialize per-CPU workers and CPU hotplug hooks * 434940a426 FROMGIT: scsi: ufs: mcq: Delete ufshcd_release_scsi_cmd() in ufshcd_mcq_abort() * 0ac9aa9b62 ANDROID: GKI: Rename xring's symbol list. * f56b0532df BACKPORT: mm: set pageblock_order to HPAGE_PMD_ORDER in case with !CONFIG_HUGETLB_PAGE but THP enabled * f19494634f ANDROID: GKI: Update symbol list for vivo * 68191d9c7a ANDROID: vendor_hooks: add hook to retry mempool allocation without delay * 45afa56280 ANDROID: mm: Set PAGE_BLOCK_ORDER to 7 when ARM64_16K_PAGES * 3148030c78 ANDROID: KVM: arm64: Fix hyp_alloc(0) * 4ec55296c6 ANDROID: fix out-of-bounds error when trace_create_new_event * d9ec0e18f4 ANDROID: CONFIG_CRYPTO_SHA1_ARM64_CE=y to GKI and Microdroid kernel * 0272a2ffdc BACKPORT: FROMGIT: mm: add CONFIG_PAGE_BLOCK_ORDER to select page block order * 86ba3f3eb2 BACKPORT: binder: Create safe versions of binder log files * 8a55e7a02a UPSTREAM: binder: Refactor binder_node print synchronization * fe02cfa135 ANDROID: iommu/arm-smmu-v3-kvm: Fix accidental domain ID freeing in free() * 9733cd1fa2 ANDROID: GKI: Update xiaomi symbol list. * 125f87a148 UPSTREAM: mm/memcg: use kmem_cache when alloc memcg pernode info * 78e6a3d422 UPSTREAM: mm/memcg: use kmem_cache when alloc memcg * b6bde4b648 UPSTREAM: mm/memcg: move mem_cgroup_init() ahead of cgroup_init() * 476cb9bc9b UPSTREAM: af_unix: Remove io_uring code for GC. * fb219cbb0b UPSTREAM: af_unix: Replace BUG_ON() with WARN_ON_ONCE(). * 3c39219343 ANDROID: Enable memory controller for microdroid * c6325b075d ANDROID: cgroup: Fix cgroup_root backport padding calculation * 452d899d2f ANDROID: GKI: Fix up abi issue in struct scm_fp_list * cec9cb02ce UPSTREAM: af_unix: Try to run GC async. * 93c2d24134 BACKPORT: FROMGIT: usb: typec: tcpm: move tcpm_queue_vdm_unlocked to asynchronous work * ee016b98b7 BACKPORT: usb: typec: tcpm: enforce ready state when queueing alt mode vdm * 4be94a6b03 ANDROID: ABI: Update pixel symbol list * 6af2e78f07 ANDROID: fix ABI breakage for trace_array extensions * 6f62c0d0fb UPSTREAM: tracing: Allow creating instances with specified system events * f8d73c6178 UPSTREAM: af_unix: Run GC on only one CPU. * a70bd568b1 UPSTREAM: af_unix: Return struct unix_sock from unix_get_socket(). * c1b974e51d UPSTREAM: iommu: Handle race with default domain setup * 315fdde476 ANDROID: ABI: Update pixel symbol list * 32288ce2f2 ANDROID: vendor_hooks: Add hooks for xhci reset * dd8fcb5398 ANDROID: GKI: deferred split queue corruption - ABI fixup * 374babecde UPSTREAM: mm/thp: fix deferred split queue not partially_mapped: fix * 3a8faa5b25 BACKPORT: mm/thp: fix deferred split unqueue naming and locking * 84cc354617 UPSTREAM: mm/thp: fix deferred split queue not partially_mapped * dd46964f3e BACKPORT: mm: add sysfs entry to disable splitting underused THPs * 40ffd525e5 UPSTREAM: mm: split underused THPs * a63eadb11d BACKPORT: mm: introduce a pageflag for partially mapped folios * f1b73b0513 UPSTREAM: mm/migrate: fix kernel BUG at mm/compaction.c:2761! * cbbd153073 BACKPORT: mm/migrate: split source folio if it is on deferred split list * c6f085c328 BACKPORT: mm: count the number of partially mapped anonymous THPs per size * 545db6094c BACKPORT: mm: count the number of anonymous THPs per size * 6ee860d0d4 UPSTREAM: mm: separate out FOLIO_FLAGS from PAGEFLAGS * f052bbc24d UPSTREAM: mm: selftest to verify zero-filled pages are mapped to zeropage * d826c84482 BACKPORT: mm: remap unused subpages to shared zeropage when splitting isolated thp * bc9f1a0f43 Revert "BACKPORT: mm/thp: fix deferred split unqueue naming and locking" * c06fa3b5cd ANDROID: GKI: page_alloc ABI fixup * 819bdc71dc BACKPORT: mm: page_alloc: batch vmstat updates in expand() * c97dfdfac0 UPSTREAM: mm/page_alloc: keep track of free highatomic * cdff4faf2b UPSTREAM: mm: remove unused has_isolate_pageblock * 5b5902fcf6 UPSTREAM: mm: page_alloc: fix highatomic typing in multi-block buddies * 48e8763a95 BACKPORT: mm: page_alloc: consolidate free page accounting * a4f7bd4b3d BACKPORT: mm: page_isolation: prepare for hygienic freelists * a8dcfbc68b UPSTREAM: mm: page_alloc: set migratetype inside move_freepages() * 209c219a0f BACKPORT: mm: page_alloc: close migratetype race between freeing and stealing * 1a3654f59a BACKPORT: mm: page_alloc: fix freelist movement during block conversion * 861e9d3c44 UPSTREAM: mm: page_alloc: fix move_freepages_block() range error * 350c3b1d61 UPSTREAM: mm: page_alloc: move free pages when converting block during isolation * f76299151c UPSTREAM: mm: page_alloc: fix up block types when merging compatible blocks * cb610236ed UPSTREAM: mm: page_alloc: optimize free_unref_folios() * 606130dacb BACKPORT: mm: page_alloc: remove pcppage migratetype caching * a7a880e6de UPSTREAM: mm: allow non-hugetlb large folios to be batch processed * f17c4db9cf BACKPORT: mm: handle large folios in free_unref_folios() * c7f67cfb85 UPSTREAM: mm: use folios_put() in __folio_batch_release() * 445fa9a71a BACKPORT: mm: add free_unref_folios() * cc058410b3 BACKPORT: mm: convert free_unref_page_list() to use folios * 980cb4e2ba BACKPORT: mm: make folios_put() the basis of release_pages() * 5f4ed005d7 Revert "BACKPORT: mm: page_alloc: remove pcppage migratetype caching" * bab99c1b7e Revert "UPSTREAM: mm: page_alloc: fix up block types when merging compatible blocks" * 94e3afbb3d Revert "UPSTREAM: mm: page_alloc: move free pages when converting block during isolation" * 13aa15180a Revert "UPSTREAM: mm: page_alloc: fix move_freepages_block() range error" * d47518de38 Revert "UPSTREAM: mm: page_alloc: fix freelist movement during block conversion" * 135ab7374e Revert "BACKPORT: mm: page_alloc: close migratetype race between freeing and stealing" * 9ed2d2fba2 Revert "UPSTREAM: mm: page_alloc: set migratetype inside move_freepages()" * efbdb11ac1 Revert "BACKPORT: mm: page_isolation: prepare for hygienic freelists" * 7d424e0f80 Revert "BACKPORT: mm: page_alloc: consolidate free page accounting" * 8a91cd1d26 Revert "BACKPORT: mm: page_alloc: batch vmstat updates in expand()" * be6d3cc085 Revert "UPSTREAM: mm: page_alloc: fix highatomic typing in multi-block buddies" * bbc65a78d2 Revert "BACKPORT: mm/page_alloc: keep track of free highatomic" * a7a0d95bca Revert "BACKPORT: mm: page_alloc: optimize free_unref_folios()" * 8b5d78fb5c Revert "ANDROID: fuse-bpf: fix wrong logic in read backing" * c1488e58c3 ANDROID: GKI: Update symbol list for Nvidia * 1e3d640b05 ANDROID: GKI: Add initial Nvidia symbol list * 5fa476bd0b ANDROID: Add ufs headers to aarch64 allowlist * 17daf81bcc ANDROID: KVM: arm64: Allow relinqush for p-guest with huge-mappings * 297e1ff805 ANDROID: KVM: arm64: Use unmap for pKVM guests memory relinquish * 7c95a219c0 ANDROID: KVM: arm64: Add hyp request SPLIT * e56d181356 ANDROID: KVM: arm64: Convert kvm_pinned_pages to an interval-tree * 390699f93d ANDROID: KVM: arm64: Add host_split_guest for pKVM * 16df80ab9c ANDROID: KVM: arm64: Disable relinquish for p-guest huge-mappings * 549ac47ca0 FROMGIT: PM: runtime: fix denying of auto suspend in pm_suspend_timer_fn() * 4cdfd02ff2 ANDROID: Enable SHA1 for microdroid * ab0ad8d198 BACKPORT: mm: page_alloc: optimize free_unref_folios() Change-Id: Ic5571553dd22417e2ff66c8e99c114b8d79476f2 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2025-06-25 09:43:52 +00:00
parent fc57b3829f 3a0107a38e
commit e6f212b36a
87 changed files with 3280 additions and 989 deletions
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -126,6 +126,7 @@ filegroup(
        "android/abi_gki_aarch64_mtk",
        "android/abi_gki_aarch64_mtktv",
        "android/abi_gki_aarch64_nothing",
        "android/abi_gki_aarch64_nvidia",
        "android/abi_gki_aarch64_oplus",
        "android/abi_gki_aarch64_paragon",
        "android/abi_gki_aarch64_pixel",
@@ -140,7 +141,7 @@ filegroup(
        "android/abi_gki_aarch64_virtual_device",
        "android/abi_gki_aarch64_vivo",
        "android/abi_gki_aarch64_xiaomi",
-        "android/abi_gki_aarch64_xiaomi2",
+        "android/abi_gki_aarch64_xiaomi_xring",
    ],
    visibility = ["//visibility:public"],
 )
@@ -1028,6 +1029,9 @@ ddk_headers(
        "drivers/pci/controller/dwc/pcie-designware.h",
        "drivers/thermal/thermal_core.h",
        "drivers/thermal/thermal_netlink.h",
        "drivers/ufs/core/ufshcd-crypto.h",
        "drivers/ufs/core/ufshcd-priv.h",
        "drivers/ufs/host/ufshcd-pltfrm.h",
        "drivers/usb/dwc3/core.h",
        "sound/usb/card.h",
        "sound/usb/usbaudio.h",
@@ -1045,6 +1049,7 @@ ddk_headers(
        "drivers/extcon",
        "drivers/pci/controller/dwc",
        "drivers/thermal",
        "drivers/ufs",
        "drivers/usb",
        "sound/usb",
        "include",
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -202,6 +202,16 @@ PMD-mappable transparent hugepage::
 	cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
 All THPs at fault and collapse time will be added to _deferred_list,
 and will therefore be split under memory presure if they are considered
 "underused". A THP is underused if the number of zero-filled pages in
 the THP is above max_ptes_none (see below). It is possible to disable
 this behaviour by writing 0 to shrink_underused, and enable it by writing
 1 to it::
 	echo 0 > /sys/kernel/mm/transparent_hugepage/shrink_underused
 	echo 1 > /sys/kernel/mm/transparent_hugepage/shrink_underused
 khugepaged will be automatically started when one or more hugepage
 sizes are enabled (either by directly setting "always" or "madvise",
 or by setting "inherit" while the top-level enabled is set to "always"
@@ -443,6 +453,12 @@ thp_deferred_split_page
 	splitting it would free up some memory. Pages on split queue are
 	going to be split under memory pressure.
 thp_underused_split_page
 	is incremented when a huge page on the split queue was split
 	because it was underused. A THP is underused if the number of
 	zero pages in the THP is above a certain threshold
 	(/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none).
 thp_split_pmd
 	is incremented every time a PMD split into table of PTEs.
 	This can happen, for instance, when application calls mprotect() or
@@ -510,6 +526,18 @@ split_deferred
 	it would free up some memory. Pages on split queue are going to
 	be split under memory pressure, if splitting is possible.
 nr_anon
       the number of anonymous THP we have in the whole system. These THPs
       might be currently entirely mapped or have partially unmapped/unused
       subpages.
 nr_anon_partially_mapped
       the number of anonymous THP which are likely partially mapped, possibly
       wasting memory, and have been queued for deferred memory reclamation.
       Note that in corner some cases (e.g., failed migration), we might detect
       an anonymous THP as "partially mapped" and count it here, even though it
       is not actually partially mapped anymore.
 As the system ages, allocating huge pages may be expensive as the
 system uses memory compaction to copy data around memory to free a
 huge page for use. There are some counters in ``/proc/vmstat`` to help
--- a/android/abi_gki_aarch64.stg
+++ b/android/abi_gki_aarch64.stg
--- a/android/abi_gki_aarch64.stg.allowed_breaks
+++ b/android/abi_gki_aarch64.stg.allowed_breaks
@@ -132,3 +132,84 @@ type 'struct io_ring_ctx' changed
 1 variable symbol(s) removed
  'struct tracepoint __tracepoint_android_vh_filemap_fault_before_folio_locked'
 type 'struct kvm_protected_vm' changed
  member 'struct maple_tree pinned_pages' was removed
  member 'union { struct rb_root_cached pinned_pages; struct { struct maple_tree __unused; }; union { }; }' was added
 type 'struct kvm_hyp_req' changed
  member changed from 'union { struct { u8 dest; int nr_pages; int sz_alloc; } mem; struct { unsigned long guest_ipa; size_t size; } map; }' to 'union { struct { u8 dest; int nr_pages; int sz_alloc; } mem; struct { unsigned long guest_ipa; size_t size; } map; struct { unsigned long guest_ipa; size_t size; } split; }'
    type changed from 'union { struct { u8 dest; int nr_pages; int sz_alloc; } mem; struct { unsigned long guest_ipa; size_t size; } map; }' to 'union { struct { u8 dest; int nr_pages; int sz_alloc; } mem; struct { unsigned long guest_ipa; size_t size; } map; struct { unsigned long guest_ipa; size_t size; } split; }'
      member 'struct { unsigned long guest_ipa; size_t size; } split' was added
 type 'struct scm_fp_list' changed
  byte size changed from 2040 to 2048
  member 'short count_unix' was added
 type 'struct scm_fp_list' changed
  byte size changed from 2048 to 2064
  member 'struct list_head vertices' was added
  member 'short count_unix' changed
    offset changed by 128
 type 'struct scm_fp_list' changed
  byte size changed from 2064 to 2072
  member 'struct unix_edge* edges' was added
  member 'short count_unix' changed
    offset changed by 64
 type 'struct scm_fp_list' changed
  byte size changed from 2072 to 2080
  member 'bool inflight' was added
  3 members ('struct list_head vertices' .. 'short count_unix') changed
    offset changed by 64
 type 'struct unix_edge' changed
  byte size changed from 32 to 48
  member 'struct list_head stack_entry' was added
 type 'struct unix_vertex' changed
  byte size changed from 40 to 48
  member 'unsigned long index' was added
 type 'struct unix_vertex' changed
  byte size changed from 48 to 80
  member 'struct list_head scc_entry' was added
  2 members ('unsigned long out_degree' .. 'unsigned long index') changed
    offset changed by 128
  member 'unsigned long lowlink' was added
  member 'bool on_stack' was added
 type 'struct unix_sock' changed
  member 'struct sock* listener' was added
  4 members ('struct list_head link' .. 'unsigned long gc_flags') changed
    offset changed by 64
 type 'struct unix_vertex' changed
  byte size changed from 80 to 72
  member 'bool on_stack' was removed
 type 'struct unix_vertex' changed
  member 'unsigned long lowlink' was removed
  member 'unsigned long scc_index' was added
 type 'struct unix_sock' changed
  byte size changed from 1216 to 1152
  member 'struct list_head link' was removed
  member 'unsigned long inflight' was removed
  member 'spinlock_t lock' changed
    offset changed by -192
  member 'unsigned long gc_flags' was removed
  4 members ('struct socket_wq peer_wq' .. 'struct sk_buff* oob_skb') changed
    offset changed by -512
 type 'struct unix_sock' changed
  member 'struct sk_buff* oob_skb' changed
    offset changed by 64
 type 'struct scm_stat' changed
  byte size changed from 4 to 16
  member 'unsigned long nr_unix_fds' was added
 type 'struct scm_fp_list' changed
  member 'bool dead' was added
--- a/android/abi_gki_aarch64_nvidia
+++ b/android/abi_gki_aarch64_nvidia
@@ -0,0 +1,232 @@
 [abi_symbol_list]
 # commonly used symbols
  alloc_chrdev_region
  alt_cb_patch_nops
  __arch_copy_from_user
  __arch_copy_to_user
  cdev_add
  cdev_del
  cdev_init
  __check_object_size
  class_create
  class_destroy
  complete
  dev_driver_string
  _dev_err
  device_create
  device_destroy
  _dev_info
  devm_kfree
  devm_kmalloc
  devm_memremap
  devm_request_threaded_irq
  _dev_warn
  fortify_panic
  free_irq
  __init_swait_queue_head
  init_timer_key
  __init_waitqueue_head
  jiffies_to_usecs
  kfree
  __kmalloc
  kmalloc_caches
  kmalloc_trace
  kstrtouint
  log_post_read_mmio
  log_read_mmio
  memcpy
  __memcpy_fromio
  memset
  module_layout
  __mutex_init
  mutex_lock
  mutex_unlock
  of_find_property
  of_property_read_u32_index
  of_property_read_variable_u32_array
  panic
  pid_task
  __platform_driver_register
  platform_driver_unregister
  _printk
  __put_task_struct
  _raw_spin_lock
  _raw_spin_unlock
  request_threaded_irq
  schedule_timeout
  snprintf
  __stack_chk_fail
  strlen
  strncmp
  strnlen
  strscpy
  sysfs_create_group
  sysfs_remove_group
  system_cpucaps
  system_wq
  tegra_ivc_notified
  tegra_ivc_read_advance
  tegra_ivc_read_get_next_frame
  tegra_ivc_reset
  tegra_ivc_write_advance
  tegra_ivc_write_get_next_frame
  __traceiter_rwmmio_post_read
  __traceiter_rwmmio_read
  __tracepoint_rwmmio_post_read
  __tracepoint_rwmmio_read
  unregister_chrdev_region
  __wake_up
  __warn_printk
 # required by ivc-cdev.ko
  device_del
  devm_free_irq
  noop_llseek
  remap_pfn_range
 # required by ivc_ext.ko
  dma_sync_single_for_cpu
  __memcpy_toio
 # required by nvsciipc.ko
  _dev_notice
  __fdget
  find_get_pid
  fput
  platform_device_register_full
  platform_device_unregister
  sprintf
 # required by tegra_bpmp.ko
  clk_hw_determine_rate_no_reparent
  clk_hw_get_name
  clk_hw_unregister
  debugfs_create_dir
  debugfs_create_file
  debugfs_remove
  dentry_path_raw
  devm_clk_hw_register
  devm_reset_controller_register
  dma_alloc_attrs
  dma_free_attrs
  _find_next_bit
  kmalloc_large
  kstrdup
  ktime_get
  of_clk_add_hw_provider
  of_device_get_match_data
  of_genpd_add_provider_onecell
  __of_parse_phandle_with_args
  of_platform_default_populate
  pm_genpd_init
  pm_genpd_remove
  seq_lseek
  seq_read
  seq_write
  single_open_size
  single_release
  strncpy
  tegra_bpmp_free_mrq
  tegra_bpmp_mrq_is_supported
  tegra_bpmp_mrq_return
  tegra_bpmp_request_mrq
  tegra_bpmp_transfer
  tegra_bpmp_transfer_atomic
  tegra_sku_info
 # required by tegra_hv.ko
  arm64_use_ng_mappings
  class_create_file_ns
  ioremap_prot
  iounmap
  irq_get_irq_data
  memstart_addr
  of_add_property
  of_chosen
  of_find_compatible_node
  of_irq_get
  pfn_is_map_memory
  tegra_ivc_init
 # required by tegra_hv_pm_ctl.ko
  __alloc_skb
  find_vpid
  finish_wait
  init_net
  init_wait_entry
  msleep
  __netlink_kernel_create
  netlink_unicast
  __nlmsg_put
  prepare_to_wait_event
  register_pm_notifier
  schedule
  strcmp
  wait_for_completion_timeout
 # required by tegra_hv_vblk_oops.ko
  delayed_work_timer_fn
  dma_map_page_attrs
  __get_free_pages
  is_vmalloc_addr
  queue_delayed_work_on
 # required by tegra_vblk.ko
  blk_execute_rq
  blk_mq_alloc_disk_for_queue
  blk_mq_alloc_request
  blk_mq_alloc_tag_set
  blk_mq_destroy_queue
  blk_mq_end_request
  blk_mq_free_request
  blk_mq_free_tag_set
  blk_mq_init_queue
  blk_mq_start_hw_queues
  blk_mq_start_request
  blk_mq_stop_hw_queues
  blk_queue_flag_set
  blk_queue_logical_block_size
  blk_queue_max_discard_sectors
  blk_queue_max_hw_sectors
  blk_queue_max_secure_erase_sectors
  blk_queue_physical_block_size
  blk_queue_write_cache
  __blk_rq_map_sg
  capable
  __cpu_possible_mask
  del_gendisk
  device_add_disk
  device_create_file
  disable_irq
  disk_check_media_change
  dma_map_sg_attrs
  dma_unmap_sg_attrs
  enable_irq
  _find_first_zero_bit
  jiffies
  kasan_flag_enabled
  kthread_create_on_cpu
  kthread_create_on_node
  __list_add_valid_or_report
  __list_del_entry_valid_or_report
  mod_timer
  __num_online_cpus
  of_find_node_by_name
  put_disk
  queue_work_on
  _raw_spin_lock_irqsave
  _raw_spin_unlock_irqrestore
  __register_blkdev
  sched_setattr_nocheck
  set_capacity
  set_disk_ro
  sg_init_table
  sg_nents
  __sw_hweight64
  timer_delete
  unregister_blkdev
  vfree
  vzalloc
  wait_for_completion
  wait_for_completion_interruptible
  wake_up_process
--- a/android/abi_gki_aarch64_pixel
+++ b/android/abi_gki_aarch64_pixel
@@ -883,6 +883,7 @@
  drm_mode_duplicate
  drm_mode_equal
  drm_mode_equal_no_clocks
  drm_mode_is_420_only
  drm_mode_object_find
  drm_mode_object_get
  drm_mode_object_put
@@ -2620,6 +2621,7 @@
  touch_softlockup_watchdog
  trace_array_destroy
  trace_array_get_by_name
  trace_array_get_by_name_ext
  trace_array_put
  trace_array_set_clr_event
  trace_event_buffer_commit
@@ -2731,6 +2733,7 @@
  __traceiter_android_vh_ufs_update_sysfs
  __traceiter_android_vh_usb_dev_resume
  __traceiter_android_vh_use_amu_fie
  __traceiter_android_vh_xhci_full_reset_on_remove
  __traceiter_clock_set_rate
  __traceiter_cma_alloc_finish
  __traceiter_cma_alloc_start
@@ -2869,6 +2872,7 @@
  __tracepoint_android_vh_ufs_update_sysfs
  __tracepoint_android_vh_usb_dev_resume
  __tracepoint_android_vh_use_amu_fie
  __tracepoint_android_vh_xhci_full_reset_on_remove
  __tracepoint_clock_set_rate
  __tracepoint_cma_alloc_finish
  __tracepoint_cma_alloc_start
--- a/android/abi_gki_aarch64_vivo
+++ b/android/abi_gki_aarch64_vivo
@@ -154,6 +154,8 @@
  __traceiter_android_vh_look_around_migrate_folio
  __traceiter_android_vh_lruvec_add_folio
  __traceiter_android_vh_lruvec_del_folio
  __traceiter_android_vh_mempool_alloc_skip_wait
  __traceiter_android_vh_mm_free_page
  __traceiter_android_vh_mmap_region
  __traceiter_android_vh_mutex_init
  __traceiter_android_vh_mutex_unlock_slowpath
@@ -284,6 +286,8 @@
  __tracepoint_android_vh_look_around_migrate_folio
  __tracepoint_android_vh_lruvec_add_folio
  __tracepoint_android_vh_lruvec_del_folio
  __tracepoint_android_vh_mempool_alloc_skip_wait
  __tracepoint_android_vh_mm_free_page
  __tracepoint_android_vh_mmap_region
  __tracepoint_android_vh_mutex_init
  __tracepoint_android_vh_mutex_unlock_slowpath
--- a/android/abi_gki_aarch64_xiaomi
+++ b/android/abi_gki_aarch64_xiaomi
@@ -23,6 +23,8 @@
  __tracepoint_android_vh_tune_swappiness
  __traceiter_android_vh_do_shrink_slab_ex
  __tracepoint_android_vh_do_shrink_slab_ex
  __traceiter_android_vh_migration_target_bypass
  __tracepoint_android_vh_migration_target_bypass
 # required by lz4 decompress module
  __tracepoint_android_vh_lz4_decompress_bypass
--- a/android/abi_gki_aarch64_xiaomi_xring
+++ b/android/abi_gki_aarch64_xiaomi_xring
@@ -1911,6 +1911,7 @@
  scsi_report_bus_reset
  scsi_scan_host
  scsi_unblock_requests
  scsi_host_busy
  sdev_prefix_printk
  security_file_ioctl
  select_fallback_rq
--- a/arch/arm64/configs/gki_defconfig
+++ b/arch/arm64/configs/gki_defconfig
@@ -737,6 +737,7 @@ CONFIG_CRYPTO_LZ4=y
 CONFIG_CRYPTO_ZSTD=y
 CONFIG_CRYPTO_ANSI_CPRNG=y
 CONFIG_CRYPTO_GHASH_ARM64_CE=y
 CONFIG_CRYPTO_SHA1_ARM64_CE=y
 CONFIG_CRYPTO_SHA2_ARM64_CE=y
 CONFIG_CRYPTO_SHA512_ARM64_CE=y
 CONFIG_CRYPTO_POLYVAL_ARM64_CE=y
--- a/arch/arm64/configs/microdroid_defconfig
+++ b/arch/arm64/configs/microdroid_defconfig
@@ -8,6 +8,8 @@ CONFIG_RCU_EXPERT=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_CGROUPS=y
 CONFIG_MEMCG=y
 # CONFIG_RD_GZIP is not set
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
@@ -136,8 +138,10 @@ CONFIG_STATIC_USERMODEHELPER_PATH=""
 CONFIG_SECURITY_SELINUX=y
 CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
 CONFIG_BUG_ON_DATA_CORRUPTION=y
 CONFIG_CRYPTO_SHA1=y
 CONFIG_CRYPTO_HCTR2=y
 CONFIG_CRYPTO_LZO=y
 CONFIG_CRYPTO_SHA1_ARM64_CE=y
 CONFIG_CRYPTO_SHA2_ARM64_CE=y
 CONFIG_CRYPTO_POLYVAL_ARM64_CE=y
 CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -83,6 +83,7 @@ enum __kvm_host_smccc_func {
 	__KVM_HOST_SMCCC_FUNC___pkvm_relax_perms,
 	__KVM_HOST_SMCCC_FUNC___pkvm_wrprotect,
 	__KVM_HOST_SMCCC_FUNC___pkvm_dirty_log,
 	__KVM_HOST_SMCCC_FUNC___pkvm_host_split_guest,
 	__KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid,
 	__KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
 	__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -224,20 +224,36 @@ struct kvm_smccc_features {
 };
 struct kvm_pinned_page {
 	union {
 		struct rb_node		node;
 		struct list_head	list_node;
 	};
 	struct page		*page;
 	u64			ipa;
 	u64			__subtree_last;
 	u8			order;
 	u16			pins;
 };
-#define KVM_DUMMY_PPAGE ((struct kvm_pinned_page *)-1)
+struct kvm_pinned_page
 *kvm_pinned_pages_iter_first(struct rb_root_cached *root, u64 start, u64 end);
 struct kvm_pinned_page
 *kvm_pinned_pages_iter_next(struct kvm_pinned_page *ppage, u64 start, u64 end);
 #define for_ppage_node_in_range(kvm, start, end, __ppage, __tmp)				\
 	for (__ppage = kvm_pinned_pages_iter_first(&(kvm)->arch.pkvm.pinned_pages, start, end - 1);\
 	    __ppage && ({ __tmp = kvm_pinned_pages_iter_next(__ppage, start, end - 1); 1; });	\
 	    __ppage = __tmp)
 void kvm_pinned_pages_remove(struct kvm_pinned_page *ppage,
 			     struct rb_root_cached *root);
 typedef unsigned int pkvm_handle_t;
 struct kvm_protected_vm {
 	pkvm_handle_t handle;
 	struct kvm_hyp_memcache stage2_teardown_mc;
-	struct maple_tree pinned_pages;
+	_ANDROID_KABI_REPLACE(struct maple_tree __unused, struct rb_root_cached pinned_pages);
 	gpa_t pvmfw_load_addr;
 	bool enabled;
 };
@@ -525,6 +541,7 @@ struct kvm_hyp_req {
 #define KVM_HYP_LAST_REQ	0
 #define KVM_HYP_REQ_TYPE_MEM	1
 #define KVM_HYP_REQ_TYPE_MAP	2
 #define KVM_HYP_REQ_TYPE_SPLIT	3
 	u8 type;
 	union {
 		struct {
@@ -539,6 +556,12 @@ struct kvm_hyp_req {
 			unsigned long	guest_ipa;
 			size_t		size;
 		} map;
 #ifndef __GENKSYMS__
 		struct {
 			unsigned long	guest_ipa;
 			size_t		size;
 		} split;
 #endif
 	};
 };
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -184,6 +184,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu);
 int pkvm_mem_abort_range(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, size_t size);
 int __pkvm_pgtable_stage2_split(struct kvm_vcpu *vcpu, phys_addr_t ipa, size_t size);
 phys_addr_t kvm_mmu_get_httbr(void);
 phys_addr_t kvm_get_idmap_vector(void);
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -862,8 +862,7 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
 * kvm_pgtable_stage2_split() is best effort: it tries to break as many
 * blocks in the input range as allowed by @mc_capacity.
 */
-int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
+int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size, void *mc);
 			     struct kvm_mmu_memory_cache *mc);
 /**
 * kvm_pgtable_walk() - Walk a page-table.
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -363,6 +363,11 @@ static int handle_hyp_req_map(struct kvm_vcpu *vcpu,
 	return pkvm_mem_abort_range(vcpu, req->map.guest_ipa, req->map.size);
 }
 static int handle_hyp_req_split(struct kvm_vcpu *vcpu, struct kvm_hyp_req *req)
 {
 	return __pkvm_pgtable_stage2_split(vcpu, req->split.guest_ipa, req->split.size);
 }
 static int handle_hyp_req(struct kvm_vcpu *vcpu)
 {
 	struct kvm_hyp_req *hyp_req = vcpu->arch.hyp_reqs;
@@ -379,6 +384,9 @@ static int handle_hyp_req(struct kvm_vcpu *vcpu)
 		case KVM_HYP_REQ_TYPE_MAP:
 			ret = handle_hyp_req_map(vcpu, hyp_req);
 			break;
 		case KVM_HYP_REQ_TYPE_SPLIT:
 			ret = handle_hyp_req_split(vcpu, hyp_req);
 			break;
 		default:
 			pr_warn("Unknown kvm_hyp_req type: %d\n", hyp_req->type);
 			ret = -EINVAL;
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -63,6 +63,7 @@ int __pkvm_host_unuse_dma(u64 phys_addr, size_t size);
 int __pkvm_guest_stage2_snapshot(struct kvm_pgtable_snapshot *snap, struct pkvm_hyp_vm *vm);
 int __pkvm_host_stage2_snapshot(struct kvm_pgtable_snapshot *snap);
 int __pkvm_host_lazy_pte(u64 pfn, u64 nr_pages, bool enable);
 int __pkvm_host_split_guest(u64 pfn, u64 gfn, u64 size, struct pkvm_hyp_vcpu *vcpu);
 bool addr_is_memory(phys_addr_t phys);
 int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
--- a/arch/arm64/kvm/hyp/nvhe/alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/alloc.c
@@ -556,7 +556,7 @@ void *hyp_alloc(size_t size)
 	unsigned long chunk_addr;
 	int missing_map, ret = 0;
-	size = ALIGN(size, MIN_ALLOC);
+	size = ALIGN(size ?: MIN_ALLOC, MIN_ALLOC);
 	hyp_spin_lock(&allocator->lock);
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -1073,6 +1073,27 @@ out:
 	cpu_reg(host_ctxt, 1) = ret;
 }
 static void handle___pkvm_host_split_guest(struct kvm_cpu_context *host_ctxt)
 {
 	DECLARE_REG(u64, pfn, host_ctxt, 1);
 	DECLARE_REG(u64, gfn, host_ctxt, 2);
 	DECLARE_REG(u64, size, host_ctxt, 3);
 	struct pkvm_hyp_vcpu *hyp_vcpu;
 	int ret = -EINVAL;
 	if (!is_protected_kvm_enabled())
 		goto out;
 	hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
 	if (!hyp_vcpu)
 		goto out;
 	ret = __pkvm_host_split_guest(pfn, gfn, size, hyp_vcpu);
 out:
 	cpu_reg(host_ctxt, 1) = ret;
 }
 static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt)
 {
 	struct pkvm_hyp_vcpu *hyp_vcpu;
@@ -1618,6 +1639,7 @@ static const hcall_t host_hcall[] = {
 	HANDLE_FUNC(__pkvm_relax_perms),
 	HANDLE_FUNC(__pkvm_wrprotect),
 	HANDLE_FUNC(__pkvm_dirty_log),
 	HANDLE_FUNC(__pkvm_host_split_guest),
 	HANDLE_FUNC(__pkvm_tlb_flush_vmid),
 	HANDLE_FUNC(__kvm_adjust_pc),
 	HANDLE_FUNC(__kvm_vcpu_run),
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -387,6 +387,10 @@ static int relinquish_walker(const struct kvm_pgtable_visit_ctx *ctx,
 	if (!kvm_pte_valid(pte))
 		return 0;
 	/* We don't support splitting non-leaf mappings */
 	if (ctx->level != (KVM_PGTABLE_MAX_LEVELS - 1))
 		return -E2BIG;
 	state = pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
 	if (state != data->expected_state)
 		return -EPERM;
@@ -433,8 +437,7 @@ int __pkvm_guest_relinquish_to_host(struct pkvm_hyp_vcpu *vcpu,
 		goto end;
 	/* Zap the guest stage2 pte and return ownership to the host */
-	ret = kvm_pgtable_stage2_annotate(&vm->pgt, ipa, PAGE_SIZE,
+	ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE);
 					  &vcpu->vcpu.arch.stage2_mc, 0);
 	if (ret)
 		goto end;
@@ -2760,6 +2763,30 @@ unlock:
 }
 int __pkvm_host_split_guest(u64 pfn, u64 gfn, u64 size, struct pkvm_hyp_vcpu *vcpu)
 {
 	struct kvm_hyp_memcache *mc = &vcpu->vcpu.arch.stage2_mc;
 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
 	u64 ipa = hyp_pfn_to_phys(gfn);
 	int ret;
 	if (size != PMD_SIZE)
 		return -EINVAL;
 	guest_lock_component(vm);
 	/*
 	 * stage2_split() already checks the existing mapping is valid and PMD-level.
 	 * No other check is necessary.
 	 */
 	ret = kvm_pgtable_stage2_split(&vm->pgt, ipa, size, mc);
 	guest_unlock_component(vm);
 	return ret;
 }
 int __pkvm_host_donate_guest(struct pkvm_hyp_vcpu *vcpu, u64 pfn, u64 gfn,
 			     u64 nr_pages)
 {
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -702,16 +702,13 @@ static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
 	if (ret)
 		goto done;
 	ret = pkvm_vcpu_init_psci(hyp_vcpu);
 	if (ret)
 		goto done;
 	if (test_bit(KVM_ARM_VCPU_SVE, hyp_vcpu->vcpu.arch.features)) {
 		ret = init_pkvm_hyp_vcpu_sve(hyp_vcpu, host_vcpu);
 		if (ret)
 			goto done;
 	}
 	WARN_ON(pkvm_vcpu_init_psci(hyp_vcpu));
 	pkvm_vcpu_init_traps(hyp_vcpu);
 	kvm_reset_pvm_sys_regs(&hyp_vcpu->vcpu);
 done:
@@ -1588,9 +1585,19 @@ static bool pkvm_memrelinquish_call(struct pkvm_hyp_vcpu *hyp_vcpu,
 		goto out_guest_err;
 	ret = __pkvm_guest_relinquish_to_host(hyp_vcpu, ipa, &pa);
-	if (ret == -ENOMEM) {
+	if (ret == -E2BIG) {
-		if (pkvm_handle_empty_memcache(hyp_vcpu, exit_code))
+		struct kvm_hyp_req *req = pkvm_hyp_req_reserve(hyp_vcpu, KVM_HYP_REQ_TYPE_SPLIT);
 		if (!req) {
 			ret = -ENOMEM;
 			goto out_guest_err;
 		}
 		req->split.guest_ipa = ALIGN_DOWN(ipa, PMD_SIZE);
 		req->split.size = PMD_SIZE;
 		write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR);
 		*exit_code = ARM_EXCEPTION_HYP_REQ;
 		return false;
 	} else if (ret) {
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -1769,13 +1769,49 @@ static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
 	return 0;
 }
-int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
+static int pkvm_stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
-			     struct kvm_mmu_memory_cache *mc)
+				    enum kvm_pgtable_walk_flags visit)
 {
 	struct stage2_map_data *data = ctx->arg;
 	struct kvm_pgtable *pgt = data->mmu->pgt;
 	struct kvm_hyp_memcache *mc = data->memcache;
 	enum kvm_pgtable_prot prot;
 	kvm_pte_t pte = ctx->old;
 	kvm_pte_t *childp;
 	if (ctx->level == KVM_PGTABLE_MAX_LEVELS - 1)
 		return 0;
 	/* We can only split PMD-level blocks */
 	if (!kvm_pte_valid(pte) || ctx->level != KVM_PGTABLE_MAX_LEVELS - 2)
 		return -EINVAL;
 	prot = kvm_pgtable_stage2_pte_prot(pte);
 	childp = kvm_pgtable_stage2_create_unlinked(pgt, kvm_pte_to_phys(pte),
 						    ctx->level, prot, mc, true);
 	if (IS_ERR(childp))
 		return PTR_ERR(childp);
 	WARN_ON(!stage2_try_break_pte(ctx, data->mmu));
 	stage2_make_pte(ctx, kvm_init_table_pte(childp, ctx->mm_ops));
 	dsb(ishst);
 	return 0;
 }
 int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size, void *mc)
 {
 	struct stage2_map_data data = {
 		.mmu		= pgt->mmu,
 		.memcache	= mc,
 	};
 	struct kvm_pgtable_walker walker = {
-		.cb	= stage2_split_walker,
+		.cb	= static_branch_unlikely(&kvm_protected_mode_initialized) ?
 				pkvm_stage2_split_walker : stage2_split_walker,
 		.arg	= static_branch_unlikely(&kvm_protected_mode_initialized) ?
 				&data : mc,
 		.flags	= KVM_PGTABLE_WALK_LEAF,
 		.arg	= mc,
 	};
 	return kvm_pgtable_walk(pgt, addr, size, &walker);
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -6,11 +6,11 @@
 #include <linux/cma.h>
 #include <linux/dma-map-ops.h>
 #include <linux/maple_tree.h>
 #include <linux/mman.h>
 #include <linux/kvm_host.h>
 #include <linux/io.h>
 #include <linux/hugetlb.h>
 #include <linux/interval_tree_generic.h>
 #include <linux/sched/signal.h>
 #include <trace/events/kvm.h>
 #include <asm/pgalloc.h>
@@ -291,6 +291,20 @@ static void invalidate_icache_guest_page(void *va, size_t size)
 	__invalidate_icache_guest_page(va, size);
 }
 static u64 __pinned_page_start(struct kvm_pinned_page *ppage)
 {
 	return ppage->ipa;
 }
 static u64 __pinned_page_end(struct kvm_pinned_page *ppage)
 {
 	return ppage->ipa + (1 << (ppage->order + PAGE_SHIFT)) - 1;
 }
 INTERVAL_TREE_DEFINE(struct kvm_pinned_page, node, u64, __subtree_last,
 		     __pinned_page_start, __pinned_page_end, /* empty */,
 		     kvm_pinned_pages);
 static int __pkvm_unmap_guest_call(u64 pfn, u64 gfn, u8 order, void *args)
 {
 	struct kvm *kvm = args;
@@ -312,7 +326,7 @@ static int pkvm_unmap_guest(struct kvm *kvm, struct kvm_pinned_page *ppage)
 	 * no update needed from here.
 	 */
 	unpin_user_pages(&ppage->page, 1);
-	mtree_erase(&kvm->arch.pkvm.pinned_pages, ppage->ipa);
+	kvm_pinned_pages_remove(ppage, &kvm->arch.pkvm.pinned_pages);
 	kfree(ppage);
 	return 0;
@@ -320,17 +334,12 @@ static int pkvm_unmap_guest(struct kvm *kvm, struct kvm_pinned_page *ppage)
 static int pkvm_unmap_range(struct kvm *kvm, u64 start, u64 end)
 {
 	struct kvm_pinned_page *ppage, *tmp;
 	struct mm_struct *mm = kvm->mm;
 	unsigned long index = start;
 	unsigned long cnt = 0;
 	void *entry;
 	int ret = 0;
-	mt_for_each(&kvm->arch.pkvm.pinned_pages, entry, index, end - 1) {
+	for_ppage_node_in_range(kvm, start, end, ppage, tmp) {
 		struct kvm_pinned_page *ppage = entry;
 		if (ppage == KVM_DUMMY_PPAGE)
 			continue;
 		ret = pkvm_unmap_guest(kvm, ppage);
 		if (ret)
 			break;
@@ -418,8 +427,7 @@ static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 si
 static void pkvm_stage2_flush(struct kvm *kvm)
 {
-	unsigned long index = 0;
+	struct kvm_pinned_page *ppage, *tmp;
 	void *entry;
 	/*
 	 * Contrary to stage2_apply_range(), we don't need to check
@@ -427,11 +435,7 @@ static void pkvm_stage2_flush(struct kvm *kvm)
 	 * from a vcpu thread, and the list is only ever freed on VM
 	 * destroy (which only occurs when all vcpu are gone).
 	 */
-	mt_for_each(&kvm->arch.pkvm.pinned_pages, entry, index, ULONG_MAX) {
+	for_ppage_node_in_range(kvm, 0, ULONG_MAX, ppage, tmp) {
 		struct kvm_pinned_page *ppage = entry;
 		if (ppage == KVM_DUMMY_PPAGE)
 			continue;
 		__clean_dcache_guest_page(page_address(ppage->page), PAGE_SIZE);
 		cond_resched_rwlock_write(&kvm->mmu_lock);
 	}
@@ -1014,7 +1018,6 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
 	mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
 	mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
 	kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
 	mt_init_flags(&kvm->arch.pkvm.pinned_pages, MT_FLAGS_USE_RCU);
 	mmu->arch = &kvm->arch;
 	if (is_protected_kvm_enabled())
@@ -1293,18 +1296,13 @@ static int __pkvm_wrprotect_call(u64 pfn, u64 gfn, u8 order, void *args)
 static int pkvm_wp_range(struct kvm *kvm, u64 start, u64 end)
 {
-	unsigned long index = start;
+	struct kvm_pinned_page *ppage, *tmp;
 	void *entry;
-	mt_for_each(&kvm->arch.pkvm.pinned_pages, entry, index, end - 1) {
+	for_ppage_node_in_range(kvm, start, end, ppage, tmp) {
 		struct kvm_pinned_page *ppage = entry;
 		int ret;
 		if (ppage == KVM_DUMMY_PPAGE)
 			continue;
 		ret = pkvm_call_hyp_nvhe_ppage(ppage, __pkvm_wrprotect_call,
 						   kvm, false);
 		if (ret)
 			return ret;
 	}
@@ -1630,28 +1628,9 @@ static int pkvm_host_map_guest(u64 pfn, u64 gfn, u64 nr_pages,
 	return (ret == -EPERM) ? -EAGAIN : ret;
 }
 static struct kvm_pinned_page *
 find_ppage_or_above(struct kvm *kvm, phys_addr_t ipa)
 {
 	unsigned long index = ipa;
 	void *entry;
 	mt_for_each(&kvm->arch.pkvm.pinned_pages, entry, index, ULONG_MAX) {
 		if (entry == KVM_DUMMY_PPAGE)
 			continue;
 		return entry;
 	}
 	return NULL;
 }
 static struct kvm_pinned_page *find_ppage(struct kvm *kvm, u64 ipa)
 {
-	struct kvm_pinned_page *ppage;
+	return kvm_pinned_pages_iter_first(&kvm->arch.pkvm.pinned_pages, ipa, ipa + PAGE_SIZE - 1);
 	unsigned long index = ipa;
 	ppage = mt_find(&kvm->arch.pkvm.pinned_pages, &index, ipa + PAGE_SIZE - 1);
 	return ppage == KVM_DUMMY_PPAGE ? NULL : ppage;
 }
 static int __pkvm_relax_perms_call(u64 pfn, u64 gfn, u8 order, void *args)
@@ -1707,11 +1686,10 @@ static int pkvm_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t *fault_ipa,
 {
 	unsigned int flags = FOLL_HWPOISON | FOLL_LONGTERM | FOLL_WRITE;
 	struct kvm_hyp_memcache *hyp_memcache = &vcpu->arch.stage2_mc;
-	unsigned long index, pmd_offset, page_size, end;
+	unsigned long page_size = PAGE_SIZE;
 	struct mm_struct *mm = current->mm;
 	struct kvm_pinned_page *ppage;
 	struct kvm *kvm = vcpu->kvm;
 	struct maple_tree *mt = &kvm->arch.pkvm.pinned_pages;
 	int ret, nr_pages;
 	struct page *page;
 	u64 pfn;
@@ -1760,66 +1738,49 @@ static int pkvm_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t *fault_ipa,
 	}
 	pfn = page_to_pfn(page);
 	pmd_offset = *fault_ipa & (PMD_SIZE - 1);
 	page_size = transparent_hugepage_adjust(kvm, memslot,
 						hva, &pfn,
 						fault_ipa);
 	page = pfn_to_page(pfn);
-retry:
+	read_lock(&kvm->mmu_lock);
-	if (size)
+	if (!kvm_pinned_pages_iter_first(&vcpu->kvm->arch.pkvm.pinned_pages,
-		*size = page_size;
+					 ALIGN_DOWN(*fault_ipa, PMD_SIZE),
 					 ALIGN(*fault_ipa + 1, PMD_SIZE) - 1))
 		page_size = transparent_hugepage_adjust(kvm, memslot, hva, &pfn, fault_ipa);
 	/*
 	 * We take the risk of racing with another vCPU, but sync will be restored by the
 	 * host_map_guest HVC
 	 */
 	read_unlock(&kvm->mmu_lock);
 	page = pfn_to_page(pfn);
 	ret = account_locked_vm(mm, page_size >> PAGE_SHIFT, true);
 	if (ret)
 		goto unpin;
 	index = *fault_ipa;
 	end = index + page_size - 1;
 	ppage->page = page;
 	ppage->ipa = *fault_ipa;
 	ppage->order = get_order(page_size);
 	ppage->pins = 1 << ppage->order;
 	/*
 	 * If we already have a mapping in the middle of the THP, we have no
 	 * other choice than enforcing PAGE_SIZE for pkvm_host_map_guest() to
 	 * succeed.
 	 */
 	if (page_size > PAGE_SIZE && mt_find(mt, &index, end)) {
 		*fault_ipa += pmd_offset;
 		pfn += pmd_offset >> PAGE_SHIFT;
 		page = pfn_to_page(pfn);
 		account_locked_vm(mm, page_size >> PAGE_SHIFT, false);
 		page_size = PAGE_SIZE;
 		goto retry;
 	}
 	/* Reserve space in the mtree */
 	ret = mtree_insert_range(mt, index, end, KVM_DUMMY_PPAGE, GFP_KERNEL);
 	if (ret) {
 		if (ret == -EEXIST)
 			ret = 0;
 		goto dec_account;
 	}
 	write_lock(&kvm->mmu_lock);
 	ret = pkvm_host_map_guest(pfn, *fault_ipa >> PAGE_SHIFT,
 				  page_size >> PAGE_SHIFT, KVM_PGTABLE_PROT_R);
 	if (ret) {
-		if (WARN_ON(ret == -EAGAIN))
+		if (ret == -EAGAIN)
 			ret = 0;
 		goto err_unlock;
 	}
-	WARN_ON(mtree_store_range(mt, index, end, ppage, GFP_ATOMIC));
+	kvm_pinned_pages_insert(ppage, &kvm->arch.pkvm.pinned_pages);
 	write_unlock(&kvm->mmu_lock);
 	if (size)
 		*size = page_size;
 	return 0;
 err_unlock:
 	write_unlock(&kvm->mmu_lock);
 dec_account:
 	account_locked_vm(mm, page_size >> PAGE_SHIFT, false);
 unpin:
 	unpin_user_pages(&page, 1);
@@ -1847,13 +1808,13 @@ int pkvm_mem_abort_range(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, size_t si
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	read_lock(&vcpu->kvm->mmu_lock);
-	ppage = find_ppage_or_above(vcpu->kvm, fault_ipa);
+	ppage = kvm_pinned_pages_iter_first(&vcpu->kvm->arch.pkvm.pinned_pages,
 					    fault_ipa, ipa_end);
 	while (fault_ipa < ipa_end) {
-		if (ppage && ppage != KVM_DUMMY_PPAGE && ppage->ipa == fault_ipa) {
+		if (ppage && ppage->ipa == fault_ipa) {
 			page_size = PAGE_SIZE << ppage->order;
-			ppage = mt_next(&vcpu->kvm->arch.pkvm.pinned_pages,
+			ppage = kvm_pinned_pages_iter_next(ppage, fault_ipa, ipa_end);
 					ppage->ipa, ULONG_MAX);
 		} else {
 			gfn_t gfn = gpa_to_gfn(fault_ipa);
 			struct kvm_memory_slot *memslot;
@@ -1877,7 +1838,8 @@ int pkvm_mem_abort_range(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, size_t si
 			 * We had to release the mmu_lock so let's update the
 			 * reference.
 			 */
-			ppage = find_ppage_or_above(vcpu->kvm, fault_ipa + page_size);
+			ppage = kvm_pinned_pages_iter_first(&vcpu->kvm->arch.pkvm.pinned_pages,
 							    fault_ipa + PAGE_SIZE, ipa_end);
 		}
 		fault_ipa += page_size;
@@ -1889,6 +1851,162 @@ end:
 	return err;
 }
 static int __pkvm_pin_user_pages(struct kvm *kvm, struct kvm_memory_slot *memslot,
 				 u64 gfn, u64 nr_pages, struct page ***__pages)
 {
 	unsigned long hva = gfn_to_hva_memslot_prot(memslot, gfn, NULL);
 	unsigned int flags = FOLL_HWPOISON | FOLL_LONGTERM | FOLL_WRITE;
 	struct mm_struct *mm = current->mm;
 	struct page **pages;
 	long ret;
 	int p;
 	pages = kmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL);
 	if (!pages)
 		return -ENOMEM;
 	mmap_read_lock(mm);
 	ret = pin_user_pages(hva, nr_pages, flags, pages);
 	mmap_read_unlock(mm);
 	if (ret == -EHWPOISON) {
 		kvm_send_hwpoison_signal(hva, PAGE_SHIFT);
 		goto err_free_pages;
 	} else if (ret == -EFAULT) {
 		/* Will try MMIO map */
 		ret = -EREMOTEIO;
 		goto err_free_pages;
 	} else if (ret < 0) {
 		ret = -EFAULT;
 		goto err_free_pages;
 	} else if (ret != nr_pages) {
 		nr_pages = ret;
 		ret = -EFAULT;
 		goto err_unpin_pages;
 	}
 	/* See PageSwapBacked() in pkvm_mem_abort() */
 	for (p = 0; p < nr_pages; p++) {
 		if (!folio_test_swapbacked(page_folio(pages[p]))) {
 			ret = -EIO;
 			goto err_unpin_pages;
 		}
 	}
 	*__pages = pages;
 	return 0;
 err_unpin_pages:
 	unpin_user_pages(pages, nr_pages);
 err_free_pages:
 	kfree(pages);
 	return ret;
 }
 /*
 * Splitting is only expected on the back of a relinquish guest HVC in the pKVM case, while
 * pkvm_pgtable_stage2_split() can be called with dirty logging.
 */
 int __pkvm_pgtable_stage2_split(struct kvm_vcpu *vcpu, phys_addr_t ipa, size_t size)
 {
 	struct list_head ppage_prealloc = LIST_HEAD_INIT(ppage_prealloc);
 	struct kvm_hyp_memcache *hyp_memcache = &vcpu->arch.stage2_mc;
 	struct kvm_pinned_page *ppage, *tmp;
 	struct kvm_memory_slot *memslot;
 	struct kvm *kvm = vcpu->kvm;
 	int idx, p, ret, nr_pages;
 	struct page **pages;
 	kvm_pfn_t pfn;
 	gfn_t gfn;
 	if (!IS_ALIGNED(ipa, PMD_SIZE) || size != PMD_SIZE)
 		return -EINVAL;
 	if (!hyp_memcache->nr_pages) {
 		ret = topup_hyp_memcache(hyp_memcache, 1, 0);
 		if (ret)
 			return -ENOMEM;
 		atomic64_add(PAGE_SIZE, &kvm->stat.protected_hyp_mem);
 		atomic64_add(PAGE_SIZE, &kvm->stat.protected_pgtable_mem);
 	}
 	/* We already have 1 pin on the Huge Page */
 	nr_pages = (size >> PAGE_SHIFT) - 1;
 	gfn = (ipa >> PAGE_SHIFT) + 1;
 	/* Pre-allocate kvm_pinned_page before acquiring the mmu_lock */
 	for (p = 0; p < nr_pages; p++) {
 		ppage = kzalloc(sizeof(*ppage), GFP_KERNEL_ACCOUNT);
 		if (!ppage) {
 			ret = -ENOMEM;
 			goto free_pinned_pages;
 		}
 		list_add(&ppage->list_node, &ppage_prealloc);
 	}
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	memslot = gfn_to_memslot(vcpu->kvm, gfn);
 	ret = __pkvm_pin_user_pages(kvm, memslot, gfn, nr_pages, &pages);
 	if (ret)
 		goto unlock_srcu;
 	write_lock(&kvm->mmu_lock);
 	ppage = find_ppage(kvm, ipa);
 	if (!ppage) {
 		ret = -EPERM;
 		goto end;
 	} else if (!ppage->order) {
 		ret = 0;
 		goto end;
 	}
 	ret = kvm_call_hyp_nvhe(__pkvm_host_split_guest, page_to_pfn(ppage->page),
 				ipa >> PAGE_SHIFT, size);
 	if (ret)
 		goto end;
 	ppage->order = 0;
 	ppage->pins = 1;
 	pfn = page_to_pfn(ppage->page) + 1;
 	ipa = ipa + PAGE_SIZE;
 	while (nr_pages--) {
 		/* Pop a ppage from the pre-allocated list */
 		ppage = list_first_entry(&ppage_prealloc, struct kvm_pinned_page, list_node);
 		list_del_init(&ppage->list_node);
 		ppage->page = pfn_to_page(pfn);
 		ppage->ipa = ipa;
 		ppage->order = 0;
 		ppage->pins = 1;
 		kvm_pinned_pages_insert(ppage, &kvm->arch.pkvm.pinned_pages);
 		pfn += 1;
 		ipa += PAGE_SIZE;
 	}
 end:
 	write_unlock(&kvm->mmu_lock);
 	if (ret)
 		unpin_user_pages(pages, nr_pages);
 	kfree(pages);
 unlock_srcu:
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 free_pinned_pages:
 	/* Free unused pre-allocated kvm_pinned_page */
 	list_for_each_entry_safe(ppage, tmp, &ppage_prealloc, list_node) {
 		list_del(&ppage->list_node);
 		kfree(ppage);
 	}
 	return ret;
 }
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot, unsigned long hva,
 			  unsigned long fault_status)
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -319,21 +319,17 @@ static int __reclaim_dying_guest_page_call(u64 pfn, u64 gfn, u8 order, void *arg
 static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
 {
 	struct kvm_pinned_page *tmp, *ppage;
 	struct mm_struct *mm = current->mm;
 	struct kvm_pinned_page *ppage;
 	struct kvm_vcpu *host_vcpu;
-	unsigned long idx, ipa = 0;
+	unsigned long idx;
 	if (!host_kvm->arch.pkvm.handle)
 		goto out_free;
 	WARN_ON(kvm_call_hyp_nvhe(__pkvm_start_teardown_vm, host_kvm->arch.pkvm.handle));
-	mt_clear_in_rcu(&host_kvm->arch.pkvm.pinned_pages);
+	for_ppage_node_in_range(host_kvm, 0, ULONG_MAX, ppage, tmp) {
 	mt_for_each(&host_kvm->arch.pkvm.pinned_pages, ppage, ipa, ULONG_MAX) {
 		if (WARN_ON(ppage == KVM_DUMMY_PPAGE))
 			continue;
 		WARN_ON(pkvm_call_hyp_nvhe_ppage(ppage,
 						 __reclaim_dying_guest_page_call,
 						 host_kvm, true));
@@ -341,9 +337,9 @@ static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
 		account_locked_vm(mm, 1, false);
 		unpin_user_pages_dirty_lock(&ppage->page, 1, host_kvm->arch.pkvm.enabled);
 		kvm_pinned_pages_remove(ppage, &host_kvm->arch.pkvm.pinned_pages);
 		kfree(ppage);
 	}
 	mtree_destroy(&host_kvm->arch.pkvm.pinned_pages);
 	WARN_ON(kvm_call_hyp_nvhe(__pkvm_finalize_teardown_vm, host_kvm->arch.pkvm.handle));
@@ -538,21 +534,21 @@ void pkvm_host_reclaim_page(struct kvm *host_kvm, phys_addr_t ipa)
 {
 	struct mm_struct *mm = current->mm;
 	struct kvm_pinned_page *ppage;
 	unsigned long index = ipa;
 	u16 pins;
 	write_lock(&host_kvm->mmu_lock);
-	ppage = mt_find(&host_kvm->arch.pkvm.pinned_pages, &index,
+	ppage = kvm_pinned_pages_iter_first(&host_kvm->arch.pkvm.pinned_pages,
-			index + PAGE_SIZE - 1);
+					    ipa, ipa + PAGE_SIZE - 1);
-	if (ppage && ppage != KVM_DUMMY_PPAGE) {
+	if (ppage) {
 		WARN_ON_ONCE(ppage->pins != 1);
 		if (ppage->pins)
 			ppage->pins--;
 		else
 			WARN_ON(1);
 		pins = ppage->pins;
 		if (!pins)
-			mtree_erase(&host_kvm->arch.pkvm.pinned_pages, ipa);
+			kvm_pinned_pages_remove(ppage,
 						&host_kvm->arch.pkvm.pinned_pages);
 	}
 	write_unlock(&host_kvm->mmu_lock);
--- a/arch/x86/configs/gki_defconfig
+++ b/arch/x86/configs/gki_defconfig
@@ -672,6 +672,7 @@ CONFIG_CRYPTO_ZSTD=y
 CONFIG_CRYPTO_ANSI_CPRNG=y
 CONFIG_CRYPTO_AES_NI_INTEL=y
 CONFIG_CRYPTO_POLYVAL_CLMUL_NI=y
 CONFIG_CRYPTO_SHA1_SSSE3=y
 CONFIG_CRYPTO_SHA256_SSSE3=y
 CONFIG_CRYPTO_SHA512_SSSE3=y
 CONFIG_CRC_CCITT=y
--- a/arch/x86/configs/microdroid_defconfig
+++ b/arch/x86/configs/microdroid_defconfig
@@ -14,12 +14,6 @@ CONFIG_UCLAMP_TASK=y
 CONFIG_UCLAMP_BUCKETS_COUNT=20
 CONFIG_CGROUPS=y
 CONFIG_MEMCG=y
 CONFIG_BLK_CGROUP=y
 CONFIG_CGROUP_SCHED=y
 CONFIG_UCLAMP_TASK_GROUP=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CPUSETS=y
 CONFIG_CGROUP_CPUACCT=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
 # CONFIG_RD_XZ is not set
@@ -47,7 +41,6 @@ CONFIG_CPU_FREQ_GOV_POWERSAVE=y
 CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
 CONFIG_JUMP_LABEL=y
 # CONFIG_BLOCK_LEGACY_AUTOLOAD is not set
 CONFIG_BLK_CGROUP_IOCOST=y
 CONFIG_PARTITION_ADVANCED=y
 # CONFIG_MSDOS_PARTITION is not set
 # CONFIG_MQ_IOSCHED_DEADLINE is not set
@@ -209,6 +202,7 @@ CONFIG_CRYPTO_HCTR2=y
 CONFIG_CRYPTO_LZO=y
 CONFIG_CRYPTO_AES_NI_INTEL=y
 CONFIG_CRYPTO_POLYVAL_CLMUL_NI=y
 CONFIG_CRYPTO_SHA1_SSSE3=y
 CONFIG_CRYPTO_SHA256_SSSE3=y
 CONFIG_CRYPTO_SHA512_SSSE3=y
 CONFIG_PRINTK_TIME=y
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -6645,7 +6645,7 @@ static void print_binder_work_ilocked(struct seq_file *m,
 				      struct binder_proc *proc,
 				      const char *prefix,
 				      const char *transaction_prefix,
-				     struct binder_work *w)
+				      struct binder_work *w, bool hash_ptrs)
 {
 	struct binder_node *node;
 	struct binder_transaction *t;
@@ -6668,6 +6668,12 @@ static void print_binder_work_ilocked(struct seq_file *m,
 		break;
 	case BINDER_WORK_NODE:
 		node = container_of(w, struct binder_node, work);
 		if (hash_ptrs)
 			seq_printf(m, "%snode work %d: u%p c%p\n",
 				   prefix, node->debug_id,
 				   (void *)(long)node->ptr,
 				   (void *)(long)node->cookie);
 		else
 			seq_printf(m, "%snode work %d: u%016llx c%016llx\n",
 				   prefix, node->debug_id,
 				   (u64)node->ptr, (u64)node->cookie);
@@ -6695,7 +6701,7 @@ static void print_binder_work_ilocked(struct seq_file *m,
 static void print_binder_thread_ilocked(struct seq_file *m,
 					struct binder_thread *thread,
-					int print_always)
+					bool print_always, bool hash_ptrs)
 {
 	struct binder_transaction *t;
 	struct binder_work *w;
@@ -6725,14 +6731,16 @@ static void print_binder_thread_ilocked(struct seq_file *m,
 	}
 	list_for_each_entry(w, &thread->todo, entry) {
 		print_binder_work_ilocked(m, thread->proc, "    ",
-					  "    pending transaction", w);
+					  "    pending transaction",
 					  w, hash_ptrs);
 	}
 	if (!print_always && m->count == header_pos)
 		m->count = start_pos;
 }
 static void print_binder_node_nilocked(struct seq_file *m,
-				       struct binder_node *node)
+				       struct binder_node *node,
 				       bool hash_ptrs)
 {
 	struct binder_ref *ref;
 	struct binder_work *w;
@@ -6742,8 +6750,13 @@ static void print_binder_node_nilocked(struct seq_file *m,
 	hlist_for_each_entry(ref, &node->refs, node_entry)
 		count++;
-	seq_printf(m, "  node %d: u%016llx c%016llx pri %d:%d hs %d hw %d ls %d lw %d is %d iw %d tr %d",
+	if (hash_ptrs)
-		   node->debug_id, (u64)node->ptr, (u64)node->cookie,
+		seq_printf(m, "  node %d: u%p c%p", node->debug_id,
 			   (void *)(long)node->ptr, (void *)(long)node->cookie);
 	else
 		seq_printf(m, "  node %d: u%016llx c%016llx", node->debug_id,
 			   (u64)node->ptr, (u64)node->cookie);
 	seq_printf(m, " pri %d:%d hs %d hw %d ls %d lw %d is %d iw %d tr %d",
 		   node->sched_policy, node->min_priority,
 		   node->has_strong_ref, node->has_weak_ref,
 		   node->local_strong_refs, node->local_weak_refs,
@@ -6757,7 +6770,8 @@ static void print_binder_node_nilocked(struct seq_file *m,
 	if (node->proc) {
 		list_for_each_entry(w, &node->async_todo, entry)
 			print_binder_work_ilocked(m, node->proc, "    ",
-					  "    pending async transaction", w);
+					  "    pending async transaction",
 					  w, hash_ptrs);
 	}
 }
@@ -6773,8 +6787,54 @@ static void print_binder_ref_olocked(struct seq_file *m,
 	binder_node_unlock(ref->node);
 }
-static void print_binder_proc(struct seq_file *m,
+/**
-			      struct binder_proc *proc, int print_all)
+ * print_next_binder_node_ilocked() - Print binder_node from a locked list
 * @m:          struct seq_file for output via seq_printf()
 * @proc:       struct binder_proc we hold the inner_proc_lock to (if any)
 * @node:       struct binder_node to print fields of
 * @prev_node:	struct binder_node we hold a temporary reference to (if any)
 * @hash_ptrs:  whether to hash @node's binder_uintptr_t fields
 *
 * Helper function to handle synchronization around printing a struct
 * binder_node while iterating through @proc->nodes or the dead nodes list.
 * Caller must hold either @proc->inner_lock (for live nodes) or
 * binder_dead_nodes_lock. This lock will be released during the body of this
 * function, but it will be reacquired before returning to the caller.
 *
 * Return:	pointer to the struct binder_node we hold a tmpref on
 */
 static struct binder_node *
 print_next_binder_node_ilocked(struct seq_file *m, struct binder_proc *proc,
 			       struct binder_node *node,
 			       struct binder_node *prev_node, bool hash_ptrs)
 {
 	/*
 	 * Take a temporary reference on the node so that isn't freed while
 	 * we print it.
 	 */
 	binder_inc_node_tmpref_ilocked(node);
 	/*
 	 * Live nodes need to drop the inner proc lock and dead nodes need to
 	 * drop the binder_dead_nodes_lock before trying to take the node lock.
 	 */
 	if (proc)
 		binder_inner_proc_unlock(proc);
 	else
 		spin_unlock(&binder_dead_nodes_lock);
 	if (prev_node)
 		binder_put_node(prev_node);
 	binder_node_inner_lock(node);
 	print_binder_node_nilocked(m, node, hash_ptrs);
 	binder_node_inner_unlock(node);
 	if (proc)
 		binder_inner_proc_lock(proc);
 	else
 		spin_lock(&binder_dead_nodes_lock);
 	return node;
 }
 static void print_binder_proc(struct seq_file *m, struct binder_proc *proc,
 			      bool print_all, bool hash_ptrs)
 {
 	struct binder_work *w;
 	struct rb_node *n;
@@ -6787,31 +6847,19 @@ static void print_binder_proc(struct seq_file *m,
 	header_pos = m->count;
 	binder_inner_proc_lock(proc);
-	for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n))
+	for (n = rb_first(&proc->threads); n; n = rb_next(n))
 		print_binder_thread_ilocked(m, rb_entry(n, struct binder_thread,
-						rb_node), print_all);
+						rb_node), print_all, hash_ptrs);
-	for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) {
+	for (n = rb_first(&proc->nodes); n; n = rb_next(n)) {
 		struct binder_node *node = rb_entry(n, struct binder_node,
 						    rb_node);
 		if (!print_all && !node->has_async_transaction)
 			continue;
-		/*
+		last_node = print_next_binder_node_ilocked(m, proc, node,
-		 * take a temporary reference on the node so it
+							   last_node,
-		 * survives and isn't removed from the tree
+							   hash_ptrs);
 		 * while we print it.
 		 */
 		binder_inc_node_tmpref_ilocked(node);
 		/* Need to drop inner lock to take node lock */
 		binder_inner_proc_unlock(proc);
 		if (last_node)
 			binder_put_node(last_node);
 		binder_node_inner_lock(node);
 		print_binder_node_nilocked(m, node);
 		binder_node_inner_unlock(node);
 		last_node = node;
 		binder_inner_proc_lock(proc);
 	}
 	binder_inner_proc_unlock(proc);
 	if (last_node)
@@ -6819,9 +6867,7 @@ static void print_binder_proc(struct seq_file *m,
 	if (print_all) {
 		binder_proc_lock(proc);
-		for (n = rb_first(&proc->refs_by_desc);
+		for (n = rb_first(&proc->refs_by_desc); n; n = rb_next(n))
 		     n != NULL;
 		     n = rb_next(n))
 			print_binder_ref_olocked(m, rb_entry(n,
 							     struct binder_ref,
 							     rb_node_desc));
@@ -6831,7 +6877,8 @@ static void print_binder_proc(struct seq_file *m,
 	binder_inner_proc_lock(proc);
 	list_for_each_entry(w, &proc->todo, entry)
 		print_binder_work_ilocked(m, proc, "  ",
-					  "  pending transaction", w);
+					  "  pending transaction", w,
 					  hash_ptrs);
 	list_for_each_entry(w, &proc->delivered_death, entry) {
 		seq_puts(m, "  has delivered dead binder\n");
 		break;
@@ -6958,7 +7005,7 @@ static void print_binder_proc_stats(struct seq_file *m,
 	count = 0;
 	ready_threads = 0;
 	binder_inner_proc_lock(proc);
-	for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n))
+	for (n = rb_first(&proc->threads); n; n = rb_next(n))
 		count++;
 	list_for_each_entry(thread, &proc->waiting_threads, waiting_thread_node)
@@ -6972,7 +7019,7 @@ static void print_binder_proc_stats(struct seq_file *m,
 			ready_threads,
 			free_async_space);
 	count = 0;
-	for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n))
+	for (n = rb_first(&proc->nodes); n; n = rb_next(n))
 		count++;
 	binder_inner_proc_unlock(proc);
 	seq_printf(m, "  nodes: %d\n", count);
@@ -6980,7 +7027,7 @@ static void print_binder_proc_stats(struct seq_file *m,
 	strong = 0;
 	weak = 0;
 	binder_proc_lock(proc);
-	for (n = rb_first(&proc->refs_by_desc); n != NULL; n = rb_next(n)) {
+	for (n = rb_first(&proc->refs_by_desc); n; n = rb_next(n)) {
 		struct binder_ref *ref = rb_entry(n, struct binder_ref,
 						  rb_node_desc);
 		count++;
@@ -7007,7 +7054,7 @@ static void print_binder_proc_stats(struct seq_file *m,
 	print_binder_stats(m, "  ", &proc->stats);
 }
-static int state_show(struct seq_file *m, void *unused)
+static void print_binder_state(struct seq_file *m, bool hash_ptrs)
 {
 	struct binder_proc *proc;
 	struct binder_node *node;
@@ -7018,31 +7065,40 @@ static int state_show(struct seq_file *m, void *unused)
 	spin_lock(&binder_dead_nodes_lock);
 	if (!hlist_empty(&binder_dead_nodes))
 		seq_puts(m, "dead nodes:\n");
-	hlist_for_each_entry(node, &binder_dead_nodes, dead_node) {
+	hlist_for_each_entry(node, &binder_dead_nodes, dead_node)
-		/*
+		last_node = print_next_binder_node_ilocked(m, NULL, node,
-		 * take a temporary reference on the node so it
+							   last_node,
-		 * survives and isn't removed from the list
+							   hash_ptrs);
 		 * while we print it.
 		 */
 		node->tmp_refs++;
 		spin_unlock(&binder_dead_nodes_lock);
 		if (last_node)
 			binder_put_node(last_node);
 		binder_node_lock(node);
 		print_binder_node_nilocked(m, node);
 		binder_node_unlock(node);
 		last_node = node;
 		spin_lock(&binder_dead_nodes_lock);
 	}
 	spin_unlock(&binder_dead_nodes_lock);
 	if (last_node)
 		binder_put_node(last_node);
 	mutex_lock(&binder_procs_lock);
 	hlist_for_each_entry(proc, &binder_procs, proc_node)
-		print_binder_proc(m, proc, 1);
+		print_binder_proc(m, proc, true, hash_ptrs);
 	mutex_unlock(&binder_procs_lock);
 }
 static void print_binder_transactions(struct seq_file *m, bool hash_ptrs)
 {
 	struct binder_proc *proc;
 	seq_puts(m, "binder transactions:\n");
 	mutex_lock(&binder_procs_lock);
 	hlist_for_each_entry(proc, &binder_procs, proc_node)
 		print_binder_proc(m, proc, false, hash_ptrs);
 	mutex_unlock(&binder_procs_lock);
 }
 static int state_show(struct seq_file *m, void *unused)
 {
 	print_binder_state(m, false);
 	return 0;
 }
 static int state_hashed_show(struct seq_file *m, void *unused)
 {
 	print_binder_state(m, true);
 	return 0;
 }
@@ -7064,14 +7120,13 @@ static int stats_show(struct seq_file *m, void *unused)
 static int transactions_show(struct seq_file *m, void *unused)
 {
-	struct binder_proc *proc;
+	print_binder_transactions(m, false);
-
+	return 0;
-	seq_puts(m, "binder transactions:\n");
+}
 	mutex_lock(&binder_procs_lock);
 	hlist_for_each_entry(proc, &binder_procs, proc_node)
 		print_binder_proc(m, proc, 0);
 	mutex_unlock(&binder_procs_lock);
 static int transactions_hashed_show(struct seq_file *m, void *unused)
 {
 	print_binder_transactions(m, true);
 	return 0;
 }
@@ -7084,7 +7139,7 @@ static int proc_show(struct seq_file *m, void *unused)
 	hlist_for_each_entry(itr, &binder_procs, proc_node) {
 		if (itr->pid == pid) {
 			seq_puts(m, "binder proc state:\n");
-			print_binder_proc(m, itr, 1);
+			print_binder_proc(m, itr, true, false);
 		}
 	}
 	mutex_unlock(&binder_procs_lock);
@@ -7151,8 +7206,10 @@ const struct file_operations binder_fops = {
 };
 DEFINE_SHOW_ATTRIBUTE(state);
 DEFINE_SHOW_ATTRIBUTE(state_hashed);
 DEFINE_SHOW_ATTRIBUTE(stats);
 DEFINE_SHOW_ATTRIBUTE(transactions);
 DEFINE_SHOW_ATTRIBUTE(transactions_hashed);
 DEFINE_SHOW_ATTRIBUTE(transaction_log);
 const struct binder_debugfs_entry binder_debugfs_entries[] = {
@@ -7162,6 +7219,12 @@ const struct binder_debugfs_entry binder_debugfs_entries[] = {
 		.fops = &state_fops,
 		.data = NULL,
 	},
 	{
 		.name = "state_hashed",
 		.mode = 0444,
 		.fops = &state_hashed_fops,
 		.data = NULL,
 	},
 	{
 		.name = "stats",
 		.mode = 0444,
@@ -7174,6 +7237,12 @@ const struct binder_debugfs_entry binder_debugfs_entries[] = {
 		.fops = &transactions_fops,
 		.data = NULL,
 	},
 	{
 		.name = "transactions_hashed",
 		.mode = 0444,
 		.fops = &transactions_hashed_fops,
 		.data = NULL,
 	},
 	{
 		.name = "transaction_log",
 		.mode = 0444,
--- a/drivers/android/vendor_hooks.c
+++ b/drivers/android/vendor_hooks.c
@@ -490,6 +490,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_lruvec_add_folio);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_lruvec_del_folio);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_add_lazyfree_bypass);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_do_async_mmap_readahead);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_mm_free_page);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_update_page_mapcount);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_add_page_to_lrulist);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_del_page_from_lrulist);
@@ -676,3 +677,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_filemap_fault_pre_folio_locked);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_filemap_folio_mapped);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_folio_remove_rmap_ptes);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_pageset_update);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_xhci_full_reset_on_remove);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_mempool_alloc_skip_wait);
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1002,7 +1002,7 @@ static enum hrtimer_restart  pm_suspend_timer_fn(struct hrtimer *timer)
 	 * If 'expires' is after the current time, we've been called
 	 * too early.
 	 */
-	if (expires > 0 && expires < ktime_get_mono_fast_ns()) {
+	if (expires > 0 && expires <= ktime_get_mono_fast_ns()) {
 		dev->power.timer_expires = 0;
 		rpm_suspend(dev, dev->power.timer_autosuspends ?
 		    (RPM_ASYNC | RPM_AUTO) : RPM_ASYNC);
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
@@ -284,15 +284,13 @@ static int kvm_arm_smmu_domain_finalize(struct kvm_arm_smmu_domain *kvm_smmu_dom
 		return 0;
 	}
 	kvm_smmu_domain->smmu = smmu;
 	if (kvm_smmu_domain->domain.type == IOMMU_DOMAIN_IDENTITY) {
 		kvm_smmu_domain->id = KVM_IOMMU_DOMAIN_IDMAP_ID;
 		/*
 		 * Identity domains doesn't use the DMA API, so no need to
 		 * set the  domain aperture.
 		 */
-		return 0;
+		goto out;
 	}
 	/* Default to stage-1. */
@@ -325,7 +323,13 @@ static int kvm_arm_smmu_domain_finalize(struct kvm_arm_smmu_domain *kvm_smmu_dom
 	ret = kvm_call_hyp_nvhe_mc(__pkvm_host_iommu_alloc_domain,
 				   kvm_smmu_domain->id, kvm_smmu_domain->type);
 	if (ret) {
 		ida_free(&kvm_arm_smmu_domain_ida, kvm_smmu_domain->id);
 		return ret;
 	}
 out:
 	kvm_smmu_domain->smmu = smmu;
 	return ret;
 }
--- a/drivers/ufs/core/ufs-mcq.c
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -629,7 +629,6 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
 	int tag = scsi_cmd_to_rq(cmd)->tag;
 	struct ufshcd_lrb *lrbp = &hba->lrb[tag];
 	struct ufs_hw_queue *hwq;
 	unsigned long flags;
 	int err;
 	/* Skip task abort in case previous aborts failed and report failure */
@@ -668,10 +667,5 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
 		return FAILED;
 	}
 	spin_lock_irqsave(&hwq->cq_lock, flags);
 	if (ufshcd_cmd_inflight(lrbp->cmd))
 		ufshcd_release_scsi_cmd(hba, lrbp);
 	spin_unlock_irqrestore(&hwq->cq_lock, flags);
 	return SUCCESS;
 }
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -6545,9 +6545,14 @@ static void ufshcd_err_handler(struct work_struct *work)
 		up(&hba->host_sem);
 		return;
 	}
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 	ufshcd_err_handling_prepare(hba);
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	ufshcd_set_eh_in_progress(hba);
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
-	ufshcd_err_handling_prepare(hba);
+
 	/* Complete requests that have door-bell cleared by h/w */
 	ufshcd_complete_requests(hba, false);
 	spin_lock_irqsave(hba->host->host_lock, flags);
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/dmi.h>
 #include <linux/dma-mapping.h>
 #include <trace/hooks/usb.h>
 #include "xhci.h"
 #include "xhci-trace.h"
@@ -196,6 +197,7 @@ int xhci_reset(struct xhci_hcd *xhci, u64 timeout_us)
 	u32 command;
 	u32 state;
 	int ret;
 	bool full_reset = 0;
 	state = readl(&xhci->op_regs->status);
@@ -224,8 +226,11 @@ int xhci_reset(struct xhci_hcd *xhci, u64 timeout_us)
 	if (xhci->quirks & XHCI_INTEL_HOST)
 		udelay(1000);
 	trace_android_vh_xhci_full_reset_on_remove(&full_reset);
 	ret = xhci_handshake_check_state(xhci, &xhci->op_regs->command,
-				CMD_RESET, 0, timeout_us, XHCI_STATE_REMOVING);
+				CMD_RESET, 0, timeout_us,
 				full_reset ? 0 : XHCI_STATE_REMOVING);
 	if (ret)
 		return ret;
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -547,6 +547,14 @@ struct pd_rx_event {
 	struct pd_message msg;
 };
 struct altmode_vdm_event {
 	struct kthread_work work;
 	struct tcpm_port *port;
 	u32 header;
 	u32 *data;
 	int cnt;
 };
 static const char * const pd_rev[] = {
 	[PD_REV10]		= "rev1",
 	[PD_REV20]		= "rev2",
@@ -1531,12 +1539,64 @@ static void tcpm_queue_vdm(struct tcpm_port *port, const u32 header,
 	mod_vdm_delayed_work(port, 0);
 }
-static void tcpm_queue_vdm_unlocked(struct tcpm_port *port, const u32 header,
+static void tcpm_queue_vdm_work(struct kthread_work *work)
 {
 	struct altmode_vdm_event *event = container_of(work,
 						       struct altmode_vdm_event,
 						       work);
 	struct tcpm_port *port = event->port;
 	mutex_lock(&port->lock);
 	if (port->state != SRC_READY && port->state != SNK_READY) {
 		tcpm_log_force(port, "dropping altmode_vdm_event");
 		goto port_unlock;
 	}
 	tcpm_queue_vdm(port, event->header, event->data, event->cnt);
 port_unlock:
 	kfree(event->data);
 	kfree(event);
 	mutex_unlock(&port->lock);
 }
 static int tcpm_queue_vdm_unlocked(struct tcpm_port *port, const u32 header,
 				   const u32 *data, int cnt)
 {
-	mutex_lock(&port->lock);
+	struct altmode_vdm_event *event;
-	tcpm_queue_vdm(port, header, data, cnt);
+	u32 *data_cpy;
-	mutex_unlock(&port->lock);
+	int ret = -ENOMEM;
 	event = kzalloc(sizeof(*event), GFP_KERNEL);
 	if (!event)
 		goto err_event;
 	data_cpy = kcalloc(cnt, sizeof(u32), GFP_KERNEL);
 	if (!data_cpy)
 		goto err_data;
 	kthread_init_work(&event->work, tcpm_queue_vdm_work);
 	event->port = port;
 	event->header = header;
 	memcpy(data_cpy, data, sizeof(u32) * cnt);
 	event->data = data_cpy;
 	event->cnt = cnt;
 	ret = kthread_queue_work(port->wq, &event->work);
 	if (!ret) {
 		ret = -EBUSY;
 		goto err_queue;
 	}
 	return 0;
 err_queue:
 	kfree(data_cpy);
 err_data:
 	kfree(event);
 err_event:
 	tcpm_log_force(port, "failed to queue altmode vdm, err:%d", ret);
 	return ret;
 }
 static void svdm_consume_identity(struct tcpm_port *port, const u32 *p, int cnt)
@@ -2297,8 +2357,7 @@ static int tcpm_altmode_enter(struct typec_altmode *altmode, u32 *vdo)
 	header = VDO(altmode->svid, vdo ? 2 : 1, svdm_version, CMD_ENTER_MODE);
 	header |= VDO_OPOS(altmode->mode);
-	tcpm_queue_vdm_unlocked(port, header, vdo, vdo ? 1 : 0);
+	return tcpm_queue_vdm_unlocked(port, header, vdo, vdo ? 1 : 0);
 	return 0;
 }
 static int tcpm_altmode_exit(struct typec_altmode *altmode)
@@ -2314,8 +2373,7 @@ static int tcpm_altmode_exit(struct typec_altmode *altmode)
 	header = VDO(altmode->svid, 1, svdm_version, CMD_EXIT_MODE);
 	header |= VDO_OPOS(altmode->mode);
-	tcpm_queue_vdm_unlocked(port, header, NULL, 0);
+	return tcpm_queue_vdm_unlocked(port, header, NULL, 0);
 	return 0;
 }
 static int tcpm_altmode_vdm(struct typec_altmode *altmode,
@@ -2323,9 +2381,7 @@ static int tcpm_altmode_vdm(struct typec_altmode *altmode,
 {
 	struct tcpm_port *port = typec_altmode_get_drvdata(altmode);
-	tcpm_queue_vdm_unlocked(port, header, data, count - 1);
+	return tcpm_queue_vdm_unlocked(port, header, data, count - 1);
 	return 0;
 }
 static const struct typec_altmode_ops tcpm_altmode_ops = {
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -336,6 +336,7 @@ static struct workqueue_struct *z_erofs_workqueue __read_mostly;
 #ifdef CONFIG_EROFS_FS_PCPU_KTHREAD
 static struct kthread_worker __rcu **z_erofs_pcpu_workers;
 static atomic_t erofs_percpu_workers_initialized = ATOMIC_INIT(0);
 static void erofs_destroy_percpu_workers(void)
 {
@@ -381,12 +382,8 @@ static int erofs_init_percpu_workers(void)
 	}
 	return 0;
 }
 #else
 static inline void erofs_destroy_percpu_workers(void) {}
 static inline int erofs_init_percpu_workers(void) { return 0; }
 #endif
-#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_EROFS_FS_PCPU_KTHREAD)
+#ifdef CONFIG_HOTPLUG_CPU
 static DEFINE_SPINLOCK(z_erofs_pcpu_worker_lock);
 static enum cpuhp_state erofs_cpuhp_state;
@@ -443,15 +440,53 @@ static void erofs_cpu_hotplug_destroy(void)
 	if (erofs_cpuhp_state)
 		cpuhp_remove_state_nocalls(erofs_cpuhp_state);
 }
-#else /* !CONFIG_HOTPLUG_CPU || !CONFIG_EROFS_FS_PCPU_KTHREAD */
+#else /* !CONFIG_HOTPLUG_CPU  */
 static inline int erofs_cpu_hotplug_init(void) { return 0; }
 static inline void erofs_cpu_hotplug_destroy(void) {}
-#endif
+#endif/* CONFIG_HOTPLUG_CPU */
 static int z_erofs_init_pcpu_workers(struct super_block *sb)
 {
 	int err;
 	if (atomic_xchg(&erofs_percpu_workers_initialized, 1))
 		return 0;
 	err = erofs_init_percpu_workers();
 	if (err) {
 		erofs_err(sb, "per-cpu workers: failed to allocate.");
 		goto err_init_percpu_workers;
 	}
 	err = erofs_cpu_hotplug_init();
 	if (err < 0) {
 		erofs_err(sb, "per-cpu workers: failed CPU hotplug init.");
 		goto err_cpuhp_init;
 	}
 	erofs_info(sb, "initialized per-cpu workers successfully.");
 	return err;
 err_cpuhp_init:
 	erofs_destroy_percpu_workers();
 err_init_percpu_workers:
 	atomic_set(&erofs_percpu_workers_initialized, 0);
 	return err;
 }
 static void z_erofs_destroy_pcpu_workers(void)
 {
 	if (!atomic_xchg(&erofs_percpu_workers_initialized, 0))
 		return;
 	erofs_cpu_hotplug_destroy();
 	erofs_destroy_percpu_workers();
 }
 #else /* !CONFIG_EROFS_FS_PCPU_KTHREAD */
 static inline int z_erofs_init_pcpu_workers(struct super_block *sb) { return 0; }
 static inline void z_erofs_destroy_pcpu_workers(void) {}
 #endif/* CONFIG_EROFS_FS_PCPU_KTHREAD */
 void z_erofs_exit_zip_subsystem(void)
 {
-	erofs_cpu_hotplug_destroy();
+	z_erofs_destroy_pcpu_workers();
 	erofs_destroy_percpu_workers();
 	destroy_workqueue(z_erofs_workqueue);
 	z_erofs_destroy_pcluster_pool();
 }
@@ -467,23 +502,12 @@ int __init z_erofs_init_zip_subsystem(void)
 			WQ_UNBOUND | WQ_HIGHPRI, num_possible_cpus());
 	if (!z_erofs_workqueue) {
 		err = -ENOMEM;
-		goto out_error_workqueue_init;
+		goto out_err_workqueue_init;
 	}
 	err = erofs_init_percpu_workers();
 	if (err)
 		goto out_error_pcpu_worker;
 	err = erofs_cpu_hotplug_init();
 	if (err < 0)
 		goto out_error_cpuhp_init;
 	return err;
-out_error_cpuhp_init:
+out_err_workqueue_init:
 	erofs_destroy_percpu_workers();
 out_error_pcpu_worker:
 	destroy_workqueue(z_erofs_workqueue);
 out_error_workqueue_init:
 	z_erofs_destroy_pcluster_pool();
 out_error_pcluster_pool:
 	return err;
@@ -711,8 +735,14 @@ static const struct address_space_operations z_erofs_cache_aops = {
 int erofs_init_managed_cache(struct super_block *sb)
 {
-	struct inode *const inode = new_inode(sb);
+	struct inode *inode;
 	int err;
 	err = z_erofs_init_pcpu_workers(sb);
 	if (err)
 		return err;
 	inode = new_inode(sb);
 	if (!inode)
 		return -ENOMEM;
--- a/fs/fuse/backing.c
+++ b/fs/fuse/backing.c
@@ -799,6 +799,10 @@ int fuse_file_read_iter_initialize(
 		.size = to->count,
 	};
 	fri->frio = (struct fuse_read_iter_out) {
 		.ret = fri->fri.size,
 	};
 	/* TODO we can't assume 'to' is a kvec */
 	/* TODO we also can't assume the vector has only one component */
 	*fa = (struct fuse_bpf_args) {
@@ -833,6 +837,11 @@ int fuse_file_read_iter_backing(struct fuse_bpf_args *fa,
 	if (!iov_iter_count(to))
 		return 0;
 	if ((iocb->ki_flags & IOCB_DIRECT) &&
 	    (!ff->backing_file->f_mapping->a_ops ||
 	     !ff->backing_file->f_mapping->a_ops->direct_IO))
 		return -EINVAL;
 	/* TODO This just plain ignores any change to fuse_read_in */
 	if (is_sync_kiocb(iocb)) {
 		ret = vfs_iter_read(ff->backing_file, to, &iocb->ki_pos,
@@ -855,13 +864,14 @@ int fuse_file_read_iter_backing(struct fuse_bpf_args *fa,
 			fuse_bpf_aio_cleanup_handler(aio_req);
 	}
 	frio->ret = ret;
 	/* TODO Need to point value at the buffer for post-modification */
 out:
 	fuse_file_accessed(file, ff->backing_file);
-	frio->ret = ret;
+	return ret;
 	return ret < 0 ? ret : 0;
 }
 void *fuse_file_read_iter_finalize(struct fuse_bpf_args *fa,
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -41,6 +41,24 @@ struct poll_table_struct;
 /* define the enumeration of all cgroup subsystems */
 #define SUBSYS(_x) _x ## _cgrp_id,
 #define CSS_COUNTERS_SIZE (CGROUP_SUBSYS_COUNT * sizeof(atomic_t))
 /*
 * This should just use max(), but max() doesn't work in struct definitions.
 *
 * Originally, the space was reserved for per cgroup subsystem counters, where each counter was
 * the size of an atomic_t variable. However, it was later reused to fit a struct rcu_head
 * which is why the calculation considers the size of struct rcu_head.
 *
 * This macro is provided to ANDROID_BACKPORT_USE_ARRAY() which needs to reserve at least
 * enough memory to accommodate struct rcu_head. However, if we only reserve CSS_COUNTERS_SIZE,
 * that may not be enough space on kernels with a small amount of cgroup subsystems enabled. So,
 * we take the max between the two values to use in ANDROID_BACKPORT_USE_ARRAY().
 */
 #define CGROUP_ROOT_BACKPORT_PADDING_SIZE \
 	(CSS_COUNTERS_SIZE > sizeof(struct rcu_head) ? CSS_COUNTERS_SIZE : sizeof(struct rcu_head))
 enum cgroup_subsys_id {
 #include <linux/cgroup_subsys.h>
 	CGROUP_SUBSYS_COUNT,
@@ -585,8 +603,12 @@ struct cgroup_root {
 	/* The name for this hierarchy - may be empty */
 	char name[MAX_CGROUP_ROOT_NAMELEN];
-	ANDROID_BACKPORT_USE_ARRAY(1, CGROUP_SUBSYS_COUNT * sizeof(atomic_t),
+	/* Use the original calculation to preserve the CRC value for the ABI. */
-				   struct rcu_head rcu);
+#ifndef __GENKSYMS__
 	ANDROID_BACKPORT_USE_ARRAY(1, CGROUP_ROOT_BACKPORT_PADDING_SIZE, struct rcu_head rcu);
 #else
 	ANDROID_BACKPORT_USE_ARRAY(1, CGROUP_SUBSYS_COUNT * sizeof(atomic_t), struct rcu_head rcu);
 #endif
 };
 /*
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -277,15 +277,25 @@ struct mthp_stat {
 #ifdef CONFIG_SYSFS
 DECLARE_PER_CPU(struct mthp_stat, mthp_stats);
-static inline void count_mthp_stat(int order, enum mthp_stat_item item)
+static inline void mod_mthp_stat(int order, enum mthp_stat_item item, int delta)
 {
 	if (order <= 0 || order > PMD_ORDER)
 		return;
-	this_cpu_inc(mthp_stats.stats[order][item]);
+	this_cpu_add(mthp_stats.stats[order][item], delta);
 }
 static inline void count_mthp_stat(int order, enum mthp_stat_item item)
 {
 	mod_mthp_stat(order, item, 1);
 }
 unsigned long sum_mthp_stat(int order, enum mthp_stat_item item);
 #else
 static inline void mod_mthp_stat(int order, enum mthp_stat_item item, int delta)
 {
 }
 static inline void count_mthp_stat(int order, enum mthp_stat_item item)
 {
 }
@@ -326,7 +336,7 @@ static inline int split_huge_page(struct page *page)
 {
 	return split_huge_page_to_list(page, NULL);
 }
-void deferred_split_folio(struct folio *folio);
+void deferred_split_folio(struct folio *folio, bool partially_mapped);
 void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long address, bool freeze, struct folio *folio);
@@ -486,7 +496,7 @@ static inline int split_huge_page(struct page *page)
 {
 	return 0;
 }
-static inline void deferred_split_folio(struct folio *folio) {}
+static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {}
 #define split_huge_pmd(__vma, __pmd, __address)	\
 	do { } while (0)
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -4,6 +4,7 @@
 #include <linux/sched/coredump.h> /* MMF_VM_HUGEPAGE */
 extern unsigned int khugepaged_max_ptes_none __read_mostly;
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern struct attribute_group khugepaged_attr_group;
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -731,8 +731,15 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
 	__mem_cgroup_uncharge_list(page_list);
 }
-void mem_cgroup_replace_folio(struct folio *old, struct folio *new);
+void __mem_cgroup_uncharge_folios(struct folio_batch *folios);
 static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios)
 {
 	if (mem_cgroup_disabled())
 		return;
 	__mem_cgroup_uncharge_folios(folios);
 }
 void mem_cgroup_replace_folio(struct folio *old, struct folio *new);
 void mem_cgroup_migrate(struct folio *old, struct folio *new);
 /**
@@ -1171,6 +1178,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 						gfp_t gfp_mask,
 						unsigned long *total_scanned);
 extern int mem_cgroup_init(void);
 #else /* CONFIG_MEMCG */
 #define MEM_CGROUP_ID_SHIFT	0
@@ -1297,6 +1305,10 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
 {
 }
 static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios)
 {
 }
 static inline void mem_cgroup_replace_folio(struct folio *old,
 		struct folio *new)
 {
@@ -1619,6 +1631,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 {
 	return 0;
 }
 static inline int mem_cgroup_init(void) { return 0; }
 #endif /* CONFIG_MEMCG */
 static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx)
@@ -1682,18 +1696,18 @@ static inline struct lruvec *folio_lruvec_relock_irq(struct folio *folio,
 	return folio_lruvec_lock_irq(folio);
 }
-/* Don't lock again iff page's lruvec locked */
+/* Don't lock again iff folio's lruvec locked */
-static inline struct lruvec *folio_lruvec_relock_irqsave(struct folio *folio,
+static inline void folio_lruvec_relock_irqsave(struct folio *folio,
-		struct lruvec *locked_lruvec, unsigned long *flags)
+		struct lruvec **lruvecp, unsigned long *flags)
 {
-	if (locked_lruvec) {
+	if (*lruvecp) {
-		if (folio_matches_lruvec(folio, locked_lruvec))
+		if (folio_matches_lruvec(folio, *lruvecp))
-			return locked_lruvec;
+			return;
-		unlock_page_lruvec_irqrestore(locked_lruvec, *flags);
+		unlock_page_lruvec_irqrestore(*lruvecp, *flags);
 	}
-	return folio_lruvec_lock_irqsave(folio, flags);
+	*lruvecp = folio_lruvec_lock_irqsave(folio, flags);
 }
 #ifdef CONFIG_CGROUP_WRITEBACK
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -39,6 +39,7 @@ struct anon_vma;
 struct anon_vma_chain;
 struct user_struct;
 struct pt_regs;
 struct folio_batch;
 extern int sysctl_page_lock_unfairness;
@@ -1539,6 +1540,8 @@ static inline void folio_put_refs(struct folio *folio, int refs)
 		__folio_put(folio);
 }
 void folios_put_refs(struct folio_batch *folios, unsigned int *refs);
 /*
 * union release_pages_arg - an array of pages or folios
 *
@@ -1561,18 +1564,19 @@ void release_pages(release_pages_arg, int nr);
 /**
 * folios_put - Decrement the reference count on an array of folios.
 * @folios: The folios.
 * @nr: How many folios there are.
 *
- * Like folio_put(), but for an array of folios.  This is more efficient
+ * Like folio_put(), but for a batch of folios.  This is more efficient
- * than writing the loop yourself as it will optimise the locks which
+ * than writing the loop yourself as it will optimise the locks which need
- * need to be taken if the folios are freed.
+ * to be taken if the folios are freed.  The folios batch is returned
 * empty and ready to be reused for another batch; there is no need to
 * reinitialise it.
 *
 * Context: May be called in process or interrupt context, but not in NMI
 * context.  May be called while holding a spinlock.
 */
-static inline void folios_put(struct folio **folios, unsigned int nr)
+static inline void folios_put(struct folio_batch *folios)
 {
-	release_pages(folios, nr);
+	folios_put_refs(folios, NULL);
 }
 static inline void put_page(struct page *page)
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -37,6 +37,22 @@
 #define NR_PAGE_ORDERS (MAX_ORDER + 1)
 /* Defines the order for the number of pages that have a migrate type. */
 #ifndef CONFIG_PAGE_BLOCK_ORDER
 #define PAGE_BLOCK_ORDER MAX_ORDER
 #else
 #define PAGE_BLOCK_ORDER CONFIG_PAGE_BLOCK_ORDER
 #endif /* CONFIG_PAGE_BLOCK_ORDER */
 /*
 * The MAX_ORDER, which defines the max order of pages to be allocated
 * by the buddy allocator, has to be larger or equal to the PAGE_BLOCK_ORDER,
 * which defines the order for the number of pages that can have a migrate type
 */
 #if (PAGE_BLOCK_ORDER > MAX_ORDER)
 #error MAX_ORDER must be >= PAGE_BLOCK_ORDER
 #endif
 /*
 * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed
 * costly to service.  That is between allocation orders which should
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -197,6 +197,7 @@ enum pageflags {
 	/* At least one page in this folio has the hwpoison flag set */
 	PG_has_hwpoisoned = PG_error,
 	PG_large_rmappable = PG_workingset, /* anon or file-backed */
 	PG_partially_mapped = PG_reclaim, /* was identified to be partially mapped */
 };
 #define PAGEFLAGS_MASK		((1UL << NR_PAGEFLAGS) - 1)
@@ -372,54 +373,77 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n)
 #define FOLIO_PF_NO_COMPOUND	0
 #define FOLIO_PF_SECOND		1
 #define FOLIO_HEAD_PAGE		0
 #define FOLIO_SECOND_PAGE	1
 /*
 * Macros to create function definitions for page flags
 */
 #define FOLIO_TEST_FLAG(name, page)					\
 static __always_inline bool folio_test_##name(struct folio *folio)	\
 { return test_bit(PG_##name, folio_flags(folio, page)); }
 #define FOLIO_SET_FLAG(name, page)					\
 static __always_inline void folio_set_##name(struct folio *folio)	\
 { set_bit(PG_##name, folio_flags(folio, page)); }
 #define FOLIO_CLEAR_FLAG(name, page)					\
 static __always_inline void folio_clear_##name(struct folio *folio)	\
 { clear_bit(PG_##name, folio_flags(folio, page)); }
 #define __FOLIO_SET_FLAG(name, page)					\
 static __always_inline void __folio_set_##name(struct folio *folio)	\
 { __set_bit(PG_##name, folio_flags(folio, page)); }
 #define __FOLIO_CLEAR_FLAG(name, page)					\
 static __always_inline void __folio_clear_##name(struct folio *folio)	\
 { __clear_bit(PG_##name, folio_flags(folio, page)); }
 #define FOLIO_TEST_SET_FLAG(name, page)					\
 static __always_inline bool folio_test_set_##name(struct folio *folio)	\
 { return test_and_set_bit(PG_##name, folio_flags(folio, page)); }
 #define FOLIO_TEST_CLEAR_FLAG(name, page)				\
 static __always_inline bool folio_test_clear_##name(struct folio *folio) \
 { return test_and_clear_bit(PG_##name, folio_flags(folio, page)); }
 #define FOLIO_FLAG(name, page)						\
 FOLIO_TEST_FLAG(name, page)						\
 FOLIO_SET_FLAG(name, page)						\
 FOLIO_CLEAR_FLAG(name, page)
 #define TESTPAGEFLAG(uname, lname, policy)				\
-static __always_inline bool folio_test_##lname(struct folio *folio)	\
+FOLIO_TEST_FLAG(lname, FOLIO_##policy)					\
 { return test_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); }	\
 static __always_inline int Page##uname(struct page *page)		\
 { return test_bit(PG_##lname, &policy(page, 0)->flags); }
 #define SETPAGEFLAG(uname, lname, policy)				\
-static __always_inline							\
+FOLIO_SET_FLAG(lname, FOLIO_##policy)					\
 void folio_set_##lname(struct folio *folio)				\
 { set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); }		\
 static __always_inline void SetPage##uname(struct page *page)		\
 { set_bit(PG_##lname, &policy(page, 1)->flags); }
 #define CLEARPAGEFLAG(uname, lname, policy)				\
-static __always_inline							\
+FOLIO_CLEAR_FLAG(lname, FOLIO_##policy)					\
 void folio_clear_##lname(struct folio *folio)				\
 { clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); }		\
 static __always_inline void ClearPage##uname(struct page *page)		\
 { clear_bit(PG_##lname, &policy(page, 1)->flags); }
 #define __SETPAGEFLAG(uname, lname, policy)				\
-static __always_inline							\
+__FOLIO_SET_FLAG(lname, FOLIO_##policy)					\
 void __folio_set_##lname(struct folio *folio)				\
 { __set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); }		\
 static __always_inline void __SetPage##uname(struct page *page)		\
 { __set_bit(PG_##lname, &policy(page, 1)->flags); }
 #define __CLEARPAGEFLAG(uname, lname, policy)				\
-static __always_inline							\
+__FOLIO_CLEAR_FLAG(lname, FOLIO_##policy)				\
 void __folio_clear_##lname(struct folio *folio)				\
 { __clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); }	\
 static __always_inline void __ClearPage##uname(struct page *page)	\
 { __clear_bit(PG_##lname, &policy(page, 1)->flags); }
 #define TESTSETFLAG(uname, lname, policy)				\
-static __always_inline							\
+FOLIO_TEST_SET_FLAG(lname, FOLIO_##policy)				\
 bool folio_test_set_##lname(struct folio *folio)			\
 { return test_and_set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
 static __always_inline int TestSetPage##uname(struct page *page)	\
 { return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); }
 #define TESTCLEARFLAG(uname, lname, policy)				\
-static __always_inline							\
+FOLIO_TEST_CLEAR_FLAG(lname, FOLIO_##policy)				\
 bool folio_test_clear_##lname(struct folio *folio)			\
 { return test_and_clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
 static __always_inline int TestClearPage##uname(struct page *page)	\
 { return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); }
@@ -842,8 +866,18 @@ static inline void ClearPageCompound(struct page *page)
 	ClearPageHead(page);
 }
 PAGEFLAG(LargeRmappable, large_rmappable, PF_SECOND)
 FOLIO_TEST_FLAG(partially_mapped, FOLIO_SECOND_PAGE)
 /*
 * PG_partially_mapped is protected by deferred_split split_queue_lock,
 * so its safe to use non-atomic set/clear.
 */
 __FOLIO_SET_FLAG(partially_mapped, FOLIO_SECOND_PAGE)
 __FOLIO_CLEAR_FLAG(partially_mapped, FOLIO_SECOND_PAGE)
 #else
 TESTPAGEFLAG_FALSE(LargeRmappable, large_rmappable)
 FOLIO_TEST_FLAG_FALSE(partially_mapped)
 __FOLIO_SET_FLAG_NOOP(partially_mapped)
 __FOLIO_CLEAR_FLAG_NOOP(partially_mapped)
 #endif
 #define PG_head_mask ((1UL << PG_head))
@@ -1111,7 +1145,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
 */
 #define PAGE_FLAGS_SECOND						\
 	(0xffUL /* order */		| 1UL << PG_has_hwpoisoned |	\
-	 1UL << PG_large_rmappable)
+	 1UL << PG_large_rmappable	| 1UL << PG_partially_mapped)
 #define PAGE_FLAGS_PRIVATE				\
 	(1UL << PG_private | 1UL << PG_private_2)
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -3,10 +3,6 @@
 #define __LINUX_PAGEISOLATION_H
 #ifdef CONFIG_MEMORY_ISOLATION
 static inline bool has_isolate_pageblock(struct zone *zone)
 {
 	return zone->nr_isolate_pageblock;
 }
 static inline bool is_migrate_isolate_page(struct page *page)
 {
 	return get_pageblock_migratetype(page) == MIGRATE_ISOLATE;
@@ -16,10 +12,6 @@ static inline bool is_migrate_isolate(int migratetype)
 	return migratetype == MIGRATE_ISOLATE;
 }
 #else
 static inline bool has_isolate_pageblock(struct zone *zone)
 {
 	return false;
 }
 static inline bool is_migrate_isolate_page(struct page *page)
 {
 	return false;
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -28,7 +28,7 @@ enum pageblock_bits {
 	NR_PAGEBLOCK_BITS
 };
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE)
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -41,14 +41,18 @@ extern unsigned int pageblock_order;
 * Huge pages are a constant size, but don't exceed the maximum allocation
 * granularity.
 */
-#define pageblock_order		min_t(unsigned int, HUGETLB_PAGE_ORDER, MAX_ORDER)
+#define pageblock_order		min_t(unsigned int, HUGETLB_PAGE_ORDER, PAGE_BLOCK_ORDER)
 #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
-#else /* CONFIG_HUGETLB_PAGE */
+#elif defined(CONFIG_TRANSPARENT_HUGEPAGE)
 #define pageblock_order		min_t(unsigned int, HPAGE_PMD_ORDER, PAGE_BLOCK_ORDER)
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 /* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
-#define pageblock_order		MAX_ORDER
+#define pageblock_order		PAGE_BLOCK_ORDER
 #endif /* CONFIG_HUGETLB_PAGE */
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -742,7 +742,12 @@ int folio_mkclean(struct folio *);
 int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
 		      struct vm_area_struct *vma);
-void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked);
+enum rmp_flags {
 	RMP_LOCKED		= 1 << 0,
 	RMP_USE_SHARED_ZEROPAGE	= 1 << 1,
 };
 void remove_migration_ptes(struct folio *src, struct folio *dst, int flags);
 int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
--- a/include/linux/trace.h
+++ b/include/linux/trace.h
@@ -52,6 +52,8 @@ int trace_array_printk(struct trace_array *tr, unsigned long ip,
 int trace_array_init_printk(struct trace_array *tr);
 void trace_array_put(struct trace_array *tr);
 struct trace_array *trace_array_get_by_name(const char *name);
 struct trace_array *trace_array_get_by_name_ext(const char *name,
 						const char *systems);
 int trace_array_destroy(struct trace_array *tr);
 /* For osnoise tracer */
@@ -88,6 +90,11 @@ static inline struct trace_array *trace_array_get_by_name(const char *name)
 {
 	return NULL;
 }
 static inline struct trace_array *trace_array_get_by_name_ext(
 	const char *name, const char *systems)
 {
 	return NULL;
 }
 static inline int trace_array_destroy(struct trace_array *tr)
 {
 	return 0;
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -8,21 +8,46 @@
 #include <linux/refcount.h>
 #include <net/sock.h>
-void unix_inflight(struct user_struct *user, struct file *fp);
+#if IS_ENABLED(CONFIG_UNIX)
 void unix_notinflight(struct user_struct *user, struct file *fp);
 void unix_destruct_scm(struct sk_buff *skb);
 void io_uring_destruct_scm(struct sk_buff *skb);
 void unix_gc(void);
 void wait_for_unix_gc(void);
 struct unix_sock *unix_get_socket(struct file *filp);
 #else
 static inline struct unix_sock *unix_get_socket(struct file *filp)
 {
 	return NULL;
 }
 #endif
 extern unsigned int unix_tot_inflight;
 void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver);
 void unix_del_edges(struct scm_fp_list *fpl);
 void unix_update_edges(struct unix_sock *receiver);
 int unix_prepare_fpl(struct scm_fp_list *fpl);
 void unix_destroy_fpl(struct scm_fp_list *fpl);
 void unix_gc(void);
 void wait_for_unix_gc(struct scm_fp_list *fpl);
 struct unix_vertex {
 	struct list_head edges;
 	struct list_head entry;
 	struct list_head scc_entry;
 	unsigned long out_degree;
 	unsigned long index;
 	unsigned long scc_index;
 };
 struct unix_edge {
 	struct unix_sock *predecessor;
 	struct unix_sock *successor;
 	struct list_head vertex_entry;
 	struct list_head stack_entry;
 };
 struct sock *unix_peer_get(struct sock *sk);
 #define UNIX_HASH_MOD	(256 - 1)
 #define UNIX_HASH_SIZE	(256 * 2)
 #define UNIX_HASH_BITS	8
 extern unsigned int unix_tot_inflight;
 struct unix_address {
 	refcount_t	refcnt;
 	int		len;
@@ -42,6 +67,7 @@ struct unix_skb_parms {
 struct scm_stat {
 	atomic_t nr_fds;
 	unsigned long nr_unix_fds;
 };
 #define UNIXCB(skb)	(*(struct unix_skb_parms *)&((skb)->cb))
@@ -54,12 +80,9 @@ struct unix_sock {
 	struct path		path;
 	struct mutex		iolock, bindlock;
 	struct sock		*peer;
-	struct list_head	link;
+	struct unix_vertex	*vertex;
-	unsigned long		inflight;
+	struct sock		*listener;
 	spinlock_t		lock;
 	unsigned long		gc_flags;
 #define UNIX_GC_CANDIDATE	0
 #define UNIX_GC_MAYBE_CYCLE	1
 	struct socket_wq	peer_wq;
 	wait_queue_entry_t	peer_wake;
 	struct scm_stat		scm_stat;
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -22,11 +22,24 @@ struct scm_creds {
 	kgid_t	gid;
 };
 #ifdef CONFIG_UNIX
 struct unix_edge;
 #endif
 struct scm_fp_list {
 	short			count;
 	short			max;
 	struct user_struct	*user;
 	struct file		*fp[SCM_MAX_FD];
 #ifndef __GENKSYMS__
 #ifdef CONFIG_UNIX
 	bool			inflight;
 	bool			dead;
 	struct list_head	vertices;
 	struct unix_edge        *edges;
 #endif
 	short			count_unix;
 #endif
 };
 struct scm_cookie {
--- a/include/trace/hooks/mm.h
+++ b/include/trace/hooks/mm.h
@@ -431,6 +431,9 @@ DECLARE_HOOK(android_vh_add_lazyfree_bypass,
 DECLARE_HOOK(android_vh_do_async_mmap_readahead,
 	TP_PROTO(struct vm_fault *vmf, struct folio *folio, bool *skip),
 	TP_ARGS(vmf, folio, skip));
 DECLARE_HOOK(android_vh_mm_free_page,
 	TP_PROTO(struct page *page),
 	TP_ARGS(page));
 DECLARE_HOOK(android_vh_cma_debug_show_areas,
 	TP_PROTO(bool *show),
@@ -596,6 +599,9 @@ DECLARE_HOOK(android_vh_folio_remove_rmap_ptes,
 DECLARE_HOOK(android_vh_pageset_update,
 	TP_PROTO(unsigned long *high, unsigned long *batch),
 	TP_ARGS(high, batch));
 DECLARE_HOOK(android_vh_mempool_alloc_skip_wait,
 	TP_PROTO(gfp_t *gfp_flags, bool *skip_wait),
 	TP_ARGS(gfp_flags, skip_wait));
 #endif /* _TRACE_HOOK_MM_H */
 /* This part must be outside protection */
--- a/include/trace/hooks/usb.h
+++ b/include/trace/hooks/usb.h
@@ -31,6 +31,10 @@ DECLARE_HOOK(android_vh_usb_new_device_added,
 	TP_PROTO(struct usb_device *udev, int *err),
 	TP_ARGS(udev, err));
 DECLARE_HOOK(android_vh_xhci_full_reset_on_remove,
 	TP_PROTO(bool *full_reset),
 	TP_ARGS(full_reset));
 #endif /*  _TRACE_HOOK_USB_H */
 /*  This part must be outside protection */
 #include <trace/define_trace.h>
--- a/init/main.c
+++ b/init/main.c
@@ -50,6 +50,7 @@
 #include <linux/writeback.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
 #include <linux/memcontrol.h>
 #include <linux/cgroup.h>
 #include <linux/efi.h>
 #include <linux/tick.h>
@@ -1062,6 +1063,7 @@ void start_kernel(void)
 	proc_root_init();
 	nsfs_init();
 	cpuset_init();
 	mem_cgroup_init();
 	cgroup_init();
 	taskstats_init_early();
 	delayacct_init();
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -452,7 +452,7 @@ struct kmem_cache *files_cachep;
 struct kmem_cache *fs_cachep;
 /* SLAB cache for vm_area_struct structures */
-static struct kmem_cache *vm_area_cachep;
+struct kmem_cache *vm_area_cachep;
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static struct kmem_cache *mm_cachep;
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -227,6 +227,14 @@ __irq_startup_managed(struct irq_desc *desc, const struct cpumask *aff,
 void irq_startup_managed(struct irq_desc *desc)
 {
 	struct irq_data *d = irq_desc_get_irq_data(desc);
 	/*
 	 * Clear managed-shutdown flag, so we don't repeat managed-startup for
 	 * multiple hotplugs, and cause imbalanced disable depth.
 	 */
 	irqd_clr_managed_shutdown(d);
 	/*
 	 * Only start it up when the disable depth is 1, so that a disable,
 	 * hotunplug, hotplug sequence does not end up enabling it during
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -211,13 +211,6 @@ static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu)
 	    !irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity))
 		return;
 	/*
 	 * Don't restore suspended interrupts here when a system comes back
 	 * from S3. They are reenabled via resume_device_irqs().
 	 */
 	if (desc->istate & IRQS_SUSPENDED)
 		return;
 	if (irqd_is_managed_and_shutdown(data))
 		irq_startup_managed(desc);
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -9538,16 +9538,19 @@ static int trace_array_create_dir(struct trace_array *tr)
 	return ret;
 }
-static struct trace_array *trace_array_create(const char *name)
+static struct trace_array *
 trace_array_create_systems(const char *name, const char *systems)
 {
 	struct trace_array_ext *tr_ext;
 	struct trace_array *tr;
 	int ret;
 	ret = -ENOMEM;
-	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
+	tr_ext = kzalloc(sizeof(*tr_ext), GFP_KERNEL);
-	if (!tr)
+	if (!tr_ext)
 		return ERR_PTR(ret);
 	tr = &tr_ext->trace_array;
 	tr->name = kstrdup(name, GFP_KERNEL);
 	if (!tr->name)
 		goto out_free_tr;
@@ -9558,6 +9561,12 @@ static struct trace_array *trace_array_create(const char *name)
 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
 		goto out_free_tr;
 	if (systems) {
 		tr_ext->system_names = kstrdup_const(systems, GFP_KERNEL);
 		if (!tr_ext->system_names)
 			goto out_free_tr;
 	}
 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
@@ -9601,12 +9610,18 @@ static struct trace_array *trace_array_create(const char *name)
 	free_trace_buffers(tr);
 	free_cpumask_var(tr->pipe_cpumask);
 	free_cpumask_var(tr->tracing_cpumask);
 	kfree_const(tr_ext->system_names);
 	kfree(tr->name);
-	kfree(tr);
+	kfree(tr_ext);
 	return ERR_PTR(ret);
 }
 static struct trace_array *trace_array_create(const char *name)
 {
 	return trace_array_create_systems(name, NULL);
 }
 static int instance_mkdir(const char *name)
 {
 	struct trace_array *tr;
@@ -9629,9 +9644,27 @@ out_unlock:
 	return ret;
 }
 const char *trace_array_get_system_names(struct trace_array *tr)
 {
 	struct trace_array_ext *tr_ext;
 	if (tr == &global_trace)
 		return NULL;
 	tr_ext = container_of(tr, struct trace_array_ext, trace_array);
 	return tr_ext->system_names;
 }
 struct trace_array *trace_array_get_by_name(const char *name)
 {
 	return trace_array_get_by_name_ext(name, NULL);
 }
 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
 /**
- * trace_array_get_by_name - Create/Lookup a trace array, given its name.
+ * trace_array_get_by_name_ext - Create/Lookup a trace array, given its name.
 * @name: The name of the trace array to be looked up/created.
 * @systems: A list of systems to create event directories for (NULL for all)
 *
 * Returns pointer to trace array with given name.
 * NULL, if it cannot be created.
@@ -9645,7 +9678,8 @@ out_unlock:
 * trace_array_put() is called, user space can not delete it.
 *
 */
-struct trace_array *trace_array_get_by_name(const char *name)
+struct trace_array *trace_array_get_by_name_ext(const char *name,
 						const char *systems)
 {
 	struct trace_array *tr;
@@ -9657,7 +9691,7 @@ struct trace_array *trace_array_get_by_name(const char *name)
 			goto out_unlock;
 	}
-	tr = trace_array_create(name);
+	tr = trace_array_create_systems(name, systems);
 	if (IS_ERR(tr))
 		tr = NULL;
@@ -9669,11 +9703,14 @@ out_unlock:
 	mutex_unlock(&event_mutex);
 	return tr;
 }
-EXPORT_SYMBOL_GPL(trace_array_get_by_name);
+EXPORT_SYMBOL_GPL(trace_array_get_by_name_ext);
 static int __remove_instance(struct trace_array *tr)
 {
 	int i;
 	struct trace_array_ext *tr_ext = container_of(tr,
 						      struct trace_array_ext,
 						      trace_array);
 	/* Reference counter for a newly created trace array = 1. */
 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
@@ -9704,8 +9741,9 @@ static int __remove_instance(struct trace_array *tr)
 	free_cpumask_var(tr->pipe_cpumask);
 	free_cpumask_var(tr->tracing_cpumask);
 	kfree_const(tr_ext->system_names);
 	kfree(tr->name);
-	kfree(tr);
+	kfree(tr_ext);
 	return 0;
 }
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -412,6 +412,11 @@ struct trace_array {
 	struct trace_func_repeats	__percpu *last_func_repeats;
 };
 struct trace_array_ext {
 	const char		*system_names;
 	struct trace_array	trace_array;
 };
 enum {
 	TRACE_ARRAY_FL_GLOBAL	= (1 << 0)
 };
@@ -420,6 +425,7 @@ extern struct list_head ftrace_trace_arrays;
 extern struct mutex trace_types_lock;
 extern const char *trace_array_get_system_names(struct trace_array *tr);
 extern int trace_array_get(struct trace_array *tr);
 extern int tracing_check_open_get_tr(struct trace_array *tr);
 extern struct trace_array *trace_array_find(const char *instance);
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -3041,6 +3041,27 @@ void trace_event_eval_update(struct trace_eval_map **map, int len)
 	up_write(&trace_event_sem);
 }
 static bool event_in_systems(struct trace_event_call *call,
 			     const char *systems)
 {
 	const char *system;
 	const char *p;
 	if (!systems)
 		return true;
 	system = call->class->system;
 	p = strstr(systems, system);
 	if (!p)
 		return false;
 	if (p != systems && !isspace(*(p - 1)) && *(p - 1) != ',')
 		return false;
 	p += strlen(system);
 	return !*p || isspace(*p) || *p == ',';
 }
 static struct trace_event_file *
 trace_create_new_event(struct trace_event_call *call,
 		       struct trace_array *tr)
@@ -3050,9 +3071,12 @@ trace_create_new_event(struct trace_event_call *call,
 	struct trace_event_file *file;
 	unsigned int first;
 	if (!event_in_systems(call, trace_array_get_system_names(tr)))
 		return NULL;
 	file = kmem_cache_alloc(file_cachep, GFP_TRACE);
 	if (!file)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 	pid_list = rcu_dereference_protected(tr->filtered_pids,
 					     lockdep_is_held(&event_mutex));
@@ -3117,8 +3141,17 @@ __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
 	struct trace_event_file *file;
 	file = trace_create_new_event(call, tr);
 	/*
 	 * trace_create_new_event() returns ERR_PTR(-ENOMEM) if failed
 	 * allocation, or NULL if the event is not part of the tr->system_names.
 	 * When the event is not part of the tr->system_names, return zero, not
 	 * an error.
 	 */
 	if (!file)
-		return -ENOMEM;
+		return 0;
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 	if (eventdir_initialized)
 		return event_create_dir(tr->event_dir, file);
@@ -3157,8 +3190,17 @@ __trace_early_add_new_event(struct trace_event_call *call,
 	int ret;
 	file = trace_create_new_event(call, tr);
 	/*
 	 * trace_create_new_event() returns ERR_PTR(-ENOMEM) if failed
 	 * allocation, or NULL if the event is not part of the tr->system_names.
 	 * When the event is not part of the tr->system_names, return zero, not
 	 * an error.
 	 */
 	if (!file)
-		return -ENOMEM;
+		return 0;
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 	ret = event_define_fields(call);
 	if (ret)
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -994,6 +994,40 @@ config CMA_AREAS
 	  If unsure, leave the default value "7" in UMA and "19" in NUMA.
 #
 # Select this config option from the architecture Kconfig, if available, to set
 # the max page order for physically contiguous allocations.
 #
 config ARCH_FORCE_MAX_ORDER
 	int
 #
 # When ARCH_FORCE_MAX_ORDER is not defined,
 # the default page block order is MAX_PAGE_ORDER (10) as per
 # include/linux/mmzone.h.
 #
 config PAGE_BLOCK_ORDER
 	int "Page Block Order"
 	range 1 10 if ARCH_FORCE_MAX_ORDER = 0 || ARCH_FORCE_MAX_ORDER = ""
 	default 10 if ARCH_FORCE_MAX_ORDER = 0 || ARCH_FORCE_MAX_ORDER = ""
 	range 1 ARCH_FORCE_MAX_ORDER if ARCH_FORCE_MAX_ORDER != 0
 	default ARCH_FORCE_MAX_ORDER if ARCH_FORCE_MAX_ORDER != 0
 	help
 	  The page block order refers to the power of two number of pages that
 	  are physically contiguous and can have a migrate type associated to
 	  them. The maximum size of the page block order is limited by
 	  ARCH_FORCE_MAX_ORDER.
 	  This config allows overriding the default page block order when the
 	  page block order is required to be smaller than ARCH_FORCE_MAX_ORDER
 	  or MAX_ORDER.
 	  Reducing pageblock order can negatively impact THP generation
 	  success rate. If your workloads uses THP heavily, please use this
 	  option with caution.
 	  Don't change if unsure.
 config MEM_SOFT_DIRTY
 	bool "Track memory changes"
 	depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -70,6 +70,7 @@ unsigned long transparent_hugepage_flags __read_mostly =
 	(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
 static struct shrinker deferred_split_shrinker;
 static bool split_underused_thp = true;
 static atomic_t huge_zero_refcount;
 struct page *huge_zero_page __read_mostly;
@@ -423,6 +424,27 @@ static ssize_t hpage_pmd_size_show(struct kobject *kobj,
 static struct kobj_attribute hpage_pmd_size_attr =
 	__ATTR_RO(hpage_pmd_size);
 static ssize_t split_underused_thp_show(struct kobject *kobj,
 			    struct kobj_attribute *attr, char *buf)
 {
 	return sysfs_emit(buf, "%d\n", split_underused_thp);
 }
 static ssize_t split_underused_thp_store(struct kobject *kobj,
 			     struct kobj_attribute *attr,
 			     const char *buf, size_t count)
 {
 	int err = kstrtobool(buf, &split_underused_thp);
 	if (err < 0)
 		return err;
 	return count;
 }
 static struct kobj_attribute split_underused_thp_attr = __ATTR(
 	shrink_underused, 0644, split_underused_thp_show, split_underused_thp_store);
 static struct attribute *hugepage_attr[] = {
 	&enabled_attr.attr,
 	&defrag_attr.attr,
@@ -431,6 +453,7 @@ static struct attribute *hugepage_attr[] = {
 #ifdef CONFIG_SHMEM
 	&shmem_enabled_attr.attr,
 #endif
 	&split_underused_thp_attr.attr,
 	NULL,
 };
@@ -1046,6 +1069,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 		update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
 		add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
 		mm_inc_nr_ptes(vma->vm_mm);
 		deferred_split_folio(folio, false);
 		spin_unlock(vmf->ptl);
 		count_vm_event(THP_FAULT_ALLOC);
 		count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_ALLOC);
@@ -2953,7 +2977,7 @@ bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr,
 	return __discard_anon_folio_pmd_locked(vma, addr, pmdp, folio);
 }
-static void remap_page(struct folio *folio, unsigned long nr)
+static void remap_page(struct folio *folio, unsigned long nr, int flags)
 {
 	int i = 0;
@@ -2961,7 +2985,7 @@ static void remap_page(struct folio *folio, unsigned long nr)
 	if (!folio_test_anon(folio))
 		return;
 	for (;;) {
-		remove_migration_ptes(folio, folio, true);
+		remove_migration_ptes(folio, folio, RMP_LOCKED | flags);
 		i += folio_nr_pages(folio);
 		if (i >= nr)
 			break;
@@ -3314,7 +3338,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 	if (nr_dropped)
 		shmem_uncharge(head->mapping->host, nr_dropped);
-	remap_page(folio, nr);
+	remap_page(folio, nr, PageAnon(head) ? RMP_USE_SHARED_ZEROPAGE : 0);
 	for (i = 0; i < nr; i++) {
 		struct page *subpage = folio_dst_page(folio, i);
@@ -3376,8 +3400,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 	struct folio *folio = page_folio(page);
 	struct deferred_split *ds_queue = get_deferred_split_queue(folio);
 	XA_STATE(xas, &folio->mapping->i_pages, folio->index);
-	struct anon_vma *anon_vma = NULL;
+	bool is_anon = folio_test_anon(folio);
 	struct address_space *mapping = NULL;
 	struct anon_vma *anon_vma = NULL;
 	int extra_pins, ret;
 	pgoff_t end;
 	bool is_hzp;
@@ -3394,7 +3419,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 	if (folio_test_writeback(folio))
 		return -EBUSY;
-	if (folio_test_anon(folio)) {
+	if (is_anon) {
 		/*
 		 * The caller does not necessarily hold an mmap_lock that would
 		 * prevent the anon_vma disappearing so we first we take a
@@ -3495,6 +3520,14 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 	if (folio_order(folio) > 1 &&
 	    !list_empty(&folio->_deferred_list)) {
 		ds_queue->split_queue_len--;
 		if (folio_test_partially_mapped(folio))
 			__folio_clear_partially_mapped(folio);
 		/*
 		* Reinitialize page_deferred_list after removing the
 		* page from the split_queue, otherwise a subsequent
 		* split will see list corruption when checking the
 		* page_deferred_list.
 		*/
 		list_del_init(&folio->_deferred_list);
 	}
 	spin_unlock(&ds_queue->split_queue_lock);
@@ -3522,7 +3555,7 @@ unfreeze:
 		folio_ref_unfreeze(folio, 1 + extra_pins);
 remap:
 		free_dst_pages(folio);
-		remap_page(folio, folio_nr_pages(folio));
+		remap_page(folio, folio_nr_pages(folio), 0);
 	}
 out_unlock:
@@ -3572,6 +3605,8 @@ bool __folio_unqueue_deferred_split(struct folio *folio)
 	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
 	if (!list_empty(&folio->_deferred_list)) {
 		ds_queue->split_queue_len--;
 		if (folio_test_partially_mapped(folio))
 			__folio_clear_partially_mapped(folio);
 		list_del_init(&folio->_deferred_list);
 		unqueued = true;
 	}
@@ -3580,7 +3615,8 @@ bool __folio_unqueue_deferred_split(struct folio *folio)
 	return unqueued;	/* useful for debug warnings */
 }
-void deferred_split_folio(struct folio *folio)
+/* partially_mapped=false won't clear PG_partially_mapped folio flag */
 void deferred_split_folio(struct folio *folio, bool partially_mapped)
 {
 	struct deferred_split *ds_queue = get_deferred_split_queue(folio);
 #ifdef CONFIG_MEMCG
@@ -3595,6 +3631,9 @@ void deferred_split_folio(struct folio *folio)
 	if (folio_order(folio) <= 1)
 		return;
 	if (!partially_mapped && !split_underused_thp)
 		return;
 	/*
 	 * Exclude swapcache: originally to avoid a corrupt deferred split
 	 * queue. Nowadays that is fully prevented by mem_cgroup_swapout();
@@ -3605,13 +3644,20 @@ void deferred_split_folio(struct folio *folio)
 	if (folio_test_swapcache(folio))
 		return;
 	if (!list_empty(&folio->_deferred_list))
 		return;
 	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
-	if (list_empty(&folio->_deferred_list)) {
+	if (partially_mapped) {
 		if (!folio_test_partially_mapped(folio)) {
 			__folio_set_partially_mapped(folio);
 			if (folio_test_pmd_mappable(folio))
 				count_vm_event(THP_DEFERRED_SPLIT_PAGE);
 			count_mthp_stat(folio_order(folio), MTHP_STAT_SPLIT_DEFERRED);
 		}
 	} else {
 		/* partially mapped folios cannot become non-partially mapped */
 		VM_WARN_ON_FOLIO(folio_test_partially_mapped(folio), folio);
 	}
 	if (list_empty(&folio->_deferred_list)) {
 		list_add_tail(&folio->_deferred_list, &ds_queue->split_queue);
 		ds_queue->split_queue_len++;
 #ifdef CONFIG_MEMCG
@@ -3640,6 +3686,39 @@ static unsigned long deferred_split_count(struct shrinker *shrink,
 	return READ_ONCE(ds_queue->split_queue_len);
 }
 static bool thp_underused(struct folio *folio)
 {
 	int num_zero_pages = 0, num_filled_pages = 0;
 	void *kaddr;
 	int i;
 	if (khugepaged_max_ptes_none == HPAGE_PMD_NR - 1)
 		return false;
 	for (i = 0; i < folio_nr_pages(folio); i++) {
 		kaddr = kmap_local_folio(folio, i * PAGE_SIZE);
 		if (!memchr_inv(kaddr, 0, PAGE_SIZE)) {
 			num_zero_pages++;
 			if (num_zero_pages > khugepaged_max_ptes_none) {
 				kunmap_local(kaddr);
 				return true;
 			}
 		} else {
 			/*
 			 * Another path for early exit once the number
 			 * of non-zero filled pages exceeds threshold.
 			 */
 			num_filled_pages++;
 			if (num_filled_pages >= HPAGE_PMD_NR - khugepaged_max_ptes_none) {
 				kunmap_local(kaddr);
 				return false;
 			}
 		}
 		kunmap_local(kaddr);
 	}
 	return false;
 }
 static unsigned long deferred_split_scan(struct shrinker *shrink,
 		struct shrink_control *sc)
 {
@@ -3647,8 +3726,8 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
 	struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
 	unsigned long flags;
 	LIST_HEAD(list);
-	struct folio *folio, *next;
+	struct folio *folio, *next, *prev = NULL;
-	int split = 0;
+	int split = 0, removed = 0;
 #ifdef CONFIG_MEMCG
 	if (sc->memcg)
@@ -3663,6 +3742,8 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
 			list_move(&folio->_deferred_list, &list);
 		} else {
 			/* We lost race with folio_put() */
 			if (folio_test_partially_mapped(folio))
 				__folio_clear_partially_mapped(folio);
 			list_del_init(&folio->_deferred_list);
 			ds_queue->split_queue_len--;
 		}
@@ -3672,20 +3753,55 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
 	spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
 	list_for_each_entry_safe(folio, next, &list, _deferred_list) {
 		bool did_split = false;
 		bool underused = false;
 		if (!folio_test_partially_mapped(folio)) {
 			underused = thp_underused(folio);
 			if (!underused)
 				goto next;
 		}
 		if (!folio_trylock(folio))
 			goto next;
-		/* split_huge_page() removes page from list on success */
+		if (!split_folio(folio)) {
-		if (!split_folio(folio))
+			did_split = true;
 			split++;
 		}
 		folio_unlock(folio);
 next:
 		/*
 		 * split_folio() removes folio from list on success.
 		 * Only add back to the queue if folio is partially mapped.
 		 * If thp_underused returns false, or if split_folio fails
 		 * in the case it was underused, then consider it used and
 		 * don't add it back to split_queue.
 		 */
 		if (did_split) {
 			; /* folio already removed from list */
 		} else if (!folio_test_partially_mapped(folio)) {
 			list_del_init(&folio->_deferred_list);
 			removed++;
 		} else {
 			/*
 			 * That unlocked list_del_init() above would be unsafe,
 			 * unless its folio is separated from any earlier folios
 			 * left on the list (which may be concurrently unqueued)
 			 * by one safe folio with refcount still raised.
 			 */
 			swap(folio, prev);
 		}
 		if (folio)
 			folio_put(folio);
 	}
 	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
 	list_splice_tail(&list, &ds_queue->split_queue);
 	ds_queue->split_queue_len -= removed;
 	spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
 	if (prev)
 		folio_put(prev);
 	/*
 	 * Stop shrinker if we didn't split any page, but the queue is empty.
 	 * This can happen if pages were freed under us.
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -470,7 +470,7 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address);
 #define K(x) ((x) << (PAGE_SHIFT-10))
 extern char * const zone_names[MAX_NR_ZONES];
-extern unsigned long free_highatomics[MAX_NR_ZONES];
+extern unsigned long nr_free_highatomic[MAX_NR_ZONES];
 /* perform sanity checks on struct pages being allocated or freed */
 DECLARE_STATIC_KEY_MAYBE(CONFIG_DEBUG_VM, check_pages_enabled);
@@ -721,8 +721,9 @@ extern void post_alloc_hook(struct page *page, unsigned int order,
 					gfp_t gfp_flags);
 extern int user_min_free_kbytes;
-extern void free_unref_page(struct page *page, unsigned int order);
+void free_unref_page(struct page *page, unsigned int order);
-extern void free_unref_page_list(struct list_head *list);
+void free_unref_folios(struct folio_batch *fbatch);
 void free_unref_page_list(struct list_head *list);
 extern void zone_pcp_reset(struct zone *zone);
 extern void zone_pcp_disable(struct zone *zone);
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -84,7 +84,7 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
 *
 * Note that these are only respected if collapse was initiated by khugepaged.
 */
-static unsigned int khugepaged_max_ptes_none __read_mostly;
+unsigned int khugepaged_max_ptes_none __read_mostly;
 static unsigned int khugepaged_max_ptes_swap __read_mostly;
 static unsigned int khugepaged_max_ptes_shared __read_mostly;
@@ -1218,6 +1218,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
 	pgtable_trans_huge_deposit(mm, pmd, pgtable);
 	set_pmd_at(mm, address, pmd, _pmd);
 	update_mmu_cache_pmd(vma, address, pmd);
 	deferred_split_folio(folio, false);
 	spin_unlock(pmd_ptl);
 	hpage = NULL;
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -33,6 +33,7 @@
 #include <linux/shmem_fs.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
 #include <linux/vm_event_item.h>
 #include <linux/smp.h>
 #include <linux/page-flags.h>
@@ -95,6 +96,9 @@ static bool cgroup_memory_nokmem __ro_after_init;
 /* BPF memory accounting disabled? */
 static bool cgroup_memory_nobpf __ro_after_init;
 static struct kmem_cache *memcg_cachep;
 static struct kmem_cache *memcg_pn_cachep;
 #ifdef CONFIG_CGROUP_WRITEBACK
 static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq);
 #endif
@@ -5384,7 +5388,8 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 {
 	struct mem_cgroup_per_node *pn;
-	pn = kzalloc_node(sizeof(*pn), GFP_KERNEL, node);
+	pn = kmem_cache_alloc_node(memcg_pn_cachep, GFP_KERNEL | __GFP_ZERO,
 				   node);
 	if (!pn)
 		return 1;
@@ -5440,7 +5445,7 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent)
 	int __maybe_unused i;
 	long error = -ENOMEM;
-	memcg = kzalloc(struct_size(memcg, nodeinfo, nr_node_ids), GFP_KERNEL);
+	memcg = kmem_cache_zalloc(memcg_cachep, GFP_KERNEL);
 	if (!memcg)
 		return ERR_PTR(error);
@@ -6017,8 +6022,6 @@ int mem_cgroup_move_account(struct folio *folio,
 	css_get(&to->css);
 	css_put(&from->css);
 	/* Warning should never happen, so don't worry about refcount non-0 */
 	WARN_ON_ONCE(folio_unqueue_deferred_split(folio));
 	folio->memcg_data = (unsigned long)to;
 	__folio_memcg_unlock(from);
@@ -6389,9 +6392,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
 	enum mc_target_type target_type;
 	union mc_target target;
 	struct folio *folio;
 	bool tried_split_before = false;
 retry_pmd:
 	ptl = pmd_trans_huge_lock(pmd, vma);
 	if (ptl) {
 		if (mc.precharge < HPAGE_PMD_NR) {
@@ -6401,27 +6402,6 @@ retry_pmd:
 		target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);
 		if (target_type == MC_TARGET_PAGE) {
 			folio = target.folio;
 			/*
 			 * Deferred split queue locking depends on memcg,
 			 * and unqueue is unsafe unless folio refcount is 0:
 			 * split or skip if on the queue? first try to split.
 			 */
 			if (!list_empty(&folio->_deferred_list)) {
 				spin_unlock(ptl);
 				if (!tried_split_before)
 					split_folio(folio);
 				folio_unlock(folio);
 				folio_put(folio);
 				if (tried_split_before)
 					return 0;
 				tried_split_before = true;
 				goto retry_pmd;
 			}
 			/*
 			 * So long as that pmd lock is held, the folio cannot
 			 * be racily added to the _deferred_list, because
 			 * __folio_remove_rmap() will find !partially_mapped.
 			 */
 			if (folio_isolate_lru(folio)) {
 				if (!mem_cgroup_move_account(folio, true,
 							     mc.from, mc.to)) {
@@ -7418,6 +7398,18 @@ void __mem_cgroup_uncharge_list(struct list_head *page_list)
 		uncharge_batch(&ug);
 }
 void __mem_cgroup_uncharge_folios(struct folio_batch *folios)
 {
 	struct uncharge_gather ug;
 	unsigned int i;
 	uncharge_gather_clear(&ug);
 	for (i = 0; i < folios->nr; i++)
 		uncharge_folio(folios->folios[i], &ug);
 	if (ug.memcg)
 		uncharge_batch(&ug);
 }
 /**
 * mem_cgroup_replace_folio - Charge a folio's replacement.
 * @old: Currently circulating folio.
@@ -7606,15 +7598,16 @@ static int __init cgroup_memory(char *s)
 __setup("cgroup.memory=", cgroup_memory);
 /*
- * subsys_initcall() for memory controller.
+ * Memory controller init before cgroup_init() initialize root_mem_cgroup.
 *
 * Some parts like memcg_hotplug_cpu_dead() have to be initialized from this
 * context because of lock dependencies (cgroup_lock -> cpu hotplug) but
 * basically everything that doesn't depend on a specific mem_cgroup structure
 * should be initialized from here.
 */
-static int __init mem_cgroup_init(void)
+int __init mem_cgroup_init(void)
 {
 	unsigned int memcg_size;
 	int cpu, node;
 	/*
@@ -7632,6 +7625,13 @@ static int __init mem_cgroup_init(void)
 		INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work,
 			  drain_local_stock);
 	memcg_size = struct_size_t(struct mem_cgroup, nodeinfo, nr_node_ids);
 	memcg_cachep = kmem_cache_create("mem_cgroup", memcg_size, 0,
 					 SLAB_PANIC | SLAB_HWCACHE_ALIGN, NULL);
 	memcg_pn_cachep = KMEM_CACHE(mem_cgroup_per_node,
 				     SLAB_PANIC | SLAB_HWCACHE_ALIGN);
 	for_each_node(node) {
 		struct mem_cgroup_tree_per_node *rtpn;
@@ -7645,7 +7645,6 @@ static int __init mem_cgroup_init(void)
 	return 0;
 }
 subsys_initcall(mem_cgroup_init);
 #ifdef CONFIG_SWAP
 static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -19,6 +19,8 @@
 #include <linux/mempool.h>
 #include <linux/writeback.h>
 #include "slab.h"
 #undef CREATE_TRACE_POINTS
 #include <trace/hooks/mm.h>
 #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
 static void poison_error(mempool_t *pool, void *element, size_t size,
@@ -383,6 +385,7 @@ void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
 	unsigned long flags;
 	wait_queue_entry_t wait;
 	gfp_t gfp_temp;
 	bool skip_wait = false;
 	VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
 	might_alloc(gfp_mask);
@@ -428,6 +431,11 @@ repeat_alloc:
 		spin_unlock_irqrestore(&pool->lock, flags);
 		return NULL;
 	}
 	trace_android_vh_mempool_alloc_skip_wait(&gfp_temp, &skip_wait);
 	if (skip_wait) {
 		spin_unlock_irqrestore(&pool->lock, flags);
 		goto repeat_alloc;
 	}
 	/* Let's wait for someone else to return an element to @pool */
 	init_wait(&wait);
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -182,13 +182,57 @@ void putback_movable_pages(struct list_head *l)
 }
 EXPORT_SYMBOL_GPL(putback_movable_pages);
 static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw,
 					  struct folio *folio,
 					  unsigned long idx)
 {
 	struct page *page = folio_page(folio, idx);
 	bool contains_data;
 	pte_t newpte;
 	void *addr;
 	VM_BUG_ON_PAGE(PageCompound(page), page);
 	VM_BUG_ON_PAGE(!PageAnon(page), page);
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page);
 	if (folio_test_mlocked(folio) || (pvmw->vma->vm_flags & VM_LOCKED) ||
 	    mm_forbids_zeropage(pvmw->vma->vm_mm))
 		return false;
 	/*
 	 * The pmd entry mapping the old thp was flushed and the pte mapping
 	 * this subpage has been non present. If the subpage is only zero-filled
 	 * then map it to the shared zeropage.
 	 */
 	addr = kmap_local_page(page);
 	contains_data = memchr_inv(addr, 0, PAGE_SIZE);
 	kunmap_local(addr);
 	if (contains_data)
 		return false;
 	newpte = pte_mkspecial(pfn_pte(my_zero_pfn(pvmw->address),
 					pvmw->vma->vm_page_prot));
 	set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte);
 	dec_mm_counter(pvmw->vma->vm_mm, mm_counter(folio));
 	return true;
 }
 struct rmap_walk_arg {
 	struct folio *folio;
 	bool map_unused_to_zeropage;
 };
 /*
 * Restore a potential migration pte to a working pte entry
 */
 static bool remove_migration_pte(struct folio *dst,
 		struct vm_area_struct *vma, unsigned long addr, void *arg)
 {
-	struct folio *src = arg;
+	struct rmap_walk_arg *rmap_walk_arg = arg;
 	struct folio *src = rmap_walk_arg->folio;
 	DEFINE_FOLIO_VMA_WALK(pvmw, src, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
 	while (page_vma_mapped_walk(&pvmw)) {
@@ -228,6 +272,9 @@ static bool remove_migration_pte(struct folio *dst,
 			continue;
 		}
 #endif
 		if (rmap_walk_arg->map_unused_to_zeropage &&
 		    try_to_map_unused_to_zeropage(&pvmw, folio, idx))
 			continue;
 		folio_get(folio);
 		pte = mk_pte(page, READ_ONCE(vma->vm_page_prot));
@@ -303,14 +350,21 @@ static bool remove_migration_pte(struct folio *dst,
 * Get rid of all migration entries and replace them by
 * references to the indicated page.
 */
-void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked)
+void remove_migration_ptes(struct folio *src, struct folio *dst, int flags)
 {
-	struct rmap_walk_control rwc = {
+	struct rmap_walk_arg rmap_walk_arg = {
-		.rmap_one = remove_migration_pte,
+		.folio = src,
-		.arg = src,
+		.map_unused_to_zeropage = flags & RMP_USE_SHARED_ZEROPAGE,
 	};
-	if (locked)
+	struct rmap_walk_control rwc = {
 		.rmap_one = remove_migration_pte,
 		.arg = &rmap_walk_arg,
 	};
 	VM_BUG_ON_FOLIO((flags & RMP_USE_SHARED_ZEROPAGE) && (src != dst), src);
 	if (flags & RMP_LOCKED)
 		rmap_walk_locked(dst, &rwc);
 	else
 		rmap_walk(dst, &rwc);
@@ -461,6 +515,7 @@ int folio_migrate_mapping(struct address_space *mapping,
 	}
 	/* Take off deferred split queue while frozen and memcg set */
 	if (folio_test_large(folio) && folio_test_large_rmappable(folio))
 		folio_unqueue_deferred_split(folio);
 	/*
@@ -933,7 +988,7 @@ static int writeout(struct address_space *mapping, struct folio *folio)
 	 * At this point we know that the migration attempt cannot
 	 * be successful.
 	 */
-	remove_migration_ptes(folio, folio, false);
+	remove_migration_ptes(folio, folio, 0);
 	rc = mapping->a_ops->writepage(&folio->page, &wbc);
@@ -1096,7 +1151,7 @@ static void migrate_folio_undo_src(struct folio *src,
 				   struct list_head *ret)
 {
 	if (page_was_mapped)
-		remove_migration_ptes(src, src, false);
+		remove_migration_ptes(src, src, 0);
 	/* Drop an anon_vma reference if we took one */
 	if (anon_vma)
 		put_anon_vma(anon_vma);
@@ -1335,7 +1390,7 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
 		lru_add_drain();
 	if (old_page_state & PAGE_WAS_MAPPED)
-		remove_migration_ptes(src, dst, false);
+		remove_migration_ptes(src, dst, 0);
 out_unlock_both:
 	folio_unlock(dst);
@@ -1474,7 +1529,7 @@ static int unmap_and_move_huge_page(new_folio_t get_new_folio,
 	if (page_was_mapped)
 		remove_migration_ptes(src,
-			rc == MIGRATEPAGE_SUCCESS ? dst : src, false);
+			rc == MIGRATEPAGE_SUCCESS ? dst : src, 0);
 unlock_put_anon:
 	folio_unlock(dst);
@@ -1702,6 +1757,35 @@ static int migrate_pages_batch(struct list_head *from,
 			cond_resched();
 			/*
 			 * The rare folio on the deferred split list should
 			 * be split now. It should not count as a failure:
 			 * but increment nr_failed because, without doing so,
 			 * migrate_pages() may report success with (split but
 			 * unmigrated) pages still on its fromlist; whereas it
 			 * always reports success when its fromlist is empty.
 			 *
 			 * Only check it without removing it from the list.
 			 * Since the folio can be on deferred_split_scan()
 			 * local list and removing it can cause the local list
 			 * corruption. Folio split process below can handle it
 			 * with the help of folio_ref_freeze().
 			 *
 			 * nr_pages > 2 is needed to avoid checking order-1
 			 * page cache folios. They exist, in contrast to
 			 * non-existent order-1 anonymous folios, and do not
 			 * use _deferred_list.
 			 */
 			if (nr_pages > 2 &&
 			   !list_empty(&folio->_deferred_list) &&
 			   folio_test_partially_mapped(folio)) {
 				if (!try_split_folio(folio, split_folios, mode)) {
 					nr_failed++;
 					stats->nr_thp_split += is_thp;
 					continue;
 				}
 			}
 			/*
 			 * Large folio migration might be unsupported or
 			 * the allocation might be failed so we should retry
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -422,7 +422,7 @@ static unsigned long migrate_device_unmap(unsigned long *src_pfns,
 			continue;
 		folio = page_folio(page);
-		remove_migration_ptes(folio, folio, false);
+		remove_migration_ptes(folio, folio, 0);
 		src_pfns[i] = 0;
 		folio_unlock(folio);
@@ -840,7 +840,7 @@ void migrate_device_finalize(unsigned long *src_pfns,
 		src = page_folio(page);
 		dst = page_folio(newpage);
-		remove_migration_ptes(src, dst, false);
+		remove_migration_ptes(src, dst, 0);
 		folio_unlock(src);
 		if (is_zone_device_page(page))
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -208,8 +208,7 @@ static void mlock_folio_batch(struct folio_batch *fbatch)
 	if (lruvec)
 		unlock_page_lruvec_irq(lruvec);
-	folios_put(fbatch->folios, folio_batch_count(fbatch));
+	folios_put(fbatch);
 	folio_batch_reinit(fbatch);
 }
 void mlock_drain_local(void)
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1558,7 +1558,7 @@ static inline void setup_usemap(struct zone *zone) {}
 /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
 void __init set_pageblock_order(void)
 {
-	unsigned int order = MAX_ORDER;
+	unsigned int order = PAGE_BLOCK_ORDER;
 	/* Check that pageblock_nr_pages has not already been setup */
 	if (pageblock_order)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -33,6 +33,7 @@
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
 #include <linux/pagevec.h>
 #include <linux/memory_hotplug.h>
 #include <linux/nodemask.h>
 #include <linux/vmstat.h>
@@ -323,7 +324,7 @@ const char * const migratetype_names[MIGRATE_TYPES] = {
 #endif
 };
-unsigned long free_highatomics[MAX_NR_ZONES] = {0};
+unsigned long nr_free_highatomic[MAX_NR_ZONES] = {0};
 int min_free_kbytes = 1024;
 int user_min_free_kbytes = -1;
@@ -770,8 +771,8 @@ static inline void account_freepages(struct zone *zone, int nr_pages,
 	if (is_migrate_cma(migratetype))
 		__mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages);
 	else if (is_migrate_highatomic(migratetype))
-		WRITE_ONCE(free_highatomics[zone_idx(zone)],
+		WRITE_ONCE(nr_free_highatomic[zone_idx(zone)],
-			   free_highatomics[zone_idx(zone)] + nr_pages);
+			   nr_free_highatomic[zone_idx(zone)] + nr_pages);
 }
 /* Used for pages not on another list */
@@ -921,7 +922,6 @@ static inline void __free_one_page(struct page *page,
 	VM_BUG_ON_PAGE(page->flags & check_flags, page);
 	VM_BUG_ON(migratetype == -1);
 	VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page);
 	VM_BUG_ON_PAGE(bad_range(zone, page), page);
@@ -1237,6 +1237,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
 				}
 			}
 			(page + i)->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
 			trace_android_vh_mm_free_page(page + i);
 		}
 	}
 	if (PageMappingFlags(page))
@@ -1252,6 +1253,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
 	page_cpupid_reset_last(page);
 	page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
 	trace_android_vh_mm_free_page(page);
 	reset_page_owner(page, order);
 	free_page_pinner(page, order);
 	page_table_check_free(page, order);
@@ -1372,7 +1374,6 @@ static void free_one_page(struct zone *zone, struct page *page,
 static void __free_pages_ok(struct page *page, unsigned int order,
 			    fpi_t fpi_flags)
 {
 	unsigned long flags;
 	int migratetype;
 	unsigned long pfn = page_to_pfn(page);
 	struct zone *zone = page_zone(page);
@@ -1392,21 +1393,17 @@ skip_prepare:
 			fpi_flags, &skip_free_pages_ok);
 	if (skip_free_pages_ok)
 		return;
-
+	/*
-	spin_lock_irqsave(&zone->lock, flags);
+	 * Calling get_pfnblock_migratetype() without spin_lock_irqsave() here
 	 * is used to avoid calling get_pfnblock_migratetype() under the lock.
 	 * This will reduce the lock holding time.
 	 */
 	migratetype = get_pfnblock_migratetype(page, pfn);
 	trace_android_vh_free_unref_page_bypass(page, order, migratetype, &skip_free_unref_page);
-	if (skip_free_unref_page) {
+	if (skip_free_unref_page)
 		spin_unlock_irqrestore(&zone->lock, flags);
 		return;
 	}
-	if (unlikely(has_isolate_pageblock(zone) ||
+	free_one_page(zone, page, pfn, order, fpi_flags);
 		is_migrate_isolate(migratetype))) {
 		migratetype = get_pfnblock_migratetype(page, pfn);
 	}
 	__free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
 	spin_unlock_irqrestore(&zone->lock, flags);
 	__count_vm_events(PGFREE, 1 << order);
 }
@@ -2249,8 +2246,8 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
 	struct zone *zone;
 	struct page *page;
 	int order;
 	int ret;
 	bool skip_unreserve_highatomic = false;
 	int ret;
 	for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx,
 								ac->nodemask) {
@@ -2787,58 +2784,59 @@ void free_unref_page(struct page *page, unsigned int order)
 }
 /*
- * Free a list of 0-order pages
+ * Free a batch of folios
 */
-void free_unref_page_list(struct list_head *list)
+void free_unref_folios(struct folio_batch *folios)
 {
 	unsigned long __maybe_unused UP_flags;
 	struct page *page, *next;
 	struct per_cpu_pages *pcp = NULL;
 	struct zone *locked_zone = NULL;
-	int batch_count = 0;
+	int i, j;
 	/* Prepare folios for freeing */
 	for (i = 0, j = 0; i < folios->nr; i++) {
 		struct folio *folio = folios->folios[i];
 		unsigned long pfn = folio_pfn(folio);
 		unsigned int order = folio_order(folio);
 		if (order > 0 && folio_test_large_rmappable(folio))
 			folio_unqueue_deferred_split(folio);
 		if (!free_pages_prepare(&folio->page, order, FPI_NONE))
 			continue;
 		/*
 		 * Free orders not handled on the PCP directly to the
 		 * allocator.
 		 */
 		if (!pcp_allowed_order(order)) {
 			free_one_page(folio_zone(folio), &folio->page,
 				      pfn, order, FPI_NONE);
 			continue;
 		}
 		folio->private = (void *)(unsigned long)order;
 		if (j != i)
 			folios->folios[j] = folio;
 		j++;
 	}
 	folios->nr = j;
 	for (i = 0; i < folios->nr; i++) {
 		struct folio *folio = folios->folios[i];
 		struct zone *zone = folio_zone(folio);
 		unsigned long pfn = folio_pfn(folio);
 		unsigned int order = (unsigned long)folio->private;
 		int migratetype;
 	bool skip_free = false;
-	/* Prepare pages for freeing */
+		folio->private = NULL;
-	list_for_each_entry_safe(page, next, list, lru) {
+		migratetype = get_pfnblock_migratetype(&folio->page, pfn);
 		unsigned long pfn = page_to_pfn(page);
 		if (!free_pages_prepare(page, 0, FPI_NONE)) {
 			list_del(&page->lru);
 			continue;
 		}
-		/*
+		/* Different zone requires a different pcp lock */
-		 * Free isolated pages directly to the allocator, see
+		if (zone != locked_zone ||
-		 * comment in free_unref_page.
+		    is_migrate_isolate(migratetype)) {
 		 */
 		migratetype = get_pfnblock_migratetype(page, pfn);
 		if (unlikely(is_migrate_isolate(migratetype))) {
 			list_del(&page->lru);
 			free_one_page(page_zone(page), page, pfn, 0, FPI_NONE);
 			continue;
 		}
 	}
 	trace_android_vh_free_unref_page_list_bypass(list, &skip_free);
 	if (skip_free)
 		return;
 	list_for_each_entry_safe(page, next, list, lru) {
 		struct zone *zone = page_zone(page);
 		unsigned long pfn = page_to_pfn(page);
 		list_del(&page->lru);
 		migratetype = get_pfnblock_migratetype(page, pfn);
 		/*
 		 * Either different zone requiring a different pcp lock or
 		 * excessive lock hold times when freeing a large list of
 		 * pages.
 		 */
 		if (zone != locked_zone || batch_count == SWAP_CLUSTER_MAX) {
 			if (pcp) {
 				pcp_spin_unlock(pcp);
 				pcp_trylock_finish(UP_flags);
 				locked_zone = NULL;
 				pcp = NULL;
 			}
 			/*
@@ -2846,24 +2844,21 @@ void free_unref_page_list(struct list_head *list)
 			 * allocator, see comment in free_unref_page.
 			 */
 			if (is_migrate_isolate(migratetype)) {
-				free_one_page(zone, page, page_to_pfn(page),
+				free_one_page(zone, &folio->page, pfn,
-					      0,  FPI_NONE);
+					      order, FPI_NONE);
 				continue;
 			}
 			batch_count = 0;
 			/*
-			 * trylock is necessary as pages may be getting freed
+			 * trylock is necessary as folios may be getting freed
 			 * from IRQ or SoftIRQ context after an IO completion.
 			 */
 			pcp_trylock_prepare(UP_flags);
 			pcp = pcp_spin_trylock(zone->per_cpu_pageset);
 			if (unlikely(!pcp)) {
 				pcp_trylock_finish(UP_flags);
-				free_one_page(zone, page, pfn,
+				free_one_page(zone, &folio->page, pfn,
-					      0, FPI_NONE);
+					      order, FPI_NONE);
 				locked_zone = NULL;
 				continue;
 			}
 			locked_zone = zone;
@@ -2880,15 +2875,39 @@ void free_unref_page_list(struct list_head *list)
 				migratetype = MIGRATE_MOVABLE;
 		}
-		trace_mm_page_free_batched(page);
+		trace_mm_page_free_batched(&folio->page);
-		free_unref_page_commit(zone, pcp, page, migratetype, 0);
+		free_unref_page_commit(zone, pcp, &folio->page, migratetype,
-		batch_count++;
+				order);
 	}
 	if (pcp) {
 		pcp_spin_unlock(pcp);
 		pcp_trylock_finish(UP_flags);
 	}
 	folio_batch_reinit(folios);
 }
 void free_unref_page_list(struct list_head *list)
 {
 	struct folio_batch fbatch;
 	bool skip_free = false;
 	trace_android_vh_free_unref_page_list_bypass(list, &skip_free);
 	if (skip_free)
 		return;
 	folio_batch_init(&fbatch);
 	while (!list_empty(list)) {
 		struct folio *folio = list_first_entry(list, struct folio, lru);
 		list_del(&folio->lru);
 		if (folio_batch_add(&fbatch, folio) > 0)
 			continue;
 		free_unref_folios(&fbatch);
 	}
 	if (fbatch.nr)
 		free_unref_folios(&fbatch);
 }
 /*
@@ -3216,7 +3235,7 @@ static inline long __zone_watermark_unusable_free(struct zone *z,
 	 * watermark then subtract the free pages reserved for highatomic.
 	 */
 	if (likely(!(alloc_flags & ALLOC_RESERVES)))
-		unusable_free += READ_ONCE(free_highatomics[zone_idx(z)]);
+		unusable_free += READ_ONCE(nr_free_highatomic[zone_idx(z)]);
 #ifdef CONFIG_CMA
 	/* If allocation can't use CMA areas don't use free CMA pages */
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -417,9 +417,9 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
 				ret = __alloc_contig_migrate_range(&cc, head_pfn,
 							head_pfn + nr_pages, page_mt);
 				if (ret)
 					goto failed;
 				pfn = head_pfn + nr_pages;
 				continue;
 			}
--- a/mm/pgsize_migration.c
+++ b/mm/pgsize_migration.c
@@ -270,6 +270,9 @@ static const struct vm_operations_struct pad_vma_ops = {
 	.name = pad_vma_name,
 };
 /* Defined in kernel/fork.c */
 extern struct kmem_cache *vm_area_cachep;
 /*
 * Returns a new VMA representing the padding in @vma;
 * returns NULL if no padding in @vma or allocation failed.
@@ -281,7 +284,7 @@ static struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma)
 	if (!is_pgsize_migration_enabled() || !(vma->vm_flags & VM_PAD_MASK))
 		return NULL;
-	pad = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
+	pad = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (!pad) {
 		pr_warn("Page size migration: Failed to allocate padding VMA");
 		return NULL;
@@ -347,7 +350,7 @@ void show_map_pad_vma(struct vm_area_struct *vma, struct seq_file *m,
 	else
 		((show_pad_maps_fn)func)(m, pad);
-	kfree(pad);
+	kmem_cache_free(vm_area_cachep, pad);
 }
 /*
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1599,8 +1599,8 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
 		 * Check partially_mapped first to ensure it is a large folio.
 		 */
 		if (folio_test_anon(folio) && partially_mapped &&
-		    list_empty(&folio->_deferred_list))
+		    !folio_test_partially_mapped(folio))
-			deferred_split_folio(folio);
+			deferred_split_folio(folio, true);
 	}
 	/*
--- a/mm/show_mem.c
+++ b/mm/show_mem.c
@@ -342,7 +342,7 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z
 			K(low_wmark_pages(zone)),
 			K(high_wmark_pages(zone)),
 			K(zone->nr_reserved_highatomic),
-			K(free_highatomics[zone_idx(zone)]),
+			K(nr_free_highatomic[zone_idx(zone)]),
 			K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON)),
 			K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON)),
 			K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)),
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -77,26 +77,33 @@ static DEFINE_PER_CPU(struct cpu_fbatches, cpu_fbatches) = {
 	.lock = INIT_LOCAL_LOCK(lock),
 };
 static void __page_cache_release(struct folio *folio, struct lruvec **lruvecp,
 		unsigned long *flagsp)
 {
 	if (folio_test_lru(folio)) {
 		folio_lruvec_relock_irqsave(folio, lruvecp, flagsp);
 		lruvec_del_folio(*lruvecp, folio);
 		__folio_clear_lru_flags(folio);
 	}
 }
 /*
 * This path almost never happens for VM activity - pages are normally freed
 * in batches.  But it gets used by networking - and for compound pages.
 */
-static void __page_cache_release(struct folio *folio)
+static void page_cache_release(struct folio *folio)
 {
-	if (folio_test_lru(folio)) {
+	struct lruvec *lruvec = NULL;
 		struct lruvec *lruvec;
 	unsigned long flags;
-		lruvec = folio_lruvec_lock_irqsave(folio, &flags);
+	__page_cache_release(folio, &lruvec, &flags);
-		lruvec_del_folio(lruvec, folio);
+	if (lruvec)
 		__folio_clear_lru_flags(folio);
 		unlock_page_lruvec_irqrestore(lruvec, flags);
 	}
 }
 static void __folio_put_small(struct folio *folio)
 {
-	__page_cache_release(folio);
+	page_cache_release(folio);
 	mem_cgroup_uncharge(folio);
 	free_unref_page(&folio->page, 0);
 }
@@ -110,7 +117,7 @@ static void __folio_put_large(struct folio *folio)
 	 * be called for hugetlb (it has a separate hugetlb_cgroup.)
 	 */
 	if (!folio_test_hugetlb(folio))
-		__page_cache_release(folio);
+		page_cache_release(folio);
 	destroy_large_folio(folio);
 }
@@ -133,22 +140,25 @@ EXPORT_SYMBOL(__folio_put);
 */
 void put_pages_list(struct list_head *pages)
 {
 	struct folio_batch fbatch;
 	struct folio *folio, *next;
 	folio_batch_init(&fbatch);
 	list_for_each_entry_safe(folio, next, pages, lru) {
-		if (!folio_put_testzero(folio)) {
+		if (!folio_put_testzero(folio))
 			list_del(&folio->lru);
 			continue;
 		}
 		if (folio_test_large(folio)) {
 			list_del(&folio->lru);
 			__folio_put_large(folio);
 			continue;
 		}
 		/* LRU flag must be clear because it's passed using the lru */
 		if (folio_batch_add(&fbatch, folio) > 0)
 			continue;
 		free_unref_folios(&fbatch);
 	}
-	free_unref_page_list(pages);
+	if (fbatch.nr)
 		free_unref_folios(&fbatch);
 	INIT_LIST_HEAD(pages);
 }
 EXPORT_SYMBOL(put_pages_list);
@@ -170,7 +180,7 @@ static void lru_add_fn(struct lruvec *lruvec, struct folio *folio)
 	 * while the LRU lock is held.
 	 *
 	 * (That is not true of __page_cache_release(), and not necessarily
-	 * true of release_pages(): but those only clear the mlocked flag after
+	 * true of folios_put(): but those only clear the mlocked flag after
 	 * folio_put_testzero() has excluded any other users of the folio.)
 	 */
 	if (folio_evictable(folio)) {
@@ -208,7 +218,7 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
 		if (move_fn != lru_add_fn && !folio_test_clear_lru(folio))
 			continue;
-		lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags);
+		folio_lruvec_relock_irqsave(folio, &lruvec, &flags);
 		move_fn(lruvec, folio);
 		folio_set_lru(folio);
@@ -216,8 +226,7 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
 	if (lruvec)
 		unlock_page_lruvec_irqrestore(lruvec, flags);
-	folios_put(fbatch->folios, folio_batch_count(fbatch));
+	folios_put(fbatch);
 	folio_batch_reinit(fbatch);
 }
 static void folio_batch_add_and_move(struct folio_batch *fbatch,
@@ -958,47 +967,29 @@ void lru_cache_disable(void)
 EXPORT_SYMBOL_GPL(lru_cache_disable);
 /**
- * release_pages - batched put_page()
+ * folios_put_refs - Reduce the reference count on a batch of folios.
- * @arg: array of pages to release
+ * @folios: The folios.
- * @nr: number of pages
+ * @refs: The number of refs to subtract from each folio.
 *
- * Decrement the reference count on all the pages in @arg.  If it
+ * Like folio_put(), but for a batch of folios.  This is more efficient
- * fell to zero, remove the page from the LRU and free it.
+ * than writing the loop yourself as it will optimise the locks which need
 * to be taken if the folios are freed.  The folios batch is returned
 * empty and ready to be reused for another batch; there is no need
 * to reinitialise it.  If @refs is NULL, we subtract one from each
 * folio refcount.
 *
- * Note that the argument can be an array of pages, encoded pages,
+ * Context: May be called in process or interrupt context, but not in NMI
- * or folio pointers. We ignore any encoded bits, and turn any of
+ * context.  May be called while holding a spinlock.
 * them into just a folio that gets free'd.
 */
-void release_pages(release_pages_arg arg, int nr)
+void folios_put_refs(struct folio_batch *folios, unsigned int *refs)
 {
-	int i;
+	int i, j;
 	struct encoded_page **encoded = arg.encoded_pages;
 	LIST_HEAD(pages_to_free);
 	struct lruvec *lruvec = NULL;
 	unsigned long flags = 0;
 	unsigned int lock_batch;
-	for (i = 0; i < nr; i++) {
+	for (i = 0, j = 0; i < folios->nr; i++) {
-		unsigned int nr_refs = 1;
+		struct folio *folio = folios->folios[i];
-		struct folio *folio;
+		unsigned int nr_refs = refs ? refs[i] : 1;
 		/* Turn any of the argument types into a folio */
 		folio = page_folio(encoded_page_ptr(encoded[i]));
 		/* Is our next entry actually "nr_pages" -> "nr_refs" ? */
 		if (unlikely(encoded_page_flags(encoded[i]) &
 			     ENCODED_PAGE_BIT_NR_PAGES_NEXT))
 			nr_refs = encoded_nr_pages(encoded[++i]);
 		/*
 		 * Make sure the IRQ-safe lock-holding time does not get
 		 * excessive with a continuous string of pages from the
 		 * same lruvec. The lock is held only if lruvec != NULL.
 		 */
 		if (lruvec && ++lock_batch == SWAP_CLUSTER_MAX) {
 			unlock_page_lruvec_irqrestore(lruvec, flags);
 			lruvec = NULL;
 		}
 		if (is_huge_zero_page(&folio->page))
 			continue;
@@ -1018,34 +1009,73 @@ void release_pages(release_pages_arg arg, int nr)
 		if (!folio_ref_sub_and_test(folio, nr_refs))
 			continue;
-		if (folio_test_large(folio)) {
+		/* hugetlb has its own memcg */
 		if (folio_test_hugetlb(folio)) {
 			if (lruvec) {
 				unlock_page_lruvec_irqrestore(lruvec, flags);
 				lruvec = NULL;
 			}
-			__folio_put_large(folio);
+			free_huge_folio(folio);
 			continue;
 		}
-		if (folio_test_lru(folio)) {
+		folio_unqueue_deferred_split(folio);
-			struct lruvec *prev_lruvec = lruvec;
+		__page_cache_release(folio, &lruvec, &flags);
-			lruvec = folio_lruvec_relock_irqsave(folio, lruvec,
+		if (j != i)
-									&flags);
+			folios->folios[j] = folio;
-			if (prev_lruvec != lruvec)
+		j++;
 				lock_batch = 0;
 			lruvec_del_folio(lruvec, folio);
 			__folio_clear_lru_flags(folio);
 		}
 		list_add(&folio->lru, &pages_to_free);
 	}
 	if (lruvec)
 		unlock_page_lruvec_irqrestore(lruvec, flags);
 	if (!j) {
 		folio_batch_reinit(folios);
 		return;
 	}
-	mem_cgroup_uncharge_list(&pages_to_free);
+	folios->nr = j;
-	free_unref_page_list(&pages_to_free);
+	mem_cgroup_uncharge_folios(folios);
 	free_unref_folios(folios);
 }
 EXPORT_SYMBOL(folios_put_refs);
 /**
 * release_pages - batched put_page()
 * @arg: array of pages to release
 * @nr: number of pages
 *
 * Decrement the reference count on all the pages in @arg.  If it
 * fell to zero, remove the page from the LRU and free it.
 *
 * Note that the argument can be an array of pages, encoded pages,
 * or folio pointers. We ignore any encoded bits, and turn any of
 * them into just a folio that gets free'd.
 */
 void release_pages(release_pages_arg arg, int nr)
 {
 	struct folio_batch fbatch;
 	int refs[PAGEVEC_SIZE];
 	struct encoded_page **encoded = arg.encoded_pages;
 	int i;
 	folio_batch_init(&fbatch);
 	for (i = 0; i < nr; i++) {
 		/* Turn any of the argument types into a folio */
 		struct folio *folio = page_folio(encoded_page_ptr(encoded[i]));
 		/* Is our next entry actually "nr_pages" -> "nr_refs" ? */
 		refs[fbatch.nr] = 1;
 		if (unlikely(encoded_page_flags(encoded[i]) &
 			     ENCODED_PAGE_BIT_NR_PAGES_NEXT))
 			refs[fbatch.nr] = encoded_nr_pages(encoded[++i]);
 		if (folio_batch_add(&fbatch, folio) > 0)
 			continue;
 		folios_put_refs(&fbatch, refs);
 	}
 	if (fbatch.nr)
 		folios_put_refs(&fbatch, refs);
 }
 EXPORT_SYMBOL(release_pages);
@@ -1065,8 +1095,7 @@ void __folio_batch_release(struct folio_batch *fbatch)
 		lru_add_drain();
 		fbatch->percpu_pvec_drained = true;
 	}
-	release_pages(fbatch->folios, folio_batch_count(fbatch));
+	folios_put(fbatch);
 	folio_batch_reinit(fbatch);
 }
 EXPORT_SYMBOL(__folio_batch_release);
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1358,6 +1358,7 @@ const char * const vmstat_text[] = {
 	"thp_split_page",
 	"thp_split_page_failed",
 	"thp_deferred_split_page",
 	"thp_underused_split_page",
 	"thp_split_pmd",
 	"thp_shatter_page",
 	"thp_shatter_page_failed",
--- a/net/Makefile
+++ b/net/Makefile
@@ -17,7 +17,7 @@ obj-$(CONFIG_NETFILTER)		+= netfilter/
 obj-$(CONFIG_INET)		+= ipv4/
 obj-$(CONFIG_TLS)		+= tls/
 obj-$(CONFIG_XFRM)		+= xfrm/
-obj-$(CONFIG_UNIX_SCM)		+= unix/
+obj-$(CONFIG_UNIX)		+= unix/
 obj-y				+= ipv6/
 obj-$(CONFIG_BPFILTER)		+= bpfilter/
 obj-$(CONFIG_PACKET)		+= packet/
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -36,6 +36,7 @@
 #include <net/compat.h>
 #include <net/scm.h>
 #include <net/cls_cgroup.h>
 #include <net/af_unix.h>
 /*
@@ -85,8 +86,15 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
 			return -ENOMEM;
 		*fplp = fpl;
 		fpl->count = 0;
 		fpl->count_unix = 0;
 		fpl->max = SCM_MAX_FD;
 		fpl->user = NULL;
 #if IS_ENABLED(CONFIG_UNIX)
 		fpl->inflight = false;
 		fpl->dead = false;
 		fpl->edges = NULL;
 		INIT_LIST_HEAD(&fpl->vertices);
 #endif
 	}
 	fpp = &fpl->fp[fpl->count];
@@ -109,6 +117,9 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
 			fput(file);
 			return -EINVAL;
 		}
 		if (unix_get_socket(file))
 			fpl->count_unix++;
 		*fpp++ = file;
 		fpl->count++;
 	}
@@ -366,13 +377,18 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
 	if (!fpl)
 		return NULL;
-	new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]),
+	new_fpl = kmemdup(fpl, sizeof(*fpl),
 			  GFP_KERNEL_ACCOUNT);
 	if (new_fpl) {
 		for (i = 0; i < fpl->count; i++)
 			get_file(fpl->fp[i]);
 		new_fpl->max = new_fpl->count;
 		new_fpl->user = get_uid(fpl->user);
 #if IS_ENABLED(CONFIG_UNIX)
 		new_fpl->inflight = false;
 		new_fpl->edges = NULL;
 		INIT_LIST_HEAD(&new_fpl->vertices);
 #endif
 	}
 	return new_fpl;
 }
--- a/net/unix/Kconfig
+++ b/net/unix/Kconfig
@@ -16,11 +16,6 @@ config UNIX
 	  Say Y unless you know what you are doing.
 config UNIX_SCM
 	bool
 	depends on UNIX
 	default y
 config	AF_UNIX_OOB
 	bool
 	depends on UNIX
--- a/net/unix/Makefile
+++ b/net/unix/Makefile
@@ -11,5 +11,3 @@ unix-$(CONFIG_BPF_SYSCALL) += unix_bpf.o
 obj-$(CONFIG_UNIX_DIAG)	+= unix_diag.o
 unix_diag-y		:= diag.o
 obj-$(CONFIG_UNIX_SCM)	+= scm.o
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -117,8 +117,6 @@
 #include <linux/file.h>
 #include <linux/btf_ids.h>
 #include "scm.h"
 static atomic_long_t unix_nr_socks;
 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
@@ -980,11 +978,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
 	sk->sk_max_ack_backlog	= READ_ONCE(net->unx.sysctl_max_dgram_qlen);
 	sk->sk_destruct		= unix_sock_destructor;
 	u = unix_sk(sk);
-	u->inflight = 0;
+	u->listener = NULL;
 	u->vertex = NULL;
 	u->path.dentry = NULL;
 	u->path.mnt = NULL;
 	spin_lock_init(&u->lock);
 	INIT_LIST_HEAD(&u->link);
 	mutex_init(&u->iolock); /* single task reading lock */
 	mutex_init(&u->bindlock); /* single task binding lock */
 	init_waitqueue_head(&u->peer_wait);
@@ -1583,6 +1581,7 @@ restart:
 	newsk->sk_type		= sk->sk_type;
 	init_peercred(newsk);
 	newu = unix_sk(newsk);
 	newu->listener = other;
 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
 	otheru = unix_sk(other);
@@ -1678,8 +1677,8 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
 		       bool kern)
 {
 	struct sock *sk = sock->sk;
 	struct sock *tsk;
 	struct sk_buff *skb;
 	struct sock *tsk;
 	int err;
 	err = -EOPNOTSUPP;
@@ -1709,6 +1708,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
 	/* attach accepted sock to socket */
 	unix_state_lock(tsk);
 	unix_update_edges(unix_sk(tsk));
 	newsock->state = SS_CONNECTED;
 	unix_sock_inherit_flags(sock, newsock);
 	sock_graft(tsk, newsock);
@@ -1752,51 +1752,65 @@ out:
 	return err;
 }
 /* The "user->unix_inflight" variable is protected by the garbage
 * collection lock, and we just read it locklessly here. If you go
 * over the limit, there might be a tiny race in actually noticing
 * it across threads. Tough.
 */
 static inline bool too_many_unix_fds(struct task_struct *p)
 {
 	struct user_struct *user = current_user();
 	if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
 		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
 	return false;
 }
 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 {
 	if (too_many_unix_fds(current))
 		return -ETOOMANYREFS;
 	/* Need to duplicate file references for the sake of garbage
 	 * collection.  Otherwise a socket in the fps might become a
 	 * candidate for GC while the skb is not yet queued.
 	 */
 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
 	if (!UNIXCB(skb).fp)
 		return -ENOMEM;
 	if (unix_prepare_fpl(UNIXCB(skb).fp))
 		return -ENOMEM;
 	return 0;
 }
 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 {
 	scm->fp = UNIXCB(skb).fp;
 	UNIXCB(skb).fp = NULL;
 	unix_destroy_fpl(scm->fp);
 }
 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
 {
 	scm->fp = scm_fp_dup(UNIXCB(skb).fp);
 }
-	/*
+static void unix_destruct_scm(struct sk_buff *skb)
-	 * Garbage collection of unix sockets starts by selecting a set of
+{
-	 * candidate sockets which have reference only from being in flight
+	struct scm_cookie scm;
-	 * (total_refs == inflight_refs).  This condition is checked once during
+
-	 * the candidate collection phase, and candidates are marked as such, so
+	memset(&scm, 0, sizeof(scm));
-	 * that non-candidates can later be ignored.  While inflight_refs is
+	scm.pid  = UNIXCB(skb).pid;
-	 * protected by unix_gc_lock, total_refs (file count) is not, hence this
+	if (UNIXCB(skb).fp)
-	 * is an instantaneous decision.
+		unix_detach_fds(&scm, skb);
-	 *
+
-	 * Once a candidate, however, the socket must not be reinstalled into a
+	/* Alas, it calls VFS */
-	 * file descriptor while the garbage collection is in progress.
+	/* So fscking what? fput() had been SMP-safe since the last Summer */
-	 *
+	scm_destroy(&scm);
-	 * If the above conditions are met, then the directed graph of
+	sock_wfree(skb);
 	 * candidates (*) does not change while unix_gc_lock is held.
 	 *
 	 * Any operations that changes the file count through file descriptors
 	 * (dup, close, sendmsg) does not change the graph since candidates are
 	 * not installed in fds.
 	 *
 	 * Dequeing a candidate via recvmsg would install it into an fd, but
 	 * that takes unix_gc_lock to decrement the inflight count, so it's
 	 * serialized with garbage collection.
 	 *
 	 * MSG_PEEK is special in that it does not change the inflight count,
 	 * yet does install the socket into an fd.  The following lock/unlock
 	 * pair is to ensure serialization with garbage collection.  It must be
 	 * done between incrementing the file count and installing the file into
 	 * an fd.
 	 *
 	 * If garbage collection starts after the barrier provided by the
 	 * lock/unlock, then it will see the elevated refcount and not mark this
 	 * as a candidate.  If a garbage collection is already in progress
 	 * before the file count was incremented, then the lock/unlock pair will
 	 * ensure that garbage collection is finished before progressing to
 	 * installing the fd.
 	 *
 	 * (*) A -> B where B is on the queue of A or B is on the queue of C
 	 * which is on the queue of listening socket A.
 	 */
 	spin_lock(&unix_gc_lock);
 	spin_unlock(&unix_gc_lock);
 }
 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -1855,8 +1869,10 @@ static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
 	struct scm_fp_list *fp = UNIXCB(skb).fp;
 	struct unix_sock *u = unix_sk(sk);
-	if (unlikely(fp && fp->count))
+	if (unlikely(fp && fp->count)) {
 		atomic_add(fp->count, &u->scm_stat.nr_fds);
 		unix_add_edges(fp, u);
 	}
 }
 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
@@ -1864,8 +1880,10 @@ static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
 	struct scm_fp_list *fp = UNIXCB(skb).fp;
 	struct unix_sock *u = unix_sk(sk);
-	if (unlikely(fp && fp->count))
+	if (unlikely(fp && fp->count)) {
 		atomic_sub(fp->count, &u->scm_stat.nr_fds);
 		unix_del_edges(fp);
 	}
 }
 /*
@@ -1885,11 +1903,12 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 	long timeo;
 	int err;
 	wait_for_unix_gc();
 	err = scm_send(sock, msg, &scm, false);
 	if (err < 0)
 		return err;
 	wait_for_unix_gc(scm.fp);
 	err = -EOPNOTSUPP;
 	if (msg->msg_flags&MSG_OOB)
 		goto out;
@@ -2157,11 +2176,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 	bool fds_sent = false;
 	int data_len;
 	wait_for_unix_gc();
 	err = scm_send(sock, msg, &scm, false);
 	if (err < 0)
 		return err;
 	wait_for_unix_gc(scm.fp);
 	err = -EOPNOTSUPP;
 	if (msg->msg_flags & MSG_OOB) {
 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -81,249 +81,519 @@
 #include <net/scm.h>
 #include <net/tcp_states.h>
-#include "scm.h"
+struct unix_sock *unix_get_socket(struct file *filp)
 /* Internal data structures and random procedures: */
 static LIST_HEAD(gc_candidates);
 static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
 			  struct sk_buff_head *hitlist)
 {
-	struct sk_buff *skb;
+	struct inode *inode = file_inode(filp);
 	struct sk_buff *next;
-	spin_lock(&x->sk_receive_queue.lock);
+	/* Socket ? */
-	skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
+	if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
-		/* Do we have file descriptors ? */
+		struct socket *sock = SOCKET_I(inode);
-		if (UNIXCB(skb).fp) {
+		const struct proto_ops *ops;
-			bool hit = false;
+		struct sock *sk = sock->sk;
 			/* Process the descriptors of this socket */
 			int nfd = UNIXCB(skb).fp->count;
 			struct file **fp = UNIXCB(skb).fp->fp;
-			while (nfd--) {
+		ops = READ_ONCE(sock->ops);
 				/* Get the socket the fd matches if it indeed does so */
 				struct unix_sock *u = unix_get_socket(*fp++);
-				/* Ignore non-candidates, they could have been added
+		/* PF_UNIX ? */
-				 * to the queues after starting the garbage collection
+		if (sk && ops && ops->family == PF_UNIX)
-				 */
+			return unix_sk(sk);
-				if (u && test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
+	}
 					hit = true;
-					func(u);
+	return NULL;
 				}
 			}
 			if (hit && hitlist != NULL) {
 				__skb_unlink(skb, &x->sk_receive_queue);
 				__skb_queue_tail(hitlist, skb);
 			}
 		}
 	}
 	spin_unlock(&x->sk_receive_queue.lock);
 }
-static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
+static struct unix_vertex *unix_edge_successor(struct unix_edge *edge)
 			  struct sk_buff_head *hitlist)
 {
-	if (x->sk_state != TCP_LISTEN) {
+	/* If an embryo socket has a fd,
-		scan_inflight(x, func, hitlist);
+	 * the listener indirectly holds the fd's refcnt.
 	 */
 	if (edge->successor->listener)
 		return unix_sk(edge->successor->listener)->vertex;
 	return edge->successor->vertex;
 }
 static bool unix_graph_maybe_cyclic;
 static bool unix_graph_grouped;
 static void unix_update_graph(struct unix_vertex *vertex)
 {
 	/* If the receiver socket is not inflight, no cyclic
 	 * reference could be formed.
 	 */
 	if (!vertex)
 		return;
 	unix_graph_maybe_cyclic = true;
 	unix_graph_grouped = false;
 }
 static LIST_HEAD(unix_unvisited_vertices);
 enum unix_vertex_index {
 	UNIX_VERTEX_INDEX_MARK1,
 	UNIX_VERTEX_INDEX_MARK2,
 	UNIX_VERTEX_INDEX_START,
 };
 static unsigned long unix_vertex_unvisited_index = UNIX_VERTEX_INDEX_MARK1;
 static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge)
 {
 	struct unix_vertex *vertex = edge->predecessor->vertex;
 	if (!vertex) {
 		vertex = list_first_entry(&fpl->vertices, typeof(*vertex), entry);
 		vertex->index = unix_vertex_unvisited_index;
 		vertex->out_degree = 0;
 		INIT_LIST_HEAD(&vertex->edges);
 		INIT_LIST_HEAD(&vertex->scc_entry);
 		list_move_tail(&vertex->entry, &unix_unvisited_vertices);
 		edge->predecessor->vertex = vertex;
 	}
 	vertex->out_degree++;
 	list_add_tail(&edge->vertex_entry, &vertex->edges);
 	unix_update_graph(unix_edge_successor(edge));
 }
 static void unix_del_edge(struct scm_fp_list *fpl, struct unix_edge *edge)
 {
 	struct unix_vertex *vertex = edge->predecessor->vertex;
 	if (!fpl->dead)
 		unix_update_graph(unix_edge_successor(edge));
 	list_del(&edge->vertex_entry);
 	vertex->out_degree--;
 	if (!vertex->out_degree) {
 		edge->predecessor->vertex = NULL;
 		list_move_tail(&vertex->entry, &fpl->vertices);
 	}
 }
 static void unix_free_vertices(struct scm_fp_list *fpl)
 {
 	struct unix_vertex *vertex, *next_vertex;
 	list_for_each_entry_safe(vertex, next_vertex, &fpl->vertices, entry) {
 		list_del(&vertex->entry);
 		kfree(vertex);
 	}
 }
 static DEFINE_SPINLOCK(unix_gc_lock);
 unsigned int unix_tot_inflight;
 void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver)
 {
 	int i = 0, j = 0;
 	spin_lock(&unix_gc_lock);
 	if (!fpl->count_unix)
 		goto out;
 	do {
 		struct unix_sock *inflight = unix_get_socket(fpl->fp[j++]);
 		struct unix_edge *edge;
 		if (!inflight)
 			continue;
 		edge = fpl->edges + i++;
 		edge->predecessor = inflight;
 		edge->successor = receiver;
 		unix_add_edge(fpl, edge);
 	} while (i < fpl->count_unix);
 	receiver->scm_stat.nr_unix_fds += fpl->count_unix;
 	WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + fpl->count_unix);
 out:
 	WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight + fpl->count);
 	spin_unlock(&unix_gc_lock);
 	fpl->inflight = true;
 	unix_free_vertices(fpl);
 }
 void unix_del_edges(struct scm_fp_list *fpl)
 {
 	struct unix_sock *receiver;
 	int i = 0;
 	spin_lock(&unix_gc_lock);
 	if (!fpl->count_unix)
 		goto out;
 	do {
 		struct unix_edge *edge = fpl->edges + i++;
 		unix_del_edge(fpl, edge);
 	} while (i < fpl->count_unix);
 	if (!fpl->dead) {
 		receiver = fpl->edges[0].successor;
 		receiver->scm_stat.nr_unix_fds -= fpl->count_unix;
 	}
 	WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - fpl->count_unix);
 out:
 	WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight - fpl->count);
 	spin_unlock(&unix_gc_lock);
 	fpl->inflight = false;
 }
 void unix_update_edges(struct unix_sock *receiver)
 {
 	/* nr_unix_fds is only updated under unix_state_lock().
 	 * If it's 0 here, the embryo socket is not part of the
 	 * inflight graph, and GC will not see it, so no lock needed.
 	 */
 	if (!receiver->scm_stat.nr_unix_fds) {
 		receiver->listener = NULL;
 	} else {
-		struct sk_buff *skb;
+		spin_lock(&unix_gc_lock);
-		struct sk_buff *next;
+		unix_update_graph(unix_sk(receiver->listener)->vertex);
 		receiver->listener = NULL;
 		spin_unlock(&unix_gc_lock);
 	}
 }
 int unix_prepare_fpl(struct scm_fp_list *fpl)
 {
 	struct unix_vertex *vertex;
 	int i;
 	if (!fpl->count_unix)
 		return 0;
 	for (i = 0; i < fpl->count_unix; i++) {
 		vertex = kmalloc(sizeof(*vertex), GFP_KERNEL);
 		if (!vertex)
 			goto err;
 		list_add(&vertex->entry, &fpl->vertices);
 	}
 	fpl->edges = kvmalloc_array(fpl->count_unix, sizeof(*fpl->edges),
 				    GFP_KERNEL_ACCOUNT);
 	if (!fpl->edges)
 		goto err;
 	return 0;
 err:
 	unix_free_vertices(fpl);
 	return -ENOMEM;
 }
 void unix_destroy_fpl(struct scm_fp_list *fpl)
 {
 	if (fpl->inflight)
 		unix_del_edges(fpl);
 	kvfree(fpl->edges);
 	unix_free_vertices(fpl);
 }
 static bool unix_vertex_dead(struct unix_vertex *vertex)
 {
 	struct unix_edge *edge;
 	struct unix_sock *u;
-		LIST_HEAD(embryos);
+	long total_ref;
-		/* For a listening socket collect the queued embryos
+	list_for_each_entry(edge, &vertex->edges, vertex_entry) {
-		 * and perform a scan on them as well.
+		struct unix_vertex *next_vertex = unix_edge_successor(edge);
 		/* The vertex's fd can be received by a non-inflight socket. */
 		if (!next_vertex)
 			return false;
 		/* The vertex's fd can be received by an inflight socket in
 		 * another SCC.
 		 */
-		spin_lock(&x->sk_receive_queue.lock);
+		if (next_vertex->scc_index != vertex->scc_index)
-		skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
+			return false;
 			u = unix_sk(skb->sk);
 			/* An embryo cannot be in-flight, so it's safe
 			 * to use the list link.
 			 */
 			BUG_ON(!list_empty(&u->link));
 			list_add_tail(&u->link, &embryos);
 	}
 		spin_unlock(&x->sk_receive_queue.lock);
-		while (!list_empty(&embryos)) {
+	/* No receiver exists out of the same SCC. */
-			u = list_entry(embryos.next, struct unix_sock, link);
+
-			scan_inflight(&u->sk, func, hitlist);
+	edge = list_first_entry(&vertex->edges, typeof(*edge), vertex_entry);
-			list_del_init(&u->link);
+	u = edge->predecessor;
 	total_ref = file_count(u->sk.sk_socket->file);
 	/* If not close()d, total_ref > out_degree. */
 	if (total_ref != vertex->out_degree)
 		return false;
 	return true;
 }
 enum unix_recv_queue_lock_class {
 	U_RECVQ_LOCK_NORMAL,
 	U_RECVQ_LOCK_EMBRYO,
 };
 static void unix_collect_queue(struct unix_sock *u, struct sk_buff_head *hitlist)
 {
 	skb_queue_splice_init(&u->sk.sk_receive_queue, hitlist);
 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
 	if (u->oob_skb) {
 		WARN_ON_ONCE(skb_unref(u->oob_skb));
 		u->oob_skb = NULL;
 	}
 #endif
 }
 static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist)
 {
 	struct unix_vertex *vertex;
 	list_for_each_entry_reverse(vertex, scc, scc_entry) {
 		struct sk_buff_head *queue;
 		struct unix_edge *edge;
 		struct unix_sock *u;
 		edge = list_first_entry(&vertex->edges, typeof(*edge), vertex_entry);
 		u = edge->predecessor;
 		queue = &u->sk.sk_receive_queue;
 		spin_lock(&queue->lock);
 		if (u->sk.sk_state == TCP_LISTEN) {
 			struct sk_buff *skb;
 			skb_queue_walk(queue, skb) {
 				struct sk_buff_head *embryo_queue = &skb->sk->sk_receive_queue;
 				/* listener -> embryo order, the inversion never happens. */
 				spin_lock_nested(&embryo_queue->lock, U_RECVQ_LOCK_EMBRYO);
 				unix_collect_queue(unix_sk(skb->sk), hitlist);
 				spin_unlock(&embryo_queue->lock);
 			}
 		} else {
 			unix_collect_queue(u, hitlist);
 		}
 		spin_unlock(&queue->lock);
 	}
 }
-static void dec_inflight(struct unix_sock *usk)
+static bool unix_scc_cyclic(struct list_head *scc)
 {
-	usk->inflight--;
+	struct unix_vertex *vertex;
 	struct unix_edge *edge;
 	/* SCC containing multiple vertices ? */
 	if (!list_is_singular(scc))
 		return true;
 	vertex = list_first_entry(scc, typeof(*vertex), scc_entry);
 	/* Self-reference or a embryo-listener circle ? */
 	list_for_each_entry(edge, &vertex->edges, vertex_entry) {
 		if (unix_edge_successor(edge) == vertex)
 			return true;
 	}
 	return false;
 }
-static void inc_inflight(struct unix_sock *usk)
+static LIST_HEAD(unix_visited_vertices);
-{
+static unsigned long unix_vertex_grouped_index = UNIX_VERTEX_INDEX_MARK2;
 	usk->inflight++;
 }
-static void inc_inflight_move_tail(struct unix_sock *u)
+static void __unix_walk_scc(struct unix_vertex *vertex, unsigned long *last_index,
 			    struct sk_buff_head *hitlist)
 {
-	u->inflight++;
+	LIST_HEAD(vertex_stack);
 	struct unix_edge *edge;
 	LIST_HEAD(edge_stack);
-	/* If this still might be part of a cycle, move it to the end
+next_vertex:
-	 * of the list, so that it's checked even if it was already
+	/* Push vertex to vertex_stack and mark it as on-stack
-	 * passed over
+	 * (index >= UNIX_VERTEX_INDEX_START).
 	 * The vertex will be popped when finalising SCC later.
 	 */
-	if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags))
+	list_add(&vertex->scc_entry, &vertex_stack);
-		list_move_tail(&u->link, &gc_candidates);
+
 	vertex->index = *last_index;
 	vertex->scc_index = *last_index;
 	(*last_index)++;
 	/* Explore neighbour vertices (receivers of the current vertex's fd). */
 	list_for_each_entry(edge, &vertex->edges, vertex_entry) {
 		struct unix_vertex *next_vertex = unix_edge_successor(edge);
 		if (!next_vertex)
 			continue;
 		if (next_vertex->index == unix_vertex_unvisited_index) {
 			/* Iterative deepening depth first search
 			 *
 			 *   1. Push a forward edge to edge_stack and set
 			 *      the successor to vertex for the next iteration.
 			 */
 			list_add(&edge->stack_entry, &edge_stack);
 			vertex = next_vertex;
 			goto next_vertex;
 			/*   2. Pop the edge directed to the current vertex
 			 *      and restore the ancestor for backtracking.
 			 */
 prev_vertex:
 			edge = list_first_entry(&edge_stack, typeof(*edge), stack_entry);
 			list_del_init(&edge->stack_entry);
 			next_vertex = vertex;
 			vertex = edge->predecessor->vertex;
 			/* If the successor has a smaller scc_index, two vertices
 			 * are in the same SCC, so propagate the smaller scc_index
 			 * to skip SCC finalisation.
 			 */
 			vertex->scc_index = min(vertex->scc_index, next_vertex->scc_index);
 		} else if (next_vertex->index != unix_vertex_grouped_index) {
 			/* Loop detected by a back/cross edge.
 			 *
 			 * The successor is on vertex_stack, so two vertices are in
 			 * the same SCC.  If the successor has a smaller *scc_index*,
 			 * propagate it to skip SCC finalisation.
 			 */
 			vertex->scc_index = min(vertex->scc_index, next_vertex->scc_index);
 		} else {
 			/* The successor was already grouped as another SCC */
 		}
 	}
 	if (vertex->index == vertex->scc_index) {
 		struct unix_vertex *v;
 		struct list_head scc;
 		bool scc_dead = true;
 		/* SCC finalised.
 		 *
 		 * If the scc_index was not updated, all the vertices above on
 		 * vertex_stack are in the same SCC.  Group them using scc_entry.
 		 */
 		__list_cut_position(&scc, &vertex_stack, &vertex->scc_entry);
 		list_for_each_entry_reverse(v, &scc, scc_entry) {
 			/* Don't restart DFS from this vertex in unix_walk_scc(). */
 			list_move_tail(&v->entry, &unix_visited_vertices);
 			/* Mark vertex as off-stack. */
 			v->index = unix_vertex_grouped_index;
 			if (scc_dead)
 				scc_dead = unix_vertex_dead(v);
 		}
 		if (scc_dead)
 			unix_collect_skb(&scc, hitlist);
 		else if (!unix_graph_maybe_cyclic)
 			unix_graph_maybe_cyclic = unix_scc_cyclic(&scc);
 		list_del(&scc);
 	}
 	/* Need backtracking ? */
 	if (!list_empty(&edge_stack))
 		goto prev_vertex;
 }
 static void unix_walk_scc(struct sk_buff_head *hitlist)
 {
 	unsigned long last_index = UNIX_VERTEX_INDEX_START;
 	unix_graph_maybe_cyclic = false;
 	/* Visit every vertex exactly once.
 	 * __unix_walk_scc() moves visited vertices to unix_visited_vertices.
 	 */
 	while (!list_empty(&unix_unvisited_vertices)) {
 		struct unix_vertex *vertex;
 		vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry);
 		__unix_walk_scc(vertex, &last_index, hitlist);
 	}
 	list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
 	swap(unix_vertex_unvisited_index, unix_vertex_grouped_index);
 	unix_graph_grouped = true;
 }
 static void unix_walk_scc_fast(struct sk_buff_head *hitlist)
 {
 	unix_graph_maybe_cyclic = false;
 	while (!list_empty(&unix_unvisited_vertices)) {
 		struct unix_vertex *vertex;
 		struct list_head scc;
 		bool scc_dead = true;
 		vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry);
 		list_add(&scc, &vertex->scc_entry);
 		list_for_each_entry_reverse(vertex, &scc, scc_entry) {
 			list_move_tail(&vertex->entry, &unix_visited_vertices);
 			if (scc_dead)
 				scc_dead = unix_vertex_dead(vertex);
 		}
 		if (scc_dead)
 			unix_collect_skb(&scc, hitlist);
 		else if (!unix_graph_maybe_cyclic)
 			unix_graph_maybe_cyclic = unix_scc_cyclic(&scc);
 		list_del(&scc);
 	}
 	list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
 }
 static bool gc_in_progress;
 static void __unix_gc(struct work_struct *work)
 {
 	struct sk_buff *next_skb, *skb;
 	struct unix_sock *u;
 	struct unix_sock *next;
 	struct sk_buff_head hitlist;
-	struct list_head cursor;
+	struct sk_buff *skb;
 	LIST_HEAD(not_cycle_list);
 	spin_lock(&unix_gc_lock);
-	/* First, select candidates for garbage collection.  Only
+	if (!unix_graph_maybe_cyclic) {
-	 * in-flight sockets are considered, and from those only ones
+		spin_unlock(&unix_gc_lock);
-	 * which don't have any external reference.
+		goto skip_gc;
 	 *
 	 * Holding unix_gc_lock will protect these candidates from
 	 * being detached, and hence from gaining an external
 	 * reference.  Since there are no possible receivers, all
 	 * buffers currently on the candidates' queues stay there
 	 * during the garbage collection.
 	 *
 	 * We also know that no new candidate can be added onto the
 	 * receive queues.  Other, non candidate sockets _can_ be
 	 * added to queue, so we must make sure only to touch
 	 * candidates.
 	 *
 	 * Embryos, though never candidates themselves, affect which
 	 * candidates are reachable by the garbage collector.  Before
 	 * being added to a listener's queue, an embryo may already
 	 * receive data carrying SCM_RIGHTS, potentially making the
 	 * passed socket a candidate that is not yet reachable by the
 	 * collector.  It becomes reachable once the embryo is
 	 * enqueued.  Therefore, we must ensure that no SCM-laden
 	 * embryo appears in a (candidate) listener's queue between
 	 * consecutive scan_children() calls.
 	 */
 	list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
 		struct sock *sk = &u->sk;
 		long total_refs;
 		total_refs = file_count(sk->sk_socket->file);
 		BUG_ON(!u->inflight);
 		BUG_ON(total_refs < u->inflight);
 		if (total_refs == u->inflight) {
 			list_move_tail(&u->link, &gc_candidates);
 			__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
 			__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
 			if (sk->sk_state == TCP_LISTEN) {
 				unix_state_lock_nested(sk, U_LOCK_GC_LISTENER);
 				unix_state_unlock(sk);
 			}
 		}
 	}
-	/* Now remove all internal in-flight reference to children of
+	__skb_queue_head_init(&hitlist);
 	 * the candidates.
 	 */
 	list_for_each_entry(u, &gc_candidates, link)
 		scan_children(&u->sk, dec_inflight, NULL);
-	/* Restore the references for children of all candidates,
+	if (unix_graph_grouped)
-	 * which have remaining references.  Do this recursively, so
+		unix_walk_scc_fast(&hitlist);
-	 * only those remain, which form cyclic references.
+	else
-	 *
+		unix_walk_scc(&hitlist);
 	 * Use a "cursor" link, to make the list traversal safe, even
 	 * though elements might be moved about.
 	 */
 	list_add(&cursor, &gc_candidates);
 	while (cursor.next != &gc_candidates) {
 		u = list_entry(cursor.next, struct unix_sock, link);
 		/* Move cursor to after the current position. */
 		list_move(&cursor, &u->link);
 		if (u->inflight) {
 			list_move_tail(&u->link, &not_cycle_list);
 			__clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
 			scan_children(&u->sk, inc_inflight_move_tail, NULL);
 		}
 	}
 	list_del(&cursor);
 	/* Now gc_candidates contains only garbage.  Restore original
 	 * inflight counters for these as well, and remove the skbuffs
 	 * which are creating the cycle(s).
 	 */
 	skb_queue_head_init(&hitlist);
 	list_for_each_entry(u, &gc_candidates, link) {
 		scan_children(&u->sk, inc_inflight, &hitlist);
 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
 		if (u->oob_skb) {
 			kfree_skb(u->oob_skb);
 			u->oob_skb = NULL;
 		}
 #endif
 	}
 	/* not_cycle_list contains those sockets which do not make up a
 	 * cycle.  Restore these to the inflight list.
 	 */
 	while (!list_empty(&not_cycle_list)) {
 		u = list_entry(not_cycle_list.next, struct unix_sock, link);
 		__clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
 		list_move_tail(&u->link, &gc_inflight_list);
 	}
 	spin_unlock(&unix_gc_lock);
-	/* We need io_uring to clean its registered files, ignore all io_uring
+	skb_queue_walk(&hitlist, skb) {
-	 * originated skbs. It's fine as io_uring doesn't keep references to
+		if (UNIXCB(skb).fp)
-	 * other io_uring instances and so killing all other files in the cycle
+			UNIXCB(skb).fp->dead = true;
 	 * will put all io_uring references forcing it to go through normal
 	 * release.path eventually putting registered files.
 	 */
 	skb_queue_walk_safe(&hitlist, skb, next_skb) {
 		if (skb->destructor == io_uring_destruct_scm) {
 			__skb_unlink(skb, &hitlist);
 			skb_queue_tail(&skb->sk->sk_receive_queue, skb);
 		}
 	}
 	/* Here we are. Hitlist is filled. Die. */
 	__skb_queue_purge(&hitlist);
-
+skip_gc:
 	spin_lock(&unix_gc_lock);
 	/* There could be io_uring registered files, just push them back to
 	 * the inflight list
 	 */
 	list_for_each_entry_safe(u, next, &gc_candidates, link)
 		list_move_tail(&u->link, &gc_inflight_list);
 	/* All candidates should have been detached by now. */
 	BUG_ON(!list_empty(&gc_candidates));
 	/* Paired with READ_ONCE() in wait_for_unix_gc(). */
 	WRITE_ONCE(gc_in_progress, false);
 	spin_unlock(&unix_gc_lock);
 }
 static DECLARE_WORK(unix_gc_work, __unix_gc);
@@ -335,8 +605,9 @@ void unix_gc(void)
 }
 #define UNIX_INFLIGHT_TRIGGER_GC 16000
 #define UNIX_INFLIGHT_SANE_USER (SCM_MAX_FD * 8)
-void wait_for_unix_gc(void)
+void wait_for_unix_gc(struct scm_fp_list *fpl)
 {
 	/* If number of inflight sockets is insane,
 	 * force a garbage collect right now.
@@ -348,6 +619,13 @@ void wait_for_unix_gc(void)
 	    !READ_ONCE(gc_in_progress))
 		unix_gc();
 	/* Penalise users who want to send AF_UNIX sockets
 	 * but whose sockets have not been received yet.
 	 */
 	if (!fpl || !fpl->count_unix ||
 	    READ_ONCE(fpl->user->unix_inflight) < UNIX_INFLIGHT_SANE_USER)
 		return;
 	if (READ_ONCE(gc_in_progress))
 		flush_work(&unix_gc_work);
 }
--- a/net/unix/scm.c
+++ b/net/unix/scm.c
@@ -1,156 +0,0 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/socket.h>
 #include <linux/net.h>
 #include <linux/fs.h>
 #include <net/af_unix.h>
 #include <net/scm.h>
 #include <linux/init.h>
 #include <linux/io_uring.h>
 #include "scm.h"
 unsigned int unix_tot_inflight;
 EXPORT_SYMBOL(unix_tot_inflight);
 LIST_HEAD(gc_inflight_list);
 EXPORT_SYMBOL(gc_inflight_list);
 DEFINE_SPINLOCK(unix_gc_lock);
 EXPORT_SYMBOL(unix_gc_lock);
 struct unix_sock *unix_get_socket(struct file *filp)
 {
 	struct inode *inode = file_inode(filp);
 	/* Socket ? */
 	if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
 		struct socket *sock = SOCKET_I(inode);
 		const struct proto_ops *ops = READ_ONCE(sock->ops);
 		struct sock *s = sock->sk;
 		/* PF_UNIX ? */
 		if (s && ops && ops->family == PF_UNIX)
 			return unix_sk(s);
 	}
 	return NULL;
 }
 EXPORT_SYMBOL(unix_get_socket);
 /* Keep the number of times in flight count for the file
 * descriptor if it is for an AF_UNIX socket.
 */
 void unix_inflight(struct user_struct *user, struct file *fp)
 {
 	struct unix_sock *u = unix_get_socket(fp);
 	spin_lock(&unix_gc_lock);
 	if (u) {
 		if (!u->inflight) {
 			BUG_ON(!list_empty(&u->link));
 			list_add_tail(&u->link, &gc_inflight_list);
 		} else {
 			BUG_ON(list_empty(&u->link));
 		}
 		u->inflight++;
 		/* Paired with READ_ONCE() in wait_for_unix_gc() */
 		WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
 	}
 	WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1);
 	spin_unlock(&unix_gc_lock);
 }
 void unix_notinflight(struct user_struct *user, struct file *fp)
 {
 	struct unix_sock *u = unix_get_socket(fp);
 	spin_lock(&unix_gc_lock);
 	if (u) {
 		BUG_ON(!u->inflight);
 		BUG_ON(list_empty(&u->link));
 		u->inflight--;
 		if (!u->inflight)
 			list_del_init(&u->link);
 		/* Paired with READ_ONCE() in wait_for_unix_gc() */
 		WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
 	}
 	WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1);
 	spin_unlock(&unix_gc_lock);
 }
 /*
 * The "user->unix_inflight" variable is protected by the garbage
 * collection lock, and we just read it locklessly here. If you go
 * over the limit, there might be a tiny race in actually noticing
 * it across threads. Tough.
 */
 static inline bool too_many_unix_fds(struct task_struct *p)
 {
 	struct user_struct *user = current_user();
 	if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
 		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
 	return false;
 }
 int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 {
 	int i;
 	if (too_many_unix_fds(current))
 		return -ETOOMANYREFS;
 	/*
 	 * Need to duplicate file references for the sake of garbage
 	 * collection.  Otherwise a socket in the fps might become a
 	 * candidate for GC while the skb is not yet queued.
 	 */
 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
 	if (!UNIXCB(skb).fp)
 		return -ENOMEM;
 	for (i = scm->fp->count - 1; i >= 0; i--)
 		unix_inflight(scm->fp->user, scm->fp->fp[i]);
 	return 0;
 }
 EXPORT_SYMBOL(unix_attach_fds);
 void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 {
 	int i;
 	scm->fp = UNIXCB(skb).fp;
 	UNIXCB(skb).fp = NULL;
 	for (i = scm->fp->count-1; i >= 0; i--)
 		unix_notinflight(scm->fp->user, scm->fp->fp[i]);
 }
 EXPORT_SYMBOL(unix_detach_fds);
 void unix_destruct_scm(struct sk_buff *skb)
 {
 	struct scm_cookie scm;
 	memset(&scm, 0, sizeof(scm));
 	scm.pid  = UNIXCB(skb).pid;
 	if (UNIXCB(skb).fp)
 		unix_detach_fds(&scm, skb);
 	/* Alas, it calls VFS */
 	/* So fscking what? fput() had been SMP-safe since the last Summer */
 	scm_destroy(&scm);
 	sock_wfree(skb);
 }
 EXPORT_SYMBOL(unix_destruct_scm);
 void io_uring_destruct_scm(struct sk_buff *skb)
 {
 	unix_destruct_scm(skb);
 }
 EXPORT_SYMBOL(io_uring_destruct_scm);
--- a/net/unix/scm.h
+++ b/net/unix/scm.h
@@ -1,10 +0,0 @@
 #ifndef NET_UNIX_SCM_H
 #define NET_UNIX_SCM_H
 extern struct list_head gc_inflight_list;
 extern spinlock_t unix_gc_lock;
 int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb);
 void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb);
 #endif
--- a/samples/ftrace/sample-trace-array.c
+++ b/samples/ftrace/sample-trace-array.c
@@ -105,7 +105,8 @@ static int __init sample_trace_array_init(void)
 	 * NOTE: This function increments the reference counter
 	 * associated with the trace array - "tr".
 	 */
-	tr = trace_array_get_by_name("sample-instance");
+	tr = trace_array_get_by_name_ext("sample-instance",
 					 "sched,timer,kprobes");
 	if (!tr)
 		return -1;
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -88,6 +88,76 @@ static void write_debugfs(const char *fmt, ...)
 	}
 }
 static char *allocate_zero_filled_hugepage(size_t len)
 {
 	char *result;
 	size_t i;
 	result = memalign(pmd_pagesize, len);
 	if (!result) {
 		printf("Fail to allocate memory\n");
 		exit(EXIT_FAILURE);
 	}
 	madvise(result, len, MADV_HUGEPAGE);
 	for (i = 0; i < len; i++)
 		result[i] = (char)0;
 	return result;
 }
 static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hpages, size_t len)
 {
 	unsigned long rss_anon_before, rss_anon_after;
 	size_t i;
 	if (!check_huge_anon(one_page, 4, pmd_pagesize)) {
 		printf("No THP is allocated\n");
 		exit(EXIT_FAILURE);
 	}
 	rss_anon_before = rss_anon();
 	if (!rss_anon_before) {
 		printf("No RssAnon is allocated before split\n");
 		exit(EXIT_FAILURE);
 	}
 	/* split all THPs */
 	write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
 		      (uint64_t)one_page + len, 0);
 	for (i = 0; i < len; i++)
 		if (one_page[i] != (char)0) {
 			printf("%ld byte corrupted\n", i);
 			exit(EXIT_FAILURE);
 		}
 	if (!check_huge_anon(one_page, 0, pmd_pagesize)) {
 		printf("Still AnonHugePages not split\n");
 		exit(EXIT_FAILURE);
 	}
 	rss_anon_after = rss_anon();
 	if (rss_anon_after >= rss_anon_before) {
 		printf("Incorrect RssAnon value. Before: %ld After: %ld\n",
 		       rss_anon_before, rss_anon_after);
 		exit(EXIT_FAILURE);
 	}
 }
 void split_pmd_zero_pages(void)
 {
 	char *one_page;
 	int nr_hpages = 4;
 	size_t len = nr_hpages * pmd_pagesize;
 	one_page = allocate_zero_filled_hugepage(len);
 	verify_rss_anon_split_huge_page_all_zeroes(one_page, nr_hpages, len);
 	printf("Split zero filled huge pages successful\n");
 	free(one_page);
 }
 void split_pmd_thp(void)
 {
 	char *one_page;
@@ -305,6 +375,7 @@ int main(int argc, char **argv)
 		exit(EXIT_FAILURE);
 	}
 	split_pmd_zero_pages();
 	split_pmd_thp();
 	split_pte_mapped_thp();
 	split_file_backed_thp();
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -11,6 +11,7 @@
 #define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
 #define SMAP_FILE_PATH "/proc/self/smaps"
 #define STATUS_FILE_PATH "/proc/self/status"
 #define MAX_LINE_LENGTH 500
 unsigned int __page_size;
@@ -97,6 +98,27 @@ uint64_t read_pmd_pagesize(void)
 	return strtoul(buf, NULL, 10);
 }
 unsigned long rss_anon(void)
 {
 	unsigned long rss_anon = 0;
 	FILE *fp;
 	char buffer[MAX_LINE_LENGTH];
 	fp = fopen(STATUS_FILE_PATH, "r");
 	if (!fp)
 		ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, STATUS_FILE_PATH);
 	if (!check_for_pattern(fp, "RssAnon:", buffer, sizeof(buffer)))
 		goto err_out;
 	if (sscanf(buffer, "RssAnon:%10lu kB", &rss_anon) != 1)
 		ksft_exit_fail_msg("Reading status error\n");
 err_out:
 	fclose(fp);
 	return rss_anon;
 }
 bool __check_huge(void *addr, char *pattern, int nr_hpages,
 		  uint64_t hpage_size)
 {
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -39,6 +39,7 @@ unsigned long pagemap_get_pfn(int fd, char *start);
 void clear_softdirty(void);
 bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len);
 uint64_t read_pmd_pagesize(void);
 unsigned long rss_anon(void);
 bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size);
 bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size);
 bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size);