diff --git a/BUILD.bazel b/BUILD.bazel index 4be135108507..d17808239217 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -1025,6 +1025,7 @@ ddk_headers( name = "all_headers_allowlist_aarch64", hdrs = [ "drivers/dma-buf/heaps/deferred-free-helper.h", + "drivers/dma/dmaengine.h", "drivers/extcon/extcon.h", "drivers/pci/controller/dwc/pcie-designware.h", "drivers/thermal/thermal_core.h", @@ -1046,6 +1047,7 @@ ddk_headers( "arch/arm64/include", "arch/arm64/include/uapi", "drivers/dma-buf", + "drivers/dma", "drivers/extcon", "drivers/pci/controller/dwc", "drivers/thermal", diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 7e7ffbe8167b..eec506c44d97 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -858,3 +858,12 @@ Description: This is a read-only entry to show the value of sb.s_encoding_flags, SB_ENC_STRICT_MODE_FL 0x00000001 SB_ENC_NO_COMPAT_FALLBACK_FL 0x00000002 ============================ ========== + +What: /sys/fs/f2fs//reserved_pin_section +Date: June 2025 +Contact: "Chao Yu" +Description: This threshold is used to control triggering garbage collection while + fallocating on pinned file, so, it can guarantee there is enough free + reserved section before preallocating on pinned file. + By default, the value is ovp_sections, especially, for zoned ufs, the + value is 1. diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index d8ed1e76d857..78f10f7d9bec 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -8228,6 +8228,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x15e4d187 } +pointer_reference { + id: 0x0fe9f911 + kind: POINTER + pointee_type_id: 0x15e702d9 +} pointer_reference { id: 0x0fe9ffda kind: POINTER @@ -17573,6 +17578,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x9e7aaf3f } +pointer_reference { + id: 0x2d0e9efd + kind: POINTER + pointee_type_id: 0x9e7a9d6b +} pointer_reference { id: 0x2d0fdd7c kind: POINTER @@ -27928,6 +27938,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0xca7029d8 } +pointer_reference { + id: 0x380eb497 + kind: POINTER + pointee_type_id: 0xca7a34c0 +} pointer_reference { id: 0x381020ff kind: POINTER @@ -35043,6 +35058,11 @@ qualified { qualifier: CONST qualified_type_id: 0x592e728c } +qualified { + id: 0xca7a34c0 + qualifier: CONST + qualified_type_id: 0x59af6589 +} qualified { id: 0xca8285c3 qualifier: CONST @@ -99904,6 +99924,11 @@ member { type_id: 0x37e7a473 offset: 768 } +member { + id: 0x36181e96 + name: "funcs" + type_id: 0x380eb497 +} member { id: 0x36184afd name: "funcs" @@ -152610,6 +152635,12 @@ member { type_id: 0x9bd401b6 offset: 16 } +member { + id: 0xd3327091 + name: "panel" + type_id: 0x10617cac + offset: 192 +} member { id: 0xd3a8d2cb name: "panel" @@ -152633,6 +152664,17 @@ member { type_id: 0x2a670b41 offset: 9024 } +member { + id: 0xf2e51365 + name: "panel_prepared" + type_id: 0x2d0e9efd +} +member { + id: 0x289370ad + name: "panel_unpreparing" + type_id: 0x2d0e9efd + offset: 64 +} member { id: 0x616a797d name: "panic" @@ -239344,6 +239386,27 @@ struct_union { member_id: 0x3a2d3750 } } +struct_union { + id: 0x15e702d9 + kind: STRUCT + name: "drm_panel_follower" + definition { + bytesize: 32 + member_id: 0x36181e96 + member_id: 0x7c00ebb3 + member_id: 0xd3327091 + } +} +struct_union { + id: 0x59af6589 + kind: STRUCT + name: "drm_panel_follower_funcs" + definition { + bytesize: 16 + member_id: 0xf2e51365 + member_id: 0x289370ad + } +} struct_union { id: 0x5c75f1b8 kind: STRUCT @@ -308489,6 +308552,11 @@ function { parameter_id: 0x0258f96e parameter_id: 0xd41e888f } +function { + id: 0x13622fd7 + return_type_id: 0x48b5725f + parameter_id: 0x0fe9f911 +} function { id: 0x1362a71c return_type_id: 0x48b5725f @@ -321953,6 +322021,13 @@ function { parameter_id: 0x27a7c613 parameter_id: 0x4585663f } +function { + id: 0x5e21336c + return_type_id: 0x2170d06d + parameter_id: 0x0a134144 + parameter_id: 0x33756485 + parameter_id: 0xae60496e +} function { id: 0x5e29431a return_type_id: 0x295c7202 @@ -328930,6 +329005,12 @@ function { parameter_id: 0x391f15ea parameter_id: 0xf435685e } +function { + id: 0x9294d8c1 + return_type_id: 0x6720d32f + parameter_id: 0x3c01aef6 + parameter_id: 0x051414e1 +} function { id: 0x92956fd0 return_type_id: 0x6720d32f @@ -345546,6 +345627,12 @@ function { parameter_id: 0x0258f96e parameter_id: 0x0fa01494 } +function { + id: 0x9d297a90 + return_type_id: 0x6720d32f + parameter_id: 0x0258f96e + parameter_id: 0x0fe9f911 +} function { id: 0x9d2c14da return_type_id: 0x6720d32f @@ -348123,6 +348210,11 @@ function { parameter_id: 0x0c2e195c parameter_id: 0x3ca4f8de } +function { + id: 0x9e7a9d6b + return_type_id: 0x6720d32f + parameter_id: 0x0fe9f911 +} function { id: 0x9e7aaf3f return_type_id: 0x6720d32f @@ -360789,6 +360881,15 @@ elf_symbol { type_id: 0x9baf3eaf full_name: "__traceiter_android_rvh_show_max_freq" } +elf_symbol { + id: 0xb80ecc98 + name: "__traceiter_android_rvh_swap_readpage_bdev_sync" + is_defined: true + symbol_type: FUNCTION + crc: 0xecf99d88 + type_id: 0x9bab3090 + full_name: "__traceiter_android_rvh_swap_readpage_bdev_sync" +} elf_symbol { id: 0x3b650ee3 name: "__traceiter_android_rvh_tcp_rcv_spurious_retrans" @@ -367899,6 +368000,15 @@ elf_symbol { type_id: 0x18ccbd2c full_name: "__tracepoint_android_rvh_show_max_freq" } +elf_symbol { + id: 0x64ce7cd6 + name: "__tracepoint_android_rvh_swap_readpage_bdev_sync" + is_defined: true + symbol_type: OBJECT + crc: 0x72fbf2a6 + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_rvh_swap_readpage_bdev_sync" +} elf_symbol { id: 0x5380a8d5 name: "__tracepoint_android_rvh_tcp_rcv_spurious_retrans" @@ -389888,6 +389998,15 @@ elf_symbol { type_id: 0xfa1de4ef full_name: "drm_is_current_master" } +elf_symbol { + id: 0xa3983618 + name: "drm_is_panel_follower" + is_defined: true + symbol_type: FUNCTION + crc: 0xcfdfa487 + type_id: 0xfe32655f + full_name: "drm_is_panel_follower" +} elf_symbol { id: 0xc8af6225 name: "drm_kms_helper_connector_hotplug_event" @@ -390536,6 +390655,15 @@ elf_symbol { type_id: 0x14800eb8 full_name: "drm_panel_add" } +elf_symbol { + id: 0x2b742694 + name: "drm_panel_add_follower" + is_defined: true + symbol_type: FUNCTION + crc: 0x2db618bd + type_id: 0x9d297a90 + full_name: "drm_panel_add_follower" +} elf_symbol { id: 0xd67ad69f name: "drm_panel_bridge_add_typed" @@ -390626,6 +390754,15 @@ elf_symbol { type_id: 0x14800eb8 full_name: "drm_panel_remove" } +elf_symbol { + id: 0x6016204a + name: "drm_panel_remove_follower" + is_defined: true + symbol_type: FUNCTION + crc: 0x397cfaf5 + type_id: 0x13622fd7 + full_name: "drm_panel_remove_follower" +} elf_symbol { id: 0x046720ab name: "drm_panel_unprepare" @@ -393051,6 +393188,15 @@ elf_symbol { type_id: 0xf6f86f1f full_name: "folio_clear_dirty_for_io" } +elf_symbol { + id: 0x1ac8aa52 + name: "folio_deactivate" + is_defined: true + symbol_type: FUNCTION + crc: 0x7abc9b3a + type_id: 0x18c46588 + full_name: "folio_deactivate" +} elf_symbol { id: 0xf83588d6 name: "folio_end_private_2" @@ -393078,6 +393224,15 @@ elf_symbol { type_id: 0x637004ab full_name: "folio_mapping" } +elf_symbol { + id: 0xd2e101fd + name: "folio_mark_accessed" + is_defined: true + symbol_type: FUNCTION + crc: 0x74311ee4 + type_id: 0x18c46588 + full_name: "folio_mark_accessed" +} elf_symbol { id: 0xcef0ca54 name: "folio_mark_dirty" @@ -396727,6 +396882,24 @@ elf_symbol { type_id: 0x13e1603f full_name: "hid_destroy_device" } +elf_symbol { + id: 0x1706be22 + name: "hid_driver_reset_resume" + is_defined: true + symbol_type: FUNCTION + crc: 0x371549c9 + type_id: 0x9ef9d283 + full_name: "hid_driver_reset_resume" +} +elf_symbol { + id: 0x4c3911f0 + name: "hid_driver_suspend" + is_defined: true + symbol_type: FUNCTION + crc: 0xe6a4222b + type_id: 0x9d398c85 + full_name: "hid_driver_suspend" +} elf_symbol { id: 0x8717f26f name: "hid_hw_close" @@ -422588,6 +422761,15 @@ elf_symbol { type_id: 0x909c23c2 full_name: "snd_soc_get_dai_id" } +elf_symbol { + id: 0x4086fab0 + name: "snd_soc_get_dai_name" + is_defined: true + symbol_type: FUNCTION + crc: 0x347721f4 + type_id: 0x9294d8c1 + full_name: "snd_soc_get_dai_name" +} elf_symbol { id: 0xa64c7fe5 name: "snd_soc_get_dai_via_args" @@ -434533,6 +434715,15 @@ elf_symbol { type_id: 0xfc37fa4b full_name: "vm_node_stat" } +elf_symbol { + id: 0x4e194253 + name: "vm_normal_folio_pmd" + is_defined: true + symbol_type: FUNCTION + crc: 0xa737dbaa + type_id: 0x5e21336c + full_name: "vm_normal_folio_pmd" +} elf_symbol { id: 0x2570ceae name: "vm_normal_page" @@ -436958,6 +437149,7 @@ interface { symbol_id: 0x1228e7e9 symbol_id: 0x73c83ef4 symbol_id: 0x46515de8 + symbol_id: 0xb80ecc98 symbol_id: 0x3b650ee3 symbol_id: 0xcf016f05 symbol_id: 0x79480d0a @@ -437748,6 +437940,7 @@ interface { symbol_id: 0x8a4070f7 symbol_id: 0x00b7ed82 symbol_id: 0xe8cacf26 + symbol_id: 0x64ce7cd6 symbol_id: 0x5380a8d5 symbol_id: 0x1f12a317 symbol_id: 0x454d16cc @@ -440188,6 +440381,7 @@ interface { symbol_id: 0x3a6e27e9 symbol_id: 0xc9aa2ffd symbol_id: 0xec79cf1c + symbol_id: 0xa3983618 symbol_id: 0xc8af6225 symbol_id: 0x8a043efe symbol_id: 0x3c6b600d @@ -440260,6 +440454,7 @@ interface { symbol_id: 0xc73568f4 symbol_id: 0x124ae77d symbol_id: 0xdc6725cf + symbol_id: 0x2b742694 symbol_id: 0xd67ad69f symbol_id: 0x48cde8a9 symbol_id: 0x633d0644 @@ -440270,6 +440465,7 @@ interface { symbol_id: 0xad1d778f symbol_id: 0xcf81b673 symbol_id: 0x864914fa + symbol_id: 0x6016204a symbol_id: 0x046720ab symbol_id: 0x3c07bbff symbol_id: 0xbdb562b1 @@ -440539,9 +440735,11 @@ interface { symbol_id: 0x3c7c2553 symbol_id: 0x06c58be7 symbol_id: 0xab55569c + symbol_id: 0x1ac8aa52 symbol_id: 0xf83588d6 symbol_id: 0xa1c5bd8d symbol_id: 0x159a69a3 + symbol_id: 0xd2e101fd symbol_id: 0xcef0ca54 symbol_id: 0x39840ab2 symbol_id: 0xc05a6c7d @@ -440946,6 +441144,8 @@ interface { symbol_id: 0xccc593d6 symbol_id: 0x97a02af0 symbol_id: 0x2ffc7c7e + symbol_id: 0x1706be22 + symbol_id: 0x4c3911f0 symbol_id: 0x8717f26f symbol_id: 0x361004c8 symbol_id: 0xcf5ea9a2 @@ -443815,6 +444015,7 @@ interface { symbol_id: 0x7918ef41 symbol_id: 0x97843792 symbol_id: 0x54622a57 + symbol_id: 0x4086fab0 symbol_id: 0xa64c7fe5 symbol_id: 0x5eb2e502 symbol_id: 0x33a917a0 @@ -445141,6 +445342,7 @@ interface { symbol_id: 0xdc09fb10 symbol_id: 0x5849ff8e symbol_id: 0xaf85c216 + symbol_id: 0x4e194253 symbol_id: 0x2570ceae symbol_id: 0xacc76406 symbol_id: 0xef2c49d1 diff --git a/android/abi_gki_aarch64_amlogic b/android/abi_gki_aarch64_amlogic index ae6e6cb73da1..2a3c0510146a 100644 --- a/android/abi_gki_aarch64_amlogic +++ b/android/abi_gki_aarch64_amlogic @@ -1,3 +1,5 @@ + + [abi_symbol_list] add_cpu add_device_randomness @@ -209,10 +211,12 @@ consume_skb contig_page_data __contpte_try_unfold + _copy_from_iter copy_from_kernel_nofault __copy_overflow copy_page_from_iter_atomic copy_splice_read + _copy_to_iter cpu_all_bits cpu_bit_bitmap cpufreq_boost_enabled @@ -245,10 +249,13 @@ crypto_aead_setauthsize crypto_aead_setkey crypto_ahash_digest + crypto_ahash_final + crypto_ahash_finup crypto_ahash_setkey crypto_alloc_aead crypto_alloc_ahash crypto_alloc_base + crypto_alloc_rng crypto_alloc_shash crypto_alloc_skcipher crypto_cipher_encrypt_one @@ -258,13 +265,17 @@ crypto_dequeue_request crypto_destroy_tfm crypto_enqueue_request + crypto_get_default_null_skcipher crypto_has_alg crypto_init_queue __crypto_memneq + crypto_put_default_null_skcipher crypto_register_ahash crypto_register_alg crypto_register_shash crypto_register_skcipher + crypto_req_done + crypto_rng_reset crypto_sha1_finup crypto_sha1_update crypto_shash_digest @@ -623,6 +634,7 @@ drm_atomic_set_mode_prop_for_crtc drm_atomic_state_alloc drm_atomic_state_clear + drm_atomic_state_default_release __drm_atomic_state_free drm_compat_ioctl drm_connector_attach_content_type_property @@ -793,6 +805,7 @@ extcon_set_state extcon_set_state_sync extcon_unregister_notifier + extract_iter_to_sg fasync_helper fault_in_iov_iter_readable __fdget @@ -1102,8 +1115,10 @@ ioremap_prot io_schedule iounmap + iov_iter_advance iov_iter_alignment iov_iter_init + iov_iter_npages iov_iter_revert iov_iter_zero iput @@ -1269,12 +1284,14 @@ __local_bh_enable_ip __lock_buffer lockref_get + lock_sock_nested logfc log_post_read_mmio log_post_write_mmio log_read_mmio log_write_mmio lookup_bdev + lookup_user_key loops_per_jiffy LZ4_decompress_safe LZ4_decompress_safe_partial @@ -1726,6 +1743,8 @@ proc_mkdir proc_mkdir_data proc_remove + proto_register + proto_unregister __pskb_copy_fclone pskb_expand_head __pskb_pull_tail @@ -1845,6 +1864,8 @@ release_firmware __release_region release_resource + release_sock + release_sock remap_pfn_range remap_vmalloc_range remove_cpu @@ -1940,6 +1961,8 @@ sdio_writel sdio_writesb sdio_writew + security_sk_clone + security_sock_graft send_sig seq_list_next seq_list_start @@ -2000,6 +2023,7 @@ single_open_size single_release si_swapinfo + sk_alloc skb_add_rx_frag skb_checksum_help skb_clone @@ -2026,6 +2050,7 @@ skb_scrub_packet skb_trim skb_tstamp_tx + sk_free skip_spaces smpboot_register_percpu_thread smp_call_function @@ -2046,6 +2071,7 @@ snd_pcm_lib_preallocate_pages snd_pcm_period_elapsed snd_pcm_rate_to_rate_bit + snd_pcm_set_managed_buffer_all snd_pcm_stop snd_pcm_stop_xrun _snd_pcm_stream_lock_irqsave @@ -2068,6 +2094,7 @@ snd_soc_dai_set_tdm_slot snd_soc_dapm_get_enum_double snd_soc_dapm_put_enum_double + snd_soc_get_dai_name snd_soc_get_volsw snd_soc_get_volsw_range snd_soc_info_enum_double @@ -2082,6 +2109,7 @@ snd_soc_of_parse_audio_simple_widgets snd_soc_of_parse_card_name snd_soc_of_parse_tdm_slot + snd_soc_of_put_dai_link_codecs snd_soc_pm_ops snd_soc_put_volsw snd_soc_put_volsw_range @@ -2090,7 +2118,25 @@ snd_soc_unregister_component snprintf __sock_create + sock_init_data + sock_kfree_s + sock_kmalloc + sock_kzfree_s + sock_no_accept + sock_no_bind + sock_no_connect + sock_no_getname + sock_no_ioctl + sock_no_listen + sock_no_mmap + sock_no_recvmsg + sock_no_sendmsg + sock_no_shutdown + sock_no_socketpair + sock_register sock_release + sock_unregister + sock_wake_async sock_wfree sort spi_add_device @@ -2172,6 +2218,7 @@ sysfs_create_file_ns sysfs_create_files sysfs_create_group + sysfs_create_groups sysfs_create_link sysfs_emit __sysfs_match_string @@ -2574,10 +2621,12 @@ wakeup_source_register wakeup_source_unregister __wake_up_sync + __wake_up_sync_key __warn_flushing_systemwide_wq __warn_printk wireless_nlevent_flush wireless_send_event + woken_wake_function work_busy write_cache_pages write_inode_now diff --git a/android/abi_gki_aarch64_honor b/android/abi_gki_aarch64_honor index 48c49720e0ea..33decf01d449 100644 --- a/android/abi_gki_aarch64_honor +++ b/android/abi_gki_aarch64_honor @@ -94,6 +94,7 @@ bio_crypt_set_ctx zero_fill_bio_iter percpu_ref_is_zero + vm_normal_folio_pmd __trace_bputs __traceiter_android_vh_proactive_compact_wmark_high __tracepoint_android_vh_proactive_compact_wmark_high diff --git a/android/abi_gki_aarch64_pixel b/android/abi_gki_aarch64_pixel index d64fef8faf50..d17b443f7c9e 100644 --- a/android/abi_gki_aarch64_pixel +++ b/android/abi_gki_aarch64_pixel @@ -2669,6 +2669,7 @@ __traceiter_android_rvh_setscheduler_prio __traceiter_android_rvh_set_task_cpu __traceiter_android_rvh_set_user_nice_locked + __traceiter_android_rvh_swap_readpage_bdev_sync __traceiter_android_rvh_tick_entry __traceiter_android_rvh_try_to_wake_up_success __traceiter_android_rvh_uclamp_eff_get @@ -2808,6 +2809,7 @@ __tracepoint_android_rvh_setscheduler_prio __tracepoint_android_rvh_set_task_cpu __tracepoint_android_rvh_set_user_nice_locked + __tracepoint_android_rvh_swap_readpage_bdev_sync __tracepoint_android_rvh_tick_entry __tracepoint_android_rvh_try_to_wake_up_success __tracepoint_android_rvh_uclamp_eff_get diff --git a/android/abi_gki_aarch64_pixel_watch b/android/abi_gki_aarch64_pixel_watch index db61c65ca0ff..a5621a839612 100644 --- a/android/abi_gki_aarch64_pixel_watch +++ b/android/abi_gki_aarch64_pixel_watch @@ -288,6 +288,7 @@ delayed_work_timer_fn destroy_workqueue dev_addr_mod + _dev_alert dev_alloc_name __dev_change_net_namespace dev_close @@ -869,6 +870,7 @@ gpiod_get_raw_value gpiod_get_raw_value_cansleep gpiod_get_value + gpiod_is_active_low gpiod_set_raw_value gpiod_set_value gpiod_set_value_cansleep @@ -2091,6 +2093,7 @@ tick_nohz_get_sleep_length timer_delete timer_delete_sync + timer_shutdown_sync topology_clear_scale_freq_source topology_update_done topology_update_thermal_pressure @@ -2171,6 +2174,10 @@ __traceiter_mmap_lock_acquire_returned __traceiter_mmap_lock_released __traceiter_mmap_lock_start_locking + __traceiter_rwmmio_post_read + __traceiter_rwmmio_post_write + __traceiter_rwmmio_read + __traceiter_rwmmio_write __traceiter_sched_overutilized_tp __traceiter_sched_switch __traceiter_sk_data_ready @@ -2246,6 +2253,10 @@ tracepoint_probe_register tracepoint_probe_register_prio tracepoint_probe_unregister + __tracepoint_rwmmio_post_read + __tracepoint_rwmmio_post_write + __tracepoint_rwmmio_read + __tracepoint_rwmmio_write __tracepoint_sched_overutilized_tp __tracepoint_sched_switch __tracepoint_sk_data_ready diff --git a/android/abi_gki_aarch64_sunxi b/android/abi_gki_aarch64_sunxi index 4b51d7f71b55..27b308dd7254 100644 --- a/android/abi_gki_aarch64_sunxi +++ b/android/abi_gki_aarch64_sunxi @@ -91,3 +91,8 @@ __tracepoint_dwc3_readl __tracepoint_dwc3_writel pinctrl_gpio_set_config + drm_is_panel_follower + drm_panel_add_follower + drm_panel_remove_follower + hid_driver_reset_resume + hid_driver_suspend diff --git a/android/abi_gki_aarch64_xiaomi b/android/abi_gki_aarch64_xiaomi index a8531903d2a7..1cc056048b75 100644 --- a/android/abi_gki_aarch64_xiaomi +++ b/android/abi_gki_aarch64_xiaomi @@ -197,6 +197,10 @@ __tracepoint_android_rvh_dequeue_task_fair __tracepoint_android_rvh_entity_tick +# required by mi_damon.ko + folio_deactivate + folio_mark_accessed + #required by cpq.ko elv_rb_former_request elv_rb_latter_request diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index aee331a1430b..7b89c07f23b5 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -581,6 +581,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_LIB_KUNIT_TEST=m CONFIG_RTC_DRV_PL030=y CONFIG_RTC_DRV_PL031=y +CONFIG_UDMABUF=y CONFIG_DMABUF_HEAPS=y CONFIG_DMABUF_SYSFS_STATS=y CONFIG_DMABUF_HEAPS_DEFERRED_FREE=y diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 4a16808c3ba8..80a1526684cb 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -593,7 +593,7 @@ static inline unsigned long host_s2_pgtable_pages(void) * Maximum number of consitutents allowed in a descriptor. This number is * arbitrary, see comment below on SG_MAX_SEGMENTS in hyp_ffa_proxy_pages(). */ -#define KVM_FFA_MAX_NR_CONSTITUENTS 4096 +#define KVM_FFA_MAX_NR_CONSTITUENTS 12288 static inline unsigned long hyp_ffa_proxy_pages(void) { diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index bc1f8cb3faf3..afdd36e4ae8a 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -491,17 +491,9 @@ int __pkvm_prot_finalize(void) int host_stage2_unmap_reg_locked(phys_addr_t start, u64 size) { - int ret; - hyp_assert_lock_held(&host_mmu.lock); - ret = kvm_pgtable_stage2_reclaim_leaves(&host_mmu.pgt, start, size); - if (ret) - return ret; - - kvm_iommu_host_stage2_idmap(start, start + size, 0); - - return 0; + return kvm_pgtable_stage2_reclaim_leaves(&host_mmu.pgt, start, size); } static int host_stage2_unmap_unmoveable_regs(void) diff --git a/arch/arm64/kvm/hyp_events.c b/arch/arm64/kvm/hyp_events.c index 424cd5189355..086931bec32c 100644 --- a/arch/arm64/kvm/hyp_events.c +++ b/arch/arm64/kvm/hyp_events.c @@ -250,7 +250,10 @@ bool hyp_trace_init_event_early(void) } static struct dentry *event_tracefs; -static unsigned int last_event_id; +// Event IDs should be positive integers, hence starting from 1 here. +// NOTE: this introduces ID clash between hypervisor events and kernel events. +// For now this doesn't seem to cause problems, but we should fix it... +static unsigned int last_event_id = 1; struct hyp_event_table { struct hyp_event *start; diff --git a/arch/arm64/kvm/hyp_trace.c b/arch/arm64/kvm/hyp_trace.c index b4a5d117568a..4eb85aad055f 100644 --- a/arch/arm64/kvm/hyp_trace.c +++ b/arch/arm64/kvm/hyp_trace.c @@ -861,7 +861,9 @@ int hyp_trace_init_tracefs(void) tracefs_create_file("trace_pipe", TRACEFS_MODE_READ, per_cpu_dir, (void *)cpu, &hyp_trace_pipe_fops); tracefs_create_file("trace_pipe_raw", TRACEFS_MODE_READ, per_cpu_dir, - (void *)cpu, &hyp_trace_pipe_fops); + (void *)cpu, &hyp_trace_raw_fops); + tracefs_create_file("trace", TRACEFS_MODE_WRITE, per_cpu_dir, + (void *)cpu, &hyp_trace_fops); } hyp_trace_init_event_tracefs(root); diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index c7bd6055c20b..6fe64231eb62 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -535,6 +535,7 @@ CONFIG_LEDS_TRIGGER_TRANSIENT=y CONFIG_EDAC=y CONFIG_RTC_CLASS=y CONFIG_RTC_LIB_KUNIT_TEST=m +CONFIG_UDMABUF=y CONFIG_DMABUF_HEAPS=y CONFIG_DMABUF_SYSFS_STATS=y CONFIG_DMABUF_HEAPS_DEFERRED_FREE=y diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 64551b0aa51e..91b788149381 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1566,6 +1566,14 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol) if (blkcg_policy_enabled(q, pol)) return 0; + /* + * Policy is allowed to be registered without pd_alloc_fn/pd_free_fn, + * for example, ioprio. Such policy will work on blkcg level, not disk + * level, and don't need to be activated. + */ + if (WARN_ON_ONCE(!pol->pd_alloc_fn || !pol->pd_free_fn)) + return -EINVAL; + if (queue_is_mq(q)) blk_mq_freeze_queue(q); retry: @@ -1745,9 +1753,12 @@ int blkcg_policy_register(struct blkcg_policy *pol) goto err_unlock; } - /* Make sure cpd/pd_alloc_fn and cpd/pd_free_fn in pairs */ + /* + * Make sure cpd/pd_alloc_fn and cpd/pd_free_fn in pairs, and policy + * without pd_alloc_fn/pd_free_fn can't be activated. + */ if ((!pol->cpd_alloc_fn ^ !pol->cpd_free_fn) || - (!pol->pd_alloc_fn ^ !pol->pd_free_fn)) + (!pol->pd_alloc_fn ^ !pol->pd_free_fn)) goto err_unlock; /* register @pol */ diff --git a/drivers/android/vendor_hooks.c b/drivers/android/vendor_hooks.c index d3f7ff4fde56..00ffd7ed2ffc 100644 --- a/drivers/android/vendor_hooks.c +++ b/drivers/android/vendor_hooks.c @@ -625,6 +625,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_migration_target_bypass); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_shrink_node_memcgs); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_swap_writepage); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_swap_readpage_bdev_sync); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_swap_readpage_bdev_sync); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_dpm_wait_start); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_dpm_wait_finish); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_sync_irq_wait_start); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d0aba74067c9..3a7bd62ef6b7 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -543,6 +543,7 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy, unsigned int idx; unsigned int old_target_freq = target_freq; + target_freq = clamp_val(target_freq, policy->min, policy->max); trace_android_vh_cpufreq_resolve_freq(policy, &target_freq, old_target_freq); if (!policy->freq_table) @@ -568,22 +569,7 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy, unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, unsigned int target_freq) { - unsigned int min = READ_ONCE(policy->min); - unsigned int max = READ_ONCE(policy->max); - - /* - * If this function runs in parallel with cpufreq_set_policy(), it may - * read policy->min before the update and policy->max after the update - * or the other way around, so there is no ordering guarantee. - * - * Resolve this by always honoring the max (in case it comes from - * thermal throttling or similar). - */ - if (unlikely(min > max)) - min = max; - - return __resolve_freq(policy, clamp_val(target_freq, min, max), - CPUFREQ_RELATION_LE); + return __resolve_freq(policy, target_freq, CPUFREQ_RELATION_LE); } EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); @@ -2369,7 +2355,6 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (cpufreq_disabled()) return -ENODEV; - target_freq = clamp_val(target_freq, policy->min, policy->max); target_freq = __resolve_freq(policy, target_freq, relation); trace_android_vh_cpufreq_target(policy, &target_freq, old_target_freq); @@ -2662,15 +2647,11 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, * Resolve policy min/max to available frequencies. It ensures * no frequency resolution will neither overshoot the requested maximum * nor undershoot the requested minimum. - * - * Avoid storing intermediate values in policy->max or policy->min and - * compiler optimizations around them because they may be accessed - * concurrently by cpufreq_driver_resolve_freq() during the update. */ - WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, CPUFREQ_RELATION_H)); - new_data.min = __resolve_freq(policy, new_data.min, CPUFREQ_RELATION_L); - WRITE_ONCE(policy->min, new_data.min > policy->max ? policy->max : new_data.min); - + policy->min = new_data.min; + policy->max = new_data.max; + policy->min = __resolve_freq(policy, policy->min, CPUFREQ_RELATION_L); + policy->max = __resolve_freq(policy, policy->max, CPUFREQ_RELATION_H); trace_cpu_frequency_limits(policy); policy->cached_target_freq = UINT_MAX; diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c index 97e8af88df85..ab853d6c586b 100644 --- a/drivers/pinctrl/pinmux.c +++ b/drivers/pinctrl/pinmux.c @@ -238,6 +238,15 @@ static const char *pin_free(struct pinctrl_dev *pctldev, int pin, if (desc->mux_usecount) return NULL; } + + if (gpio_range) { + owner = desc->gpio_owner; + desc->gpio_owner = NULL; + } else { + owner = desc->mux_owner; + desc->mux_owner = NULL; + desc->mux_setting = NULL; + } } /* @@ -249,17 +258,6 @@ static const char *pin_free(struct pinctrl_dev *pctldev, int pin, else if (ops->free) ops->free(pctldev, pin); - scoped_guard(mutex, &desc->mux_lock) { - if (gpio_range) { - owner = desc->gpio_owner; - desc->gpio_owner = NULL; - } else { - owner = desc->mux_owner; - desc->mux_owner = NULL; - desc->mux_setting = NULL; - } - } - module_put(pctldev->owner); return owner; diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index a3a7ecd53901..7be18e45ba5a 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -7018,6 +7018,11 @@ static irqreturn_t ufshcd_intr(int irq, void *__hba) if (enabled_intr_status) retval |= ufshcd_sl_intr(hba, enabled_intr_status); + if (hba->android_quirks & + UFSHCD_ANDROID_QUIRK_NO_IS_READ_ON_H8 && + intr_status & UIC_HIBERNATE_ENTER) + break; + intr_status = ufshcd_readl(hba, REG_INTERRUPT_STATUS); } diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index 729b0472bab0..7a306b11881f 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -291,8 +291,8 @@ __acquires(&port->port_lock) break; } - if (do_tty_wake && port->port.tty) - tty_wakeup(port->port.tty); + if (do_tty_wake) + tty_port_tty_wakeup(&port->port); return status; } @@ -573,7 +573,7 @@ static int gs_start_io(struct gs_port *port) gs_start_tx(port); /* Unblock any pending writes into our circular buffer, in case * we didn't in gs_start_tx() */ - tty_wakeup(port->port.tty); + tty_port_tty_wakeup(&port->port); } else { /* Free reqs only if we are still connected */ if (port->port_usb) { diff --git a/drivers/virt/gunyah/gunyah_qcom.c b/drivers/virt/gunyah/gunyah_qcom.c index f2342d51a018..622d6a07db02 100644 --- a/drivers/virt/gunyah/gunyah_qcom.c +++ b/drivers/virt/gunyah/gunyah_qcom.c @@ -187,7 +187,7 @@ static bool gunyah_has_qcom_extensions(void) uuid_t uuid; u32 *up; - arm_smccc_1_1_smc(GUNYAH_QCOM_EXT_CALL_UUID_ID, &res); + arm_smccc_1_1_invoke(GUNYAH_QCOM_EXT_CALL_UUID_ID, &res); up = (u32 *)&uuid.b[0]; up[0] = lower_32_bits(res.a0); diff --git a/fs/exfat/file.c b/fs/exfat/file.c index efd24e29f119..272208708ffc 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -610,9 +610,8 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) if (pos > valid_size) pos = valid_size; - if (iocb_is_dsync(iocb) && iocb->ki_pos > pos) { - ssize_t err = vfs_fsync_range(file, pos, iocb->ki_pos - 1, - iocb->ki_flags & IOCB_SYNC); + if (iocb->ki_pos > pos) { + ssize_t err = generic_write_sync(iocb, iocb->ki_pos - pos); if (err < 0) return err; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 811eb3e3089c..cf9007a253da 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1703,6 +1703,9 @@ struct f2fs_sb_info { /* for skip statistic */ unsigned long long skipped_gc_rwsem; /* FG_GC only */ + /* free sections reserved for pinned file */ + unsigned int reserved_pin_section; + /* threshold for gc trials on pinned files */ unsigned short gc_pin_file_threshold; struct f2fs_rwsem pin_sem; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 479d49dd4ce5..f8832212ee37 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1859,9 +1859,8 @@ next_alloc: } } - if (has_not_enough_free_secs(sbi, 0, f2fs_sb_has_blkzoned(sbi) ? - ZONED_PIN_SEC_REQUIRED_COUNT : - GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { + if (has_not_enough_free_secs(sbi, 0, + sbi->reserved_pin_section)) { f2fs_down_write(&sbi->gc_lock); stat_inc_gc_call_count(sbi, FOREGROUND); err = f2fs_gc(sbi, &gc_control); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ecf1a380d433..2bc3c52814d5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4668,6 +4668,10 @@ try_onemore: /* get segno of first zoned block device */ sbi->first_zoned_segno = get_first_zoned_segno(sbi); + sbi->reserved_pin_section = f2fs_sb_has_blkzoned(sbi) ? + ZONED_PIN_SEC_REQUIRED_COUNT : + GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)); + /* Read accumulated write IO statistics if exists */ seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); if (__exist_node_summaries(sbi)) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index d4a63b0254b9..46216f0a203a 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -824,6 +824,13 @@ out: return count; } + if (!strcmp(a->attr.name, "reserved_pin_section")) { + if (t > GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi))) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + *ui = (unsigned int)t; return count; @@ -1130,6 +1137,7 @@ F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec); F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy); #endif F2FS_SBI_GENERAL_RW_ATTR(carve_out); +F2FS_SBI_GENERAL_RW_ATTR(reserved_pin_section); /* STAT_INFO ATTR */ #ifdef CONFIG_F2FS_STAT_FS @@ -1323,6 +1331,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(last_age_weight), ATTR_LIST(max_read_extent_count), ATTR_LIST(carve_out), + ATTR_LIST(reserved_pin_section), NULL, }; ATTRIBUTE_GROUPS(f2fs); diff --git a/include/trace/hooks/mm.h b/include/trace/hooks/mm.h index 65eb40c00944..8087138ba33c 100644 --- a/include/trace/hooks/mm.h +++ b/include/trace/hooks/mm.h @@ -549,6 +549,10 @@ DECLARE_HOOK(android_vh_swap_readpage_bdev_sync, TP_PROTO(struct block_device *bdev, sector_t sector, struct page *page, bool *read), TP_ARGS(bdev, sector, page, read)); +DECLARE_RESTRICTED_HOOK(android_rvh_swap_readpage_bdev_sync, + TP_PROTO(struct block_device *bdev, sector_t sector, + struct page *page, bool *read), + TP_ARGS(bdev, sector, page, read), 4); DECLARE_HOOK(android_vh_alloc_flags_cma_adjust, TP_PROTO(gfp_t gfp_mask, unsigned int *alloc_flags), TP_ARGS(gfp_mask, alloc_flags)); diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 66bd5c15375e..cde9ad6489b2 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -704,6 +704,9 @@ enum ufshcd_android_quirks { /* Set IID to one. */ UFSHCD_ANDROID_QUIRK_SET_IID_TO_ONE = 1 << 30, + + /* Do not read IS after H8 enter */ + UFSHCD_ANDROID_QUIRK_NO_IS_READ_ON_H8 = 1 << 31, }; enum ufshcd_caps { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e3e17a54c71f..41f11c0f834e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4073,6 +4073,11 @@ bool cpus_share_cache(int this_cpu, int that_cpu) static inline bool ttwu_queue_cond(struct task_struct *p, int cpu) { +#ifdef CONFIG_SMP + if (p->sched_class == &stop_sched_class) + return false; +#endif + /* * Do not complicate things with the async wake_list while the CPU is * in hotplug state. diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 7b65bb0b4a66..0c3d387d3db7 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -82,18 +82,15 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done) } static void __cpu_stop_queue_work(struct cpu_stopper *stopper, - struct cpu_stop_work *work, - struct wake_q_head *wakeq) + struct cpu_stop_work *work) { list_add_tail(&work->list, &stopper->works); - wake_q_add(wakeq, stopper->thread); } /* queue @work to @stopper. if offline, @work is completed immediately */ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); - DEFINE_WAKE_Q(wakeq); unsigned long flags; bool enabled; @@ -101,12 +98,13 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) raw_spin_lock_irqsave(&stopper->lock, flags); enabled = stopper->enabled; if (enabled) - __cpu_stop_queue_work(stopper, work, &wakeq); + __cpu_stop_queue_work(stopper, work); else if (work->done) cpu_stop_signal_done(work->done); raw_spin_unlock_irqrestore(&stopper->lock, flags); - wake_up_q(&wakeq); + if (enabled) + wake_up_process(stopper->thread); preempt_enable(); return enabled; @@ -264,7 +262,6 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, { struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1); struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); - DEFINE_WAKE_Q(wakeq); int err; retry: @@ -300,8 +297,8 @@ retry: } err = 0; - __cpu_stop_queue_work(stopper1, work1, &wakeq); - __cpu_stop_queue_work(stopper2, work2, &wakeq); + __cpu_stop_queue_work(stopper1, work1); + __cpu_stop_queue_work(stopper2, work2); unlock: raw_spin_unlock(&stopper2->lock); @@ -316,7 +313,10 @@ unlock: goto retry; } - wake_up_q(&wakeq); + if (!err) { + wake_up_process(stopper1->thread); + wake_up_process(stopper2->thread); + } preempt_enable(); return err; diff --git a/mm/compaction.c b/mm/compaction.c index 75ee7750ce2a..89570cd884c7 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2279,7 +2279,6 @@ static enum compact_result __compact_finished(struct compact_control *cc) ret = COMPACT_NO_SUITABLE_PAGE; for (order = cc->order; order < NR_PAGE_ORDERS; order++) { struct free_area *area = &cc->zone->free_area[order]; - bool can_steal; /* Job done if page is free of the right migratetype */ if (!free_area_empty(area, migratetype)) @@ -2295,8 +2294,7 @@ static enum compact_result __compact_finished(struct compact_control *cc) * Job done if allocation would steal freepages from * other migratetype buddy lists. */ - if (find_suitable_fallback(area, order, migratetype, - true, &can_steal) != -1) + if (find_suitable_fallback(area, order, migratetype, true) >= 0) /* * Movable pages are OK in any pageblock. If we are * stealing for a non-movable allocation, make sure diff --git a/mm/internal.h b/mm/internal.h index da8bd4bfbb3e..3fb4222fc3c9 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -815,7 +815,7 @@ void init_cma_reserved_pageblock(struct page *page); #endif /* CONFIG_COMPACTION || CONFIG_CMA */ int find_suitable_fallback(struct free_area *area, unsigned int order, - int migratetype, bool only_stealable, bool *can_steal); + int migratetype, bool claimable); static inline bool free_area_empty(struct free_area *area, int migratetype) { diff --git a/mm/memory.c b/mm/memory.c index a04841dc9291..2646e93c9004 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -713,6 +713,7 @@ struct folio *vm_normal_folio_pmd(struct vm_area_struct *vma, return page_folio(page); return NULL; } +EXPORT_SYMBOL_GPL(vm_normal_folio_pmd); #endif static void restore_exclusive_pte(struct vm_area_struct *vma, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 152b0424fcbf..d1ecd3793e40 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1518,11 +1518,11 @@ struct page *__pageblock_pfn_to_page(unsigned long start_pfn, * * -- nyc */ -static inline void expand(struct zone *zone, struct page *page, - int low, int high, int migratetype) +static inline unsigned int expand(struct zone *zone, struct page *page, int low, + int high, int migratetype) { - unsigned long size = 1 << high; - unsigned long nr_added = 0; + unsigned int size = 1 << high; + unsigned int nr_added = 0; while (high > low) { high--; @@ -1542,7 +1542,19 @@ static inline void expand(struct zone *zone, struct page *page, set_buddy_order(&page[size], high); nr_added += size; } - account_freepages(zone, nr_added, migratetype); + + return nr_added; +} + +static __always_inline void page_del_and_expand(struct zone *zone, + struct page *page, int low, + int high, int migratetype) +{ + int nr_pages = 1 << high; + + __del_page_from_free_list(page, zone, high, migratetype); + nr_pages -= expand(zone, page, low, high, migratetype); + account_freepages(zone, -nr_pages, migratetype); } static void check_new_page_bad(struct page *page) @@ -1727,8 +1739,9 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, page = get_page_from_free_area(area, migratetype); if (!page) continue; - del_page_from_free_list(page, zone, current_order, migratetype); - expand(zone, page, order, current_order, migratetype); + + page_del_and_expand(zone, page, order, current_order, + migratetype); trace_mm_page_alloc_zone_locked(page, order, migratetype, pcp_allowed_order(order) && migratetype < MIGRATE_PCPTYPES); @@ -1766,18 +1779,18 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone, * Change the type of a block and move all its free pages to that * type's freelist. */ -static int move_freepages(struct zone *zone, unsigned long start_pfn, - unsigned long end_pfn, int old_mt, int new_mt) +static int __move_freepages_block(struct zone *zone, unsigned long start_pfn, + int old_mt, int new_mt) { struct page *page; - unsigned long pfn; + unsigned long pfn, end_pfn; unsigned int order; int pages_moved = 0; VM_WARN_ON(start_pfn & (pageblock_nr_pages - 1)); - VM_WARN_ON(start_pfn + pageblock_nr_pages - 1 != end_pfn); + end_pfn = pageblock_end_pfn(start_pfn); - for (pfn = start_pfn; pfn <= end_pfn;) { + for (pfn = start_pfn; pfn < end_pfn;) { page = pfn_to_page(pfn); if (!PageBuddy(page)) { pfn++; @@ -1803,14 +1816,13 @@ static int move_freepages(struct zone *zone, unsigned long start_pfn, static bool prep_move_freepages_block(struct zone *zone, struct page *page, unsigned long *start_pfn, - unsigned long *end_pfn, int *num_free, int *num_movable) { unsigned long pfn, start, end; pfn = page_to_pfn(page); start = pageblock_start_pfn(pfn); - end = pageblock_end_pfn(pfn) - 1; + end = pageblock_end_pfn(pfn); /* * The caller only has the lock for @zone, don't touch ranges @@ -1821,16 +1833,15 @@ static bool prep_move_freepages_block(struct zone *zone, struct page *page, */ if (!zone_spans_pfn(zone, start)) return false; - if (!zone_spans_pfn(zone, end)) + if (!zone_spans_pfn(zone, end - 1)) return false; *start_pfn = start; - *end_pfn = end; if (num_free) { *num_free = 0; *num_movable = 0; - for (pfn = start; pfn <= end;) { + for (pfn = start; pfn < end;) { page = pfn_to_page(pfn); if (PageBuddy(page)) { int nr = 1 << buddy_order(page); @@ -1856,13 +1867,12 @@ static bool prep_move_freepages_block(struct zone *zone, struct page *page, static int move_freepages_block(struct zone *zone, struct page *page, int old_mt, int new_mt) { - unsigned long start_pfn, end_pfn; + unsigned long start_pfn; - if (!prep_move_freepages_block(zone, page, &start_pfn, &end_pfn, - NULL, NULL)) + if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL)) return -1; - return move_freepages(zone, start_pfn, end_pfn, old_mt, new_mt); + return __move_freepages_block(zone, start_pfn, old_mt, new_mt); } #ifdef CONFIG_MEMORY_ISOLATION @@ -1933,10 +1943,9 @@ static void split_large_buddy(struct zone *zone, struct page *page, bool move_freepages_block_isolate(struct zone *zone, struct page *page, int migratetype) { - unsigned long start_pfn, end_pfn, pfn; + unsigned long start_pfn, pfn; - if (!prep_move_freepages_block(zone, page, &start_pfn, &end_pfn, - NULL, NULL)) + if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL)) return false; /* No splits needed if buddies can't span multiple blocks */ @@ -1967,8 +1976,9 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page, return true; } move: - move_freepages(zone, start_pfn, end_pfn, - get_pfnblock_migratetype(page, start_pfn), migratetype); + __move_freepages_block(zone, start_pfn, + get_pfnblock_migratetype(page, start_pfn), + migratetype); return true; } #endif /* CONFIG_MEMORY_ISOLATION */ @@ -1984,39 +1994,6 @@ static void change_pageblock_range(struct page *pageblock_page, } } -/* - * When we are falling back to another migratetype during allocation, try to - * steal extra free pages from the same pageblocks to satisfy further - * allocations, instead of polluting multiple pageblocks. - * - * If we are stealing a relatively large buddy page, it is likely there will - * be more free pages in the pageblock, so try to steal them all. For - * reclaimable and unmovable allocations, we steal regardless of page size, - * as fragmentation caused by those allocations polluting movable pageblocks - * is worse than movable allocations stealing from unmovable and reclaimable - * pageblocks. - */ -static bool can_steal_fallback(unsigned int order, int start_mt) -{ - /* - * Leaving this order check is intended, although there is - * relaxed order check in next check. The reason is that - * we can actually steal whole pageblock if this condition met, - * but, below check doesn't guarantee it and that is just heuristic - * so could be changed anytime. - */ - if (order >= pageblock_order) - return true; - - if (order >= pageblock_order / 2 || - start_mt == MIGRATE_RECLAIMABLE || - start_mt == MIGRATE_UNMOVABLE || - page_group_by_mobility_disabled) - return true; - - return false; -} - static inline bool boost_watermark(struct zone *zone) { unsigned long max_boost; @@ -2055,36 +2032,102 @@ static inline bool boost_watermark(struct zone *zone) } /* - * This function implements actual steal behaviour. If order is large enough, we - * can claim the whole pageblock for the requested migratetype. If not, we check - * the pageblock for constituent pages; if at least half of the pages are free - * or compatible, we can still claim the whole block, so pages freed in the - * future will be put on the correct free list. Otherwise, we isolate exactly - * the order we need from the fallback block and leave its migratetype alone. + * When we are falling back to another migratetype during allocation, should we + * try to claim an entire block to satisfy further allocations, instead of + * polluting multiple pageblocks? */ -static struct page * -steal_suitable_fallback(struct zone *zone, struct page *page, - int current_order, int order, int start_type, - unsigned int alloc_flags, bool whole_block) +static bool should_try_claim_block(unsigned int order, int start_mt) { - int free_pages, movable_pages, alike_pages; - unsigned long start_pfn, end_pfn; - int block_type; - - block_type = get_pageblock_migratetype(page); + /* + * Leaving this order check is intended, although there is + * relaxed order check in next check. The reason is that + * we can actually claim the whole pageblock if this condition met, + * but, below check doesn't guarantee it and that is just heuristic + * so could be changed anytime. + */ + if (order >= pageblock_order) + return true; /* - * This can happen due to races and we want to prevent broken - * highatomic accounting. + * Above a certain threshold, always try to claim, as it's likely there + * will be more free pages in the pageblock. */ - if (is_migrate_highatomic(block_type)) - goto single_page; + if (order >= pageblock_order / 2) + return true; + + /* + * Unmovable/reclaimable allocations would cause permanent + * fragmentations if they fell back to allocating from a movable block + * (polluting it), so we try to claim the whole block regardless of the + * allocation size. Later movable allocations can always steal from this + * block, which is less problematic. + */ + if (start_mt == MIGRATE_RECLAIMABLE || start_mt == MIGRATE_UNMOVABLE) + return true; + + if (page_group_by_mobility_disabled) + return true; + + /* + * Movable pages won't cause permanent fragmentation, so when you alloc + * small pages, we just need to temporarily steal unmovable or + * reclaimable pages that are closest to the request size. After a + * while, memory compaction may occur to form large contiguous pages, + * and the next movable allocation may not need to steal. + */ + return false; +} + +/* + * Check whether there is a suitable fallback freepage with requested order. + * If claimable is true, this function returns fallback_mt only if + * we would do this whole-block claiming. This would help to reduce + * fragmentation due to mixed migratetype pages in one pageblock. + */ +int find_suitable_fallback(struct free_area *area, unsigned int order, + int migratetype, bool claimable) +{ + int i; + + if (claimable && !should_try_claim_block(order, migratetype)) + return -2; + + if (area->nr_free == 0) + return -1; + + for (i = 0; i < MIGRATE_FALLBACKS - 1 ; i++) { + int fallback_mt = fallbacks[migratetype][i]; + + if (!free_area_empty(area, fallback_mt)) + return fallback_mt; + } + + return -1; +} + +/* + * This function implements actual block claiming behaviour. If order is large + * enough, we can claim the whole pageblock for the requested migratetype. If + * not, we check the pageblock for constituent pages; if at least half of the + * pages are free or compatible, we can still claim the whole block, so pages + * freed in the future will be put on the correct free list. + */ +static struct page * +try_to_claim_block(struct zone *zone, struct page *page, + int current_order, int order, int start_type, + int block_type, unsigned int alloc_flags) +{ + int free_pages, movable_pages, alike_pages; + unsigned long start_pfn; /* Take ownership for orders >= pageblock_order */ if (current_order >= pageblock_order) { + unsigned int nr_added; + del_page_from_free_list(page, zone, current_order, block_type); change_pageblock_range(page, current_order, start_type); - expand(zone, page, order, current_order, start_type); + nr_added = expand(zone, page, order, current_order, start_type); + account_freepages(zone, nr_added, start_type); return page; } @@ -2096,14 +2139,10 @@ steal_suitable_fallback(struct zone *zone, struct page *page, if (boost_watermark(zone) && (alloc_flags & ALLOC_KSWAPD)) set_bit(ZONE_BOOSTED_WATERMARK, &zone->flags); - /* We are not allowed to try stealing from the whole block */ - if (!whole_block) - goto single_page; - /* moving whole block can fail due to zone boundary conditions */ - if (!prep_move_freepages_block(zone, page, &start_pfn, &end_pfn, - &free_pages, &movable_pages)) - goto single_page; + if (!prep_move_freepages_block(zone, page, &start_pfn, &free_pages, + &movable_pages)) + return NULL; /* * Determine how many pages are compatible with our allocation. @@ -2132,216 +2171,23 @@ steal_suitable_fallback(struct zone *zone, struct page *page, */ if (free_pages + alike_pages >= (1 << (pageblock_order-1)) || page_group_by_mobility_disabled) { - move_freepages(zone, start_pfn, end_pfn, block_type, start_type); + __move_freepages_block(zone, start_pfn, block_type, start_type); return __rmqueue_smallest(zone, order, start_type); } -single_page: - del_page_from_free_list(page, zone, current_order, block_type); - expand(zone, page, order, current_order, block_type); - return page; + return NULL; } /* - * Check whether there is a suitable fallback freepage with requested order. - * If only_stealable is true, this function returns fallback_mt only if - * we can steal other freepages all together. This would help to reduce - * fragmentation due to mixed migratetype pages in one pageblock. - */ -int find_suitable_fallback(struct free_area *area, unsigned int order, - int migratetype, bool only_stealable, bool *can_steal) -{ - int i; - int fallback_mt; - - if (area->nr_free == 0) - return -1; - - *can_steal = false; - for (i = 0; i < MIGRATE_FALLBACKS - 1 ; i++) { - fallback_mt = fallbacks[migratetype][i]; - if (free_area_empty(area, fallback_mt)) - continue; - - if (can_steal_fallback(order, migratetype)) - *can_steal = true; - - if (!only_stealable) - return fallback_mt; - - if (*can_steal) - return fallback_mt; - } - - return -1; -} - -/* - * Reserve the pageblock(s) surrounding an allocation request for - * exclusive use of high-order atomic allocations if there are no - * empty page blocks that contain a page with a suitable order - */ -static void reserve_highatomic_pageblock(struct page *page, int order, - struct zone *zone) -{ - int mt; - unsigned long max_managed, flags; - bool bypass = false; - - /* - * The number reserved as: minimum is 1 pageblock, maximum is - * roughly 1% of a zone. But if 1% of a zone falls below a - * pageblock size, then don't reserve any pageblocks. - * Check is race-prone but harmless. - */ - if ((zone_managed_pages(zone) / 100) < pageblock_nr_pages) - return; - max_managed = ALIGN((zone_managed_pages(zone) / 100), pageblock_nr_pages); - if (zone->nr_reserved_highatomic >= max_managed) - return; - trace_android_vh_reserve_highatomic_bypass(page, &bypass); - if (bypass) - return; - - spin_lock_irqsave(&zone->lock, flags); - - /* Recheck the nr_reserved_highatomic limit under the lock */ - if (zone->nr_reserved_highatomic >= max_managed) - goto out_unlock; - - /* Yoink! */ - mt = get_pageblock_migratetype(page); - /* Only reserve normal pageblocks (i.e., they can merge with others) */ - if (!migratetype_is_mergeable(mt)) - goto out_unlock; - - if (order < pageblock_order) { - if (move_freepages_block(zone, page, mt, MIGRATE_HIGHATOMIC) == -1) - goto out_unlock; - zone->nr_reserved_highatomic += pageblock_nr_pages; - } else { - change_pageblock_range(page, order, MIGRATE_HIGHATOMIC); - zone->nr_reserved_highatomic += 1 << order; - } - -out_unlock: - spin_unlock_irqrestore(&zone->lock, flags); -} - -/* - * Used when an allocation is about to fail under memory pressure. This - * potentially hurts the reliability of high-order allocations when under - * intense memory pressure but failed atomic allocations should be easier - * to recover from than an OOM. - * - * If @force is true, try to unreserve pageblocks even though highatomic - * pageblock is exhausted. - */ -static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, - bool force) -{ - struct zonelist *zonelist = ac->zonelist; - unsigned long flags; - struct zoneref *z; - struct zone *zone; - struct page *page; - int order; - bool skip_unreserve_highatomic = false; - int ret; - - for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx, - ac->nodemask) { - /* - * Preserve at least one pageblock unless memory pressure - * is really high. - */ - if (!force && zone->nr_reserved_highatomic <= - pageblock_nr_pages) - continue; - - trace_android_vh_unreserve_highatomic_bypass(force, zone, - &skip_unreserve_highatomic); - if (skip_unreserve_highatomic) - continue; - - spin_lock_irqsave(&zone->lock, flags); - for (order = 0; order < NR_PAGE_ORDERS; order++) { - struct free_area *area = &(zone->free_area[order]); - int mt; - - page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC); - if (!page) - continue; - - mt = get_pageblock_migratetype(page); - /* - * In page freeing path, migratetype change is racy so - * we can counter several free pages in a pageblock - * in this loop although we changed the pageblock type - * from highatomic to ac->migratetype. So we should - * adjust the count once. - */ - if (is_migrate_highatomic(mt)) { - unsigned long size; - /* - * It should never happen but changes to - * locking could inadvertently allow a per-cpu - * drain to add pages to MIGRATE_HIGHATOMIC - * while unreserving so be safe and watch for - * underflows. - */ - size = max(pageblock_nr_pages, 1UL << order); - size = min(size, zone->nr_reserved_highatomic); - zone->nr_reserved_highatomic -= size; - } - - /* - * Convert to ac->migratetype and avoid the normal - * pageblock stealing heuristics. Minimally, the caller - * is doing the work and needs the pages. More - * importantly, if the block was always converted to - * MIGRATE_UNMOVABLE or another type then the number - * of pageblocks that cannot be completely freed - * may increase. - */ - if (order < pageblock_order) - ret = move_freepages_block(zone, page, mt, - ac->migratetype); - else { - move_to_free_list(page, zone, order, mt, - ac->migratetype); - change_pageblock_range(page, order, - ac->migratetype); - ret = 1; - } - /* - * Reserving the block(s) already succeeded, - * so this should not fail on zone boundaries. - */ - WARN_ON_ONCE(ret == -1); - if (ret > 0) { - spin_unlock_irqrestore(&zone->lock, flags); - return ret; - } - } - spin_unlock_irqrestore(&zone->lock, flags); - } - - return false; -} - -/* - * Try finding a free buddy page on the fallback list and put it on the free - * list of requested migratetype, possibly along with other pages from the same - * block, depending on fragmentation avoidance heuristics. Returns true if - * fallback was found so that __rmqueue_smallest() can grab it. + * Try to allocate from some fallback migratetype by claiming the entire block, + * i.e. converting it to the allocation's start migratetype. * * The use of signed ints for order and current_order is a deliberate * deviation from the rest of this file, to make the for loop * condition simpler. */ static __always_inline struct page * -__rmqueue_fallback(struct zone *zone, int order, int start_migratetype, +__rmqueue_claim(struct zone *zone, int order, int start_migratetype, unsigned int alloc_flags) { struct free_area *area; @@ -2349,7 +2195,6 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, int min_order = order; struct page *page; int fallback_mt; - bool can_steal; /* * Do not steal pages from freelists belonging to other pageblocks @@ -2368,62 +2213,73 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, --current_order) { area = &(zone->free_area[current_order]); fallback_mt = find_suitable_fallback(area, current_order, - start_migratetype, false, &can_steal); + start_migratetype, true); + + /* No block in that order */ if (fallback_mt == -1) continue; - /* - * We cannot steal all free pages from the pageblock and the - * requested migratetype is movable. In that case it's better to - * steal and split the smallest available page instead of the - * largest available page, because even if the next movable - * allocation falls back into a different pageblock than this - * one, it won't cause permanent fragmentation. - */ - if (!can_steal && start_migratetype == MIGRATE_MOVABLE - && current_order > order) - goto find_smallest; + /* Advanced into orders too low to claim, abort */ + if (fallback_mt == -2) + break; - goto do_steal; + page = get_page_from_free_area(area, fallback_mt); + page = try_to_claim_block(zone, page, current_order, order, + start_migratetype, fallback_mt, + alloc_flags); + if (page) { + trace_mm_page_alloc_extfrag(page, order, current_order, + start_migratetype, fallback_mt); + return page; + } } return NULL; +} + +/* + * Try to steal a single page from some fallback migratetype. Leave the rest of + * the block as its current migratetype, potentially causing fragmentation. + */ +static __always_inline struct page * +__rmqueue_steal(struct zone *zone, int order, int start_migratetype) +{ + struct free_area *area; + int current_order; + struct page *page; + int fallback_mt; -find_smallest: for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) { area = &(zone->free_area[current_order]); fallback_mt = find_suitable_fallback(area, current_order, - start_migratetype, false, &can_steal); - if (fallback_mt != -1) - break; + start_migratetype, false); + if (fallback_mt == -1) + continue; + + page = get_page_from_free_area(area, fallback_mt); + page_del_and_expand(zone, page, order, current_order, fallback_mt); + trace_mm_page_alloc_extfrag(page, order, current_order, + start_migratetype, fallback_mt); + return page; } - /* - * This should not happen - we already found a suitable fallback - * when looking for the largest page. - */ - VM_BUG_ON(current_order > MAX_ORDER); - -do_steal: - page = get_page_from_free_area(area, fallback_mt); - - /* take off list, maybe claim block, expand remainder */ - page = steal_suitable_fallback(zone, page, current_order, order, - start_migratetype, alloc_flags, can_steal); - - trace_mm_page_alloc_extfrag(page, order, current_order, - start_migratetype, fallback_mt); - - return page; + return NULL; } +enum rmqueue_mode { + RMQUEUE_NORMAL, + RMQUEUE_CMA, + RMQUEUE_CLAIM, + RMQUEUE_STEAL, +}; + /* * Do the hard work of removing an element from the buddy allocator. * Call me with the zone->lock already held. */ static __always_inline struct page * __rmqueue(struct zone *zone, unsigned int order, int migratetype, - unsigned int alloc_flags) + unsigned int alloc_flags, enum rmqueue_mode *mode) { struct page *page = NULL; @@ -2446,16 +2302,48 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, } } - page = __rmqueue_smallest(zone, order, migratetype); - if (unlikely(!page)) { - if (!cma_redirect_restricted() && alloc_flags & ALLOC_CMA) + /* + * First try the freelists of the requested migratetype, then try + * fallbacks modes with increasing levels of fragmentation risk. + * + * The fallback logic is expensive and rmqueue_bulk() calls in + * a loop with the zone->lock held, meaning the freelists are + * not subject to any outside changes. Remember in *mode where + * we found pay dirt, to save us the search on the next call. + */ + switch (*mode) { + case RMQUEUE_NORMAL: + page = __rmqueue_smallest(zone, order, migratetype); + if (page) + return page; + fallthrough; + case RMQUEUE_CMA: + if (!cma_redirect_restricted() && alloc_flags & ALLOC_CMA) { page = __rmqueue_cma_fallback(zone, order); - - if (!page) - page = __rmqueue_fallback(zone, order, migratetype, - alloc_flags); + if (page) { + *mode = RMQUEUE_CMA; + return page; + } + } + fallthrough; + case RMQUEUE_CLAIM: + page = __rmqueue_claim(zone, order, migratetype, alloc_flags); + if (page) { + /* Replenished preferred freelist, back to normal mode. */ + *mode = RMQUEUE_NORMAL; + return page; + } + fallthrough; + case RMQUEUE_STEAL: + if (!(alloc_flags & ALLOC_NOFRAGMENT)) { + page = __rmqueue_steal(zone, order, migratetype); + if (page) { + *mode = RMQUEUE_STEAL; + return page; + } + } } - return page; + return NULL; } /* @@ -2467,6 +2355,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, unsigned int alloc_flags) { + enum rmqueue_mode rmqm = RMQUEUE_NORMAL; unsigned long flags; int i; @@ -2482,7 +2371,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, if (cma_redirect_restricted() && is_migrate_cma(migratetype)) page = __rmqueue_cma_fallback(zone, order); else - page = __rmqueue(zone, order, migratetype, alloc_flags); + page = __rmqueue(zone, order, migratetype, alloc_flags, &rmqm); if (unlikely(page == NULL)) break; @@ -3038,9 +2927,12 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone, alloc_flags & ALLOC_CMA) page = __rmqueue_cma_fallback(zone, order); - if (!page) + if (!page) { + enum rmqueue_mode rmqm = RMQUEUE_NORMAL; + page = __rmqueue(zone, order, migratetype, - alloc_flags); + alloc_flags, &rmqm); + } /* * If the allocation fails, allow OOM handling and * order-0 (atomic) allocs access to HIGHATOMIC @@ -3225,6 +3117,151 @@ noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) } ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE); +/* + * Reserve the pageblock(s) surrounding an allocation request for + * exclusive use of high-order atomic allocations if there are no + * empty page blocks that contain a page with a suitable order + */ +static void reserve_highatomic_pageblock(struct page *page, int order, + struct zone *zone) +{ + int mt; + unsigned long max_managed, flags; + bool bypass = false; + + /* + * The number reserved as: minimum is 1 pageblock, maximum is + * roughly 1% of a zone. But if 1% of a zone falls below a + * pageblock size, then don't reserve any pageblocks. + * Check is race-prone but harmless. + */ + if ((zone_managed_pages(zone) / 100) < pageblock_nr_pages) + return; + max_managed = ALIGN((zone_managed_pages(zone) / 100), pageblock_nr_pages); + if (zone->nr_reserved_highatomic >= max_managed) + return; + trace_android_vh_reserve_highatomic_bypass(page, &bypass); + if (bypass) + return; + + spin_lock_irqsave(&zone->lock, flags); + + /* Recheck the nr_reserved_highatomic limit under the lock */ + if (zone->nr_reserved_highatomic >= max_managed) + goto out_unlock; + + /* Yoink! */ + mt = get_pageblock_migratetype(page); + /* Only reserve normal pageblocks (i.e., they can merge with others) */ + if (!migratetype_is_mergeable(mt)) + goto out_unlock; + + if (order < pageblock_order) { + if (move_freepages_block(zone, page, mt, MIGRATE_HIGHATOMIC) == -1) + goto out_unlock; + zone->nr_reserved_highatomic += pageblock_nr_pages; + } else { + change_pageblock_range(page, order, MIGRATE_HIGHATOMIC); + zone->nr_reserved_highatomic += 1 << order; + } + +out_unlock: + spin_unlock_irqrestore(&zone->lock, flags); +} + +/* + * Used when an allocation is about to fail under memory pressure. This + * potentially hurts the reliability of high-order allocations when under + * intense memory pressure but failed atomic allocations should be easier + * to recover from than an OOM. + * + * If @force is true, try to unreserve pageblocks even though highatomic + * pageblock is exhausted. + */ +static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, + bool force) +{ + struct zonelist *zonelist = ac->zonelist; + unsigned long flags; + struct zoneref *z; + struct zone *zone; + struct page *page; + int order; + bool skip_unreserve_highatomic = false; + int ret; + + for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx, + ac->nodemask) { + /* + * Preserve at least one pageblock unless memory pressure + * is really high. + */ + if (!force && zone->nr_reserved_highatomic <= + pageblock_nr_pages) + continue; + + trace_android_vh_unreserve_highatomic_bypass(force, zone, + &skip_unreserve_highatomic); + if (skip_unreserve_highatomic) + continue; + + spin_lock_irqsave(&zone->lock, flags); + for (order = 0; order < NR_PAGE_ORDERS; order++) { + struct free_area *area = &(zone->free_area[order]); + unsigned long size; + + page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC); + if (!page) + continue; + + /* + * It should never happen but changes to + * locking could inadvertently allow a per-cpu + * drain to add pages to MIGRATE_HIGHATOMIC + * while unreserving so be safe and watch for + * underflows. + */ + size = max(pageblock_nr_pages, 1UL << order); + size = min(size, zone->nr_reserved_highatomic); + zone->nr_reserved_highatomic -= size; + + /* + * Convert to ac->migratetype and avoid the normal + * pageblock stealing heuristics. Minimally, the caller + * is doing the work and needs the pages. More + * importantly, if the block was always converted to + * MIGRATE_UNMOVABLE or another type then the number + * of pageblocks that cannot be completely freed + * may increase. + */ + if (order < pageblock_order) + ret = move_freepages_block(zone, page, + MIGRATE_HIGHATOMIC, + ac->migratetype); + else { + move_to_free_list(page, zone, order, + MIGRATE_HIGHATOMIC, + ac->migratetype); + change_pageblock_range(page, order, + ac->migratetype); + ret = 1; + } + /* + * Reserving the block(s) already succeeded, + * so this should not fail on zone boundaries. + */ + WARN_ON_ONCE(ret == -1); + if (ret > 0) { + spin_unlock_irqrestore(&zone->lock, flags); + return ret; + } + } + spin_unlock_irqrestore(&zone->lock, flags); + } + + return false; +} + static inline long __zone_watermark_unusable_free(struct zone *z, unsigned int order, unsigned int alloc_flags) { diff --git a/mm/page_io.c b/mm/page_io.c index 648fd53303a9..a3feadd1ba9e 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -471,6 +471,19 @@ static void swap_readpage_bdev_sync(struct folio *folio, struct bio bio; bool read = false; + trace_android_rvh_swap_readpage_bdev_sync(sis->bdev, + swap_page_sector(&folio->page) + get_start_sect(sis->bdev), + &folio->page, &read); + if (read) { + count_vm_events(PSWPIN, folio_nr_pages(folio)); + return; + } + + /* + * trace_android_vh_swap_readpage_bdev_sync is deprecated, and + * should not be carried over into later kernels. + * Use trace_android_rvh_swap_readpage_bdev_sync instead. + */ trace_android_vh_swap_readpage_bdev_sync(sis->bdev, swap_page_sector(&folio->page) + get_start_sect(sis->bdev), &folio->page, &read); diff --git a/mm/swap.c b/mm/swap.c index 174259a9a5f7..30b5eebce985 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -736,6 +736,7 @@ void folio_deactivate(struct folio *folio) local_unlock(&cpu_fbatches.lock); } } +EXPORT_SYMBOL_GPL(folio_deactivate); /** * folio_mark_lazyfree - make an anon folio lazyfree