diff --git a/BUILD.bazel b/BUILD.bazel index 17facd833995..986c8d3cc149 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -185,7 +185,7 @@ define_common_kernels(target_configs = { "protected_modules_list": ":gki_aarch64_protected_modules", "module_implicit_outs": get_gki_modules_list("arm64") + get_kunit_modules_list("arm64"), "make_goals": _GKI_AARCH64_MAKE_GOALS, - "clang_autofdo_profile": "//toolchain/pgo-profiles/kernel:aarch64/android15-6.6/kernel.afdo", + "clang_autofdo_profile": ":android/gki/aarch64/afdo/kernel.afdo", "defconfig_fragments": ["arch/arm64/configs/autofdo_gki.fragment"], "ddk_headers_archive": ":kernel_aarch64_ddk_headers_archive", "extra_dist": [ @@ -752,6 +752,7 @@ kernel_build( build_config = "build.config.gki.aarch64.fips140", kmi_symbol_list = "android/abi_gki_aarch64_fips140", module_outs = ["crypto/fips140.ko"], + strip_modules = True, ) kernel_abi( @@ -904,6 +905,16 @@ pkg_install( visibility = ["//visibility:private"], ) +py_library( + name = "kunit_parser", + srcs = [ + "tools/testing/kunit/kunit_parser.py", + "tools/testing/kunit/kunit_printer.py", + ], + imports = ["tools/testing/kunit"], + visibility = ["//visibility:public"], +) + # DDK Headers # All headers. These are the public targets for DDK modules to use. alias( @@ -985,6 +996,7 @@ ddk_headers( hdrs = [ "drivers/dma-buf/heaps/deferred-free-helper.h", "drivers/extcon/extcon.h", + "drivers/pci/controller/dwc/pcie-designware.h", "drivers/thermal/thermal_core.h", "drivers/thermal/thermal_netlink.h", "drivers/usb/dwc3/core.h", @@ -1002,6 +1014,7 @@ ddk_headers( "arch/arm64/include/uapi", "drivers/dma-buf", "drivers/extcon", + "drivers/pci/controller/dwc", "drivers/thermal", "drivers/usb", "sound/usb", diff --git a/Documentation/admin-guide/device-mapper/verity.rst b/Documentation/admin-guide/device-mapper/verity.rst index a65c1602cb23..50e81b6e2799 100644 --- a/Documentation/admin-guide/device-mapper/verity.rst +++ b/Documentation/admin-guide/device-mapper/verity.rst @@ -142,8 +142,15 @@ root_hash_sig_key_desc already in the secondary trusted keyring. try_verify_in_tasklet - If verity hashes are in cache, verify data blocks in kernel tasklet instead - of workqueue. This option can reduce IO latency. + If verity hashes are in cache and the IO size does not exceed the limit, + verify data blocks in bottom half instead of workqueue. This option can + reduce IO latency. The size limits can be configured via + /sys/module/dm_verity/parameters/use_bh_bytes. The four parameters + correspond to limits for IOPRIO_CLASS_NONE,IOPRIO_CLASS_RT, + IOPRIO_CLASS_BE and IOPRIO_CLASS_IDLE in turn. + For example: + ,,, + 4096,4096,4096,4096 Theory of operation =================== diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index 7d880c9f90ba..3c34edd57c66 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -11818,6 +11818,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x5641d9bd } +pointer_reference { + id: 0x1f088ddb + kind: POINTER + pointee_type_id: 0x5662d1f1 +} pointer_reference { id: 0x1f160413 kind: POINTER @@ -11958,6 +11963,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x57af7a85 } +pointer_reference { + id: 0x1f7fd7b9 + kind: POINTER + pointee_type_id: 0x57bfb879 +} pointer_reference { id: 0x1f879b63 kind: POINTER @@ -14883,6 +14893,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x9a60b05d } +pointer_reference { + id: 0x2c081838 + kind: POINTER + pointee_type_id: 0x9a60867f +} pointer_reference { id: 0x2c081e7e kind: POINTER @@ -21158,6 +21173,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x90ed88b5 } +pointer_reference { + id: 0x2eab8c01 + kind: POINTER + pointee_type_id: 0x90eed69a +} pointer_reference { id: 0x2ead1c7e kind: POINTER @@ -26093,6 +26113,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0xf9789a11 } +pointer_reference { + id: 0x34d0b2f4 + kind: POINTER + pointee_type_id: 0xf9022d4d +} pointer_reference { id: 0x34d10f7f kind: POINTER @@ -42312,6 +42337,10 @@ member { id: 0x27e3d457 type_id: 0x10a1214c } +member { + id: 0x27f3e47f + type_id: 0x10e1e1ec +} member { id: 0x28026f1f type_id: 0x2f27cc6c @@ -42400,6 +42429,10 @@ member { id: 0x2bc5541a type_id: 0x203b2079 } +member { + id: 0x2bdfee76 + type_id: 0x2051c9c9 +} member { id: 0x2bfbe1fc type_id: 0x20c1f7e3 @@ -42755,6 +42788,11 @@ member { type_id: 0x4866b1cb offset: 6592 } +member { + id: 0x31dd0541 + type_id: 0x485a4775 + offset: 3648 +} member { id: 0x31e4dfba type_id: 0x48bd0268 @@ -43603,6 +43641,11 @@ member { id: 0x397f1ce9 type_id: 0x6ad203b6 } +member { + id: 0x397f4bf7 + type_id: 0x6ad35d7c + offset: 3712 +} member { id: 0x3987082b type_id: 0x6932433a @@ -46165,16 +46208,123 @@ member { name: "a" type_id: 0x299c4193 } +member { + id: 0xe4743538 + name: "a0" + type_id: 0x33756485 +} +member { + id: 0xa534b5eb + name: "a1" + type_id: 0x33756485 + offset: 64 +} +member { + id: 0xf5e4512d + name: "a10" + type_id: 0x33756485 + offset: 640 +} +member { + id: 0xbd272d13 + name: "a11" + type_id: 0x33756485 + offset: 704 +} +member { + id: 0x7c66a286 + name: "a12" + type_id: 0x33756485 + offset: 768 +} +member { + id: 0x3fa20fe4 + name: "a13" + type_id: 0x33756485 + offset: 832 +} +member { + id: 0xfee59b6f + name: "a14" + type_id: 0x33756485 + offset: 896 +} +member { + id: 0xb82251de + name: "a15" + type_id: 0x33756485 + offset: 960 +} +member { + id: 0x79653bda + name: "a16" + type_id: 0x33756485 + offset: 1024 +} +member { + id: 0x3aa48827 + name: "a17" + type_id: 0x33756485 + offset: 1088 +} +member { + id: 0x7ab92c4e + name: "a2" + type_id: 0x33756485 + offset: 128 +} +member { + id: 0x3b7ab233 + name: "a3" + type_id: 0x33756485 + offset: 192 +} +member { + id: 0xe176b648 + name: "a4" + type_id: 0x33756485 + offset: 256 +} member { id: 0xe1a7b52a name: "a4" type_id: 0xe276adef } +member { + id: 0xa03b00d0 + name: "a5" + type_id: 0x33756485 + offset: 320 +} member { id: 0x6773cd7f name: "a6" type_id: 0xb4d0b861 } +member { + id: 0x67f46931 + name: "a6" + type_id: 0x33756485 + offset: 384 +} +member { + id: 0x26bbc900 + name: "a7" + type_id: 0x33756485 + offset: 448 +} +member { + id: 0xee750a66 + name: "a8" + type_id: 0x33756485 + offset: 512 +} +member { + id: 0xaf366cba + name: "a9" + type_id: 0x33756485 + offset: 576 +} member { id: 0xfb59b6e5 name: "a_alt_hnp_support" @@ -51797,6 +51947,11 @@ member { type_id: 0x92233392 offset: 1728 } +member { + id: 0xe0f6344b + name: "android_kabi_reserved4" + type_id: 0x92233392 +} member { id: 0xe0f63540 name: "android_kabi_reserved4" @@ -52086,10 +52241,9 @@ member { offset: 448 } member { - id: 0x22369471 + id: 0x223694dd name: "android_kabi_reserved5" type_id: 0x92233392 - offset: 3712 } member { id: 0x22369513 @@ -103757,6 +103911,11 @@ member { name: "guest_ipa" type_id: 0x33756485 } +member { + id: 0xf081c766 + name: "guest_stage2_pa" + type_id: 0x2c081838 +} member { id: 0x3c06cc88 name: "guid" @@ -170003,6 +170162,11 @@ member { type_id: 0x0eafcf90 offset: 1600 } +member { + id: 0x7aa98348 + name: "register_guest_smc_handler" + type_id: 0x2eab8c01 +} member { id: 0xa7bbc92d name: "register_host_perm_fault_handler" @@ -220332,6 +220496,14 @@ struct_union { member_id: 0x2e5ed686 } } +struct_union { + id: 0x10e1e1ec + kind: STRUCT + definition { + bytesize: 8 + member_id: 0x223694dd + } +} struct_union { id: 0x114301c2 kind: STRUCT @@ -221761,6 +221933,14 @@ struct_union { member_id: 0xe15d928a } } +struct_union { + id: 0x2051c9c9 + kind: STRUCT + definition { + bytesize: 8 + member_id: 0xe0f6344b + } +} struct_union { id: 0x20957e7a kind: STRUCT @@ -224391,6 +224571,16 @@ struct_union { member_id: 0x10f05f62 } } +struct_union { + id: 0x485a4775 + kind: UNION + definition { + bytesize: 8 + member_id: 0x7aa98348 + member_id: 0x2bdfee76 + member_id: 0x36752b74 + } +} struct_union { id: 0x4866b1cb kind: UNION @@ -226694,6 +226884,16 @@ struct_union { member_id: 0x95dac977 } } +struct_union { + id: 0x6ad35d7c + kind: UNION + definition { + bytesize: 8 + member_id: 0xf081c766 + member_id: 0x27f3e47f + member_id: 0x36752b74 + } +} struct_union { id: 0x6b4a8270 kind: UNION @@ -229097,6 +229297,44 @@ struct_union { member_id: 0x1cafacfd } } +struct_union { + id: 0x57bfb879 + kind: STRUCT + name: "arm_smccc_1_2_regs" + definition { + bytesize: 144 + member_id: 0xe4743538 + member_id: 0xa534b5eb + member_id: 0x7ab92c4e + member_id: 0x3b7ab233 + member_id: 0xe176b648 + member_id: 0xa03b00d0 + member_id: 0x67f46931 + member_id: 0x26bbc900 + member_id: 0xee750a66 + member_id: 0xaf366cba + member_id: 0xf5e4512d + member_id: 0xbd272d13 + member_id: 0x7c66a286 + member_id: 0x3fa20fe4 + member_id: 0xfee59b6f + member_id: 0xb82251de + member_id: 0x79653bda + member_id: 0x3aa48827 + } +} +struct_union { + id: 0x5662d1f1 + kind: STRUCT + name: "arm_smccc_res" + definition { + bytesize: 32 + member_id: 0xe4743538 + member_id: 0xa534b5eb + member_id: 0x7ab92c4e + member_id: 0x3b7ab233 + } +} struct_union { id: 0x48893aca kind: STRUCT @@ -261187,8 +261425,8 @@ struct_union { member_id: 0x30a9cfe3 member_id: 0x3293400e member_id: 0x3036aec8 - member_id: 0xe0f63cd3 - member_id: 0x22369471 + member_id: 0x31dd0541 + member_id: 0x397f4bf7 member_id: 0x667752ee member_id: 0xa1b7d38a member_id: 0xe3f4b484 @@ -319916,6 +320154,14 @@ function { return_type_id: 0x10be9c51 parameter_id: 0x39cdf888 } +function { + id: 0x52b6bce6 + return_type_id: 0x2560a232 + parameter_id: 0x347303b4 + parameter_id: 0x3e10b518 + parameter_id: 0x4585663f + parameter_id: 0x064d6086 +} function { id: 0x52c37930 return_type_id: 0x92233392 @@ -324967,6 +325213,12 @@ function { parameter_id: 0x4585663f parameter_id: 0x4585663f } +function { + id: 0x90eed69a + return_type_id: 0x6720d32f + parameter_id: 0x34d0b2f4 + parameter_id: 0x16864222 +} function { id: 0x90f3da7c return_type_id: 0x6720d32f @@ -335742,6 +335994,16 @@ function { parameter_id: 0x6720d32f parameter_id: 0xf1a6dfed } +function { + id: 0x9a0fef86 + return_type_id: 0x6720d32f + parameter_id: 0x18bd6530 + parameter_id: 0x6720d32f + parameter_id: 0xf1a6dfed + parameter_id: 0x2fb2b385 + parameter_id: 0x064d6086 + parameter_id: 0x11cfee5a +} function { id: 0x9a101a10 return_type_id: 0x6720d32f @@ -336459,6 +336721,13 @@ function { parameter_id: 0x2cdbb77a parameter_id: 0xf435685e } +function { + id: 0x9a60867f + return_type_id: 0x6720d32f + parameter_id: 0x16864222 + parameter_id: 0x92233392 + parameter_id: 0x25e45be7 +} function { id: 0x9a609f65 return_type_id: 0x6720d32f @@ -337949,6 +338218,13 @@ function { parameter_id: 0x6d7f5ff6 parameter_id: 0x0d164218 } +function { + id: 0x9b3126f5 + return_type_id: 0x6720d32f + parameter_id: 0x18bd6530 + parameter_id: 0x2170d06d + parameter_id: 0x6d7f5ff6 +} function { id: 0x9b321d75 return_type_id: 0x6720d32f @@ -337990,6 +338266,14 @@ function { parameter_id: 0x18bd6530 parameter_id: 0x27162aac } +function { + id: 0x9b33f629 + return_type_id: 0x6720d32f + parameter_id: 0x18bd6530 + parameter_id: 0x2170d06d + parameter_id: 0x6720d32f + parameter_id: 0x11cfee5a +} function { id: 0x9b35e16b return_type_id: 0x6720d32f @@ -341439,6 +341723,17 @@ function { parameter_id: 0x6720d32f parameter_id: 0x11cfee5a } +function { + id: 0x9bcc7570 + return_type_id: 0x6720d32f + parameter_id: 0x18bd6530 + parameter_id: 0x1b36c7a2 + parameter_id: 0x33756485 + parameter_id: 0xf435685e + parameter_id: 0x6720d32f + parameter_id: 0x13580d6c + parameter_id: 0x11cfee5a +} function { id: 0x9bccd612 return_type_id: 0x6720d32f @@ -351038,6 +351333,10 @@ function { return_type_id: 0x4585663f parameter_id: 0x334eed41 } +function { + id: 0xc20e2d34 + return_type_id: 0x2b16c036 +} function { id: 0xc21e59d8 return_type_id: 0x4585663f @@ -351142,6 +351441,13 @@ function { parameter_id: 0x347303b4 parameter_id: 0x3e10b518 } +function { + id: 0xc3621fce + return_type_id: 0x4585663f + parameter_id: 0x32a623d7 + parameter_id: 0x4585663f + parameter_id: 0x1bf16028 +} function { id: 0xc36d4f04 return_type_id: 0x2ef43f67 @@ -354047,6 +354353,13 @@ function { parameter_id: 0x7be80061 parameter_id: 0x4585663f } +function { + id: 0xf9022d4d + return_type_id: 0x6d7f5ff6 + parameter_id: 0x1f7fd7b9 + parameter_id: 0x1f088ddb + parameter_id: 0x16864222 +} function { id: 0xf904db60 return_type_id: 0x6d7f5ff6 @@ -358763,6 +359076,15 @@ elf_symbol { type_id: 0x9ba7ef87 full_name: "__traceiter_android_rvh_do_el1_undef" } +elf_symbol { + id: 0x7f84bca1 + name: "__traceiter_android_rvh_do_madvise_bypass" + is_defined: true + symbol_type: FUNCTION + crc: 0xf663bedd + type_id: 0x9bcc7570 + full_name: "__traceiter_android_rvh_do_madvise_bypass" +} elf_symbol { id: 0x1151c5b7 name: "__traceiter_android_rvh_do_read_fault" @@ -359060,6 +359382,15 @@ elf_symbol { type_id: 0x9bc253cf full_name: "__traceiter_android_rvh_is_cpu_allowed" } +elf_symbol { + id: 0xa5a0ecca + name: "__traceiter_android_rvh_kswapd_shrink_node" + is_defined: true + symbol_type: FUNCTION + crc: 0x31a610f6 + type_id: 0x9bb68fe1 + full_name: "__traceiter_android_rvh_kswapd_shrink_node" +} elf_symbol { id: 0x7fcabbe6 name: "__traceiter_android_rvh_ksys_umount" @@ -359312,6 +359643,15 @@ elf_symbol { type_id: 0x9bd5a3c9 full_name: "__traceiter_android_rvh_perf_rotate_context" } +elf_symbol { + id: 0xc16891a4 + name: "__traceiter_android_rvh_perform_reclaim" + is_defined: true + symbol_type: FUNCTION + crc: 0x2f62eaba + type_id: 0x9a0fef86 + full_name: "__traceiter_android_rvh_perform_reclaim" +} elf_symbol { id: 0x097e467e name: "__traceiter_android_rvh_place_entity" @@ -360689,6 +361029,15 @@ elf_symbol { type_id: 0x9bb68fe1 full_name: "__traceiter_android_vh_check_new_page" } +elf_symbol { + id: 0xeebf3d23 + name: "__traceiter_android_vh_check_set_ioprio" + is_defined: true + symbol_type: FUNCTION + crc: 0xc3174ee4 + type_id: 0x9bf93663 + full_name: "__traceiter_android_vh_check_set_ioprio" +} elf_symbol { id: 0xa2224fa6 name: "__traceiter_android_vh_check_uninterruptible_tasks" @@ -360716,6 +361065,15 @@ elf_symbol { type_id: 0x9b63bb96 full_name: "__traceiter_android_vh_cleanup_old_buffers_bypass" } +elf_symbol { + id: 0xe50d4e50 + name: "__traceiter_android_vh_clear_reclaimed_folio" + is_defined: true + symbol_type: FUNCTION + crc: 0xd43f001e + type_id: 0x9b3126f5 + full_name: "__traceiter_android_vh_clear_reclaimed_folio" +} elf_symbol { id: 0xaed0a325 name: "__traceiter_android_vh_clear_rwsem_reader_owned" @@ -361292,6 +361650,24 @@ elf_symbol { type_id: 0x9a2abc7b full_name: "__traceiter_android_vh_enable_thermal_genl_check" } +elf_symbol { + id: 0x7f1591a1 + name: "__traceiter_android_vh_ep_create_wakeup_source" + is_defined: true + symbol_type: FUNCTION + crc: 0x1e8ed582 + type_id: 0x9ba47dcc + full_name: "__traceiter_android_vh_ep_create_wakeup_source" +} +elf_symbol { + id: 0x10157f79 + name: "__traceiter_android_vh_evict_folios_bypass" + is_defined: true + symbol_type: FUNCTION + crc: 0xb13deb47 + type_id: 0x9b2e0ad9 + full_name: "__traceiter_android_vh_evict_folios_bypass" +} elf_symbol { id: 0x1921d10d name: "__traceiter_android_vh_exit_check" @@ -361427,6 +361803,15 @@ elf_symbol { type_id: 0x9bc01a31 full_name: "__traceiter_android_vh_filemap_map_pages_range" } +elf_symbol { + id: 0x97914e0c + name: "__traceiter_android_vh_filemap_pages" + is_defined: true + symbol_type: FUNCTION + crc: 0x8b270699 + type_id: 0x9b2a7922 + full_name: "__traceiter_android_vh_filemap_pages" +} elf_symbol { id: 0x158bf9d3 name: "__traceiter_android_vh_filemap_read" @@ -361940,6 +362325,15 @@ elf_symbol { type_id: 0x9bcd4ff7 full_name: "__traceiter_android_vh_jiffies_update" } +elf_symbol { + id: 0xc8685c71 + name: "__traceiter_android_vh_keep_reclaimed_folio" + is_defined: true + symbol_type: FUNCTION + crc: 0x0c88e967 + type_id: 0x9b33f629 + full_name: "__traceiter_android_vh_keep_reclaimed_folio" +} elf_symbol { id: 0xe261e8cc name: "__traceiter_android_vh_killed_process" @@ -362030,6 +362424,24 @@ elf_symbol { type_id: 0x9b222516 full_name: "__traceiter_android_vh_look_around_migrate_folio" } +elf_symbol { + id: 0xa6d58835 + name: "__traceiter_android_vh_lru_gen_add_folio_skip" + is_defined: true + symbol_type: FUNCTION + crc: 0xd74c52e7 + type_id: 0x9b43c1ce + full_name: "__traceiter_android_vh_lru_gen_add_folio_skip" +} +elf_symbol { + id: 0x3a741247 + name: "__traceiter_android_vh_lru_gen_del_folio_skip" + is_defined: true + symbol_type: FUNCTION + crc: 0x73901130 + type_id: 0x9b43c1ce + full_name: "__traceiter_android_vh_lru_gen_del_folio_skip" +} elf_symbol { id: 0xf6964618 name: "__traceiter_android_vh_lruvec_add_folio" @@ -363983,6 +364395,15 @@ elf_symbol { type_id: 0x9ab83ca3 full_name: "__traceiter_android_vh_timer_calc_index" } +elf_symbol { + id: 0x641d703d + name: "__traceiter_android_vh_timerfd_create" + is_defined: true + symbol_type: FUNCTION + crc: 0x8c68d59c + type_id: 0x9ba47dcc + full_name: "__traceiter_android_vh_timerfd_create" +} elf_symbol { id: 0xeac5f4dd name: "__traceiter_android_vh_try_fixup_sea" @@ -365639,6 +366060,15 @@ elf_symbol { type_id: 0x18ccbd2c full_name: "__tracepoint_android_rvh_do_el1_undef" } +elf_symbol { + id: 0x0197e2d7 + name: "__tracepoint_android_rvh_do_madvise_bypass" + is_defined: true + symbol_type: OBJECT + crc: 0x9b5ef67b + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_rvh_do_madvise_bypass" +} elf_symbol { id: 0xf51681a1 name: "__tracepoint_android_rvh_do_read_fault" @@ -365936,6 +366366,15 @@ elf_symbol { type_id: 0x18ccbd2c full_name: "__tracepoint_android_rvh_is_cpu_allowed" } +elf_symbol { + id: 0x77ee65c8 + name: "__tracepoint_android_rvh_kswapd_shrink_node" + is_defined: true + symbol_type: OBJECT + crc: 0x574b1a1e + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_rvh_kswapd_shrink_node" +} elf_symbol { id: 0x26d3bcc4 name: "__tracepoint_android_rvh_ksys_umount" @@ -366188,6 +366627,15 @@ elf_symbol { type_id: 0x18ccbd2c full_name: "__tracepoint_android_rvh_perf_rotate_context" } +elf_symbol { + id: 0xe63868b6 + name: "__tracepoint_android_rvh_perform_reclaim" + is_defined: true + symbol_type: OBJECT + crc: 0xd98f3b67 + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_rvh_perform_reclaim" +} elf_symbol { id: 0x448fc4e4 name: "__tracepoint_android_rvh_place_entity" @@ -367565,6 +368013,15 @@ elf_symbol { type_id: 0x18ccbd2c full_name: "__tracepoint_android_vh_check_new_page" } +elf_symbol { + id: 0x22653fb1 + name: "__tracepoint_android_vh_check_set_ioprio" + is_defined: true + symbol_type: OBJECT + crc: 0x85607f10 + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_vh_check_set_ioprio" +} elf_symbol { id: 0x2cf5984c name: "__tracepoint_android_vh_check_uninterruptible_tasks" @@ -367592,6 +368049,15 @@ elf_symbol { type_id: 0x18ccbd2c full_name: "__tracepoint_android_vh_cleanup_old_buffers_bypass" } +elf_symbol { + id: 0xe8cdcd02 + name: "__tracepoint_android_vh_clear_reclaimed_folio" + is_defined: true + symbol_type: OBJECT + crc: 0x3f82125f + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_vh_clear_reclaimed_folio" +} elf_symbol { id: 0xbbfbc9db name: "__tracepoint_android_vh_clear_rwsem_reader_owned" @@ -368168,6 +368634,24 @@ elf_symbol { type_id: 0x18ccbd2c full_name: "__tracepoint_android_vh_enable_thermal_genl_check" } +elf_symbol { + id: 0xdef7c547 + name: "__tracepoint_android_vh_ep_create_wakeup_source" + is_defined: true + symbol_type: OBJECT + crc: 0x4c0031cd + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_vh_ep_create_wakeup_source" +} +elf_symbol { + id: 0x4276556f + name: "__tracepoint_android_vh_evict_folios_bypass" + is_defined: true + symbol_type: OBJECT + crc: 0xd0fd44dc + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_vh_evict_folios_bypass" +} elf_symbol { id: 0x684e5f4f name: "__tracepoint_android_vh_exit_check" @@ -368303,6 +368787,15 @@ elf_symbol { type_id: 0x18ccbd2c full_name: "__tracepoint_android_vh_filemap_map_pages_range" } +elf_symbol { + id: 0xcef3bcd6 + name: "__tracepoint_android_vh_filemap_pages" + is_defined: true + symbol_type: OBJECT + crc: 0x8ee37e89 + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_vh_filemap_pages" +} elf_symbol { id: 0xf61927fd name: "__tracepoint_android_vh_filemap_read" @@ -368816,6 +369309,15 @@ elf_symbol { type_id: 0x18ccbd2c full_name: "__tracepoint_android_vh_jiffies_update" } +elf_symbol { + id: 0xee6ae51f + name: "__tracepoint_android_vh_keep_reclaimed_folio" + is_defined: true + symbol_type: OBJECT + crc: 0x7266479e + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_vh_keep_reclaimed_folio" +} elf_symbol { id: 0xa48390ca name: "__tracepoint_android_vh_killed_process" @@ -368906,6 +369408,24 @@ elf_symbol { type_id: 0x18ccbd2c full_name: "__tracepoint_android_vh_look_around_migrate_folio" } +elf_symbol { + id: 0x72b9b667 + name: "__tracepoint_android_vh_lru_gen_add_folio_skip" + is_defined: true + symbol_type: OBJECT + crc: 0x9330c193 + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_vh_lru_gen_add_folio_skip" +} +elf_symbol { + id: 0xc6d82ff1 + name: "__tracepoint_android_vh_lru_gen_del_folio_skip" + is_defined: true + symbol_type: OBJECT + crc: 0xfe0abcc3 + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_vh_lru_gen_del_folio_skip" +} elf_symbol { id: 0x6e96d5d2 name: "__tracepoint_android_vh_lruvec_add_folio" @@ -370859,6 +371379,15 @@ elf_symbol { type_id: 0x18ccbd2c full_name: "__tracepoint_android_vh_timer_calc_index" } +elf_symbol { + id: 0x2df766e3 + name: "__tracepoint_android_vh_timerfd_create" + is_defined: true + symbol_type: OBJECT + crc: 0xc5e05e8b + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_vh_timerfd_create" +} elf_symbol { id: 0x0896a337 name: "__tracepoint_android_vh_try_fixup_sea" @@ -376113,6 +376642,15 @@ elf_symbol { type_id: 0x1d2130a6 full_name: "can_free_echo_skb" } +elf_symbol { + id: 0xceaadfeb + name: "can_get_echo_skb" + is_defined: true + symbol_type: FUNCTION + crc: 0x2e11e027 + type_id: 0xc3621fce + full_name: "can_get_echo_skb" +} elf_symbol { id: 0x72704652 name: "can_put_echo_skb" @@ -400761,6 +401299,15 @@ elf_symbol { type_id: 0x171a00e6 full_name: "kthread_bind_mask" } +elf_symbol { + id: 0x3360440d + name: "kthread_blkcg" + is_defined: true + symbol_type: FUNCTION + crc: 0x6ad0c0a7 + type_id: 0xc20e2d34 + full_name: "kthread_blkcg" +} elf_symbol { id: 0x5b454f0c name: "kthread_cancel_delayed_work_sync" @@ -406243,6 +406790,15 @@ elf_symbol { type_id: 0xc44c8212 full_name: "of_find_device_by_node" } +elf_symbol { + id: 0xd1459283 + name: "of_find_gpio" + is_defined: true + symbol_type: FUNCTION + crc: 0x245269e3 + type_id: 0x52b6bce6 + full_name: "of_find_gpio" +} elf_symbol { id: 0xdf129c81 name: "of_find_matching_node_and_match" @@ -435164,6 +435720,7 @@ interface { symbol_id: 0x40034e11 symbol_id: 0x6e62463e symbol_id: 0x1bf82dcd + symbol_id: 0x7f84bca1 symbol_id: 0x1151c5b7 symbol_id: 0x81449840 symbol_id: 0x611114e0 @@ -435197,6 +435754,7 @@ interface { symbol_id: 0xfc83b254 symbol_id: 0x7e99bc71 symbol_id: 0x7c212080 + symbol_id: 0xa5a0ecca symbol_id: 0x7fcabbe6 symbol_id: 0x3444c3ba symbol_id: 0xc6985675 @@ -435225,6 +435783,7 @@ interface { symbol_id: 0xc463ba81 symbol_id: 0x21c681ad symbol_id: 0xbe2c4a33 + symbol_id: 0xc16891a4 symbol_id: 0x097e467e symbol_id: 0xfad1d9f5 symbol_id: 0x8c494e33 @@ -435378,9 +435937,11 @@ interface { symbol_id: 0x42428033 symbol_id: 0x23a3e1fa symbol_id: 0x17b6819f + symbol_id: 0xeebf3d23 symbol_id: 0xa2224fa6 symbol_id: 0xd37cc550 symbol_id: 0xef7737f8 + symbol_id: 0xe50d4e50 symbol_id: 0xaed0a325 symbol_id: 0xc308efa9 symbol_id: 0x9cbc7813 @@ -435445,6 +436006,8 @@ interface { symbol_id: 0xf432d1c9 symbol_id: 0x02c8f91b symbol_id: 0xdcaa59a3 + symbol_id: 0x7f1591a1 + symbol_id: 0x10157f79 symbol_id: 0x1921d10d symbol_id: 0xb38eccdd symbol_id: 0x1f554c2a @@ -435460,6 +436023,7 @@ interface { symbol_id: 0x49c69e22 symbol_id: 0xb7d91f76 symbol_id: 0x4eda1196 + symbol_id: 0x97914e0c symbol_id: 0x158bf9d3 symbol_id: 0x0e217a5e symbol_id: 0x1b65e021 @@ -435517,6 +436081,7 @@ interface { symbol_id: 0x0bf41403 symbol_id: 0x4dca46cc symbol_id: 0xf83fbd26 + symbol_id: 0xc8685c71 symbol_id: 0xe261e8cc symbol_id: 0x19426a78 symbol_id: 0x04e61b81 @@ -435527,6 +436092,8 @@ interface { symbol_id: 0x57087e9e symbol_id: 0xe19d2bf8 symbol_id: 0x993f42ff + symbol_id: 0xa6d58835 + symbol_id: 0x3a741247 symbol_id: 0xf6964618 symbol_id: 0x5ed119ce symbol_id: 0x127dff5c @@ -435744,6 +436311,7 @@ interface { symbol_id: 0xa00355c9 symbol_id: 0xe334c57d symbol_id: 0xfeff2e7f + symbol_id: 0x641d703d symbol_id: 0xeac5f4dd symbol_id: 0x2bc25325 symbol_id: 0x0119fc41 @@ -435928,6 +436496,7 @@ interface { symbol_id: 0x25e320a3 symbol_id: 0xcd81b300 symbol_id: 0x7a6ec3db + symbol_id: 0x0197e2d7 symbol_id: 0xf51681a1 symbol_id: 0x411e7a56 symbol_id: 0xcfcbb846 @@ -435961,6 +436530,7 @@ interface { symbol_id: 0x4fd98142 symbol_id: 0x695c4baf symbol_id: 0x99d57c12 + symbol_id: 0x77ee65c8 symbol_id: 0x26d3bcc4 symbol_id: 0x8efb0ca4 symbol_id: 0xf35a6597 @@ -435989,6 +436559,7 @@ interface { symbol_id: 0xeea6bf23 symbol_id: 0xee3719e3 symbol_id: 0xae824e91 + symbol_id: 0xe63868b6 symbol_id: 0x448fc4e4 symbol_id: 0x121537db symbol_id: 0x50cc3541 @@ -436142,9 +436713,11 @@ interface { symbol_id: 0x9620eac1 symbol_id: 0xb436fd6c symbol_id: 0x9e7fa6cd + symbol_id: 0x22653fb1 symbol_id: 0x2cf5984c symbol_id: 0x62b6878e symbol_id: 0xca10f06e + symbol_id: 0xe8cdcd02 symbol_id: 0xbbfbc9db symbol_id: 0x36fc8313 symbol_id: 0xaf598d05 @@ -436209,6 +436782,8 @@ interface { symbol_id: 0x732a182b symbol_id: 0xe5deb919 symbol_id: 0x54b2cd01 + symbol_id: 0xdef7c547 + symbol_id: 0x4276556f symbol_id: 0x684e5f4f symbol_id: 0xac5d150b symbol_id: 0x0d418d38 @@ -436224,6 +436799,7 @@ interface { symbol_id: 0x6d970e8c symbol_id: 0xb34d9200 symbol_id: 0x223c9b64 + symbol_id: 0xcef3bcd6 symbol_id: 0xf61927fd symbol_id: 0xa8c30e54 symbol_id: 0x843cb07b @@ -436281,6 +436857,7 @@ interface { symbol_id: 0x31457bed symbol_id: 0x62c13726 symbol_id: 0xafbca760 + symbol_id: 0xee6ae51f symbol_id: 0xa48390ca symbol_id: 0xb1f4423e symbol_id: 0x918a6f4b @@ -436291,6 +436868,8 @@ interface { symbol_id: 0x94ddd62c symbol_id: 0xda2d53f2 symbol_id: 0x50a5a949 + symbol_id: 0x72b9b667 + symbol_id: 0xc6d82ff1 symbol_id: 0x6e96d5d2 symbol_id: 0xfa8a7008 symbol_id: 0x2f20ce5e @@ -436508,6 +437087,7 @@ interface { symbol_id: 0xd292bd77 symbol_id: 0xfcee39f3 symbol_id: 0x69721329 + symbol_id: 0x2df766e3 symbol_id: 0x0896a337 symbol_id: 0xd9d2bcff symbol_id: 0x09ba106b @@ -437092,6 +437672,7 @@ interface { symbol_id: 0x1a528d17 symbol_id: 0x71be4162 symbol_id: 0x4f5fc64f + symbol_id: 0xceaadfeb symbol_id: 0x72704652 symbol_id: 0x23b4653d symbol_id: 0x69e0ff72 @@ -439823,6 +440404,7 @@ interface { symbol_id: 0x80424e6f symbol_id: 0xa662c735 symbol_id: 0xa921f032 + symbol_id: 0x3360440d symbol_id: 0x5b454f0c symbol_id: 0x21fc0560 symbol_id: 0xd20ecdb9 @@ -440431,6 +441013,7 @@ interface { symbol_id: 0x985c43d8 symbol_id: 0xd5545a99 symbol_id: 0x9f89ff97 + symbol_id: 0xd1459283 symbol_id: 0xdf129c81 symbol_id: 0x94b29523 symbol_id: 0x27d7263a diff --git a/android/abi_gki_aarch64.stg.allowed_breaks b/android/abi_gki_aarch64.stg.allowed_breaks index f4048d616d41..08bf02f747be 100644 --- a/android/abi_gki_aarch64.stg.allowed_breaks +++ b/android/abi_gki_aarch64.stg.allowed_breaks @@ -113,6 +113,14 @@ type 'struct cgroup_root' changed type 'struct xhci_sideband' changed was fully defined, is now only declared +type 'struct pkvm_module_ops' changed + member 'u64 android_kabi_reserved4' was removed + member 'union { int(* register_guest_smc_handler)(bool(*)(struct arm_smccc_1_2_regs*, struct arm_smccc_res*, pkvm_handle_t), pkvm_handle_t); struct { u64 android_kabi_reserved4; }; union { }; }' was added + +type 'struct pkvm_module_ops' changed + member 'u64 android_kabi_reserved5' was removed + member 'union { int(* guest_stage2_pa)(pkvm_handle_t, u64, phys_addr_t*); struct { u64 android_kabi_reserved5; }; union { }; }' was added + type 'struct io_ring_ctx' changed member 'struct hlist_head io_buf_list' was removed 28 members ('struct wait_queue_head poll_wq' .. 'struct page** sqe_pages') changed diff --git a/android/abi_gki_aarch64_db845c b/android/abi_gki_aarch64_db845c index 072c51d302f3..11bf0af8b67b 100644 --- a/android/abi_gki_aarch64_db845c +++ b/android/abi_gki_aarch64_db845c @@ -53,7 +53,6 @@ clk_put clk_round_rate clk_set_rate - clk_sync_state clk_unprepare complete complete_all @@ -224,6 +223,7 @@ driver_register driver_unregister drm_add_edid_modes + drm_atomic_get_new_connector_for_encoder drm_atomic_get_private_obj_state drm_atomic_helper_bridge_destroy_state drm_atomic_helper_bridge_duplicate_state @@ -244,7 +244,6 @@ drm_connector_update_edid_property drm_crtc_add_crc_entry ___drm_dbg - __drm_debug __drm_dev_dbg drm_dev_printk drm_do_get_edid @@ -258,9 +257,6 @@ drm_mode_vrefresh drm_of_find_panel_or_bridge drm_printf - drm_rect_rotate - drm_rect_rotate_inv - drmm_kmalloc enable_irq eth_type_trans _find_first_bit @@ -275,6 +271,7 @@ fortify_panic free_io_pgtable_ops free_irq + fwnode_handle_put fwnode_property_present fwnode_property_read_u32_array gcd @@ -456,8 +453,8 @@ misc_register mod_delayed_work_on mod_timer - __module_get module_layout + module_put __msecs_to_jiffies msleep __mutex_init @@ -582,7 +579,6 @@ prepare_to_wait_event print_hex_dump _printk - __pskb_copy_fclone pskb_expand_head __pskb_pull_tail put_device @@ -697,7 +693,6 @@ simple_read_from_buffer single_open single_release - skb_clone skb_copy skb_copy_bits skb_dequeue @@ -813,6 +808,7 @@ usb_disabled __usecs_to_jiffies usleep_range_state + utf8_data_table v4l2_ctrl_handler_free v4l2_ctrl_handler_init_class v4l2_ctrl_new_std @@ -861,8 +857,6 @@ vunmap vzalloc wait_for_completion - wait_for_completion_interruptible - wait_for_completion_interruptible_timeout wait_for_completion_timeout __wake_up wake_up_process @@ -1056,6 +1050,7 @@ __drm_crtc_commit_free drm_crtc_commit_wait drm_crtc_wait_one_vblank + __drm_debug drm_display_mode_from_cea_vic drm_edid_dup drm_edid_duplicate @@ -1102,6 +1097,7 @@ __tracepoint_mmap_lock_released __tracepoint_mmap_lock_start_locking up_read + wait_for_completion_interruptible # required by gpi.ko krealloc @@ -1196,7 +1192,6 @@ of_cpu_node_to_id # required by lontium-lt9611.ko - drm_atomic_get_new_connector_for_encoder drm_hdmi_vendor_infoframe_from_display_mode # required by lontium-lt9611uxc.ko @@ -1265,6 +1260,7 @@ round_jiffies round_jiffies_relative sg_init_one + skb_clone skb_clone_sk skb_complete_wifi_ack skb_copy_expand @@ -1472,6 +1468,7 @@ drm_ioctl drm_kms_helper_poll_fini drm_kms_helper_poll_init + drmm_kmalloc drm_mm_init drm_mm_insert_node_in_range drmm_mode_config_init @@ -1521,6 +1518,8 @@ __drm_puts_coredump __drm_puts_seq_file drm_read + drm_rect_rotate + drm_rect_rotate_inv drm_release drm_rotation_simplify drm_self_refresh_helper_init @@ -1541,7 +1540,6 @@ get_unused_fd_flags gpiod_get_value hdmi_audio_infoframe_pack - icc_put idr_preload invalidate_mapping_pages iommu_map_sg @@ -1709,7 +1707,6 @@ gpiochip_unlock_as_irq handle_fasteoi_ack_irq handle_fasteoi_irq - module_put pinctrl_force_default pinctrl_force_sleep pm_power_off @@ -1789,7 +1786,6 @@ device_get_next_child_node devm_iio_device_alloc __devm_iio_device_register - fwnode_handle_put fwnode_property_read_string strchrnul @@ -1888,8 +1884,10 @@ get_user_ifreq kernel_bind lock_sock_nested + __module_get proto_register proto_unregister + __pskb_copy_fclone put_user_ifreq radix_tree_insert radix_tree_iter_delete @@ -1967,6 +1965,7 @@ driver_set_override platform_device_add platform_device_alloc + wait_for_completion_interruptible_timeout # required by slimbus.ko device_find_child @@ -2002,8 +2001,8 @@ snd_soc_dapm_widget_name_cmp # required by snd-soc-qcom-common.ko - snd_soc_dummy_dlc snd_soc_dai_link_set_capabilities + snd_soc_dummy_dlc snd_soc_of_get_dai_link_codecs snd_soc_of_get_dlc snd_soc_of_parse_audio_routing @@ -2093,6 +2092,7 @@ dma_sync_sg_for_device __free_pages __sg_page_iter_next + _totalram_pages # required by ufs-qcom.ko insert_resource diff --git a/android/abi_gki_aarch64_honor b/android/abi_gki_aarch64_honor index 2b38e1e8053e..48c49720e0ea 100644 --- a/android/abi_gki_aarch64_honor +++ b/android/abi_gki_aarch64_honor @@ -93,6 +93,7 @@ wait_for_completion_io bio_crypt_set_ctx zero_fill_bio_iter + percpu_ref_is_zero __trace_bputs __traceiter_android_vh_proactive_compact_wmark_high __tracepoint_android_vh_proactive_compact_wmark_high diff --git a/android/abi_gki_aarch64_mtk b/android/abi_gki_aarch64_mtk index 81ee400f2606..ebecd7f8c5dd 100644 --- a/android/abi_gki_aarch64_mtk +++ b/android/abi_gki_aarch64_mtk @@ -163,6 +163,7 @@ cancel_delayed_work_sync cancel_work cancel_work_sync + can_get_echo_skb capable cdc_parse_cdc_header cdev_add diff --git a/android/abi_gki_aarch64_oplus b/android/abi_gki_aarch64_oplus index e9e9ff32a360..c819f26cd871 100644 --- a/android/abi_gki_aarch64_oplus +++ b/android/abi_gki_aarch64_oplus @@ -81,6 +81,7 @@ iterate_dir jiffies_64_to_clock_t kick_process + kthread_blkcg ktime_get_coarse_real_ts64 ktime_get_raw_ts64 ktime_get_real_ts64 @@ -193,6 +194,7 @@ tcp_hashinfo tcp_reno_undo_cwnd touch_atime + __traceiter_android_rvh_do_madvise_bypass __traceiter_android_rvh_post_init_entity_util_avg __traceiter_android_rvh_rtmutex_force_update __traceiter_android_rvh_set_cpus_allowed_comm @@ -336,6 +338,7 @@ __traceiter_block_rq_issue __traceiter_block_rq_merge __traceiter_block_rq_requeue + __traceiter_android_vh_check_set_ioprio __traceiter_mm_vmscan_kswapd_wake __traceiter_net_dev_queue __traceiter_net_dev_xmit @@ -348,6 +351,15 @@ __traceiter_sched_stat_wait __traceiter_sched_waking __traceiter_task_rename + __traceiter_android_vh_lru_gen_add_folio_skip + __traceiter_android_vh_lru_gen_del_folio_skip + __traceiter_android_vh_evict_folios_bypass + __traceiter_android_vh_keep_reclaimed_folio + __traceiter_android_vh_clear_reclaimed_folio + __traceiter_android_vh_filemap_pages + __traceiter_android_rvh_kswapd_shrink_node + __traceiter_android_rvh_perform_reclaim + __tracepoint_android_rvh_do_madvise_bypass __tracepoint_android_rvh_post_init_entity_util_avg __tracepoint_android_rvh_rtmutex_force_update __tracepoint_android_rvh_set_cpus_allowed_comm @@ -491,6 +503,7 @@ __tracepoint_block_rq_issue __tracepoint_block_rq_merge __tracepoint_block_rq_requeue + __tracepoint_android_vh_check_set_ioprio __tracepoint_mm_vmscan_kswapd_wake __tracepoint_net_dev_queue __tracepoint_net_dev_xmit @@ -503,6 +516,14 @@ __tracepoint_sched_stat_wait __tracepoint_sched_waking __tracepoint_task_rename + __tracepoint_android_vh_lru_gen_add_folio_skip + __tracepoint_android_vh_lru_gen_del_folio_skip + __tracepoint_android_vh_evict_folios_bypass + __tracepoint_android_vh_keep_reclaimed_folio + __tracepoint_android_vh_clear_reclaimed_folio + __tracepoint_android_vh_filemap_pages + __tracepoint_android_rvh_kswapd_shrink_node + __tracepoint_android_rvh_perform_reclaim folio_total_mapcount page_mapping __trace_puts diff --git a/android/abi_gki_aarch64_pixel b/android/abi_gki_aarch64_pixel index b9f969ca322a..9408b1c75dfc 100644 --- a/android/abi_gki_aarch64_pixel +++ b/android/abi_gki_aarch64_pixel @@ -2670,6 +2670,7 @@ __traceiter_android_vh_enable_thermal_genl_check __traceiter_android_vh_filemap_get_folio __traceiter_android_vh_free_pages_prepare_init + __traceiter_android_vh_ep_create_wakeup_source __traceiter_android_vh_ipi_stop __traceiter_android_vh_mm_compaction_begin __traceiter_android_vh_mm_compaction_end @@ -2685,6 +2686,7 @@ __traceiter_android_vh_si_meminfo_adjust __traceiter_android_vh_sysrq_crash __traceiter_android_vh_tune_swappiness + __traceiter_android_vh_timerfd_create __traceiter_android_vh_typec_store_partner_src_caps __traceiter_android_vh_typec_tcpm_log __traceiter_android_vh_typec_tcpm_modify_src_caps @@ -2795,6 +2797,7 @@ __tracepoint_android_vh_enable_thermal_genl_check __tracepoint_android_vh_filemap_get_folio __tracepoint_android_vh_free_pages_prepare_init + __tracepoint_android_vh_ep_create_wakeup_source __tracepoint_android_vh_ipi_stop __tracepoint_android_vh_mm_compaction_begin __tracepoint_android_vh_mm_compaction_end @@ -2810,6 +2813,7 @@ __tracepoint_android_vh_si_meminfo_adjust __tracepoint_android_vh_sysrq_crash __tracepoint_android_vh_tune_swappiness + __tracepoint_android_vh_timerfd_create __tracepoint_android_vh_typec_store_partner_src_caps __tracepoint_android_vh_typec_tcpm_log __tracepoint_android_vh_typec_tcpm_modify_src_caps diff --git a/android/abi_gki_aarch64_virtual_device b/android/abi_gki_aarch64_virtual_device index 8961a9fb1cdd..db28f9c6be75 100644 --- a/android/abi_gki_aarch64_virtual_device +++ b/android/abi_gki_aarch64_virtual_device @@ -224,6 +224,7 @@ kmemdup kstrndup kstrtobool_from_user + kstrtoint kthread_create_on_node kthread_park kthread_should_stop @@ -297,17 +298,13 @@ page_pool_alloc_frag page_pool_destroy page_pool_put_defragged_page + param_array_ops param_ops_bool param_ops_charp param_ops_int param_ops_uint passthru_features_check pci_bus_type - pci_iomap_range - pci_release_region - pci_release_selected_regions - pci_request_region - pci_request_selected_regions __per_cpu_offset perf_trace_buf_alloc perf_trace_run_bpf_submit @@ -377,7 +374,6 @@ __serio_register_driver __serio_register_port serio_unregister_driver - set_page_private sg_alloc_table sg_free_table sg_init_one @@ -405,11 +401,22 @@ skb_queue_tail skb_to_sgvec skb_trim + snd_card_register + snd_ctl_add + snd_ctl_new1 + snd_ctl_notify + snd_pcm_format_physical_width + snd_pcm_hw_constraint_integer + snd_pcm_new + snd_pcm_period_elapsed + snd_pcm_set_managed_buffer_all + snd_pcm_set_ops snprintf sprintf sscanf __stack_chk_fail strcasecmp + strchr strcmp strcpy strlen @@ -453,7 +460,6 @@ usb_create_shared_hcd usb_deregister usb_disabled - usb_find_common_endpoints usb_free_urb usb_get_dev usb_hcd_check_unlink_urb @@ -473,6 +479,7 @@ usb_unanchor_urb __usecs_to_jiffies usleep_range_state + utf8_data_table v4l2_device_register v4l2_device_unregister v4l2_event_pending @@ -700,6 +707,8 @@ pci_enable_device pci_read_config_byte __pci_register_driver + pci_release_region + pci_request_region pci_unregister_driver # required by goldfish_battery.ko @@ -747,6 +756,7 @@ unregister_candev usb_control_msg_recv usb_control_msg_send + usb_find_common_endpoints # required by hci_vhci.ko _copy_from_iter @@ -1044,6 +1054,7 @@ dma_sync_sg_for_cpu __sg_page_iter_next __sg_page_iter_start + _totalram_pages vmap vunmap @@ -1056,7 +1067,6 @@ # required by v4l2loopback.ko kstrtoull mutex_lock_killable - param_array_ops v4l2_ctrl_handler_free v4l2_ctrl_handler_init_class v4l2_ctrl_handler_setup @@ -1085,12 +1095,10 @@ # required by vhci-hcd.ko kernel_sendmsg kernel_sock_shutdown - kstrtoint kstrtoll kthread_stop_put platform_bus sockfd_lookup - strchr sysfs_create_group sysfs_remove_group usb_speed_string @@ -1310,10 +1318,6 @@ xdp_rxq_info_unreg xdp_warn -# required by virtio_pci_legacy_dev.ko - pci_iomap - pci_iounmap - # required by virtio_pmem.ko nvdimm_bus_register nvdimm_bus_unregister @@ -1322,20 +1326,10 @@ # required by virtio_snd.ko snd_card_free snd_card_new - snd_card_register - snd_ctl_add - snd_ctl_new1 - snd_ctl_notify snd_jack_new snd_jack_report snd_pcm_add_chmap_ctls - snd_pcm_format_physical_width - snd_pcm_hw_constraint_integer snd_pcm_lib_ioctl - snd_pcm_new - snd_pcm_period_elapsed - snd_pcm_set_managed_buffer_all - snd_pcm_set_ops wait_for_completion_interruptible_timeout # required by virtual-cpufreq.ko @@ -1480,10 +1474,15 @@ pci_find_ext_capability pci_find_next_capability pci_free_irq_vectors + pci_iomap + pci_iomap_range + pci_iounmap pci_irq_get_affinity pci_irq_vector pci_read_config_dword pci_read_config_word + pci_release_selected_regions + pci_request_selected_regions pci_set_master pci_vfs_assigned pipe_lock @@ -1497,6 +1496,7 @@ set_capacity_and_notify set_disk_ro __SetPageMovable + set_page_private sg_alloc_table_chained sg_free_table_chained si_mem_available diff --git a/android/abi_gki_aarch64_xiaomi2 b/android/abi_gki_aarch64_xiaomi2 index e63fd8e44445..c27a275ed937 100644 --- a/android/abi_gki_aarch64_xiaomi2 +++ b/android/abi_gki_aarch64_xiaomi2 @@ -1415,6 +1415,7 @@ of_find_node_by_name of_find_node_opts_by_path of_find_property + of_find_gpio of_fwnode_ops of_get_child_by_name of_get_compatible_child diff --git a/android/gki/aarch64/afdo/README.md b/android/gki/aarch64/afdo/README.md new file mode 100644 index 000000000000..8c7781fd62ec --- /dev/null +++ b/android/gki/aarch64/afdo/README.md @@ -0,0 +1,47 @@ +# AutoFDO profiles for Android common kernels + +This directory contains AutoFDO profiles for Android common kernels. These profiles can be used to +optimize kernel builds for specific architectures and kernel versions. + +## kernel.afdo + +kernel.afdo is an AArch64 kernel profile collected on kernel version 6.6.82 ( +SHA b62ea68f41a901d5f07f48bd6f1d3a117d801411, build server ID 13287877) using Pixel 6. + +### Performance improvements + +| Benchmark | Improvement | +| --------------------- | ----------- | +| Boot time | 2.2% | +| Cold App launch time | 2.7% | +| Binder-rpc | 4.4% | +| Binder-addints | 14.1% | +| Hwbinder | 17.0% | +| Bionic (syscall_mmap) | 1.6% | + +Benchmark results were tested on Pixel 6. + +To test a kernel prebuilt with the AutoFDO profile, navigate to [Android build server]( +https://ci.android.com/builds/branches/aosp_kernel-common-android15-6.6/grid) and download +the kernel prebuilts under the `kernel_aarch64_autofdo` target. + +## Steps to reproduce the profile + +A kernel profile is generated by running app crawling and app launching for top 100 apps from Google +Play Store. While running, we collect ETM data for the kernel, which records executed instruction +stream. Finally, we merge and convert ETM data to one AutoFDO profile. + +1. Build a kernel image and flash it on an Android device + * The source code and test device used to generate each profile are described above. + * We use a Pixel device. But using other real devices should get a similar profile. + +2. Run app crawling and app launching for top 100 apps + * Add a gmail account on the test device. Because app crawler can use the account to automatically + login some of the apps. + * We run [App Crawler](https://developer.android.com/studio/test/other-testing-tools/app-crawler) + for one app for 3 minutes, and run it twice. + * We run app launching for one app for 3 seconds, and run it 15 times. After each running, the + app is killed and cache is cleared. So we get profile for cold app startups. + +3. Record ETM data while running app crawling and app launching. + * We use cmdline `simpleperf record -e cs-etm:k -a` to [record ETM data for the kernel](https://android.googlesource.com/platform/system/extras/+/master/simpleperf/doc/collect_etm_data_for_autofdo.md). diff --git a/android/gki/aarch64/afdo/kernel.afdo b/android/gki/aarch64/afdo/kernel.afdo new file mode 100644 index 000000000000..3ce73ba9446c Binary files /dev/null and b/android/gki/aarch64/afdo/kernel.afdo differ diff --git a/arch/arm64/configs/microdroid_defconfig b/arch/arm64/configs/microdroid_defconfig index 6fb75e77081a..5cc6dec65c71 100644 --- a/arch/arm64/configs/microdroid_defconfig +++ b/arch/arm64/configs/microdroid_defconfig @@ -5,14 +5,9 @@ CONFIG_PREEMPT=y CONFIG_IRQ_TIME_ACCOUNTING=y CONFIG_PSI=y CONFIG_RCU_EXPERT=y -CONFIG_RCU_BOOST=y -CONFIG_RCU_NOCB_CPU=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y -# CONFIG_UTS_NS is not set -# CONFIG_TIME_NS is not set -# CONFIG_PID_NS is not set -# CONFIG_NET_NS is not set +CONFIG_LOG_BUF_SHIFT=14 # CONFIG_RD_GZIP is not set # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set @@ -20,11 +15,13 @@ CONFIG_IKCONFIG_PROC=y # CONFIG_RD_LZO is not set # CONFIG_RD_ZSTD is not set CONFIG_BOOT_CONFIG=y +CONFIG_EXPERT=y +# CONFIG_IO_URING is not set CONFIG_PROFILING=y +CONFIG_KEXEC_FILE=y CONFIG_SCHED_MC=y CONFIG_NR_CPUS=32 CONFIG_PARAVIRT_TIME_ACCOUNTING=y -CONFIG_KEXEC_FILE=y CONFIG_ARM64_SW_TTBR0_PAN=y CONFIG_RANDOMIZE_BASE=y # CONFIG_RANDOMIZE_MODULE_REGION_FULL is not set @@ -40,10 +37,11 @@ CONFIG_VIRTUALIZATION=y CONFIG_JUMP_LABEL=y CONFIG_SHADOW_CALL_STACK=y CONFIG_CFI_CLANG=y -CONFIG_BLK_DEV_ZONED=y +# CONFIG_BLOCK_LEGACY_AUTOLOAD is not set CONFIG_PARTITION_ADVANCED=y # CONFIG_MSDOS_PARTITION is not set -CONFIG_IOSCHED_BFQ=y +# CONFIG_MQ_IOSCHED_DEADLINE is not set +# CONFIG_MQ_IOSCHED_KYBER is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=y # CONFIG_SLAB_MERGE_DEFAULT is not set @@ -51,8 +49,6 @@ CONFIG_SLAB_FREELIST_RANDOM=y CONFIG_SLAB_FREELIST_HARDENED=y CONFIG_SHUFFLE_PAGE_ALLOCATOR=y # CONFIG_COMPAT_BRK is not set -CONFIG_MEMORY_HOTPLUG=y -CONFIG_MEMORY_HOTREMOVE=y CONFIG_DEFAULT_MMAP_MIN_ADDR=32768 CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y @@ -69,9 +65,8 @@ CONFIG_PCI=y CONFIG_PCIEPORTBUS=y CONFIG_PCIEAER=y CONFIG_PCI_IOV=y +# CONFIG_VGA_ARB is not set CONFIG_PCI_HOST_GENERIC=y -CONFIG_PCIE_DW_PLAT_EP=y -CONFIG_PCIE_KIRIN=y CONFIG_PCI_ENDPOINT=y CONFIG_FW_LOADER_USER_HELPER=y # CONFIG_FW_CACHE is not set @@ -102,7 +97,6 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=0 CONFIG_SERIAL_OF_PLATFORM=y CONFIG_NULL_TTY=y CONFIG_VIRTIO_CONSOLE=y -CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_CCTRNG=y # CONFIG_DEVMEM is not set # CONFIG_DEVPORT is not set @@ -119,7 +113,6 @@ CONFIG_RTC_DRV_PL030=y CONFIG_RTC_DRV_PL031=y CONFIG_DMABUF_HEAPS=y CONFIG_DMABUF_SYSFS_STATS=y -CONFIG_UIO=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_BALLOON=y CONFIG_STAGING=y @@ -142,6 +135,7 @@ CONFIG_STATIC_USERMODEHELPER=y CONFIG_STATIC_USERMODEHELPER_PATH="" CONFIG_SECURITY_SELINUX=y CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y +CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_HCTR2=y CONFIG_CRYPTO_LZO=y CONFIG_CRYPTO_SHA2_ARM64_CE=y @@ -152,16 +146,13 @@ CONFIG_DMA_RESTRICTED_POOL=y CONFIG_PRINTK_TIME=y CONFIG_PRINTK_CALLER=y CONFIG_DYNAMIC_DEBUG_CORE=y -CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF5=y CONFIG_DEBUG_INFO_REDUCED=y -CONFIG_DEBUG_INFO_COMPRESSED=y CONFIG_HEADERS_INSTALL=y # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_UBSAN=y CONFIG_UBSAN_TRAP=y -CONFIG_UBSAN_LOCAL_BOUNDS=y # CONFIG_UBSAN_SHIFT is not set # CONFIG_UBSAN_BOOL is not set # CONFIG_UBSAN_ENUM is not set @@ -174,8 +165,6 @@ CONFIG_PANIC_TIMEOUT=-1 CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_WQ_WATCHDOG=y CONFIG_SCHEDSTATS=y -# CONFIG_DEBUG_PREEMPT is not set -CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_HIST_TRIGGERS=y CONFIG_PID_IN_CONTEXTIDR=y # CONFIG_RUNTIME_TESTING_MENU is not set diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 1012a4813ccf..4a16808c3ba8 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -658,4 +658,7 @@ int __pkvm_topup_hyp_alloc(unsigned long nr_pages); int pkvm_call_hyp_nvhe_ppage(struct kvm_pinned_page *ppage, int (*call_hyp_nvhe)(u64, u64, u8, void*), void *args, bool unmap); + +int pkvm_guest_stage2_pa(pkvm_handle_t handle, u64 ipa, phys_addr_t *phys); + #endif /* __ARM64_KVM_PKVM_H__ */ diff --git a/arch/arm64/include/asm/kvm_pkvm_module.h b/arch/arm64/include/asm/kvm_pkvm_module.h index ef5d8fe64136..2be3f122212e 100644 --- a/arch/arm64/include/asm/kvm_pkvm_module.h +++ b/arch/arm64/include/asm/kvm_pkvm_module.h @@ -97,6 +97,10 @@ enum pkvm_psci_notification { * @register_host_smc_handler: @cb is called whenever the host issues an SMC * pKVM couldn't handle. If @cb returns false, the * SMC will be forwarded to EL3. + * @register_guest_smc_handler: @cb is called whenever a guest identified by the + * pkvm_handle issues an SMC which pKVM doesn't + * handle. If @cb returns false, the control is + * given back to the host kernel to handle the exit. * @register_default_trap_handler: * @cb is called whenever EL2 traps EL1 and pKVM * has not handled it. If @cb returns false, the @@ -161,6 +165,14 @@ enum pkvm_psci_notification { * @iommu_donate_pages_atomic: Allocate memory from IOMMU identity pool. * @iommu_reclaim_pages_atomic: Reclaim memory from iommu_donate_pages_atomic() * @hyp_smp_processor_id: Current CPU id + * @guest_stage2_pa: Look up and return the PA (@phys) mapped into + * the specified VM (@handle) at the specified + * intermediate physical address (@ipa). If there + * is no mapping, or if it is a block mapping, + * then -EINVAL will be returned. Note that no + * lock or pin is held on the returned PA; the + * only guarantee is that @handle:@ipa -> @phys + * at some point during the call to this function. */ struct pkvm_module_ops { int (*create_private_mapping)(phys_addr_t phys, size_t size, @@ -227,8 +239,13 @@ struct pkvm_module_ops { ANDROID_KABI_USE(1, void (*iommu_flush_unmap_cache)(struct kvm_iommu_paddr_cache *cache)); ANDROID_KABI_USE(2, int (*host_stage2_enable_lazy_pte)(u64 addr, u64 nr_pages)); ANDROID_KABI_USE(3, int (*host_stage2_disable_lazy_pte)(u64 addr, u64 nr_pages)); - ANDROID_KABI_RESERVE(4); - ANDROID_KABI_RESERVE(5); + ANDROID_KABI_USE(4, int (*register_guest_smc_handler)(bool (*cb)( + struct arm_smccc_1_2_regs *, + struct arm_smccc_res *res, + pkvm_handle_t handle), + pkvm_handle_t handle)); + ANDROID_KABI_USE(5, int (*guest_stage2_pa)(pkvm_handle_t handle, + u64 ipa, phys_addr_t *phys)); ANDROID_KABI_RESERVE(6); ANDROID_KABI_RESERVE(7); ANDROID_KABI_RESERVE(8); diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 0b3bdc2638a2..40d9b7341097 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -100,4 +100,7 @@ static __always_inline void __load_host_stage2(void) else write_sysreg(0, vttbr_el2); } + +int guest_stage2_pa(struct pkvm_hyp_vm *vm, u64 ipa, phys_addr_t *phys); + #endif /* __KVM_NVHE_MEM_PROTECT__ */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index 5df589bd4641..75f1be8a15de 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -19,7 +19,7 @@ void *hyp_fixmap_map(phys_addr_t phys); void hyp_fixmap_unmap(void); void *hyp_fixblock_map(phys_addr_t phys); void hyp_fixblock_unmap(void); -void hyp_poison_page(phys_addr_t phys); +void hyp_poison_page(phys_addr_t phys, size_t page_size); int hyp_create_idmap(u32 hyp_va_bits); int hyp_map_vectors(void); diff --git a/arch/arm64/kvm/hyp/include/nvhe/modules.h b/arch/arm64/kvm/hyp/include/nvhe/modules.h index 4dd6c1ceed05..fffc3c1921a3 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/modules.h +++ b/arch/arm64/kvm/hyp/include/nvhe/modules.h @@ -1,9 +1,15 @@ #include +#include +#include #define HCALL_HANDLED 0 #define HCALL_UNHANDLED -1 int __pkvm_register_host_smc_handler(bool (*cb)(struct user_pt_regs *)); +int __pkvm_register_guest_smc_handler(bool (*cb)(struct arm_smccc_1_2_regs *, + struct arm_smccc_res *res, + pkvm_handle_t handle), + pkvm_handle_t handle); int __pkvm_register_default_trap_handler(bool (*cb)(struct user_pt_regs *)); int __pkvm_register_illegal_abt_notifier(void (*cb)(struct user_pt_regs *)); int __pkvm_register_hyp_panic_notifier(void (*cb)(struct user_pt_regs *)); diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h index 5f456a8227af..887ac44855c5 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -74,6 +74,9 @@ struct pkvm_hyp_vm { */ bool is_dying; + bool (*smc_handler)(struct arm_smccc_1_2_regs *regs, + struct arm_smccc_res *res, pkvm_handle_t handle); + /* Array of the hyp vCPU structures for this VM. */ struct pkvm_hyp_vcpu *vcpus[]; }; @@ -140,6 +143,8 @@ void pkvm_reset_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu); bool kvm_handle_pvm_hvc64(struct kvm_vcpu *vcpu, u64 *exit_code); bool kvm_hyp_handle_hvc64(struct kvm_vcpu *vcpu, u64 *exit_code); +bool kvm_handle_pvm_smc64(struct kvm_vcpu *vcpu, u64 *exit_code); + struct pkvm_hyp_vcpu *pkvm_mpidr_to_hyp_vcpu(struct pkvm_hyp_vm *vm, u64 mpidr); static inline bool pkvm_hyp_vm_has_pvmfw(struct pkvm_hyp_vm *vm) diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index af4a7fe371d8..736357468985 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -730,10 +730,10 @@ static void do_ffa_version(struct arm_smccc_res *res, hyp_ffa_version = ffa_req_version; } - if (hyp_ffa_post_init()) + if (hyp_ffa_post_init()) { res->a0 = FFA_RET_NOT_SUPPORTED; - else { - has_version_negotiated = true; + } else { + smp_store_release(&has_version_negotiated, true); res->a0 = hyp_ffa_version; } unlock: @@ -815,7 +815,8 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *ctxt, u32 func_id) if (!is_ffa_call(func_id)) return false; - if (!has_version_negotiated && func_id != FFA_VERSION) { + if (func_id != FFA_VERSION && + !smp_load_acquire(&has_version_negotiated)) { ffa_to_smccc_error(&res, FFA_RET_INVALID_PARAMETERS); goto unhandled; } diff --git a/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c b/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c index d4bbcff0a428..f06b84357eb4 100644 --- a/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c +++ b/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c @@ -494,6 +494,9 @@ int kvm_iommu_map_pages(pkvm_handle_t domain_id, unsigned long iova, iova + size < iova || paddr + size < paddr) return -E2BIG; + if (domain_id == KVM_IOMMU_DOMAIN_IDMAP_ID) + return -EINVAL; + domain = handle_to_domain(domain_id); if (!domain || domain_get(domain)) return -ENOENT; @@ -595,6 +598,9 @@ size_t kvm_iommu_unmap_pages(pkvm_handle_t domain_id, iova + size < iova) return 0; + if (domain_id == KVM_IOMMU_DOMAIN_IDMAP_ID) + return 0; + domain = handle_to_domain(domain_id); if (!domain || domain_get(domain)) return 0; @@ -626,6 +632,9 @@ phys_addr_t kvm_iommu_iova_to_phys(pkvm_handle_t domain_id, unsigned long iova) if (!kvm_iommu_ops || !kvm_iommu_ops->iova_to_phys) return 0; + if (domain_id == KVM_IOMMU_DOMAIN_IDMAP_ID) + return iova; + domain = handle_to_domain( domain_id); if (!domain || domain_get(domain)) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 7371d7f0768d..c95a5e896251 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -395,7 +395,7 @@ static int relinquish_walker(const struct kvm_pgtable_visit_ctx *ctx, phys += ctx->addr - addr; if (state == PKVM_PAGE_OWNED) { - hyp_poison_page(phys); + hyp_poison_page(phys, PAGE_SIZE); psci_mem_protect_dec(1); } @@ -2797,20 +2797,29 @@ int __pkvm_host_donate_guest(struct pkvm_hyp_vcpu *vcpu, u64 pfn, u64 gfn, return ret; } -void hyp_poison_page(phys_addr_t phys) +void hyp_poison_page(phys_addr_t phys, size_t size) { - void *addr = hyp_fixmap_map(phys); + WARN_ON(!PAGE_ALIGNED(size)); - memset(addr, 0, PAGE_SIZE); - /* - * Prefer kvm_flush_dcache_to_poc() over __clean_dcache_guest_page() - * here as the latter may elide the CMO under the assumption that FWB - * will be enabled on CPUs that support it. This is incorrect for the - * host stage-2 and would otherwise lead to a malicious host potentially - * being able to read the contents of newly reclaimed guest pages. - */ - kvm_flush_dcache_to_poc(addr, PAGE_SIZE); - hyp_fixmap_unmap(); + while (size) { + size_t __size = size == PMD_SIZE ? size : PAGE_SIZE; + void *addr = __fixmap_guest_page(__hyp_va(phys), &__size); + + memset(addr, 0, __size); + + /* + * Prefer kvm_flush_dcache_to_poc() over __clean_dcache_guest_page() + * here as the latter may elide the CMO under the assumption that FWB + * will be enabled on CPUs that support it. This is incorrect for the + * host stage-2 and would otherwise lead to a malicious host potentially + * being able to read the contents of newly reclaimed guest pages. + */ + kvm_flush_dcache_to_poc(addr, __size); + __fixunmap_guest_page(__size); + + size -= __size; + phys += __size; + } } void destroy_hyp_vm_pgt(struct pkvm_hyp_vm *vm) @@ -2845,7 +2854,7 @@ int __pkvm_host_reclaim_page(struct pkvm_hyp_vm *vm, u64 pfn, u64 ipa, u8 order) switch((int)guest_get_page_state(pte, ipa)) { case PKVM_PAGE_OWNED: WARN_ON(__host_check_page_state_range(phys, page_size, PKVM_NOPAGE)); - hyp_poison_page(phys); + hyp_poison_page(phys, page_size); psci_mem_protect_dec(1 << order); break; case PKVM_PAGE_SHARED_BORROWED: @@ -3009,6 +3018,26 @@ int host_stage2_get_leaf(phys_addr_t phys, kvm_pte_t *ptep, u32 *level) return ret; } +int guest_stage2_pa(struct pkvm_hyp_vm *vm, u64 ipa, phys_addr_t *phys) +{ + kvm_pte_t pte; + u32 level; + int ret; + + guest_lock_component(vm); + ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level); + guest_unlock_component(vm); + + if (ret) + return ret; + + if (!kvm_pte_valid(pte) || level != KVM_PGTABLE_MAX_LEVELS - 1) + return -EINVAL; + + *phys = kvm_pte_to_phys(pte); + return 0; +} + #ifdef CONFIG_NVHE_EL2_DEBUG static void *snap_zalloc_page(void *mc) { diff --git a/arch/arm64/kvm/hyp/nvhe/modules.c b/arch/arm64/kvm/hyp/nvhe/modules.c index adeda6cfff57..2553d3e3ac00 100644 --- a/arch/arm64/kvm/hyp/nvhe/modules.c +++ b/arch/arm64/kvm/hyp/nvhe/modules.c @@ -124,6 +124,7 @@ const struct pkvm_module_ops module_ops = { .host_stage2_mod_prot = module_change_host_page_prot, .host_stage2_get_leaf = host_stage2_get_leaf, .register_host_smc_handler = __pkvm_register_host_smc_handler, + .register_guest_smc_handler = __pkvm_register_guest_smc_handler, .register_default_trap_handler = __pkvm_register_default_trap_handler, .register_illegal_abt_notifier = __pkvm_register_illegal_abt_notifier, .register_psci_notifier = __pkvm_register_psci_notifier, @@ -165,6 +166,7 @@ const struct pkvm_module_ops module_ops = { .iommu_flush_unmap_cache = kvm_iommu_flush_unmap_cache, .host_stage2_enable_lazy_pte = host_stage2_enable_lazy_pte, .host_stage2_disable_lazy_pte = host_stage2_disable_lazy_pte, + .guest_stage2_pa = pkvm_guest_stage2_pa, }; int __pkvm_init_module(void *module_init) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 1a64692e5199..517e4be5af84 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -7,6 +7,8 @@ #include #include +#include + #include #include @@ -1098,7 +1100,7 @@ void pkvm_poison_pvmfw_pages(void) phys_addr_t addr = pvmfw_base; while (npages--) { - hyp_poison_page(addr); + hyp_poison_page(addr, PAGE_SIZE); addr += PAGE_SIZE; } } @@ -1682,6 +1684,64 @@ static bool pkvm_forward_trng(struct kvm_vcpu *vcpu) return true; } +static bool is_standard_secure_service_call(u64 func_id) +{ + return (func_id >= PSCI_0_2_FN_BASE && func_id <= ARM_CCA_FUNC_END) || + (func_id >= PSCI_0_2_FN64_BASE && func_id <= ARM_CCA_64BIT_FUNC_END); +} + +bool kvm_handle_pvm_smc64(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + bool handled = false; + struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; + struct pkvm_hyp_vm *vm; + struct pkvm_hyp_vcpu *hyp_vcpu; + struct arm_smccc_1_2_regs regs; + struct arm_smccc_res res; + DECLARE_REG(u64, func_id, ctxt, 0); + + hyp_vcpu = container_of(vcpu, struct pkvm_hyp_vcpu, vcpu); + vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu); + + if (is_standard_secure_service_call(func_id)) + return false; + + /* Paired with cmpxchg_release in the guest registration handler */ + if (smp_load_acquire(&vm->smc_handler)) { + memcpy(®s, &ctxt->regs, sizeof(regs)); + handled = vm->smc_handler(®s, &res, vm->kvm.arch.pkvm.handle); + + /* Pass the return back to the calling guest */ + memcpy(&ctxt->regs.regs[0], ®s, sizeof(res)); + } + + /* SMC was trapped, move ELR past the current PC. */ + if (handled) + __kvm_skip_instr(vcpu); + + return handled; +} + +int __pkvm_register_guest_smc_handler(bool (*cb)(struct arm_smccc_1_2_regs *, + struct arm_smccc_res *res, + pkvm_handle_t handle), + pkvm_handle_t handle) +{ + int ret = -EINVAL; + struct pkvm_hyp_vm *vm; + + if (!cb) + return ret; + + hyp_read_lock(&vm_table_lock); + vm = get_vm_by_handle(handle); + if (vm) + ret = cmpxchg_release(&vm->smc_handler, NULL, cb) ? -EBUSY : 0; + hyp_read_unlock(&vm_table_lock); + + return ret; +} + /* * Handler for protected VM HVC calls. * @@ -1775,6 +1835,28 @@ bool kvm_hyp_handle_hvc64(struct kvm_vcpu *vcpu, u64 *exit_code) return false; } +int pkvm_guest_stage2_pa(pkvm_handle_t handle, u64 ipa, phys_addr_t *phys) +{ + struct pkvm_hyp_vm *hyp_vm; + int err; + + hyp_read_lock(&vm_table_lock); + hyp_vm = get_vm_by_handle(handle); + if (!hyp_vm) { + err = -ENOENT; + goto err_unlock; + } else if (hyp_vm->is_dying) { + err = -EBUSY; + goto err_unlock; + } + + err = guest_stage2_pa(hyp_vm, ipa, phys); + hyp_read_unlock(&vm_table_lock); + +err_unlock: + return err; +} + #ifdef CONFIG_NVHE_EL2_DEBUG static inline phys_addr_t get_next_memcache_page(phys_addr_t head) { diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index a17683d8b38d..8d0ad38e2a51 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -303,6 +303,7 @@ static const exit_handler_fn hyp_exit_handlers[] = { static const exit_handler_fn pvm_exit_handlers[] = { [0 ... ESR_ELx_EC_MAX] = NULL, [ESR_ELx_EC_HVC64] = kvm_handle_pvm_hvc64, + [ESR_ELx_EC_SMC64] = kvm_handle_pvm_smc64, [ESR_ELx_EC_SYS64] = kvm_handle_pvm_sys64, [ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd, [ESR_ELx_EC_SME] = kvm_handle_pvm_restricted, diff --git a/arch/x86/configs/microdroid_defconfig b/arch/x86/configs/microdroid_defconfig index 923b3d1f3ee3..d58c3dc697d0 100644 --- a/arch/x86/configs/microdroid_defconfig +++ b/arch/x86/configs/microdroid_defconfig @@ -9,8 +9,7 @@ CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_PSI=y CONFIG_RCU_EXPERT=y -CONFIG_RCU_BOOST=y -CONFIG_RCU_NOCB_CPU=y +CONFIG_LOG_BUF_SHIFT=14 CONFIG_UCLAMP_TASK=y CONFIG_UCLAMP_BUCKETS_COUNT=20 CONFIG_CGROUPS=y @@ -21,16 +20,14 @@ CONFIG_UCLAMP_TASK_GROUP=y CONFIG_CGROUP_FREEZER=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y -# CONFIG_UTS_NS is not set -# CONFIG_TIME_NS is not set -# CONFIG_PID_NS is not set -# CONFIG_NET_NS is not set # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set # CONFIG_RD_XZ is not set # CONFIG_RD_LZO is not set CONFIG_BOOT_CONFIG=y +CONFIG_EXPERT=y CONFIG_PROFILING=y +CONFIG_KEXEC_FILE=y CONFIG_SMP=y CONFIG_X86_X2APIC=y CONFIG_HYPERVISOR_GUEST=y @@ -39,7 +36,6 @@ CONFIG_PARAVIRT_TIME_ACCOUNTING=y CONFIG_NR_CPUS=32 # CONFIG_X86_MCE is not set CONFIG_EFI=y -CONFIG_KEXEC_FILE=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="stack_depot_disable=on cgroup_disable=pressure ioremap_guard panic=-1 bootconfig acpi=noirq" CONFIG_PM_WAKELOCKS=y @@ -50,12 +46,12 @@ CONFIG_CPU_FREQ_TIMES=y CONFIG_CPU_FREQ_GOV_POWERSAVE=y CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y CONFIG_JUMP_LABEL=y -CONFIG_BLK_DEV_ZONED=y +# CONFIG_BLOCK_LEGACY_AUTOLOAD is not set CONFIG_BLK_CGROUP_IOCOST=y CONFIG_PARTITION_ADVANCED=y # CONFIG_MSDOS_PARTITION is not set -CONFIG_IOSCHED_BFQ=y -CONFIG_BFQ_GROUP_IOSCHED=y +# CONFIG_MQ_IOSCHED_DEADLINE is not set +# CONFIG_MQ_IOSCHED_KYBER is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=y # CONFIG_SLAB_MERGE_DEFAULT is not set @@ -63,8 +59,6 @@ CONFIG_SLAB_FREELIST_RANDOM=y CONFIG_SLAB_FREELIST_HARDENED=y CONFIG_SHUFFLE_PAGE_ALLOCATOR=y # CONFIG_COMPAT_BRK is not set -CONFIG_MEMORY_HOTPLUG=y -CONFIG_MEMORY_HOTREMOVE=y CONFIG_DEFAULT_MMAP_MIN_ADDR=32768 CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y @@ -85,7 +79,7 @@ CONFIG_PCIEPORTBUS=y CONFIG_PCIEAER=y CONFIG_PCI_MSI=y CONFIG_PCI_IOV=y -CONFIG_PCIE_DW_PLAT_EP=y +# CONFIG_VGA_ARB is not set CONFIG_PCI_ENDPOINT=y CONFIG_FW_LOADER_USER_HELPER=y # CONFIG_FW_CACHE is not set @@ -113,7 +107,6 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=0 CONFIG_SERIAL_OF_PLATFORM=y CONFIG_NULL_TTY=y CONFIG_VIRTIO_CONSOLE=y -CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y # CONFIG_DEVMEM is not set # CONFIG_DEVPORT is not set @@ -138,7 +131,6 @@ CONFIG_EDAC=y CONFIG_RTC_CLASS=y CONFIG_DMABUF_HEAPS=y CONFIG_DMABUF_SYSFS_STATS=y -CONFIG_UIO=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_BALLOON=y CONFIG_STAGING=y @@ -212,6 +204,7 @@ CONFIG_STATIC_USERMODEHELPER=y CONFIG_STATIC_USERMODEHELPER_PATH="" CONFIG_SECURITY_SELINUX=y CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y +CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_HCTR2=y CONFIG_CRYPTO_LZO=y CONFIG_CRYPTO_AES_NI_INTEL=y @@ -220,16 +213,13 @@ CONFIG_CRYPTO_SHA256_SSSE3=y CONFIG_CRYPTO_SHA512_SSSE3=y CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG_CORE=y -CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF5=y CONFIG_DEBUG_INFO_REDUCED=y -CONFIG_DEBUG_INFO_COMPRESSED=y CONFIG_HEADERS_INSTALL=y # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_UBSAN=y CONFIG_UBSAN_TRAP=y -CONFIG_UBSAN_LOCAL_BOUNDS=y # CONFIG_UBSAN_SHIFT is not set # CONFIG_UBSAN_BOOL is not set # CONFIG_UBSAN_ENUM is not set @@ -243,6 +233,5 @@ CONFIG_PANIC_TIMEOUT=-1 CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_WQ_WATCHDOG=y CONFIG_SCHEDSTATS=y -CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_HIST_TRIGGERS=y CONFIG_UNWINDER_FRAME_POINTER=y diff --git a/block/blk-mq.c b/block/blk-mq.c index 086ab4465968..12eee9e1382c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -31,6 +31,7 @@ #include #include +#include #include @@ -3031,6 +3032,8 @@ void blk_mq_submit_bio(struct bio *bio) unsigned int nr_segs = 1; blk_status_t ret; + trace_android_vh_check_set_ioprio(bio); + bio = blk_queue_bounce(bio, q); if (plug) { diff --git a/drivers/android/vendor_hooks.c b/drivers/android/vendor_hooks.c index 72059e96ae37..eb5644e10481 100644 --- a/drivers/android/vendor_hooks.c +++ b/drivers/android/vendor_hooks.c @@ -307,6 +307,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_report_bug); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_watchdog_timer_softlockup); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_try_to_freeze_todo); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_try_to_freeze_todo_unfrozen); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_ep_create_wakeup_source); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_timerfd_create); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_die_kernel_fault); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_do_sp_pc_abort); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_do_el1_undef); @@ -543,6 +545,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_show_smap_swap_shared); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_armv8pmu_counter_overflowed); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_perf_rotate_context); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_process_madvise_bypass); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_do_madvise_bypass); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_free_pages_prepare_bypass); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_free_pages_ok_bypass); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_free_unref_page_list_bypass); @@ -657,3 +660,12 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_update_unmapped_area_info); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_reuse_whole_anon_folio); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_alloc_swap_slot_cache); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_calculate_totalreserve_pages); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_check_set_ioprio); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_filemap_pages); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_lru_gen_add_folio_skip); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_lru_gen_del_folio_skip); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_perform_reclaim); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_kswapd_shrink_node); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_keep_reclaimed_folio); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_clear_reclaimed_folio); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_evict_folios_bypass); diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c index 4ca48360648d..78fcce7a1604 100644 --- a/drivers/dma-buf/heaps/system_heap.c +++ b/drivers/dma-buf/heaps/system_heap.c @@ -45,7 +45,7 @@ struct dma_heap_attachment { bool uncached; }; -#define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO) +#define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO | __GFP_RETRY_MAYFAIL) #define HIGH_ORDER_GFP (((GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN \ | __GFP_NORETRY) & ~__GFP_RECLAIM) \ | __GFP_COMP) @@ -371,6 +371,9 @@ static struct dma_buf *system_heap_do_allocate(struct dma_heap *heap, struct page *page, *tmp_page; int i, ret = -ENOMEM; + if (len / PAGE_SIZE > totalram_pages()) + return ERR_PTR(-ENOMEM); + buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); if (!buffer) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 902bcb83b5f6..bbb719d48a69 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -679,6 +679,7 @@ struct gpio_desc *of_find_gpio(struct device_node *np, const char *con_id, return desc; } +EXPORT_SYMBOL_GPL(of_find_gpio); /** * of_parse_own_gpio() - Get a GPIO hog descriptor, names and flags for GPIO API diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 5de096ca09aa..7bd7ad1e5333 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -69,6 +69,8 @@ #define LIST_DIRTY 1 #define LIST_SIZE 2 +#define SCAN_RESCHED_CYCLE 16 + /*--------------------------------------------------------------*/ /* @@ -2418,7 +2420,12 @@ static void __scan(struct dm_bufio_client *c) atomic_long_dec(&c->need_shrink); freed++; - cond_resched(); + + if (unlikely(freed % SCAN_RESCHED_CYCLE == 0)) { + dm_bufio_unlock(c); + cond_resched(); + dm_bufio_lock(c); + } } } } diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index e240d43b93e8..a2c7941dbe75 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -29,6 +29,7 @@ #define DM_VERITY_ENV_VAR_NAME "DM_VERITY_ERR_BLOCK_NR" #define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144 +#define DM_VERITY_USE_BH_DEFAULT_BYTES 8192 #define DM_VERITY_MAX_CORRUPTED_ERRS 100 @@ -46,6 +47,15 @@ static unsigned int dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, 0644); +static unsigned int dm_verity_use_bh_bytes[4] = { + DM_VERITY_USE_BH_DEFAULT_BYTES, // IOPRIO_CLASS_NONE + DM_VERITY_USE_BH_DEFAULT_BYTES, // IOPRIO_CLASS_RT + DM_VERITY_USE_BH_DEFAULT_BYTES, // IOPRIO_CLASS_BE + 0 // IOPRIO_CLASS_IDLE +}; + +module_param_array_named(use_bh_bytes, dm_verity_use_bh_bytes, uint, NULL, 0644); + static DEFINE_STATIC_KEY_FALSE(use_tasklet_enabled); /* Is at least one dm-verity instance using ahash_tfm instead of shash_tfm? */ @@ -696,9 +706,17 @@ static void verity_work(struct work_struct *w) verity_finish_io(io, errno_to_blk_status(verity_verify_io(io))); } +static inline bool verity_use_bh(unsigned int bytes, unsigned short ioprio) +{ + return ioprio <= IOPRIO_CLASS_IDLE && + bytes <= READ_ONCE(dm_verity_use_bh_bytes[ioprio]); +} + static void verity_end_io(struct bio *bio) { struct dm_verity_io *io = bio->bi_private; + unsigned short ioprio = IOPRIO_PRIO_CLASS(bio->bi_ioprio); + unsigned int bytes = io->n_blocks << io->v->data_dev_block_bits; if (bio->bi_status && (!verity_fec_is_enabled(io->v) || @@ -708,6 +726,19 @@ static void verity_end_io(struct bio *bio) return; } + if (static_branch_unlikely(&use_tasklet_enabled) && io->v->use_tasklet && + verity_use_bh(bytes, ioprio)) { + if (!(in_hardirq() || irqs_disabled())) { + int err; + + io->in_tasklet = true; + err = verity_verify_io(io); + if (err != -EAGAIN && err != -ENOMEM) { + verity_finish_io(io, errno_to_blk_status(err)); + return; + } + } + } INIT_WORK(&io->work, verity_work); queue_work(io->v->verify_wq, &io->work); } diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 1c9b617e1d4d..9c32d338471f 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -115,9 +115,6 @@ void pci_save_aspm_l1ss_state(struct pci_dev *pdev) pci_read_config_dword(pdev, pdev->l1ss + PCI_L1SS_CTL2, cap++); pci_read_config_dword(pdev, pdev->l1ss + PCI_L1SS_CTL1, cap++); - if (parent->state_saved) - return; - /* * Save parent's L1 substate configuration so we have it for * pci_restore_aspm_l1ss_state(pdev) to restore. diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h index 232425b1cfda..b5f5641eced4 100644 --- a/drivers/ufs/core/ufshcd-priv.h +++ b/drivers/ufs/core/ufshcd-priv.h @@ -12,6 +12,7 @@ */ struct ufs_hba_priv { struct ufs_hba hba; + struct completion dev_cmd_compl; u8 rtt_cap; int nortt; }; diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index d559a6747a24..f6812fe067ff 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -2759,6 +2759,8 @@ static int ufshcd_compose_devman_upiu(struct ufs_hba *hba, */ static int ufshcd_comp_scsi_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) { + struct request *rq = scsi_cmd_to_rq(lrbp->cmd); + unsigned int ioprio_class = IOPRIO_PRIO_CLASS(req_get_ioprio(rq)); u8 upiu_flags; int ret = 0; @@ -2769,6 +2771,8 @@ static int ufshcd_comp_scsi_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) if (likely(lrbp->cmd)) { ufshcd_prepare_req_desc_hdr(lrbp, &upiu_flags, lrbp->cmd->sc_data_direction, 0); + if (ioprio_class == IOPRIO_CLASS_RT) + upiu_flags |= UPIU_CMD_FLAGS_CP; ufshcd_prepare_utp_scsi_cmd_upiu(lrbp, upiu_flags); if (hba->android_quirks & UFSHCD_ANDROID_QUIRK_SET_IID_TO_ONE) lrbp->ucd_req_ptr->header.iid = 1; @@ -3090,16 +3094,10 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, int err; retry: - time_left = wait_for_completion_timeout(hba->dev_cmd.complete, + time_left = wait_for_completion_timeout(&to_hba_priv(hba)->dev_cmd_compl, time_left); if (likely(time_left)) { - /* - * The completion handler called complete() and the caller of - * this function still owns the @lrbp tag so the code below does - * not trigger any race conditions. - */ - hba->dev_cmd.complete = NULL; err = ufshcd_get_tr_ocs(lrbp, NULL); if (!err) err = ufshcd_dev_cmd_completion(hba, lrbp); @@ -3113,7 +3111,6 @@ retry: /* successfully cleared the command, retry if needed */ if (ufshcd_clear_cmd(hba, lrbp->task_tag) == 0) err = -EAGAIN; - hba->dev_cmd.complete = NULL; return err; } @@ -3129,11 +3126,9 @@ retry: spin_lock_irqsave(&hba->outstanding_lock, flags); pending = test_bit(lrbp->task_tag, &hba->outstanding_reqs); - if (pending) { - hba->dev_cmd.complete = NULL; + if (pending) __clear_bit(lrbp->task_tag, &hba->outstanding_reqs); - } spin_unlock_irqrestore(&hba->outstanding_lock, flags); if (!pending) { @@ -3151,8 +3146,6 @@ retry: spin_lock_irqsave(&hba->outstanding_lock, flags); pending = test_bit(lrbp->task_tag, &hba->outstanding_reqs); - if (pending) - hba->dev_cmd.complete = NULL; spin_unlock_irqrestore(&hba->outstanding_lock, flags); if (!pending) { @@ -3183,7 +3176,6 @@ retry: static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, enum dev_cmd_type cmd_type, int timeout) { - DECLARE_COMPLETION_ONSTACK(wait); const u32 tag = hba->reserved_slot; struct ufshcd_lrb *lrbp; int err; @@ -3199,10 +3191,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, if (unlikely(err)) goto out; - hba->dev_cmd.complete = &wait; - ufshcd_add_query_upiu_trace(hba, UFS_QUERY_SEND, lrbp->ucd_req_ptr); - ufshcd_send_command(hba, tag, hba->dev_cmd_queue); err = ufshcd_wait_for_dev_cmd(hba, lrbp, timeout); ufshcd_add_query_upiu_trace(hba, err ? UFS_QUERY_ERR : UFS_QUERY_COMP, @@ -5512,14 +5501,12 @@ void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag, scsi_done(cmd); } else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE || lrbp->command_type == UTP_CMD_TYPE_UFS_STORAGE) { - if (hba->dev_cmd.complete) { - trace_android_vh_ufs_compl_command(hba, lrbp); - if (cqe) { - ocs = le32_to_cpu(cqe->status) & MASK_OCS; - lrbp->utr_descriptor_ptr->header.ocs = ocs; - } - complete(hba->dev_cmd.complete); + trace_android_vh_ufs_compl_command(hba, lrbp); + if (cqe) { + ocs = le32_to_cpu(cqe->status) & MASK_OCS; + lrbp->utr_descriptor_ptr->header.ocs = ocs; } + complete(&to_hba_priv(hba)->dev_cmd_compl); } } @@ -7178,7 +7165,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, enum dev_cmd_type cmd_type, enum query_opcode desc_op) { - DECLARE_COMPLETION_ONSTACK(wait); const u32 tag = hba->reserved_slot; struct ufshcd_lrb *lrbp; int err = 0; @@ -7220,10 +7206,7 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, memset(lrbp->ucd_rsp_ptr, 0, sizeof(struct utp_upiu_rsp)); - hba->dev_cmd.complete = &wait; - ufshcd_add_query_upiu_trace(hba, UFS_QUERY_SEND, lrbp->ucd_req_ptr); - ufshcd_send_command(hba, tag, hba->dev_cmd_queue); /* * ignore the returning value here - ufshcd_check_query_response is @@ -7348,7 +7331,6 @@ int ufshcd_advanced_rpmb_req_handler(struct ufs_hba *hba, struct utp_upiu_req *r struct ufs_ehs *rsp_ehs, int sg_cnt, struct scatterlist *sg_list, enum dma_data_direction dir) { - DECLARE_COMPLETION_ONSTACK(wait); const u32 tag = hba->reserved_slot; struct ufshcd_lrb *lrbp; int err = 0; @@ -7397,8 +7379,6 @@ int ufshcd_advanced_rpmb_req_handler(struct ufs_hba *hba, struct utp_upiu_req *r memset(lrbp->ucd_rsp_ptr, 0, sizeof(struct utp_upiu_rsp)); - hba->dev_cmd.complete = &wait; - ufshcd_send_command(hba, tag, hba->dev_cmd_queue); err = ufshcd_wait_for_dev_cmd(hba, lrbp, ADVANCED_RPMB_REQ_TIMEOUT); @@ -10457,6 +10437,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) hba->irq = irq; hba->vps = &ufs_hba_vps; + init_completion(&to_hba_priv(hba)->dev_cmd_compl); + err = ufshcd_hba_init(hba); if (err) goto out_error; diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 9057b7c7c07b..f2b6387a459e 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -21,6 +21,7 @@ #include #include +#include #include #include "xhci.h" @@ -447,7 +448,13 @@ static int xhci_plat_suspend(struct device *dev) { struct usb_hcd *hcd = dev_get_drvdata(dev); struct xhci_hcd *xhci = hcd_to_xhci(hcd); - int ret; + struct usb_device *udev; + int ret, bypass = 0; + + udev = hcd->self.root_hub; + trace_android_rvh_usb_dev_suspend(udev, PMSG_SUSPEND, &bypass); + if (bypass) + return 0; if (pm_runtime_suspended(dev)) pm_runtime_resume(dev); @@ -475,7 +482,13 @@ static int xhci_plat_resume_common(struct device *dev, struct pm_message pmsg) { struct usb_hcd *hcd = dev_get_drvdata(dev); struct xhci_hcd *xhci = hcd_to_xhci(hcd); - int ret; + struct usb_device *udev; + int ret, bypass = 0; + + udev = hcd->self.root_hub; + trace_android_vh_usb_dev_resume(udev, PMSG_RESUME, &bypass); + if (bypass) + return 0; if (!device_may_wakeup(dev) && (xhci->quirks & XHCI_SUSPEND_RESUME_CLKS)) { ret = clk_prepare_enable(xhci->clk); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 35711f78ede9..f565337d2a7a 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -18,6 +18,7 @@ #include #include #include +#include /* * Balloon device works in 4K page units. So each page is pointed to by @@ -119,6 +120,8 @@ struct virtio_balloon { /* Free page reporting device */ struct virtqueue *reporting_vq; struct page_reporting_dev_info pr_dev_info; + + bool bail_on_out_of_puff; }; static const struct virtio_device_id id_table[] = { @@ -205,7 +208,8 @@ static void set_page_pfns(struct virtio_balloon *vb, page_to_balloon_pfn(page) + i); } -static unsigned int fill_balloon(struct virtio_balloon *vb, size_t num) +static unsigned int fill_balloon(struct virtio_balloon *vb, size_t num, + bool *out_of_puff) { unsigned int num_allocated_pages; unsigned int num_pfns; @@ -225,6 +229,7 @@ static unsigned int fill_balloon(struct virtio_balloon *vb, size_t num) VIRTIO_BALLOON_PAGES_PER_PAGE); /* Sleep for at least 1/5 of a second before retry. */ msleep(200); + *out_of_puff = true; break; } @@ -477,6 +482,7 @@ static void update_balloon_size_func(struct work_struct *work) { struct virtio_balloon *vb; s64 diff; + bool out_of_puff = false; vb = container_of(work, struct virtio_balloon, update_balloon_size_work); @@ -486,12 +492,12 @@ static void update_balloon_size_func(struct work_struct *work) return; if (diff > 0) - diff -= fill_balloon(vb, diff); + diff -= fill_balloon(vb, diff, &out_of_puff); else diff += leak_balloon(vb, -diff); update_balloon_size(vb); - if (diff) + if (diff && !(vb->bail_on_out_of_puff && out_of_puff)) queue_work(system_freezable_wq, work); } @@ -871,6 +877,38 @@ static int virtio_balloon_register_shrinker(struct virtio_balloon *vb) return register_shrinker(&vb->shrinker, "virtio-balloon"); } +static ssize_t bail_on_out_of_puff_show(struct device *d, struct device_attribute *attr, + char *buf) +{ + struct virtio_device *vdev = + container_of(d, struct virtio_device, dev); + struct virtio_balloon *vb = vdev->priv; + + return sprintf(buf, "%c\n", vb->bail_on_out_of_puff ? '1' : '0'); +} + +static ssize_t bail_on_out_of_puff_store(struct device *d, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct virtio_device *vdev = + container_of(d, struct virtio_device, dev); + struct virtio_balloon *vb = vdev->priv; + + return kstrtobool(buf, &vb->bail_on_out_of_puff) ?: count; +} + +static DEVICE_ATTR_RW(bail_on_out_of_puff); + +static struct attribute *virtio_balloon_sysfs_entries[] = { + &dev_attr_bail_on_out_of_puff.attr, + NULL +}; + +static const struct attribute_group virtio_balloon_attribute_group = { + .name = NULL, /* put in device directory */ + .attrs = virtio_balloon_sysfs_entries, +}; + static int virtballoon_probe(struct virtio_device *vdev) { struct virtio_balloon *vb; @@ -901,6 +939,11 @@ static int virtballoon_probe(struct virtio_device *vdev) if (err) goto out_free_vb; + err = sysfs_create_group(&vdev->dev.kobj, + &virtio_balloon_attribute_group); + if (err) + goto out_del_vqs; + #ifdef CONFIG_BALLOON_COMPACTION vb->vb_dev_info.migratepage = virtballoon_migratepage; #endif @@ -911,13 +954,13 @@ static int virtballoon_probe(struct virtio_device *vdev) */ if (virtqueue_get_vring_size(vb->free_page_vq) < 2) { err = -ENOSPC; - goto out_del_vqs; + goto out_remove_sysfs; } vb->balloon_wq = alloc_workqueue("balloon-wq", WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); if (!vb->balloon_wq) { err = -ENOMEM; - goto out_del_vqs; + goto out_remove_sysfs; } INIT_WORK(&vb->report_free_page_work, report_free_page_func); vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP; @@ -1011,6 +1054,8 @@ out_unregister_shrinker: out_del_balloon_wq: if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) destroy_workqueue(vb->balloon_wq); +out_remove_sysfs: + sysfs_remove_group(&vdev->dev.kobj, &virtio_balloon_attribute_group); out_del_vqs: vdev->config->del_vqs(vdev); out_free_vb: @@ -1057,6 +1102,8 @@ static void virtballoon_remove(struct virtio_device *vdev) destroy_workqueue(vb->balloon_wq); } + sysfs_remove_group(&vdev->dev.kobj, &virtio_balloon_attribute_group); + remove_common(vb); kfree(vb); } diff --git a/fs/TEST_MAPPING b/fs/TEST_MAPPING index 58eaa367e190..f4f3a7070963 100644 --- a/fs/TEST_MAPPING +++ b/fs/TEST_MAPPING @@ -229,6 +229,26 @@ "include-filter": "kselftest_x86_test_mremap_vdso" } ] + }, + { + "name": "CtsJobSchedulerTestCases", + "options": [ + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testCellularConstraintExecutedAndStopped" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testConnectivityConstraintExecutes_transitionNetworks" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testConnectivityConstraintExecutes_withMobile" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testEJMeteredConstraintFails_withMobile_DataSaverOn" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testMeteredConstraintFails_withMobile_DataSaverOn" + } + ] } ], "presubmit-large": [ diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 555b5e9c777a..56b54a635a78 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1991,7 +1991,7 @@ static void free_note_info(struct elf_note_info *info) threads = t->next; WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus); for (i = 1; i < info->thread_notes; ++i) - kfree(t->notes[i].data); + kvfree(t->notes[i].data); kfree(t); } kfree(info->psinfo.data); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index cde5a15b129f..9ae458635eb7 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -39,6 +39,8 @@ #include #include +#include + /* * LOCKING: * There are three level of locking required by epoll : @@ -1444,15 +1446,20 @@ static int ep_create_wakeup_source(struct epitem *epi) { struct name_snapshot n; struct wakeup_source *ws; + char ws_name[64]; + strscpy(ws_name, "eventpoll", sizeof(ws_name)); + trace_android_vh_ep_create_wakeup_source(ws_name, sizeof(ws_name)); if (!epi->ep->ws) { - epi->ep->ws = wakeup_source_register(NULL, "eventpoll"); + epi->ep->ws = wakeup_source_register(NULL, ws_name); if (!epi->ep->ws) return -ENOMEM; } take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry); - ws = wakeup_source_register(NULL, n.name.name); + strscpy(ws_name, n.name.name, sizeof(ws_name)); + trace_android_vh_ep_create_wakeup_source(ws_name, sizeof(ws_name)); + ws = wakeup_source_register(NULL, ws_name); release_dentry_name_snapshot(&n); if (!ws) diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 1680a9928748..8139d658c88d 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -316,13 +316,6 @@ struct exfat_inode_info { /* for avoiding the race between alloc and free */ unsigned int cache_valid_id; - /* - * NOTE: i_size_ondisk is 64bits, so must hold ->inode_lock to access. - * physically allocated size. - */ - loff_t i_size_ondisk; - /* block-aligned i_size (used in cont_write_begin) */ - loff_t i_size_aligned; /* on-disk position of directory entry or 0 */ loff_t i_pos; loff_t valid_size; @@ -429,6 +422,11 @@ static inline bool is_valid_cluster(struct exfat_sb_info *sbi, return clus >= EXFAT_FIRST_CLUSTER && clus < sbi->num_clusters; } +static inline loff_t exfat_ondisk_size(const struct inode *inode) +{ + return ((loff_t)inode->i_blocks) << 9; +} + /* super.c */ int exfat_set_volume_dirty(struct super_block *sb); int exfat_clear_volume_dirty(struct super_block *sb); diff --git a/fs/exfat/file.c b/fs/exfat/file.c index bcbdfebd677b..efd24e29f119 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -29,7 +29,7 @@ static int exfat_cont_expand(struct inode *inode, loff_t size) if (ret) return ret; - num_clusters = EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi); + num_clusters = EXFAT_B_TO_CLU(exfat_ondisk_size(inode), sbi); new_num_clusters = EXFAT_B_TO_CLU_ROUND_UP(size, sbi); if (new_num_clusters == num_clusters) @@ -74,8 +74,6 @@ out: /* Expanded range not zeroed, do not update valid_size */ i_size_write(inode, size); - ei->i_size_aligned = round_up(size, sb->s_blocksize); - ei->i_size_ondisk = ei->i_size_aligned; inode->i_blocks = round_up(size, sbi->cluster_size) >> 9; mark_inode_dirty(inode); @@ -157,7 +155,7 @@ int __exfat_truncate(struct inode *inode) exfat_set_volume_dirty(sb); num_clusters_new = EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi); - num_clusters_phys = EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi); + num_clusters_phys = EXFAT_B_TO_CLU(exfat_ondisk_size(inode), sbi); exfat_chain_set(&clu, ei->start_clu, num_clusters_phys, ei->flags); @@ -243,8 +241,6 @@ void exfat_truncate(struct inode *inode) struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_inode_info *ei = EXFAT_I(inode); - unsigned int blocksize = i_blocksize(inode); - loff_t aligned_size; int err; mutex_lock(&sbi->s_lock); @@ -262,17 +258,6 @@ void exfat_truncate(struct inode *inode) inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9; write_size: - aligned_size = i_size_read(inode); - if (aligned_size & (blocksize - 1)) { - aligned_size |= (blocksize - 1); - aligned_size++; - } - - if (ei->i_size_ondisk > i_size_read(inode)) - ei->i_size_ondisk = aligned_size; - - if (ei->i_size_aligned > i_size_read(inode)) - ei->i_size_aligned = aligned_size; mutex_unlock(&sbi->s_lock); } @@ -592,9 +577,19 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) valid_size = ei->valid_size; ret = generic_write_checks(iocb, iter); - if (ret < 0) + if (ret <= 0) goto unlock; + if (iocb->ki_flags & IOCB_DIRECT) { + unsigned long align = pos | iov_iter_alignment(iter); + + if (!IS_ALIGNED(align, i_blocksize(inode)) && + !IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev))) { + ret = -EINVAL; + goto unlock; + } + } + if (pos > valid_size) { ret = exfat_file_zeroed_range(file, valid_size, pos); if (ret < 0 && ret != -ENOSPC) { diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index 8ebb56146be9..83fcd54c58c5 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -133,11 +133,9 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset, struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_inode_info *ei = EXFAT_I(inode); unsigned int local_clu_offset = clu_offset; - unsigned int num_to_be_allocated = 0, num_clusters = 0; + unsigned int num_to_be_allocated = 0, num_clusters; - if (ei->i_size_ondisk > 0) - num_clusters = - EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi); + num_clusters = EXFAT_B_TO_CLU(exfat_ondisk_size(inode), sbi); if (clu_offset >= num_clusters) num_to_be_allocated = clu_offset - num_clusters + 1; @@ -263,21 +261,6 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset, return 0; } -static int exfat_map_new_buffer(struct exfat_inode_info *ei, - struct buffer_head *bh, loff_t pos) -{ - if (buffer_delay(bh) && pos > ei->i_size_aligned) - return -EIO; - set_buffer_new(bh); - - /* - * Adjust i_size_aligned if i_size_ondisk is bigger than it. - */ - if (ei->i_size_ondisk > ei->i_size_aligned) - ei->i_size_aligned = ei->i_size_ondisk; - return 0; -} - static int exfat_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { @@ -291,10 +274,11 @@ static int exfat_get_block(struct inode *inode, sector_t iblock, sector_t last_block; sector_t phys = 0; sector_t valid_blks; - loff_t pos; + loff_t i_size; mutex_lock(&sbi->s_lock); - last_block = EXFAT_B_TO_BLK_ROUND_UP(i_size_read(inode), sb); + i_size = i_size_read(inode); + last_block = EXFAT_B_TO_BLK_ROUND_UP(i_size, sb); if (iblock >= last_block && !create) goto done; @@ -319,93 +303,103 @@ static int exfat_get_block(struct inode *inode, sector_t iblock, mapped_blocks = sbi->sect_per_clus - sec_offset; max_blocks = min(mapped_blocks, max_blocks); - pos = EXFAT_BLK_TO_B((iblock + 1), sb); - if ((create && iblock >= last_block) || buffer_delay(bh_result)) { - if (ei->i_size_ondisk < pos) - ei->i_size_ondisk = pos; - } - map_bh(bh_result, sb, phys); if (buffer_delay(bh_result)) clear_buffer_delay(bh_result); - if (create) { + /* + * In most cases, we just need to set bh_result to mapped, unmapped + * or new status as follows: + * 1. i_size == valid_size + * 2. write case (create == 1) + * 3. direct_read (!bh_result->b_folio) + * -> the unwritten part will be zeroed in exfat_direct_IO() + * + * Otherwise, in the case of buffered read, it is necessary to take + * care the last nested block if valid_size is not equal to i_size. + */ + if (i_size == ei->valid_size || create || !bh_result->b_folio) valid_blks = EXFAT_B_TO_BLK_ROUND_UP(ei->valid_size, sb); - - if (iblock + max_blocks < valid_blks) { - /* The range has been written, map it */ - goto done; - } else if (iblock < valid_blks) { - /* - * The range has been partially written, - * map the written part. - */ - max_blocks = valid_blks - iblock; - goto done; - } - - /* The area has not been written, map and mark as new. */ - err = exfat_map_new_buffer(ei, bh_result, pos); - if (err) { - exfat_fs_error(sb, - "requested for bmap out of range(pos : (%llu) > i_size_aligned(%llu)\n", - pos, ei->i_size_aligned); - goto unlock_ret; - } - - ei->valid_size = EXFAT_BLK_TO_B(iblock + max_blocks, sb); - mark_inode_dirty(inode); - } else { + else valid_blks = EXFAT_B_TO_BLK(ei->valid_size, sb); - if (iblock + max_blocks < valid_blks) { - /* The range has been written, map it */ - goto done; - } else if (iblock < valid_blks) { - /* - * The area has been partially written, - * map the written part. - */ - max_blocks = valid_blks - iblock; - goto done; - } else if (iblock == valid_blks && - (ei->valid_size & (sb->s_blocksize - 1))) { - /* - * The block has been partially written, - * zero the unwritten part and map the block. - */ - loff_t size, off; + /* The range has been fully written, map it */ + if (iblock + max_blocks < valid_blks) + goto done; - max_blocks = 1; - - /* - * For direct read, the unwritten part will be zeroed in - * exfat_direct_IO() - */ - if (!bh_result->b_folio) - goto done; - - pos -= sb->s_blocksize; - size = ei->valid_size - pos; - off = pos & (PAGE_SIZE - 1); - - folio_set_bh(bh_result, bh_result->b_folio, off); - err = bh_read(bh_result, 0); - if (err < 0) - goto unlock_ret; - - folio_zero_segment(bh_result->b_folio, off + size, - off + sb->s_blocksize); - } else { - /* - * The range has not been written, clear the mapped flag - * to only zero the cache and do not read from disk. - */ - clear_buffer_mapped(bh_result); - } + /* The range has been partially written, map the written part */ + if (iblock < valid_blks) { + max_blocks = valid_blks - iblock; + goto done; } + + /* The area has not been written, map and mark as new for create case */ + if (create) { + set_buffer_new(bh_result); + ei->valid_size = EXFAT_BLK_TO_B(iblock + max_blocks, sb); + mark_inode_dirty(inode); + goto done; + } + + /* + * The area has just one block partially written. + * In that case, we should read and fill the unwritten part of + * a block with zero. + */ + if (bh_result->b_folio && iblock == valid_blks && + (ei->valid_size & (sb->s_blocksize - 1))) { + loff_t size, pos; + void *addr; + + max_blocks = 1; + + /* + * No buffer_head is allocated. + * (1) bmap: It's enough to set blocknr without I/O. + * (2) read: The unwritten part should be filled with zero. + * If a folio does not have any buffers, + * let's returns -EAGAIN to fallback to + * block_read_full_folio() for per-bh IO. + */ + if (!folio_buffers(bh_result->b_folio)) { + err = -EAGAIN; + goto done; + } + + pos = EXFAT_BLK_TO_B(iblock, sb); + size = ei->valid_size - pos; + addr = folio_address(bh_result->b_folio) + + offset_in_folio(bh_result->b_folio, pos); + + /* Check if bh->b_data points to proper addr in folio */ + if (bh_result->b_data != addr) { + exfat_fs_error_ratelimit(sb, + "b_data(%p) != folio_addr(%p)", + bh_result->b_data, addr); + err = -EINVAL; + goto done; + } + + /* Read a block */ + err = bh_read(bh_result, 0); + if (err < 0) + goto done; + + /* Zero unwritten part of a block */ + memset(bh_result->b_data + size, 0, bh_result->b_size - size); + err = 0; + goto done; + } + + /* + * The area has not been written, clear mapped for read/bmap cases. + * If so, it will be filled with zero without reading from disk. + */ + clear_buffer_mapped(bh_result); done: bh_result->b_size = EXFAT_BLK_TO_B(max_blocks, sb); + if (err < 0) + clear_buffer_mapped(bh_result); unlock_ret: mutex_unlock(&sbi->s_lock); return err; @@ -480,14 +474,6 @@ static int exfat_write_end(struct file *file, struct address_space *mapping, int err; err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata); - - if (ei->i_size_aligned < i_size_read(inode)) { - exfat_fs_error(inode->i_sb, - "invalid size(size(%llu) > aligned(%llu)\n", - i_size_read(inode), ei->i_size_aligned); - return -EIO; - } - if (err < len) exfat_write_failed(mapping, pos+len); @@ -515,20 +501,6 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter) int rw = iov_iter_rw(iter); ssize_t ret; - if (rw == WRITE) { - /* - * FIXME: blockdev_direct_IO() doesn't use ->write_begin(), - * so we need to update the ->i_size_aligned to block boundary. - * - * But we must fill the remaining area or hole by nul for - * updating ->i_size_aligned - * - * Return 0, and fallback to normal buffered write. - */ - if (EXFAT_I(inode)->i_size_aligned < size) - return 0; - } - /* * Need to use the DIO_LOCKING for avoiding the race * condition of exfat_get_block() and ->truncate(). @@ -542,8 +514,18 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } else size = pos + ret; - /* zero the unwritten part in the partially written block */ - if (rw == READ && pos < ei->valid_size && ei->valid_size < size) { + if (rw == WRITE) { + /* + * If the block had been partially written before this write, + * ->valid_size will not be updated in exfat_get_block(), + * update it here. + */ + if (ei->valid_size < size) { + ei->valid_size = size; + mark_inode_dirty(inode); + } + } else if (pos < ei->valid_size && ei->valid_size < size) { + /* zero the unwritten part in the partially written block */ iov_iter_revert(iter, size - ei->valid_size); iov_iter_zero(size - ei->valid_size, iter); } @@ -678,15 +660,6 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info) i_size_write(inode, size); - /* ondisk and aligned size should be aligned with block size */ - if (size & (inode->i_sb->s_blocksize - 1)) { - size |= (inode->i_sb->s_blocksize - 1); - size++; - } - - ei->i_size_aligned = size; - ei->i_size_ondisk = size; - exfat_save_attr(inode, info->attr); inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9; diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index a7b258259043..3007c2e9a04d 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -373,8 +373,6 @@ static int exfat_find_empty_entry(struct inode *inode, /* directory inode should be updated in here */ i_size_write(inode, size); - ei->i_size_ondisk += sbi->cluster_size; - ei->i_size_aligned += sbi->cluster_size; ei->valid_size += sbi->cluster_size; ei->flags = p_dir->flags; inode->i_blocks += sbi->cluster_size >> 9; diff --git a/fs/exfat/super.c b/fs/exfat/super.c index 6b51eb75ef93..1400a521af4e 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -409,8 +409,6 @@ static int exfat_read_root(struct inode *inode) inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9; ei->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff; - ei->i_size_aligned = i_size_read(inode); - ei->i_size_ondisk = i_size_read(inode); exfat_save_attr(inode, EXFAT_ATTR_SUBDIR); ei->i_crtime = simple_inode_init_ts(inode); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 36fc03af8190..78d17f2e21b7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1846,7 +1846,6 @@ struct f2fs_sb_info { spinlock_t iostat_lat_lock; struct iostat_lat_info *iostat_io_lat; #endif - unsigned int sanity_check; }; /* Definitions to access f2fs_sb_info */ @@ -3671,11 +3670,8 @@ int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type); bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page); void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi); -struct page *f2fs_get_prev_nat_page(struct f2fs_sb_info *sbi, nid_t nid); void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page); void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi); -bool f2fs_get_nat_entry(struct f2fs_sb_info *sbi, struct node_info *cne, - struct node_info *jne, nid_t nid); int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 98191e785b2a..0c3b379d2e7f 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -2069,6 +2069,9 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi, .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), }; + if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, segno))) + continue; + do_garbage_collect(sbi, segno, &gc_list, FG_GC, true, false); put_gc_inode(&gc_list); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 7d7decf36851..1effcb58b99e 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -749,99 +749,6 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) #endif } -static void f2fs_sanity_check_nat(struct f2fs_sb_info *sbi, pgoff_t nid) -{ - struct page *page; - struct node_info cni = { 0 }, jni = { 0 }; - struct f2fs_nat_block *nat_blk; - struct f2fs_nat_entry ne; - nid_t start_nid; - struct f2fs_io_info fio = { - .sbi = sbi, - .type = NODE, - .op = REQ_OP_READ, - .op_flags = 0, - .encrypted_page = NULL, - }; - int err; - int ret; - - if (likely(!sbi->sanity_check)) - return; - - if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) - return; - - /* nat entry */ - ret = f2fs_get_nat_entry(sbi, &cni, &jni, nid); - if (ret) { - if (ret & NAT_JOURNAL_ENTRY) - f2fs_err(sbi, "nat entry in journal: [%u,%u,%u,%u,%u]", - jni.nid, jni.ino, jni.blk_addr, jni.version, jni.flag); - if (ret & NAT_CACHED_ENTRY) - f2fs_err(sbi, "nat entry in cache: [%u,%u,%u,%u,%u]", - cni.nid, cni.ino, cni.blk_addr, cni.version, cni.flag); - } else { - f2fs_err(sbi, "nat entry is not in cache&journal"); - } - - /* previous node block */ - page = f2fs_get_prev_nat_page(sbi, nid); - if (IS_ERR(page)) - return; - nat_blk = (struct f2fs_nat_block *)page_address(page); - start_nid = START_NID(nid); - ne = nat_blk->entries[nid - start_nid]; - node_info_from_raw_nat(&cni, &ne); - ClearPageUptodate(page); - f2fs_put_page(page, 1); - - f2fs_err(sbi, "previous node info: [%u,%u,%u,%u,%u]", - cni.nid, cni.ino, cni.blk_addr, cni.version, cni.flag); - - if (cni.blk_addr == NULL_ADDR || cni.blk_addr == NEW_ADDR) - return; - - page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); - if (!page) - return; - - fio.page = page; - fio.new_blkaddr = fio.old_blkaddr = cni.blk_addr; - - err = f2fs_submit_page_bio(&fio); - if (err) { - f2fs_err(sbi, "f2fs_submit_page_bio fail err:%d", err); - goto out; - } - - lock_page(page); - - if (unlikely(page->mapping != NODE_MAPPING(sbi))) { - f2fs_err(sbi, "mapping dismatch"); - goto out; - } - - if (unlikely(!PageUptodate(page))) { - f2fs_err(sbi, "page is not uptodate"); - goto out; - } - - if (!f2fs_inode_chksum_verify(sbi, page)) { - f2fs_err(sbi, "f2fs_inode_chksum_verify fail"); - goto out; - } - - f2fs_err(sbi, "previous node block, nid:%lu, " - "node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", - nid, nid_of_node(page), ino_of_node(page), - ofs_of_node(page), cpver_of_node(page), - next_blkaddr_of_node(page)); -out: - ClearPageUptodate(page); - f2fs_put_page(page, 1); -} - void f2fs_update_inode_page(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -859,9 +766,6 @@ retry: if (err == -ENOMEM || ++count <= DEFAULT_RETRY_IO_COUNT) goto retry; f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_UPDATE_INODE); - f2fs_err(sbi, "fail to get node page, ino:%lu, err: %d", inode->i_ino, err); - if (err == -EFSCORRUPTED) - f2fs_sanity_check_nat(sbi, inode->i_ino); return; } f2fs_update_inode(inode, node_page); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 5ed6bd0c64ad..155553fac229 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -164,12 +164,6 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) return dst_page; } -struct page *f2fs_get_prev_nat_page(struct f2fs_sb_info *sbi, nid_t nid) -{ - pgoff_t dst_off = next_nat_addr(sbi, current_nat_addr(sbi, nid)); - return f2fs_get_meta_page(sbi, dst_off); -} - static struct nat_entry *__alloc_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, bool no_fail) { @@ -383,39 +377,6 @@ void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi) spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); } -bool f2fs_get_nat_entry(struct f2fs_sb_info *sbi, struct node_info *cni, - struct node_info *jni, nid_t nid) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); - struct f2fs_journal *journal = curseg->journal; - struct nat_entry *e; - int ret = 0; - int i; - - f2fs_down_read(&nm_i->nat_tree_lock); - - /* lookup nat entry in journal */ - i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0); - if (i >= 0) { - struct f2fs_nat_entry ne; - - ne = nat_in_journal(journal, i); - node_info_from_raw_nat(jni, &ne); - ret |= NAT_JOURNAL_ENTRY; - } - - /* lookup nat entry in cache */ - e = __lookup_nat_cache(nm_i, nid); - if (e) { - *cni = e->ni; - ret |= NAT_CACHED_ENTRY; - } - f2fs_up_read(&nm_i->nat_tree_lock); - - return ret; -} - int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 943be4f1d6d2..e905c1aff512 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -429,7 +429,6 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; - unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi); spin_lock(&free_i->segmap_lock); clear_bit(segno, free_i->free_segmap); @@ -437,7 +436,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) next = find_next_bit(free_i->free_segmap, start_segno + SEGS_PER_SEC(sbi), start_segno); - if (next >= start_segno + usable_segs) { + if (next >= start_segno + f2fs_usable_segs_in_sec(sbi)) { clear_bit(secno, free_i->free_secmap); free_i->free_sections++; } @@ -463,22 +462,36 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; - unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi); + bool ret; spin_lock(&free_i->segmap_lock); - if (test_and_clear_bit(segno, free_i->free_segmap)) { - free_i->free_segments++; + ret = test_and_clear_bit(segno, free_i->free_segmap); + if (!ret) + goto unlock_out; - if (!inmem && IS_CURSEC(sbi, secno)) - goto skip_free; - next = find_next_bit(free_i->free_segmap, - start_segno + SEGS_PER_SEC(sbi), start_segno); - if (next >= start_segno + usable_segs) { - if (test_and_clear_bit(secno, free_i->free_secmap)) - free_i->free_sections++; - } - } -skip_free: + free_i->free_segments++; + + if (!inmem && IS_CURSEC(sbi, secno)) + goto unlock_out; + + /* check large section */ + next = find_next_bit(free_i->free_segmap, + start_segno + SEGS_PER_SEC(sbi), start_segno); + if (next < start_segno + f2fs_usable_segs_in_sec(sbi)) + goto unlock_out; + + ret = test_and_clear_bit(secno, free_i->free_secmap); + if (!ret) + goto unlock_out; + + free_i->free_sections++; + + if (GET_SEC_FROM_SEG(sbi, sbi->next_victim_seg[BG_GC]) == secno) + sbi->next_victim_seg[BG_GC] = NULL_SEGNO; + if (GET_SEC_FROM_SEG(sbi, sbi->next_victim_seg[FG_GC]) == secno) + sbi->next_victim_seg[FG_GC] = NULL_SEGNO; + +unlock_out: spin_unlock(&free_i->segmap_lock); } diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 77096227d10a..c69161366467 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -1122,8 +1122,6 @@ F2FS_SBI_GENERAL_RW_ATTR(max_read_extent_count); F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec); F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy); #endif -/* enable sanity check to dump more metadata info */ -F2FS_SBI_GENERAL_RW_ATTR(sanity_check); F2FS_SBI_GENERAL_RW_ATTR(carve_out); /* STAT_INFO ATTR */ @@ -1312,7 +1310,6 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(warm_data_age_threshold), ATTR_LIST(last_age_weight), ATTR_LIST(max_read_extent_count), - ATTR_LIST(sanity_check), ATTR_LIST(carve_out), NULL, }; diff --git a/fs/fuse/backing.c b/fs/fuse/backing.c index 21fb9cdd20aa..5cb9fb6086a1 100644 --- a/fs/fuse/backing.c +++ b/fs/fuse/backing.c @@ -799,10 +799,6 @@ int fuse_file_read_iter_initialize( .size = to->count, }; - fri->frio = (struct fuse_read_iter_out) { - .ret = fri->fri.size, - }; - /* TODO we can't assume 'to' is a kvec */ /* TODO we also can't assume the vector has only one component */ *fa = (struct fuse_bpf_args) { @@ -837,11 +833,6 @@ int fuse_file_read_iter_backing(struct fuse_bpf_args *fa, if (!iov_iter_count(to)) return 0; - if ((iocb->ki_flags & IOCB_DIRECT) && - (!ff->backing_file->f_mapping->a_ops || - !ff->backing_file->f_mapping->a_ops->direct_IO)) - return -EINVAL; - /* TODO This just plain ignores any change to fuse_read_in */ if (is_sync_kiocb(iocb)) { ret = vfs_iter_read(ff->backing_file, to, &iocb->ki_pos, @@ -864,14 +855,13 @@ int fuse_file_read_iter_backing(struct fuse_bpf_args *fa, fuse_bpf_aio_cleanup_handler(aio_req); } - frio->ret = ret; - /* TODO Need to point value at the buffer for post-modification */ out: fuse_file_accessed(file, ff->backing_file); - return ret; + frio->ret = ret; + return ret < 0 ? ret : 0; } void *fuse_file_read_iter_finalize(struct fuse_bpf_args *fa, diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 9d6f18188389..ad94f3ee623d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -281,7 +281,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) } start = vma->vm_start; - end = vma->vm_end; + end = VMA_PAD_START(vma); __fold_filemap_fixup_entry(&((struct proc_maps_private *)m->private)->iter, &end); @@ -345,13 +345,12 @@ done: static int show_map(struct seq_file *m, void *v) { - struct vm_area_struct *pad_vma = get_pad_vma(v); - struct vm_area_struct *vma = get_data_vma(v); + struct vm_area_struct *vma = v; if (vma_pages(vma)) show_map_vma(m, vma); - show_map_pad_vma(vma, pad_vma, m, show_map_vma, false); + show_map_pad_vma(vma, m, show_map_vma, false); return 0; } @@ -726,18 +725,24 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_SEALED)] = "sl", #endif }; + unsigned long pad_pages = vma_pad_pages(vma); size_t i; seq_puts(m, "VmFlags: "); for (i = 0; i < BITS_PER_LONG; i++) { if (!mnemonics[i][0]) continue; + if ((1UL << i) & VM_PAD_MASK) + continue; if (vma->vm_flags & (1UL << i)) { seq_putc(m, mnemonics[i][0]); seq_putc(m, mnemonics[i][1]); seq_putc(m, ' '); } } + if (pad_pages) + seq_printf(m, "pad=%lukB", pad_pages << (PAGE_SHIFT - 10)); + seq_putc(m, '\n'); } @@ -794,9 +799,10 @@ static void smap_gather_stats(struct vm_area_struct *vma, struct mem_size_stats *mss, unsigned long start) { const struct mm_walk_ops *ops = &smaps_walk_ops; + unsigned long end = VMA_PAD_START(vma); /* Invalid start */ - if (start >= vma->vm_end) + if (start >= end) return; if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) { @@ -813,7 +819,15 @@ static void smap_gather_stats(struct vm_area_struct *vma, unsigned long shmem_swapped = shmem_swap_usage(vma); if (!start && (!shmem_swapped || (vma->vm_flags & VM_SHARED) || - !(vma->vm_flags & VM_WRITE))) { + !(vma->vm_flags & VM_WRITE)) && + /* + * Only if we don't have padding can we use the fast path + * shmem_inode_info->swapped for shmem_swapped. + * + * Else we'll walk the page table to calculate + * shmem_swapped, (excluding the padding region). + */ + end == vma->vm_end) { mss->swap += shmem_swapped; } else { ops = &smaps_shmem_walk_ops; @@ -822,9 +836,9 @@ static void smap_gather_stats(struct vm_area_struct *vma, /* mmap_lock is held in m_start */ if (!start) - walk_page_vma(vma, ops, mss); + walk_page_range(vma->vm_mm, vma->vm_start, end, ops, mss); else - walk_page_range(vma->vm_mm, start, vma->vm_end, ops, mss); + walk_page_range(vma->vm_mm, start, end, ops, mss); } #define SEQ_PUT_DEC(str, val) \ @@ -875,8 +889,7 @@ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss, static int show_smap(struct seq_file *m, void *v) { - struct vm_area_struct *pad_vma = get_pad_vma(v); - struct vm_area_struct *vma = get_data_vma(v); + struct vm_area_struct *vma = v; struct mem_size_stats mss; memset(&mss, 0, sizeof(mss)); @@ -888,7 +901,7 @@ static int show_smap(struct seq_file *m, void *v) show_map_vma(m, vma); - SEQ_PUT_DEC("Size: ", vma->vm_end - vma->vm_start); + SEQ_PUT_DEC("Size: ", VMA_PAD_START(vma) - vma->vm_start); SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma)); SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma)); seq_puts(m, " kB\n"); @@ -904,7 +917,7 @@ static int show_smap(struct seq_file *m, void *v) show_smap_vma_flags(m, vma); show_pad: - show_map_pad_vma(vma, pad_vma, m, show_smap, true); + show_map_pad_vma(vma, m, show_smap, true); return 0; } diff --git a/fs/timerfd.c b/fs/timerfd.c index e9c96a0c79f1..d3ccba1799fd 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -28,6 +28,8 @@ #include #include +#include + struct timerfd_ctx { union { struct hrtimer tmr; @@ -407,6 +409,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) { int ufd; struct timerfd_ctx *ctx; + char file_name_buf[32]; /* Check the TFD_* constants for consistency. */ BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); @@ -443,7 +446,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) ctx->moffs = ktime_mono_to_real(0); - ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, + strscpy(file_name_buf, "[timerfd]", sizeof(file_name_buf)); + trace_android_vh_timerfd_create(file_name_buf, sizeof(file_name_buf)); + ufd = anon_inode_getfd(file_name_buf, &timerfd_fops, ctx, O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); if (ufd < 0) kfree(ctx); @@ -451,7 +456,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) return ufd; } -static int do_timerfd_settime(int ufd, int flags, +static int do_timerfd_settime(int ufd, int flags, const struct itimerspec64 *new, struct itimerspec64 *old) { diff --git a/include/asm-generic/TEST_MAPPING b/include/asm-generic/TEST_MAPPING index 76470c7eb9af..fbb9e9e1a11e 100644 --- a/include/asm-generic/TEST_MAPPING +++ b/include/asm-generic/TEST_MAPPING @@ -233,6 +233,14 @@ }, { "name": "vts_kernel_net_tests" + }, + { + "name": "CtsJobSchedulerTestCases", + "options": [ + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testConnectivityConstraintExecutes_withMobile" + } + ] } ], "presubmit-large": [ diff --git a/include/linux/TEST_MAPPING b/include/linux/TEST_MAPPING index 1268c221b87f..5ed598d5f47a 100644 --- a/include/linux/TEST_MAPPING +++ b/include/linux/TEST_MAPPING @@ -241,6 +241,26 @@ }, { "name": "vts_kernel_net_tests" + }, + { + "name": "CtsJobSchedulerTestCases", + "options": [ + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testCellularConstraintExecutedAndStopped" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testConnectivityConstraintExecutes_transitionNetworks" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testConnectivityConstraintExecutes_withMobile" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testEJMeteredConstraintFails_withMobile_DataSaverOn" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testMeteredConstraintFails_withMobile_DataSaverOn" + } + ] } ], "presubmit-large": [ diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index b083942d474d..02b2dff6d253 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -257,6 +257,9 @@ ARM_SMCCC_OWNER_STANDARD, \ 0x53) +#define ARM_CCA_FUNC_END 0x840001CF +#define ARM_CCA_64BIT_FUNC_END 0xC40001CF + /* * Return codes defined in ARM DEN 0070A * ARM DEN 0070A is now merged/consolidated into ARM DEN 0028 C diff --git a/include/linux/damon.h b/include/linux/damon.h index 343132a146cf..5227a4fb279e 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -40,9 +40,24 @@ struct damon_addr_range { * @ar: The address range of the region. * @sampling_addr: Address of the sample for the next access check. * @nr_accesses: Access frequency of this region. + * @nr_accesses_bp: @nr_accesses in basis point (0.01%) that updated for + * each sampling interval. * @list: List head for siblings. * @age: Age of this region. * + * @nr_accesses is reset to zero for every &damon_attrs->aggr_interval and be + * increased for every &damon_attrs->sample_interval if an access to the region + * during the last sampling interval is found. The update of this field should + * not be done with direct access but with the helper function, + * damon_update_region_access_rate(). + * + * @nr_accesses_bp is another representation of @nr_accesses in basis point + * (1 in 10,000) that updated for every &damon_attrs->sample_interval in a + * manner similar to moving sum. By the algorithm, this value becomes + * @nr_accesses * 10000 for every &struct damon_attrs->aggr_interval. This can + * be used when the aggregation interval is too huge and therefore cannot wait + * for it before getting the access monitoring results. + * * @age is initially zero, increased for each aggregation interval, and reset * to zero again if the access frequency is significantly changed. If two * regions are merged into a new region, both @nr_accesses and @age of the new @@ -52,6 +67,7 @@ struct damon_region { struct damon_addr_range ar; unsigned long sampling_addr; unsigned int nr_accesses; + unsigned int nr_accesses_bp; struct list_head list; unsigned int age; @@ -631,6 +647,8 @@ void damon_add_region(struct damon_region *r, struct damon_target *t); void damon_destroy_region(struct damon_region *r, struct damon_target *t); int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, unsigned int nr_ranges); +void damon_update_region_access_rate(struct damon_region *r, bool accessed, + struct damon_attrs *attrs); struct damos_filter *damos_new_filter(enum damos_filter_type type, bool matching); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 3c32f61232b9..c24f8bc01045 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -422,9 +422,6 @@ struct f2fs_sit_block { struct f2fs_sit_entry entries[SIT_ENTRY_PER_BLOCK]; } __packed; -#define NAT_CACHED_ENTRY 1 -#define NAT_JOURNAL_ENTRY 2 - /* * For segment summary * diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index dbe379809f38..d03a89389766 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -90,7 +90,7 @@ struct ipv6_devconf { ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); ANDROID_KABI_RESERVE(3); - ANDROID_KABI_RESERVE(4); + ANDROID_KABI_BACKPORT_OK(4); }; struct ipv6_params { diff --git a/include/linux/mm.h b/include/linux/mm.h index 1b6aafbbfb08..e6d5be7a3e92 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3757,24 +3757,22 @@ static inline bool page_is_guard(struct page *page) return PageGuard(page); } -bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order, - int migratetype); +bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order); static inline bool set_page_guard(struct zone *zone, struct page *page, - unsigned int order, int migratetype) + unsigned int order) { if (!debug_guardpage_enabled()) return false; - return __set_page_guard(zone, page, order, migratetype); + return __set_page_guard(zone, page, order); } -void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order, - int migratetype); +void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order); static inline void clear_page_guard(struct zone *zone, struct page *page, - unsigned int order, int migratetype) + unsigned int order) { if (!debug_guardpage_enabled()) return; - __clear_page_guard(zone, page, order, migratetype); + __clear_page_guard(zone, page, order); } #else /* CONFIG_DEBUG_PAGEALLOC */ @@ -3784,9 +3782,9 @@ static inline unsigned int debug_guardpage_minorder(void) { return 0; } static inline bool debug_guardpage_enabled(void) { return false; } static inline bool page_is_guard(struct page *page) { return false; } static inline bool set_page_guard(struct zone *zone, struct page *page, - unsigned int order, int migratetype) { return false; } + unsigned int order) { return false; } static inline void clear_page_guard(struct zone *zone, struct page *page, - unsigned int order, int migratetype) {} + unsigned int order) {} #endif /* CONFIG_DEBUG_PAGEALLOC */ #ifdef __HAVE_ARCH_GATE_AREA diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 6f1d13f2edd0..8f2b52af6d82 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -247,6 +247,11 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, int type = folio_is_file_lru(folio); int zone = folio_zonenum(folio); struct lru_gen_folio *lrugen = &lruvec->lrugen; + bool skip = false; + + trace_android_vh_lru_gen_add_folio_skip(lruvec, folio, &skip); + if (skip) + return true; VM_WARN_ON_ONCE_FOLIO(gen != -1, folio); @@ -294,6 +299,11 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, { unsigned long flags; int gen = folio_lru_gen(folio); + bool skip = false; + + trace_android_vh_lru_gen_del_folio_skip(lruvec, folio, &skip); + if (skip) + return true; if (gen < 0) return false; diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 4ac34392823a..c16db0067090 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -34,8 +34,9 @@ static inline bool is_migrate_isolate(int migratetype) #define REPORT_FAILURE 0x2 void set_pageblock_migratetype(struct page *page, int migratetype); -int move_freepages_block(struct zone *zone, struct page *page, - int migratetype, int *num_movable); + +bool move_freepages_block_isolate(struct zone *zone, struct page *page, + int migratetype); int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, int migratetype, int flags, gfp_t gfp_flags); diff --git a/include/linux/pgsize_migration.h b/include/linux/pgsize_migration.h index 48672dbc84e9..359c1807ff1d 100644 --- a/include/linux/pgsize_migration.h +++ b/include/linux/pgsize_migration.h @@ -26,12 +26,7 @@ extern unsigned long vma_pad_pages(struct vm_area_struct *vma); extern void madvise_vma_pad_pages(struct vm_area_struct *vma, unsigned long start, unsigned long end); -extern struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma); - -extern struct vm_area_struct *get_data_vma(struct vm_area_struct *vma); - extern void show_map_pad_vma(struct vm_area_struct *vma, - struct vm_area_struct *pad, struct seq_file *m, void *func, bool smaps); extern void split_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *new, @@ -57,18 +52,7 @@ static inline void madvise_vma_pad_pages(struct vm_area_struct *vma, { } -static inline struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma) -{ - return NULL; -} - -static inline struct vm_area_struct *get_data_vma(struct vm_area_struct *vma) -{ - return vma; -} - static inline void show_map_pad_vma(struct vm_area_struct *vma, - struct vm_area_struct *pad, struct seq_file *m, void *func, bool smaps) { } diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index d662cf136021..90cd4b1974cb 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -180,7 +180,9 @@ static inline unsigned int __map_depth(const struct sbitmap *sb, int index) static inline void sbitmap_free(struct sbitmap *sb) { free_percpu(sb->alloc_hint); - kvfree(sb->map); + if (!sb->map) + return; + kvfree(sb->map - 1); sb->map = NULL; } diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 3219b368db79..33ced39a142c 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -487,14 +487,6 @@ static inline void node_stat_sub_folio(struct folio *folio, mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio)); } -static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages, - int migratetype) -{ - __mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages); - if (is_migrate_cma(migratetype)) - __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages); -} - extern const char * const vmstat_text[]; static inline const char *zone_stat_name(enum zone_stat_item item) diff --git a/include/net/TEST_MAPPING b/include/net/TEST_MAPPING index 867f24876046..b1e9cb5cc3ec 100644 --- a/include/net/TEST_MAPPING +++ b/include/net/TEST_MAPPING @@ -241,6 +241,26 @@ }, { "name": "vts_kernel_net_tests" + }, + { + "name": "CtsJobSchedulerTestCases", + "options": [ + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testCellularConstraintExecutedAndStopped" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testConnectivityConstraintExecutes_transitionNetworks" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testConnectivityConstraintExecutes_withMobile" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testEJMeteredConstraintFails_withMobile_DataSaverOn" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testMeteredConstraintFails_withMobile_DataSaverOn" + } + ] } ], "presubmit-large": [ diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h index c79f1d4c39af..6e2bed8c8254 100644 --- a/include/trace/events/damon.h +++ b/include/trace/events/damon.h @@ -9,6 +9,45 @@ #include #include +TRACE_EVENT_CONDITION(damos_before_apply, + + TP_PROTO(unsigned int context_idx, unsigned int scheme_idx, + unsigned int target_idx, struct damon_region *r, + unsigned int nr_regions, bool do_trace), + + TP_ARGS(context_idx, target_idx, scheme_idx, r, nr_regions, do_trace), + + TP_CONDITION(do_trace), + + TP_STRUCT__entry( + __field(unsigned int, context_idx) + __field(unsigned int, scheme_idx) + __field(unsigned long, target_idx) + __field(unsigned long, start) + __field(unsigned long, end) + __field(unsigned int, nr_accesses) + __field(unsigned int, age) + __field(unsigned int, nr_regions) + ), + + TP_fast_assign( + __entry->context_idx = context_idx; + __entry->scheme_idx = scheme_idx; + __entry->target_idx = target_idx; + __entry->start = r->ar.start; + __entry->end = r->ar.end; + __entry->nr_accesses = r->nr_accesses_bp / 10000; + __entry->age = r->age; + __entry->nr_regions = nr_regions; + ), + + TP_printk("ctx_idx=%u scheme_idx=%u target_idx=%lu nr_regions=%u %lu-%lu: %u %u", + __entry->context_idx, __entry->scheme_idx, + __entry->target_idx, __entry->nr_regions, + __entry->start, __entry->end, + __entry->nr_accesses, __entry->age) +); + TRACE_EVENT(damon_aggregated, TP_PROTO(struct damon_target *t, unsigned int target_id, diff --git a/include/trace/hooks/blk.h b/include/trace/hooks/blk.h index 720399c61cb5..67453d1c7574 100644 --- a/include/trace/hooks/blk.h +++ b/include/trace/hooks/blk.h @@ -45,6 +45,12 @@ DECLARE_HOOK(android_vh_blk_mq_kick_requeue_list, TP_PROTO(struct request_queue *q, unsigned long delay, bool *skip), TP_ARGS(q, delay, skip)); +struct bio; + +DECLARE_HOOK(android_vh_check_set_ioprio, + TP_PROTO(struct bio *bio), + TP_ARGS(bio)); + #endif /* _TRACE_HOOK_BLK_H */ /* This part must be outside protection */ #include diff --git a/include/trace/hooks/fs.h b/include/trace/hooks/fs.h index df8e340a49d2..d5fcb5357e4f 100644 --- a/include/trace/hooks/fs.h +++ b/include/trace/hooks/fs.h @@ -28,6 +28,13 @@ DECLARE_HOOK(android_vh_f2fs_restore_priority, TP_PROTO(struct task_struct *p, int saved_prio), TP_ARGS(p, saved_prio)); +DECLARE_HOOK(android_vh_ep_create_wakeup_source, + TP_PROTO(char *name, int len), + TP_ARGS(name, len)); + +DECLARE_HOOK(android_vh_timerfd_create, + TP_PROTO(char *name, int len), + TP_ARGS(name, len)); #endif /* _TRACE_HOOK_FS_H */ /* This part must be outside protection */ diff --git a/include/trace/hooks/madvise.h b/include/trace/hooks/madvise.h index f13102f68b42..2542b92ab25c 100644 --- a/include/trace/hooks/madvise.h +++ b/include/trace/hooks/madvise.h @@ -11,6 +11,10 @@ DECLARE_RESTRICTED_HOOK(android_rvh_process_madvise_bypass, TP_PROTO(int pidfd, const struct iovec __user *vec, size_t vlen, int behavior, unsigned int flags, ssize_t *ret, bool *bypass), TP_ARGS(pidfd, vec, vlen, behavior, flags, ret, bypass), 1); +DECLARE_RESTRICTED_HOOK(android_rvh_do_madvise_bypass, + TP_PROTO(struct mm_struct *mm, unsigned long start, + size_t len_in, int behavior, int *ret, bool *bypass), + TP_ARGS(mm, start, len_in, behavior, ret, bypass), 1); struct vm_area_struct; DECLARE_HOOK(android_vh_update_vma_flags, TP_PROTO(struct vm_area_struct *vma), @@ -27,4 +31,4 @@ DECLARE_HOOK(android_vh_madvise_pageout_bypass, #endif -#include \ No newline at end of file +#include diff --git a/include/trace/hooks/mm.h b/include/trace/hooks/mm.h index dde35e7d0e97..27c82d37dd8c 100644 --- a/include/trace/hooks/mm.h +++ b/include/trace/hooks/mm.h @@ -22,6 +22,14 @@ struct vm_unmapped_area_info; DECLARE_RESTRICTED_HOOK(android_rvh_shmem_get_folio, TP_PROTO(struct shmem_inode_info *info, struct folio **folio), TP_ARGS(info, folio), 2); +DECLARE_RESTRICTED_HOOK(android_rvh_perform_reclaim, + TP_PROTO(int order, gfp_t gfp_mask, nodemask_t *nodemask, + unsigned long *progress, bool *skip), + TP_ARGS(order, gfp_mask, nodemask, progress, skip), 4); +DECLARE_RESTRICTED_HOOK(android_rvh_do_traversal_lruvec_ex, + TP_PROTO(struct mem_cgroup *memcg, struct lruvec *lruvec, + bool *stop), + TP_ARGS(memcg, lruvec, stop), 3); DECLARE_HOOK(android_vh_shmem_mod_shmem, TP_PROTO(struct address_space *mapping, long nr_pages), TP_ARGS(mapping, nr_pages)); @@ -398,6 +406,9 @@ DECLARE_HOOK(android_vh_filemap_update_page, TP_PROTO(struct address_space *mapping, struct folio *folio, struct file *file), TP_ARGS(mapping, folio, file)); +DECLARE_HOOK(android_vh_filemap_pages, + TP_PROTO(struct folio *folio), + TP_ARGS(folio)); DECLARE_HOOK(android_vh_lruvec_add_folio, TP_PROTO(struct lruvec *lruvec, struct folio *folio, enum lru_list lru, @@ -408,6 +419,12 @@ DECLARE_HOOK(android_vh_lruvec_del_folio, TP_PROTO(struct lruvec *lruvec, struct folio *folio, enum lru_list lru, bool *skip), TP_ARGS(lruvec, folio, lru, skip)); +DECLARE_HOOK(android_vh_lru_gen_add_folio_skip, + TP_PROTO(struct lruvec *lruvec, struct folio *folio, bool *skip), + TP_ARGS(lruvec, folio, skip)); +DECLARE_HOOK(android_vh_lru_gen_del_folio_skip, + TP_PROTO(struct lruvec *lruvec, struct folio *folio, bool *skip), + TP_ARGS(lruvec, folio, skip)); DECLARE_HOOK(android_vh_add_lazyfree_bypass, TP_PROTO(struct lruvec *lruvec, struct folio *folio, bool *bypass), TP_ARGS(lruvec, folio, bypass)); diff --git a/include/trace/hooks/vmscan.h b/include/trace/hooks/vmscan.h index 03566f66fc72..4bdcfc29c2f4 100644 --- a/include/trace/hooks/vmscan.h +++ b/include/trace/hooks/vmscan.h @@ -12,6 +12,9 @@ DECLARE_RESTRICTED_HOOK(android_rvh_set_balance_anon_file_reclaim, TP_PROTO(bool *balance_anon_file_reclaim), TP_ARGS(balance_anon_file_reclaim), 1); +DECLARE_RESTRICTED_HOOK(android_rvh_kswapd_shrink_node, + TP_PROTO(unsigned long *nr_reclaimed), + TP_ARGS(nr_reclaimed), 1); DECLARE_HOOK(android_vh_tune_swappiness, TP_PROTO(int *swappiness), TP_ARGS(swappiness)); @@ -52,6 +55,15 @@ DECLARE_HOOK(android_vh_inode_lru_isolate, DECLARE_HOOK(android_vh_invalidate_mapping_pagevec, TP_PROTO(struct address_space *mapping, bool *skip), TP_ARGS(mapping, skip)); +DECLARE_HOOK(android_vh_keep_reclaimed_folio, + TP_PROTO(struct folio *folio, int refcount, bool *keep), + TP_ARGS(folio, refcount, keep)); +DECLARE_HOOK(android_vh_clear_reclaimed_folio, + TP_PROTO(struct folio *folio, bool reclaimed), + TP_ARGS(folio, reclaimed)); +DECLARE_HOOK(android_vh_evict_folios_bypass, + TP_PROTO(struct folio *folio, bool *bypass), + TP_ARGS(folio, bypass)); enum scan_balance; DECLARE_HOOK(android_vh_tune_scan_type, diff --git a/include/ufs/ufs.h b/include/ufs/ufs.h index bfdd051cd178..483b14075944 100644 --- a/include/ufs/ufs.h +++ b/include/ufs/ufs.h @@ -100,9 +100,10 @@ enum upiu_response_transaction { UPIU_TRANSACTION_REJECT_UPIU = 0x3F, }; -/* UPIU Read/Write flags */ +/* UPIU Read/Write flags. See also table "UPIU Flags" in the UFS standard. */ enum { UPIU_CMD_FLAGS_NONE = 0x00, + UPIU_CMD_FLAGS_CP = 0x04, UPIU_CMD_FLAGS_WRITE = 0x20, UPIU_CMD_FLAGS_READ = 0x40, }; diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index d3ca2e60d965..66bd5c15375e 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -248,7 +248,15 @@ struct ufs_query { struct ufs_dev_cmd { enum dev_cmd_type type; struct mutex lock; - struct completion *complete; + struct completion *complete + /* + * Apparently the CRC generated by the ABI checker changes if an + * attribute is added to a structure member. Hence the #ifndef below. + */ +#ifndef __GENKSYMS__ + __attribute__((deprecated)) +#endif + ; struct ufs_query query; }; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index e17c365a3695..9cb68c924a2e 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2304,9 +2304,37 @@ static struct file_system_type cgroup2_fs_type = { }; #ifdef CONFIG_CPUSETS +enum cpuset_param { + Opt_cpuset_v2_mode, +}; + +static const struct fs_parameter_spec cpuset_fs_parameters[] = { + fsparam_flag ("cpuset_v2_mode", Opt_cpuset_v2_mode), + {} +}; + +static int cpuset_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, cpuset_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_cpuset_v2_mode: + ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE; + return 0; + } + return -EINVAL; +} + static const struct fs_context_operations cpuset_fs_context_ops = { .get_tree = cgroup1_get_tree, .free = cgroup_fs_context_free, + .parse_param = cpuset_parse_param, }; /* @@ -2343,6 +2371,7 @@ static int cpuset_init_fs_context(struct fs_context *fc) static struct file_system_type cpuset_fs_type = { .name = "cpuset", .init_fs_context = cpuset_init_fs_context, + .parameters = cpuset_fs_parameters, .fs_flags = FS_USERNS_MOUNT, }; #endif diff --git a/kernel/kthread.c b/kernel/kthread.c index faea3cb7bfae..cbed782e286e 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1561,4 +1561,5 @@ struct cgroup_subsys_state *kthread_blkcg(void) } return NULL; } +EXPORT_SYMBOL_GPL(kthread_blkcg); #endif diff --git a/kernel/module/Makefile b/kernel/module/Makefile index d54ccd363be9..25fe68e7fbcb 100644 --- a/kernel/module/Makefile +++ b/kernel/module/Makefile @@ -32,7 +32,13 @@ obj-$(CONFIG_MODULE_STATS) += stats.o $(obj)/gki_module.o: include/generated/gki_module_protected_exports.h \ include/generated/gki_module_unprotected.h +ifneq ($(CONFIG_UNUSED_KSYMS_WHITELIST),) +ALL_KMI_SYMBOLS := $(CONFIG_UNUSED_KSYMS_WHITELIST) +else ALL_KMI_SYMBOLS := include/config/abi_gki_kmi_symbols +$(ALL_KMI_SYMBOLS): + : > $@ +endif include/generated/gki_module_unprotected.h: $(ALL_KMI_SYMBOLS) \ $(srctree)/scripts/gen_gki_modules_headers.sh @@ -43,10 +49,6 @@ include/generated/gki_module_unprotected.h: $(ALL_KMI_SYMBOLS) \ # AARCH is the same as ARCH, except that arm64 becomes aarch64 AARCH := $(if $(filter arm64,$(ARCH)),aarch64,$(ARCH)) -# Generate symbol list with union of all symbol list for ARCH -$(ALL_KMI_SYMBOLS): $(wildcard $(srctree)/android/abi_gki_$(AARCH) $(srctree)/android/abi_gki_$(AARCH)_*) - $(if $(strip $^),cat $^ > $(ALL_KMI_SYMBOLS), echo "" > $(ALL_KMI_SYMBOLS)) - # ABI protected exports list file specific to ARCH if exists else empty ABI_PROTECTED_EXPORTS_FILE := $(wildcard $(srctree)/android/abi_gki_protected_exports_$(AARCH)) diff --git a/kernel/regset.c b/kernel/regset.c index 586823786f39..b2871fa68b2a 100644 --- a/kernel/regset.c +++ b/kernel/regset.c @@ -16,14 +16,14 @@ static int __regset_get(struct task_struct *target, if (size > regset->n * regset->size) size = regset->n * regset->size; if (!p) { - to_free = p = kzalloc(size, GFP_KERNEL); + to_free = p = kvzalloc(size, GFP_KERNEL); if (!p) return -ENOMEM; } res = regset->regset_get(target, regset, (struct membuf){.p = p, .left = size}); if (res < 0) { - kfree(to_free); + kvfree(to_free); return res; } *data = p; @@ -71,6 +71,6 @@ int copy_regset_to_user(struct task_struct *target, ret = regset_get_alloc(target, regset, size, &buf); if (ret > 0) ret = copy_to_user(data, buf, ret) ? -EFAULT : 0; - kfree(buf); + kvfree(buf); return ret; } diff --git a/kernel/sched/TEST_MAPPING b/kernel/sched/TEST_MAPPING index 603cb5cea199..48bd795b2489 100644 --- a/kernel/sched/TEST_MAPPING +++ b/kernel/sched/TEST_MAPPING @@ -236,6 +236,26 @@ "include-filter": "kselftest_x86_test_mremap_vdso" } ] + }, + { + "name": "CtsJobSchedulerTestCases", + "options": [ + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testCellularConstraintExecutedAndStopped" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testConnectivityConstraintExecutes_transitionNetworks" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testConnectivityConstraintExecutes_withMobile" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testEJMeteredConstraintFails_withMobile_DataSaverOn" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testMeteredConstraintFails_withMobile_DataSaverOn" + } + ] } ], "presubmit-large": [ diff --git a/lib/sbitmap.c b/lib/sbitmap.c index e188d4fbebd8..525542041f8f 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -100,24 +100,28 @@ static inline void update_alloc_hint_after_get(struct sbitmap *sb, * * Return: Returns the spinlock corresponding to index. */ -static spinlock_t *sbitmap_spinlock(struct sbitmap_word *map, - unsigned int map_nr, unsigned int index) +static spinlock_t *sbitmap_spinlock(struct sbitmap *sb, unsigned int index) { - spinlock_t *base_lock = (spinlock_t *)(&map[map_nr - index]); + const unsigned int max_map_nr = *(unsigned int *)&sb->map[-1]; + spinlock_t *const base_lock = (spinlock_t *) + round_up((uintptr_t)&sb->map[max_map_nr], + __alignof__(spinlock_t)); + + WARN_ON_ONCE(index < 0 || index >= sb->map_nr); - BUG_ON(((unsigned long)base_lock % __alignof__(spinlock_t))); return &base_lock[index]; } /* * See if we have deferred clears that we can batch move */ -static inline bool sbitmap_deferred_clear(struct sbitmap_word *map, +static inline bool sbitmap_deferred_clear(struct sbitmap *sb, + struct sbitmap_word *map, unsigned int depth, unsigned int alloc_hint, bool wrap, unsigned int map_nr, unsigned int index) { unsigned long mask, word_mask; - spinlock_t *swap_lock = sbitmap_spinlock(map, map_nr, index); + spinlock_t *swap_lock = sbitmap_spinlock(sb, index); guard(spinlock_irqsave)(swap_lock); @@ -183,13 +187,17 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, sb->alloc_hint = NULL; } - /* Due to 72d04bdcf3f7 ("sbitmap: fix io hung due to race on sbitmap_word - * ::cleared") directly adding spinlock_t swap_1ock to struct sbitmap_word - * in sbitmap.h, KMI was damaged. In order to achieve functionality without - * damaging KMI, we can only apply for a block of memory with a size of - * map_nr * (sizeof (* sb ->map)+sizeof(spinlock_t)) to ensure that each - * struct sbitmap-word receives protection from spinlock. - * The actual memory distribution used is as follows: + /* + * Commit 72d04bdcf3f7 ("sbitmap: fix io hung due to race on + * sbitmap_word::cleared") broke the KMI by adding `spinlock_t + * swap_1ock` in struct sbitmap_word in sbitmap.h. Restore the KMI by + * making sb->map larger and by storing the size of the sb->map array + * and the spinlock instances in that array. + * + * The memory layout of sb->map is as follows: + * ---------------------- + * struct sbitmap_word[-1] - only the first four bytes are used to store + * max_map_nr. * ---------------------- * struct sbitmap_word[0] * ...................... @@ -199,17 +207,23 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, * ....................... * spinlock_t swap_lock[n] * ---------------------- - * sbitmap_word[0] corresponds to swap_lock[0], and sbitmap_word[n] - * corresponds to swap_lock[n], and so on + * + * sbitmap_word[0] corresponds to swap_lock[0], and sbitmap_word[n] + * corresponds to swap_lock[n], and so on. */ - sb->map = kvzalloc_node(sb->map_nr * (sizeof(*sb->map) + sizeof(spinlock_t)), flags, node); + const size_t map_size = round_up((sb->map_nr + 1) * sizeof(*sb->map), + __alignof__(spinlock_t)) + + sb->map_nr * sizeof(spinlock_t); + sb->map = kvzalloc_node(map_size, flags, node); if (!sb->map) { free_percpu(sb->alloc_hint); return -ENOMEM; } + *(unsigned int *)sb->map = sb->map_nr; + sb->map++; for (i = 0; i < sb->map_nr; i++) { - spinlock_t *swap_lock = sbitmap_spinlock(&sb->map[i], sb->map_nr, i); + spinlock_t *swap_lock = sbitmap_spinlock(sb, i); spin_lock_init(swap_lock); } @@ -224,7 +238,7 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth) unsigned int i; for (i = 0; i < sb->map_nr; i++) - sbitmap_deferred_clear(&sb->map[i], 0, 0, 0, sb->map_nr, i); + sbitmap_deferred_clear(sb, &sb->map[i], 0, 0, 0, sb->map_nr, i); sb->depth = depth; sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); @@ -265,7 +279,8 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth, return nr; } -static int sbitmap_find_bit_in_word(struct sbitmap_word *map, +static int sbitmap_find_bit_in_word(struct sbitmap *sb, + struct sbitmap_word *map, unsigned int depth, unsigned int alloc_hint, bool wrap, @@ -279,7 +294,7 @@ static int sbitmap_find_bit_in_word(struct sbitmap_word *map, alloc_hint, wrap); if (nr != -1) break; - if (!sbitmap_deferred_clear(map, depth, alloc_hint, wrap, map_nr, index)) + if (!sbitmap_deferred_clear(sb, map, depth, alloc_hint, wrap, map_nr, index)) break; } while (1); @@ -296,7 +311,7 @@ static int sbitmap_find_bit(struct sbitmap *sb, int nr = -1; for (i = 0; i < sb->map_nr; i++) { - nr = sbitmap_find_bit_in_word(&sb->map[index], + nr = sbitmap_find_bit_in_word(sb, &sb->map[index], min_t(unsigned int, __map_depth(sb, index), depth), @@ -602,7 +617,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, unsigned int map_depth = __map_depth(sb, index); unsigned long val; - sbitmap_deferred_clear(map, 0, 0, 0, sb->map_nr, index); + sbitmap_deferred_clear(sb, map, 0, 0, 0, sb->map_nr, index); val = READ_ONCE(map->word); if (val == (1UL << (map_depth - 1)) - 1) goto next; diff --git a/lib/string.c b/lib/string.c index be26623953d2..d49243af5bf2 100644 --- a/lib/string.c +++ b/lib/string.c @@ -128,6 +128,7 @@ ssize_t strscpy(char *dest, const char *src, size_t count) if (count == 0 || WARN_ON_ONCE(count > INT_MAX)) return -E2BIG; +#ifndef CONFIG_DCACHE_WORD_ACCESS #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS /* * If src is unaligned, don't cross a page boundary, @@ -142,12 +143,14 @@ ssize_t strscpy(char *dest, const char *src, size_t count) /* If src or dest is unaligned, don't do word-at-a-time. */ if (((long) dest | (long) src) & (sizeof(long) - 1)) max = 0; +#endif #endif /* - * read_word_at_a_time() below may read uninitialized bytes after the - * trailing zero and use them in comparisons. Disable this optimization - * under KMSAN to prevent false positive reports. + * load_unaligned_zeropad() or read_word_at_a_time() below may read + * uninitialized bytes after the trailing zero and use them in + * comparisons. Disable this optimization under KMSAN to prevent + * false positive reports. */ if (IS_ENABLED(CONFIG_KMSAN)) max = 0; @@ -155,7 +158,11 @@ ssize_t strscpy(char *dest, const char *src, size_t count) while (max >= sizeof(unsigned long)) { unsigned long c, data; +#ifdef CONFIG_DCACHE_WORD_ACCESS + c = load_unaligned_zeropad(src+res); +#else c = read_word_at_a_time(src+res); +#endif if (has_zero(c, &data, &constants)) { data = prep_zero_mask(c, data, &constants); data = create_zero_mask(data); diff --git a/mm/TEST_MAPPING b/mm/TEST_MAPPING index 70b597e11413..5729d660dd0c 100644 --- a/mm/TEST_MAPPING +++ b/mm/TEST_MAPPING @@ -225,6 +225,26 @@ "include-filter": "kselftest_x86_test_mremap_vdso" } ] + }, + { + "name": "CtsJobSchedulerTestCases", + "options": [ + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testCellularConstraintExecutedAndStopped" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testConnectivityConstraintExecutes_transitionNetworks" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testConnectivityConstraintExecutes_withMobile" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testEJMeteredConstraintFails_withMobile_DataSaverOn" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testMeteredConstraintFails_withMobile_DataSaverOn" + } + ] } ], "kernel-presubmit": [ diff --git a/mm/damon/core-test.h b/mm/damon/core-test.h index 6cc8b245586d..79f1f12e0dd5 100644 --- a/mm/damon/core-test.h +++ b/mm/damon/core-test.h @@ -94,6 +94,7 @@ static void damon_test_aggregate(struct kunit *test) for (ir = 0; ir < 3; ir++) { r = damon_new_region(saddr[it][ir], eaddr[it][ir]); r->nr_accesses = accesses[it][ir]; + r->nr_accesses_bp = accesses[it][ir] * 10000; damon_add_region(r, t); } it++; @@ -147,9 +148,11 @@ static void damon_test_merge_two(struct kunit *test) t = damon_new_target(); r = damon_new_region(0, 100); r->nr_accesses = 10; + r->nr_accesses_bp = 100000; damon_add_region(r, t); r2 = damon_new_region(100, 300); r2->nr_accesses = 20; + r2->nr_accesses_bp = 200000; damon_add_region(r2, t); damon_merge_two_regions(t, r, r2); @@ -196,6 +199,7 @@ static void damon_test_merge_regions_of(struct kunit *test) for (i = 0; i < ARRAY_SIZE(sa); i++) { r = damon_new_region(sa[i], ea[i]); r->nr_accesses = nrs[i]; + r->nr_accesses_bp = nrs[i] * 10000; damon_add_region(r, t); } @@ -297,6 +301,7 @@ static void damon_test_update_monitoring_result(struct kunit *test) struct damon_region *r = damon_new_region(3, 7); r->nr_accesses = 15; + r->nr_accesses_bp = 150000; r->age = 20; new_attrs = (struct damon_attrs){ @@ -341,6 +346,21 @@ static void damon_test_set_attrs(struct kunit *test) KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &invalid_attrs), -EINVAL); } +static void damon_test_moving_sum(struct kunit *test) +{ + unsigned int mvsum = 50000, nomvsum = 50000, len_window = 10; + unsigned int new_values[] = {10000, 0, 10000, 0, 0, 0, 10000, 0, 0, 0}; + unsigned int expects[] = {55000, 50000, 55000, 50000, 45000, 40000, + 45000, 40000, 35000, 30000}; + int i; + + for (i = 0; i < ARRAY_SIZE(new_values); i++) { + mvsum = damon_moving_sum(mvsum, nomvsum, len_window, + new_values[i]); + KUNIT_EXPECT_EQ(test, mvsum, expects[i]); + } +} + static void damos_test_new_filter(struct kunit *test) { struct damos_filter *filter; @@ -425,6 +445,7 @@ static struct kunit_case damon_test_cases[] = { KUNIT_CASE(damon_test_set_regions), KUNIT_CASE(damon_test_update_monitoring_result), KUNIT_CASE(damon_test_set_attrs), + KUNIT_CASE(damon_test_moving_sum), KUNIT_CASE(damos_test_new_filter), KUNIT_CASE(damos_test_filter_out), {}, diff --git a/mm/damon/core.c b/mm/damon/core.c index 43e4fe7ef17e..e26ed7bb1c27 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -128,6 +128,7 @@ struct damon_region *damon_new_region(unsigned long start, unsigned long end) region->ar.start = start; region->ar.end = end; region->nr_accesses = 0; + region->nr_accesses_bp = 0; INIT_LIST_HEAD(®ion->list); region->age = 0; @@ -525,6 +526,7 @@ static void damon_update_monitoring_result(struct damon_region *r, { r->nr_accesses = damon_nr_accesses_for_new_attrs(r->nr_accesses, old_attrs, new_attrs); + r->nr_accesses_bp = r->nr_accesses * 10000; r->age = damon_age_for_new_attrs(r->age, old_attrs, new_attrs); } @@ -788,12 +790,13 @@ static void damon_split_region_at(struct damon_target *t, static bool __damos_valid_target(struct damon_region *r, struct damos *s) { unsigned long sz; + unsigned int nr_accesses = r->nr_accesses_bp / 10000; sz = damon_sz_region(r); return s->pattern.min_sz_region <= sz && sz <= s->pattern.max_sz_region && - s->pattern.min_nr_accesses <= r->nr_accesses && - r->nr_accesses <= s->pattern.max_nr_accesses && + s->pattern.min_nr_accesses <= nr_accesses && + nr_accesses <= s->pattern.max_nr_accesses && s->pattern.min_age_region <= r->age && r->age <= s->pattern.max_age_region; } @@ -948,6 +951,33 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, struct timespec64 begin, end; unsigned long sz_applied = 0; int err = 0; + /* + * We plan to support multiple context per kdamond, as DAMON sysfs + * implies with 'nr_contexts' file. Nevertheless, only single context + * per kdamond is supported for now. So, we can simply use '0' context + * index here. + */ + unsigned int cidx = 0; + struct damos *siter; /* schemes iterator */ + unsigned int sidx = 0; + struct damon_target *titer; /* targets iterator */ + unsigned int tidx = 0; + bool do_trace = false; + + /* get indices for trace_damos_before_apply() */ + if (trace_damos_before_apply_enabled()) { + damon_for_each_scheme(siter, c) { + if (siter == s) + break; + sidx++; + } + damon_for_each_target(titer, c) { + if (titer == t) + break; + tidx++; + } + do_trace = true; + } if (c->ops.apply_scheme) { if (quota->esz && quota->charged_sz + sz > quota->esz) { @@ -962,8 +992,11 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, ktime_get_coarse_ts64(&begin); if (c->callback.before_damos_apply) err = c->callback.before_damos_apply(c, t, r, s); - if (!err) + if (!err) { + trace_damos_before_apply(cidx, sidx, tidx, r, + damon_nr_regions(t), do_trace); sz_applied = c->ops.apply_scheme(c, t, r, s); + } ktime_get_coarse_ts64(&end); quota->total_charged_ns += timespec64_to_ns(&end) - timespec64_to_ns(&begin); @@ -1127,6 +1160,7 @@ static void damon_merge_two_regions(struct damon_target *t, l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / (sz_l + sz_r); + l->nr_accesses_bp = l->nr_accesses * 10000; l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r); l->ar.end = r->ar.end; damon_destroy_region(r, t); @@ -1216,6 +1250,7 @@ static void damon_split_region_at(struct damon_target *t, new->age = r->age; new->last_nr_accesses = r->last_nr_accesses; new->nr_accesses = r->nr_accesses; + new->nr_accesses_bp = r->nr_accesses_bp; damon_insert_region(new, r, damon_next_region(r), t); } @@ -1597,6 +1632,76 @@ int damon_set_region_biggest_system_ram_default(struct damon_target *t, return damon_set_regions(t, &addr_range, 1); } +/* + * damon_moving_sum() - Calculate an inferred moving sum value. + * @mvsum: Inferred sum of the last @len_window values. + * @nomvsum: Non-moving sum of the last discrete @len_window window values. + * @len_window: The number of last values to take care of. + * @new_value: New value that will be added to the pseudo moving sum. + * + * Moving sum (moving average * window size) is good for handling noise, but + * the cost of keeping past values can be high for arbitrary window size. This + * function implements a lightweight pseudo moving sum function that doesn't + * keep the past window values. + * + * It simply assumes there was no noise in the past, and get the no-noise + * assumed past value to drop from @nomvsum and @len_window. @nomvsum is a + * non-moving sum of the last window. For example, if @len_window is 10 and we + * have 25 values, @nomvsum is the sum of the 11th to 20th values of the 25 + * values. Hence, this function simply drops @nomvsum / @len_window from + * given @mvsum and add @new_value. + * + * For example, if @len_window is 10 and @nomvsum is 50, the last 10 values for + * the last window could be vary, e.g., 0, 10, 0, 10, 0, 10, 0, 0, 0, 20. For + * calculating next moving sum with a new value, we should drop 0 from 50 and + * add the new value. However, this function assumes it got value 5 for each + * of the last ten times. Based on the assumption, when the next value is + * measured, it drops the assumed past value, 5 from the current sum, and add + * the new value to get the updated pseduo-moving average. + * + * This means the value could have errors, but the errors will be disappeared + * for every @len_window aligned calls. For example, if @len_window is 10, the + * pseudo moving sum with 11th value to 19th value would have an error. But + * the sum with 20th value will not have the error. + * + * Return: Pseudo-moving average after getting the @new_value. + */ +static unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, + unsigned int len_window, unsigned int new_value) +{ + return mvsum - nomvsum / len_window + new_value; +} + +/** + * damon_update_region_access_rate() - Update the access rate of a region. + * @r: The DAMON region to update for its access check result. + * @accessed: Whether the region has accessed during last sampling interval. + * @attrs: The damon_attrs of the DAMON context. + * + * Update the access rate of a region with the region's last sampling interval + * access check result. + * + * Usually this will be called by &damon_operations->check_accesses callback. + */ +void damon_update_region_access_rate(struct damon_region *r, bool accessed, + struct damon_attrs *attrs) +{ + unsigned int len_window = 1; + + /* + * sample_interval can be zero, but cannot be larger than + * aggr_interval, owing to validation of damon_set_attrs(). + */ + if (attrs->sample_interval) + len_window = attrs->aggr_interval / attrs->sample_interval; + r->nr_accesses_bp = damon_moving_sum(r->nr_accesses_bp, + r->last_nr_accesses * 10000, len_window, + accessed ? 10000 : 0); + + if (accessed) + r->nr_accesses++; +} + static int __init damon_init(void) { damon_region_cache = KMEM_CACHE(damon_region, 0); diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index 21d31580d1a4..5e6dc312072c 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -148,7 +148,8 @@ out: return accessed; } -static void __damon_pa_check_access(struct damon_region *r) +static void __damon_pa_check_access(struct damon_region *r, + struct damon_attrs *attrs) { static unsigned long last_addr; static unsigned long last_folio_sz = PAGE_SIZE; @@ -157,14 +158,12 @@ static void __damon_pa_check_access(struct damon_region *r) /* If the region is in the last checked page, reuse the result */ if (ALIGN_DOWN(last_addr, last_folio_sz) == ALIGN_DOWN(r->sampling_addr, last_folio_sz)) { - if (last_accessed) - r->nr_accesses++; + damon_update_region_access_rate(r, last_accessed, attrs); return; } last_accessed = damon_pa_young(r->sampling_addr, &last_folio_sz); - if (last_accessed) - r->nr_accesses++; + damon_update_region_access_rate(r, last_accessed, attrs); last_addr = r->sampling_addr; } @@ -177,7 +176,7 @@ static unsigned int damon_pa_check_accesses(struct damon_ctx *ctx) damon_for_each_target(t, ctx) { damon_for_each_region(r, t) { - __damon_pa_check_access(r); + __damon_pa_check_access(r, &ctx->attrs); max_nr_accesses = max(r->nr_accesses, max_nr_accesses); } } diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index b86509634053..ba77dc828038 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -31,7 +31,7 @@ static struct damon_sysfs_scheme_region *damon_sysfs_scheme_region_alloc( return NULL; sysfs_region->kobj = (struct kobject){}; sysfs_region->ar = region->ar; - sysfs_region->nr_accesses = region->nr_accesses; + sysfs_region->nr_accesses = region->nr_accesses_bp / 10000; sysfs_region->age = region->age; INIT_LIST_HEAD(&sysfs_region->list); return sysfs_region; diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 5764b9885e7d..b61919f646f6 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -560,23 +560,27 @@ static bool damon_va_young(struct mm_struct *mm, unsigned long addr, * r the region to be checked */ static void __damon_va_check_access(struct mm_struct *mm, - struct damon_region *r, bool same_target) + struct damon_region *r, bool same_target, + struct damon_attrs *attrs) { static unsigned long last_addr; static unsigned long last_folio_sz = PAGE_SIZE; static bool last_accessed; + if (!mm) { + damon_update_region_access_rate(r, false, attrs); + return; + } + /* If the region is in the last checked page, reuse the result */ if (same_target && (ALIGN_DOWN(last_addr, last_folio_sz) == ALIGN_DOWN(r->sampling_addr, last_folio_sz))) { - if (last_accessed) - r->nr_accesses++; + damon_update_region_access_rate(r, last_accessed, attrs); return; } last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz); - if (last_accessed) - r->nr_accesses++; + damon_update_region_access_rate(r, last_accessed, attrs); last_addr = r->sampling_addr; } @@ -591,15 +595,15 @@ static unsigned int damon_va_check_accesses(struct damon_ctx *ctx) damon_for_each_target(t, ctx) { mm = damon_get_mm(t); - if (!mm) - continue; same_target = false; damon_for_each_region(r, t) { - __damon_va_check_access(mm, r, same_target); + __damon_va_check_access(mm, r, same_target, + &ctx->attrs); max_nr_accesses = max(r->nr_accesses, max_nr_accesses); same_target = true; } - mmput(mm); + if (mm) + mmput(mm); } return max_nr_accesses; diff --git a/mm/debug_page_alloc.c b/mm/debug_page_alloc.c index f9d145730fd1..03a810927d0a 100644 --- a/mm/debug_page_alloc.c +++ b/mm/debug_page_alloc.c @@ -32,8 +32,7 @@ static int __init debug_guardpage_minorder_setup(char *buf) } early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup); -bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order, - int migratetype) +bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order) { if (order >= debug_guardpage_minorder()) return false; @@ -41,19 +40,12 @@ bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order, __SetPageGuard(page); INIT_LIST_HEAD(&page->buddy_list); set_page_private(page, order); - /* Guard pages are not available for any usage */ - if (!is_migrate_isolate(migratetype)) - __mod_zone_freepage_state(zone, -(1 << order), migratetype); return true; } -void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order, - int migratetype) +void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order) { __ClearPageGuard(page); - set_page_private(page, 0); - if (!is_migrate_isolate(migratetype)) - __mod_zone_freepage_state(zone, (1 << order), migratetype); } diff --git a/mm/filemap.c b/mm/filemap.c index ef0f558511ac..d6bdeb71305c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -3727,6 +3728,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, last_pgoff = xas.xa_index; end = folio->index + folio_nr_pages(folio) - 1; nr_pages = min(end, end_pgoff) - xas.xa_index + 1; + trace_android_vh_filemap_pages(folio); if (!folio_test_large(folio)) ret |= filemap_map_order0_folio(vmf, @@ -4373,6 +4375,17 @@ resched: } } rcu_read_unlock(); + + /* Adjust the counts if emulating the page size */ + if (__PAGE_SIZE > PAGE_SIZE) { + unsigned int nr_sub_pages = __PAGE_SIZE / PAGE_SIZE; + + cs->nr_cache /= nr_sub_pages; + cs->nr_dirty /= nr_sub_pages; + cs->nr_writeback /= nr_sub_pages; + cs->nr_evicted /= nr_sub_pages; + cs->nr_recently_evicted /= nr_sub_pages; + } } /* diff --git a/mm/internal.h b/mm/internal.h index d116462f6071..a388b591fc45 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -735,10 +735,6 @@ extern void *memmap_alloc(phys_addr_t size, phys_addr_t align, void memmap_init_range(unsigned long, int, unsigned long, unsigned long, unsigned long, enum meminit_context, struct vmem_altmap *, int); - -int split_free_page(struct page *free_page, - unsigned int order, unsigned long split_pfn_offset); - #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* @@ -1216,11 +1212,6 @@ static inline bool is_migrate_highatomic(enum migratetype migratetype) return migratetype == MIGRATE_HIGHATOMIC; } -static inline bool is_migrate_highatomic_page(struct page *page) -{ - return get_pageblock_migratetype(page) == MIGRATE_HIGHATOMIC; -} - void setup_zone_pageset(struct zone *zone); struct migration_target_control { diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c index ecf9f5aa3520..94d6debde748 100644 --- a/mm/kasan/kasan_test.c +++ b/mm/kasan/kasan_test.c @@ -1043,6 +1043,7 @@ static void kasan_memcmp(struct kunit *test) static void kasan_strings(struct kunit *test) { char *ptr; + char *src; size_t size = 24; /* @@ -1054,6 +1055,25 @@ static void kasan_strings(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + src = kmalloc(KASAN_GRANULE_SIZE, GFP_KERNEL | __GFP_ZERO); + strscpy(src, "f0cacc1a0000000", KASAN_GRANULE_SIZE); + + /* + * Make sure that strscpy() does not trigger KASAN if it overreads into + * poisoned memory. + * + * The expected size does not include the terminator '\0' + * so it is (KASAN_GRANULE_SIZE - 2) == + * KASAN_GRANULE_SIZE - ("initial removed character" + "\0"). + */ + KUNIT_EXPECT_EQ(test, KASAN_GRANULE_SIZE - 2, + strscpy(ptr, src + 1, KASAN_GRANULE_SIZE)); + + /* strscpy should fail if the first byte is unreadable. */ + KUNIT_EXPECT_KASAN_FAIL(test, strscpy(ptr, src + KASAN_GRANULE_SIZE, + KASAN_GRANULE_SIZE)); + + kfree(src); kfree(ptr); /* diff --git a/mm/madvise.c b/mm/madvise.c index ca3109f2408b..b36a6a32a1e3 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1544,6 +1544,14 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) { + bool bypass = false; + int ret; + + trace_android_rvh_do_madvise_bypass(current->mm, start, + len_in, behavior, &ret, &bypass); + if (bypass) + return ret; + return do_madvise(current->mm, start, len_in, behavior); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d1a37b440012..2694ddf0f99f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1434,9 +1434,17 @@ void do_traversal_all_lruvec(void) memcg = mem_cgroup_iter(NULL, NULL, NULL); do { struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); + bool stop = false; trace_android_vh_do_traversal_lruvec(lruvec); + trace_android_rvh_do_traversal_lruvec_ex(memcg, lruvec, + &stop); + if (stop) { + mem_cgroup_iter_break(NULL, memcg); + break; + } + memcg = mem_cgroup_iter(NULL, memcg, NULL); } while (memcg); } diff --git a/mm/mmap.c b/mm/mmap.c index 34dbca85d6f7..4c74fb3d7a94 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2703,14 +2703,14 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, return do_vmi_munmap(&vmi, mm, start, len, uf, false); } -static unsigned long __mmap_region(struct file *file, unsigned long addr, +unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, struct list_head *uf) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma = NULL; struct vm_area_struct *next, *prev, *merge; - pgoff_t pglen = PHYS_PFN(len); + pgoff_t pglen = len >> PAGE_SHIFT; unsigned long charged = 0; unsigned long end = addr + len; unsigned long merge_start = addr, merge_end = end; @@ -2810,26 +2810,25 @@ cannot_expand: vma->vm_page_prot = vm_get_page_prot(vm_flags); vma->vm_pgoff = pgoff; - if (vma_iter_prealloc(&vmi, vma)) { - error = -ENOMEM; - goto free_vma; - } - if (file) { + if (vm_flags & VM_SHARED) { + error = mapping_map_writable(file->f_mapping); + if (error) + goto free_vma; + } + vma->vm_file = get_file(file); error = mmap_file(file, vma); if (error) - goto unmap_and_free_file_vma; + goto unmap_and_free_vma; - /* Drivers cannot alter the address of the VMA. */ - WARN_ON_ONCE(addr != vma->vm_start); /* - * Drivers should not permit writability when previously it was - * disallowed. + * Expansion is handled above, merging is handled below. + * Drivers should not alter the address of the VMA. */ - VM_WARN_ON_ONCE(vm_flags != vma->vm_flags && - !(vm_flags & VM_MAYWRITE) && - (vma->vm_flags & VM_MAYWRITE)); + error = -EINVAL; + if (WARN_ON((addr != vma->vm_start))) + goto close_and_free_vma; vma_iter_config(&vmi, addr, end); /* @@ -2841,7 +2840,6 @@ cannot_expand: vma->vm_end, vma->vm_flags, NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX, NULL); - if (merge) { /* * ->mmap() can change vma->vm_file and fput @@ -2855,7 +2853,7 @@ cannot_expand: vma = merge; /* Update vm_flags to pick up the change. */ vm_flags = vma->vm_flags; - goto file_expanded; + goto unmap_writable; } } @@ -2863,15 +2861,24 @@ cannot_expand: } else if (vm_flags & VM_SHARED) { error = shmem_zero_setup(vma); if (error) - goto free_iter_vma; + goto free_vma; } else { vma_set_anonymous(vma); } -#ifdef CONFIG_SPARC64 - /* TODO: Fix SPARC ADI! */ - WARN_ON_ONCE(!arch_validate_flags(vm_flags)); -#endif + if (map_deny_write_exec(vma->vm_flags, vma->vm_flags)) { + error = -EACCES; + goto close_and_free_vma; + } + + /* Allow architectures to sanity-check the vm_flags */ + error = -EINVAL; + if (!arch_validate_flags(vma->vm_flags)) + goto close_and_free_vma; + + error = -ENOMEM; + if (vma_iter_prealloc(&vmi, vma)) + goto close_and_free_vma; /* Lock the VMA since it is modified after insertion into VMA tree */ vma_start_write(vma); @@ -2894,7 +2901,10 @@ cannot_expand: */ khugepaged_enter_vma(vma, vma->vm_flags); -file_expanded: + /* Once vma denies write, undo our temporary denial count */ +unmap_writable: + if (file && vm_flags & VM_SHARED) + mapping_unmap_writable(file->f_mapping); file = vma->vm_file; ksm_add_vma(vma); expanded: @@ -2926,60 +2936,33 @@ expanded: trace_android_vh_mmap_region(vma, addr); + validate_mm(mm); return addr; -unmap_and_free_file_vma: - fput(vma->vm_file); - vma->vm_file = NULL; +close_and_free_vma: + vma_close(vma); - vma_iter_set(&vmi, vma->vm_end); - /* Undo any partial mapping done by a device driver. */ - unmap_region(mm, &vmi.mas, vma, prev, next, vma->vm_start, - vma->vm_end, vma->vm_end, true); -free_iter_vma: - vma_iter_free(&vmi); + if (file || vma->vm_file) { +unmap_and_free_vma: + fput(vma->vm_file); + vma->vm_file = NULL; + + vma_iter_set(&vmi, vma->vm_end); + /* Undo any partial mapping done by a device driver. */ + unmap_region(mm, &vmi.mas, vma, prev, next, vma->vm_start, + vma->vm_end, vma->vm_end, true); + } + if (file && (vm_flags & VM_SHARED)) + mapping_unmap_writable(file->f_mapping); free_vma: vm_area_free(vma); unacct_error: if (charged) vm_unacct_memory(charged); + validate_mm(mm); return error; } -unsigned long mmap_region(struct file *file, unsigned long addr, - unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, - struct list_head *uf) -{ - unsigned long ret; - bool writable_file_mapping = false; - - /* Check to see if MDWE is applicable. */ - if (map_deny_write_exec(vm_flags, vm_flags)) - return -EACCES; - - /* Allow architectures to sanity-check the vm_flags. */ - if (!arch_validate_flags(vm_flags)) - return -EINVAL; - - /* Map writable and ensure this isn't a sealed memfd. */ - if (file && (vm_flags & VM_SHARED)) { - int error = mapping_map_writable(file->f_mapping); - - if (error) - return error; - writable_file_mapping = true; - } - - ret = __mmap_region(file, addr, len, vm_flags, pgoff, uf); - - /* Clear our write mapping regardless of error. */ - if (writable_file_mapping) - mapping_unmap_writable(file->f_mapping); - - validate_mm(current->mm); - return ret; -} - static int __vm_munmap(unsigned long start, size_t len, bool unlock) { int ret; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ebdda7ed7927..187a8b29b60b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -259,24 +259,6 @@ EXPORT_SYMBOL(node_states); gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; -/* - * A cached value of the page's pageblock's migratetype, used when the page is - * put on a pcplist. Used to avoid the pageblock migratetype lookup when - * freeing from pcplists in most cases, at the cost of possibly becoming stale. - * Also the migratetype set in the page does not necessarily match the pcplist - * index, e.g. page might have MIGRATE_CMA set but be on a pcplist with any - * other index - this ensures that it will be put on the correct CMA freelist. - */ -static inline int get_pcppage_migratetype(struct page *page) -{ - return page->index; -} - -static inline void set_pcppage_migratetype(struct page *page, int migratetype) -{ - page->index = migratetype; -} - #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE unsigned int pageblock_order __read_mostly; #endif @@ -341,6 +323,8 @@ const char * const migratetype_names[MIGRATE_TYPES] = { #endif }; +unsigned long free_highatomics[MAX_NR_ZONES] = {0}; + int min_free_kbytes = 1024; int user_min_free_kbytes = -1; static int watermark_boost_factor __read_mostly = 15000; @@ -774,23 +758,38 @@ compaction_capture(struct capture_control *capc, struct page *page, } #endif /* CONFIG_COMPACTION */ -/* Used for pages not on another list */ -static inline void add_to_free_list(struct page *page, struct zone *zone, - unsigned int order, int migratetype) +static inline void account_freepages(struct zone *zone, int nr_pages, + int migratetype) { - struct free_area *area = &zone->free_area[order]; + lockdep_assert_held(&zone->lock); - list_add(&page->buddy_list, &area->free_list[migratetype]); - area->nr_free++; + if (is_migrate_isolate(migratetype)) + return; + + __mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages); + + if (is_migrate_cma(migratetype)) + __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages); + else if (is_migrate_highatomic(migratetype)) + WRITE_ONCE(free_highatomics[zone_idx(zone)], + free_highatomics[zone_idx(zone)] + nr_pages); } /* Used for pages not on another list */ -static inline void add_to_free_list_tail(struct page *page, struct zone *zone, - unsigned int order, int migratetype) +static inline void __add_to_free_list(struct page *page, struct zone *zone, + unsigned int order, int migratetype, + bool tail) { struct free_area *area = &zone->free_area[order]; - list_add_tail(&page->buddy_list, &area->free_list[migratetype]); + VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype, + "page type is %lu, passed migratetype is %d (nr=%d)\n", + get_pageblock_migratetype(page), migratetype, 1 << order); + + if (tail) + list_add_tail(&page->buddy_list, &area->free_list[migratetype]); + else + list_add(&page->buddy_list, &area->free_list[migratetype]); area->nr_free++; } @@ -800,16 +799,28 @@ static inline void add_to_free_list_tail(struct page *page, struct zone *zone, * allocation again (e.g., optimization for memory onlining). */ static inline void move_to_free_list(struct page *page, struct zone *zone, - unsigned int order, int migratetype) + unsigned int order, int old_mt, int new_mt) { struct free_area *area = &zone->free_area[order]; - list_move_tail(&page->buddy_list, &area->free_list[migratetype]); + /* Free page moving can fail, so it happens before the type update */ + VM_WARN_ONCE(get_pageblock_migratetype(page) != old_mt, + "page type is %lu, passed migratetype is %d (nr=%d)\n", + get_pageblock_migratetype(page), old_mt, 1 << order); + + list_move_tail(&page->buddy_list, &area->free_list[new_mt]); + + account_freepages(zone, -(1 << order), old_mt); + account_freepages(zone, 1 << order, new_mt); } -static inline void del_page_from_free_list(struct page *page, struct zone *zone, - unsigned int order) +static inline void __del_page_from_free_list(struct page *page, struct zone *zone, + unsigned int order, int migratetype) { + VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype, + "page type is %lu, passed migratetype is %d (nr=%d)\n", + get_pageblock_migratetype(page), migratetype, 1 << order); + /* clear reported state and update reported page count */ if (page_reported(page)) __ClearPageReported(page); @@ -820,6 +831,13 @@ static inline void del_page_from_free_list(struct page *page, struct zone *zone, zone->free_area[order].nr_free--; } +static inline void del_page_from_free_list(struct page *page, struct zone *zone, + unsigned int order, int migratetype) +{ + __del_page_from_free_list(page, zone, order, migratetype); + account_freepages(zone, -(1 << order), migratetype); +} + static inline struct page *get_page_from_free_area(struct free_area *area, int migratetype) { @@ -901,16 +919,17 @@ static inline void __free_one_page(struct page *page, VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page); VM_BUG_ON(migratetype == -1); - if (likely(!is_migrate_isolate(migratetype))) - __mod_zone_freepage_state(zone, 1 << order, migratetype); VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page); VM_BUG_ON_PAGE(bad_range(zone, page), page); + account_freepages(zone, 1 << order, migratetype); + while (order < max_order) { + int buddy_mt = migratetype; + if (compaction_capture(capc, page, order, migratetype)) { - __mod_zone_freepage_state(zone, -(1 << order), - migratetype); + account_freepages(zone, -(1 << order), migratetype); return; } @@ -925,11 +944,11 @@ static inline void __free_one_page(struct page *page, * pageblock isolation could cause incorrect freepage or CMA * accounting or HIGHATOMIC accounting. */ - int buddy_mt = get_pfnblock_migratetype(buddy, buddy_pfn); + buddy_mt = get_pfnblock_migratetype(buddy, buddy_pfn); - if (migratetype != buddy_mt - && (!migratetype_is_mergeable(migratetype) || - !migratetype_is_mergeable(buddy_mt))) + if (migratetype != buddy_mt && + (!migratetype_is_mergeable(migratetype) || + !migratetype_is_mergeable(buddy_mt))) goto done_merging; } @@ -938,9 +957,19 @@ static inline void __free_one_page(struct page *page, * merge with it and move up one order. */ if (page_is_guard(buddy)) - clear_page_guard(zone, buddy, order, migratetype); + clear_page_guard(zone, buddy, order); else - del_page_from_free_list(buddy, zone, order); + __del_page_from_free_list(buddy, zone, order, buddy_mt); + + if (unlikely(buddy_mt != migratetype)) { + /* + * Match buddy type. This ensures that an + * expand() down the line puts the sub-blocks + * on the right freelists. + */ + set_pageblock_migratetype(buddy, migratetype); + } + combined_pfn = buddy_pfn & pfn; page = page + (combined_pfn - pfn); pfn = combined_pfn; @@ -959,76 +988,13 @@ done_merging: else to_tail = buddy_merge_likely(pfn, buddy_pfn, page, order); - if (to_tail) - add_to_free_list_tail(page, zone, order, migratetype); - else - add_to_free_list(page, zone, order, migratetype); + __add_to_free_list(page, zone, order, migratetype, to_tail); /* Notify page reporting subsystem of freed page */ if (!(fpi_flags & FPI_SKIP_REPORT_NOTIFY)) page_reporting_notify_free(order); } -/** - * split_free_page() -- split a free page at split_pfn_offset - * @free_page: the original free page - * @order: the order of the page - * @split_pfn_offset: split offset within the page - * - * Return -ENOENT if the free page is changed, otherwise 0 - * - * It is used when the free page crosses two pageblocks with different migratetypes - * at split_pfn_offset within the page. The split free page will be put into - * separate migratetype lists afterwards. Otherwise, the function achieves - * nothing. - */ -int split_free_page(struct page *free_page, - unsigned int order, unsigned long split_pfn_offset) -{ - struct zone *zone = page_zone(free_page); - unsigned long free_page_pfn = page_to_pfn(free_page); - unsigned long pfn; - unsigned long flags; - int free_page_order; - int mt; - int ret = 0; - - VM_WARN_ON_ONCE_PAGE(!page_can_split(free_page), free_page); - - if (split_pfn_offset == 0) - return ret; - - spin_lock_irqsave(&zone->lock, flags); - - if (!PageBuddy(free_page) || buddy_order(free_page) != order) { - ret = -ENOENT; - goto out; - } - - mt = get_pfnblock_migratetype(free_page, free_page_pfn); - if (likely(!is_migrate_isolate(mt))) - __mod_zone_freepage_state(zone, -(1UL << order), mt); - - del_page_from_free_list(free_page, zone, order); - for (pfn = free_page_pfn; - pfn < free_page_pfn + (1UL << order);) { - int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn); - - free_page_order = min_t(unsigned int, - pfn ? __ffs(pfn) : order, - __fls(split_pfn_offset)); - __free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order, - mt, FPI_NONE); - pfn += 1UL << free_page_order; - split_pfn_offset -= (1UL << free_page_order); - /* we have done the first part, now switch to second part */ - if (split_pfn_offset == 0) - split_pfn_offset = (1UL << order) - (pfn - free_page_pfn); - } -out: - spin_unlock_irqrestore(&zone->lock, flags); - return ret; -} /* * A bad page could be due to a number of fields. Instead of multiple branches, * try and check multiple fields with one check. The caller must do a detailed @@ -1341,7 +1307,6 @@ static void free_pcppages_bulk(struct zone *zone, int count, { unsigned long flags; unsigned int order; - bool isolated_pageblocks; struct page *page; /* @@ -1354,7 +1319,6 @@ static void free_pcppages_bulk(struct zone *zone, int count, pindex = pindex - 1; spin_lock_irqsave(&zone->lock, flags); - isolated_pageblocks = has_isolate_pageblock(zone); while (count > 0) { struct list_head *list; @@ -1370,23 +1334,19 @@ static void free_pcppages_bulk(struct zone *zone, int count, order = pindex_to_order(pindex); nr_pages = 1 << order; do { + unsigned long pfn; int mt; page = list_last_entry(list, struct page, pcp_list); - mt = get_pcppage_migratetype(page); + pfn = page_to_pfn(page); + mt = get_pfnblock_migratetype(page, pfn); /* must delete to avoid corrupting pcp list */ list_del(&page->pcp_list); count -= nr_pages; pcp->count -= nr_pages; - /* MIGRATE_ISOLATE page should not go to pcplists */ - VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); - /* Pageblock could have been isolated meanwhile */ - if (unlikely(isolated_pageblocks)) - mt = get_pageblock_migratetype(page); - - __free_one_page(page, page_to_pfn(page), zone, order, mt, FPI_NONE); + __free_one_page(page, pfn, zone, order, mt, FPI_NONE); trace_mm_page_pcpu_drain(page, order, mt); } while (count > 0 && !list_empty(list)); } @@ -1394,18 +1354,15 @@ static void free_pcppages_bulk(struct zone *zone, int count, spin_unlock_irqrestore(&zone->lock, flags); } -static void free_one_page(struct zone *zone, - struct page *page, unsigned long pfn, - unsigned int order, - int migratetype, fpi_t fpi_flags) +static void free_one_page(struct zone *zone, struct page *page, + unsigned long pfn, unsigned int order, + fpi_t fpi_flags) { unsigned long flags; + int migratetype; spin_lock_irqsave(&zone->lock, flags); - if (unlikely(has_isolate_pageblock(zone) || - is_migrate_isolate(migratetype))) { - migratetype = get_pfnblock_migratetype(page, pfn); - } + migratetype = get_pfnblock_migratetype(page, pfn); __free_one_page(page, pfn, zone, order, migratetype, fpi_flags); spin_unlock_irqrestore(&zone->lock, flags); } @@ -1433,17 +1390,15 @@ skip_prepare: fpi_flags, &skip_free_pages_ok); if (skip_free_pages_ok) return; - /* - * Calling get_pfnblock_migratetype() without spin_lock_irqsave() here - * is used to avoid calling get_pfnblock_migratetype() under the lock. - * This will reduce the lock holding time. - */ - migratetype = get_pfnblock_migratetype(page, pfn); - trace_android_vh_free_unref_page_bypass(page, order, migratetype, &skip_free_unref_page); - if (skip_free_unref_page) - return; spin_lock_irqsave(&zone->lock, flags); + migratetype = get_pfnblock_migratetype(page, pfn); + trace_android_vh_free_unref_page_bypass(page, order, migratetype, &skip_free_unref_page); + if (skip_free_unref_page) { + spin_unlock_irqrestore(&zone->lock, flags); + return; + } + if (unlikely(has_isolate_pageblock(zone) || is_migrate_isolate(migratetype))) { migratetype = get_pfnblock_migratetype(page, pfn); @@ -1568,6 +1523,7 @@ static inline void expand(struct zone *zone, struct page *page, int low, int high, int migratetype) { unsigned long size = 1 << high; + unsigned long nr_added = 0; while (high > low) { high--; @@ -1580,12 +1536,14 @@ static inline void expand(struct zone *zone, struct page *page, * Corresponding page table entries will not be touched, * pages will stay not present in virtual address space */ - if (set_page_guard(zone, &page[size], high, migratetype)) + if (set_page_guard(zone, &page[size], high)) continue; - add_to_free_list(&page[size], zone, high, migratetype); + __add_to_free_list(&page[size], zone, high, migratetype, false); set_buddy_order(&page[size], high); + nr_added += size; } + account_freepages(zone, nr_added, migratetype); } static void check_new_page_bad(struct page *page) @@ -1770,9 +1728,8 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, page = get_page_from_free_area(area, migratetype); if (!page) continue; - del_page_from_free_list(page, zone, current_order); + del_page_from_free_list(page, zone, current_order, migratetype); expand(zone, page, order, current_order, migratetype); - set_pcppage_migratetype(page, migratetype); trace_mm_page_alloc_zone_locked(page, order, migratetype, pcp_allowed_order(order) && migratetype < MIGRATE_PCPTYPES); @@ -1807,30 +1764,23 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone, #endif /* - * Move the free pages in a range to the freelist tail of the requested type. - * Note that start_page and end_pages are not aligned on a pageblock - * boundary. If alignment is required, use move_freepages_block() + * Change the type of a block and move all its free pages to that + * type's freelist. */ -static int move_freepages(struct zone *zone, - unsigned long start_pfn, unsigned long end_pfn, - int migratetype, int *num_movable) +static int move_freepages(struct zone *zone, unsigned long start_pfn, + unsigned long end_pfn, int old_mt, int new_mt) { struct page *page; unsigned long pfn; unsigned int order; int pages_moved = 0; + VM_WARN_ON(start_pfn & (pageblock_nr_pages - 1)); + VM_WARN_ON(start_pfn + pageblock_nr_pages - 1 != end_pfn); + for (pfn = start_pfn; pfn <= end_pfn;) { page = pfn_to_page(pfn); if (!PageBuddy(page)) { - /* - * We assume that pages that could be isolated for - * migration are movable. But we don't actually try - * isolating, as that would be expensive. - */ - if (num_movable && - (PageLRU(page) || __PageMovable(page))) - (*num_movable)++; pfn++; continue; } @@ -1840,35 +1790,189 @@ static int move_freepages(struct zone *zone, VM_BUG_ON_PAGE(page_zone(page) != zone, page); order = buddy_order(page); - move_to_free_list(page, zone, order, migratetype); + + move_to_free_list(page, zone, order, old_mt, new_mt); + pfn += 1 << order; pages_moved += 1 << order; } + set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt); + return pages_moved; } -int move_freepages_block(struct zone *zone, struct page *page, - int migratetype, int *num_movable) +static bool prep_move_freepages_block(struct zone *zone, struct page *page, + unsigned long *start_pfn, + unsigned long *end_pfn, + int *num_free, int *num_movable) +{ + unsigned long pfn, start, end; + + pfn = page_to_pfn(page); + start = pageblock_start_pfn(pfn); + end = pageblock_end_pfn(pfn) - 1; + + /* + * The caller only has the lock for @zone, don't touch ranges + * that straddle into other zones. While we could move part of + * the range that's inside the zone, this call is usually + * accompanied by other operations such as migratetype updates + * which also should be locked. + */ + if (!zone_spans_pfn(zone, start)) + return false; + if (!zone_spans_pfn(zone, end)) + return false; + + *start_pfn = start; + *end_pfn = end; + + if (num_free) { + *num_free = 0; + *num_movable = 0; + for (pfn = start; pfn <= end;) { + page = pfn_to_page(pfn); + if (PageBuddy(page)) { + int nr = 1 << buddy_order(page); + + *num_free += nr; + pfn += nr; + continue; + } + /* + * We assume that pages that could be isolated for + * migration are movable. But we don't actually try + * isolating, as that would be expensive. + */ + if (PageLRU(page) || __PageMovable(page)) + (*num_movable)++; + pfn++; + } + } + + return true; +} + +static int move_freepages_block(struct zone *zone, struct page *page, + int old_mt, int new_mt) +{ + unsigned long start_pfn, end_pfn; + + if (!prep_move_freepages_block(zone, page, &start_pfn, &end_pfn, + NULL, NULL)) + return -1; + + return move_freepages(zone, start_pfn, end_pfn, old_mt, new_mt); +} + +#ifdef CONFIG_MEMORY_ISOLATION +/* Look for a buddy that straddles start_pfn */ +static unsigned long find_large_buddy(unsigned long start_pfn) +{ + int order = 0; + struct page *page; + unsigned long pfn = start_pfn; + + while (!PageBuddy(page = pfn_to_page(pfn))) { + /* Nothing found */ + if (++order > MAX_ORDER) + return start_pfn; + pfn &= ~0UL << order; + } + + /* + * Found a preceding buddy, but does it straddle? + */ + if (pfn + (1 << buddy_order(page)) > start_pfn) + return pfn; + + /* Nothing found */ + return start_pfn; +} + +/* Split a multi-block free page into its individual pageblocks */ +static void split_large_buddy(struct zone *zone, struct page *page, + unsigned long pfn, int order) +{ + unsigned long end_pfn = pfn + (1 << order); + + VM_WARN_ON_ONCE(order <= pageblock_order); + VM_WARN_ON_ONCE(pfn & (pageblock_nr_pages - 1)); + + /* Caller removed page from freelist, buddy info cleared! */ + VM_WARN_ON_ONCE(PageBuddy(page)); + + while (pfn != end_pfn) { + int mt = get_pfnblock_migratetype(page, pfn); + + __free_one_page(page, pfn, zone, pageblock_order, mt, FPI_NONE); + pfn += pageblock_nr_pages; + page = pfn_to_page(pfn); + } +} + +/** + * move_freepages_block_isolate - move free pages in block for page isolation + * @zone: the zone + * @page: the pageblock page + * @migratetype: migratetype to set on the pageblock + * + * This is similar to move_freepages_block(), but handles the special + * case encountered in page isolation, where the block of interest + * might be part of a larger buddy spanning multiple pageblocks. + * + * Unlike the regular page allocator path, which moves pages while + * stealing buddies off the freelist, page isolation is interested in + * arbitrary pfn ranges that may have overlapping buddies on both ends. + * + * This function handles that. Straddling buddies are split into + * individual pageblocks. Only the block of interest is moved. + * + * Returns %true if pages could be moved, %false otherwise. + */ +bool move_freepages_block_isolate(struct zone *zone, struct page *page, + int migratetype) { unsigned long start_pfn, end_pfn, pfn; - if (num_movable) - *num_movable = 0; + if (!prep_move_freepages_block(zone, page, &start_pfn, &end_pfn, + NULL, NULL)) + return false; - pfn = page_to_pfn(page); - start_pfn = pageblock_start_pfn(pfn); - end_pfn = pageblock_end_pfn(pfn) - 1; + /* No splits needed if buddies can't span multiple blocks */ + if (pageblock_order == MAX_ORDER) + goto move; - /* Do not cross zone boundaries */ - if (!zone_spans_pfn(zone, start_pfn)) - start_pfn = pfn; - if (!zone_spans_pfn(zone, end_pfn)) - return 0; + /* We're a tail block in a larger buddy */ + pfn = find_large_buddy(start_pfn); + if (pfn != start_pfn) { + struct page *buddy = pfn_to_page(pfn); + int order = buddy_order(buddy); - return move_freepages(zone, start_pfn, end_pfn, migratetype, - num_movable); + del_page_from_free_list(buddy, zone, order, + get_pfnblock_migratetype(buddy, pfn)); + set_pageblock_migratetype(page, migratetype); + split_large_buddy(zone, buddy, pfn, order); + return true; + } + + /* We're the starting block of a larger buddy */ + if (PageBuddy(page) && buddy_order(page) > pageblock_order) { + int order = buddy_order(page); + + del_page_from_free_list(page, zone, order, + get_pfnblock_migratetype(page, pfn)); + set_pageblock_migratetype(page, migratetype); + split_large_buddy(zone, page, pfn, order); + return true; + } +move: + move_freepages(zone, start_pfn, end_pfn, + get_pfnblock_migratetype(page, start_pfn), migratetype); + return true; } +#endif /* CONFIG_MEMORY_ISOLATION */ static void change_pageblock_range(struct page *pageblock_page, int start_order, int migratetype) @@ -1952,33 +2056,37 @@ static inline bool boost_watermark(struct zone *zone) } /* - * This function implements actual steal behaviour. If order is large enough, - * we can steal whole pageblock. If not, we first move freepages in this - * pageblock to our migratetype and determine how many already-allocated pages - * are there in the pageblock with a compatible migratetype. If at least half - * of pages are free or compatible, we can change migratetype of the pageblock - * itself, so pages freed in the future will be put on the correct free list. + * This function implements actual steal behaviour. If order is large enough, we + * can claim the whole pageblock for the requested migratetype. If not, we check + * the pageblock for constituent pages; if at least half of the pages are free + * or compatible, we can still claim the whole block, so pages freed in the + * future will be put on the correct free list. Otherwise, we isolate exactly + * the order we need from the fallback block and leave its migratetype alone. */ -static void steal_suitable_fallback(struct zone *zone, struct page *page, - unsigned int alloc_flags, int start_type, bool whole_block) +static struct page * +steal_suitable_fallback(struct zone *zone, struct page *page, + int current_order, int order, int start_type, + unsigned int alloc_flags, bool whole_block) { - unsigned int current_order = buddy_order(page); int free_pages, movable_pages, alike_pages; - int old_block_type; + unsigned long start_pfn, end_pfn; + int block_type; - old_block_type = get_pageblock_migratetype(page); + block_type = get_pageblock_migratetype(page); /* * This can happen due to races and we want to prevent broken * highatomic accounting. */ - if (is_migrate_highatomic(old_block_type)) + if (is_migrate_highatomic(block_type)) goto single_page; /* Take ownership for orders >= pageblock_order */ if (current_order >= pageblock_order) { + del_page_from_free_list(page, zone, current_order, block_type); change_pageblock_range(page, current_order, start_type); - goto single_page; + expand(zone, page, order, current_order, start_type); + return page; } /* @@ -1993,10 +2101,9 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, if (!whole_block) goto single_page; - free_pages = move_freepages_block(zone, page, start_type, - &movable_pages); /* moving whole block can fail due to zone boundary conditions */ - if (!free_pages) + if (!prep_move_freepages_block(zone, page, &start_pfn, &end_pfn, + &free_pages, &movable_pages)) goto single_page; /* @@ -2014,7 +2121,7 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, * vice versa, be conservative since we can't distinguish the * exact migratetype of non-movable pages. */ - if (old_block_type == MIGRATE_MOVABLE) + if (block_type == MIGRATE_MOVABLE) alike_pages = pageblock_nr_pages - (free_pages + movable_pages); else @@ -2025,13 +2132,15 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, * compatible migratability as our allocation, claim the whole block. */ if (free_pages + alike_pages >= (1 << (pageblock_order-1)) || - page_group_by_mobility_disabled) - set_pageblock_migratetype(page, start_type); - - return; + page_group_by_mobility_disabled) { + move_freepages(zone, start_pfn, end_pfn, block_type, start_type); + return __rmqueue_smallest(zone, order, start_type); + } single_page: - move_to_free_list(page, zone, current_order, start_type); + del_page_from_free_list(page, zone, current_order, block_type); + expand(zone, page, order, current_order, block_type); + return page; } /* @@ -2069,10 +2178,12 @@ int find_suitable_fallback(struct free_area *area, unsigned int order, } /* - * Reserve a pageblock for exclusive use of high-order atomic allocations if - * there are no empty page blocks that contain a page with a suitable order + * Reserve the pageblock(s) surrounding an allocation request for + * exclusive use of high-order atomic allocations if there are no + * empty page blocks that contain a page with a suitable order */ -static void reserve_highatomic_pageblock(struct page *page, struct zone *zone) +static void reserve_highatomic_pageblock(struct page *page, int order, + struct zone *zone) { int mt; unsigned long max_managed, flags; @@ -2102,10 +2213,16 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone) /* Yoink! */ mt = get_pageblock_migratetype(page); /* Only reserve normal pageblocks (i.e., they can merge with others) */ - if (migratetype_is_mergeable(mt)) { + if (!migratetype_is_mergeable(mt)) + goto out_unlock; + + if (order < pageblock_order) { + if (move_freepages_block(zone, page, mt, MIGRATE_HIGHATOMIC) == -1) + goto out_unlock; zone->nr_reserved_highatomic += pageblock_nr_pages; - set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC); - move_freepages_block(zone, page, MIGRATE_HIGHATOMIC, NULL); + } else { + change_pageblock_range(page, order, MIGRATE_HIGHATOMIC); + zone->nr_reserved_highatomic += 1 << order; } out_unlock: @@ -2118,7 +2235,7 @@ out_unlock: * intense memory pressure but failed atomic allocations should be easier * to recover from than an OOM. * - * If @force is true, try to unreserve a pageblock even though highatomic + * If @force is true, try to unreserve pageblocks even though highatomic * pageblock is exhausted. */ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, @@ -2130,7 +2247,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, struct zone *zone; struct page *page; int order; - bool ret; + int ret; bool skip_unreserve_highatomic = false; for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx, @@ -2151,11 +2268,13 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, spin_lock_irqsave(&zone->lock, flags); for (order = 0; order < NR_PAGE_ORDERS; order++) { struct free_area *area = &(zone->free_area[order]); + int mt; page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC); if (!page) continue; + mt = get_pageblock_migratetype(page); /* * In page freeing path, migratetype change is racy so * we can counter several free pages in a pageblock @@ -2163,7 +2282,8 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, * from highatomic to ac->migratetype. So we should * adjust the count once. */ - if (is_migrate_highatomic_page(page)) { + if (is_migrate_highatomic(mt)) { + unsigned long size; /* * It should never happen but changes to * locking could inadvertently allow a per-cpu @@ -2171,9 +2291,9 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, * while unreserving so be safe and watch for * underflows. */ - zone->nr_reserved_highatomic -= min( - pageblock_nr_pages, - zone->nr_reserved_highatomic); + size = max(pageblock_nr_pages, 1UL << order); + size = min(size, zone->nr_reserved_highatomic); + zone->nr_reserved_highatomic -= size; } /* @@ -2185,10 +2305,22 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, * of pageblocks that cannot be completely freed * may increase. */ - set_pageblock_migratetype(page, ac->migratetype); - ret = move_freepages_block(zone, page, ac->migratetype, - NULL); - if (ret) { + if (order < pageblock_order) + ret = move_freepages_block(zone, page, mt, + ac->migratetype); + else { + move_to_free_list(page, zone, order, mt, + ac->migratetype); + change_pageblock_range(page, order, + ac->migratetype); + ret = 1; + } + /* + * Reserving the block(s) already succeeded, + * so this should not fail on zone boundaries. + */ + WARN_ON_ONCE(ret == -1); + if (ret > 0) { spin_unlock_irqrestore(&zone->lock, flags); return ret; } @@ -2209,7 +2341,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, * deviation from the rest of this file, to make the for loop * condition simpler. */ -static __always_inline bool +static __always_inline struct page * __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, unsigned int alloc_flags) { @@ -2256,7 +2388,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, goto do_steal; } - return false; + return NULL; find_smallest: for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) { @@ -2276,14 +2408,14 @@ find_smallest: do_steal: page = get_page_from_free_area(area, fallback_mt); - steal_suitable_fallback(zone, page, alloc_flags, start_migratetype, - can_steal); + /* take off list, maybe claim block, expand remainder */ + page = steal_suitable_fallback(zone, page, current_order, order, + start_migratetype, alloc_flags, can_steal); trace_mm_page_alloc_extfrag(page, order, current_order, start_migratetype, fallback_mt); - return true; - + return page; } /* @@ -2314,15 +2446,15 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, return page; } } -retry: + page = __rmqueue_smallest(zone, order, migratetype); if (unlikely(!page)) { if (!cma_redirect_restricted() && alloc_flags & ALLOC_CMA) page = __rmqueue_cma_fallback(zone, order); - if (!page && __rmqueue_fallback(zone, order, migratetype, - alloc_flags)) - goto retry; + if (!page) + page = __rmqueue_fallback(zone, order, migratetype, + alloc_flags); } return page; } @@ -2367,12 +2499,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, * pages are ordered properly. */ list_add_tail(&page->pcp_list, list); - if (is_migrate_cma(get_pcppage_migratetype(page))) - __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, - -(1 << order)); } - - __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); spin_unlock_irqrestore(&zone->lock, flags); return i; @@ -2533,19 +2660,6 @@ void drain_all_pages(struct zone *zone) __drain_all_pages(zone, false); } -static bool free_unref_page_prepare(struct page *page, unsigned long pfn, - unsigned int order) -{ - int migratetype; - - if (!free_pages_prepare(page, order, FPI_NONE)) - return false; - - migratetype = get_pfnblock_migratetype(page, pfn); - set_pcppage_migratetype(page, migratetype); - return true; -} - static int nr_pcp_free(struct per_cpu_pages *pcp, int high, bool free_high) { int min_nr_free, max_nr_free; @@ -2629,10 +2743,10 @@ void free_unref_page(struct page *page, unsigned int order) struct per_cpu_pages *pcp; struct zone *zone; unsigned long pfn = page_to_pfn(page); - int migratetype, pcpmigratetype; + int migratetype; bool skip_free_unref_page = false; - if (!free_unref_page_prepare(page, pfn, order)) + if (!free_pages_prepare(page, order, FPI_NONE)) return; /* @@ -2643,29 +2757,29 @@ void free_unref_page(struct page *page, unsigned int order) * get those areas back if necessary. Otherwise, we may have to free * excessively into the page allocator */ - migratetype = pcpmigratetype = get_pcppage_migratetype(page); + migratetype = get_pfnblock_migratetype(page, pfn); trace_android_vh_free_unref_page_bypass(page, order, migratetype, &skip_free_unref_page); if (skip_free_unref_page) return; if (unlikely(migratetype > MIGRATE_RECLAIMABLE)) { if (unlikely(is_migrate_isolate(migratetype))) { - free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE); + free_one_page(page_zone(page), page, pfn, order, FPI_NONE); return; } #ifdef CONFIG_CMA if (!cma_has_pcplist() || migratetype != MIGRATE_CMA) #endif - pcpmigratetype = MIGRATE_MOVABLE; + migratetype = MIGRATE_MOVABLE; } zone = page_zone(page); pcp_trylock_prepare(UP_flags); pcp = pcp_spin_trylock(zone->per_cpu_pageset); if (pcp) { - free_unref_page_commit(zone, pcp, page, pcpmigratetype, order); + free_unref_page_commit(zone, pcp, page, migratetype, order); pcp_spin_unlock(pcp); } else { - free_one_page(zone, page, pfn, order, migratetype, FPI_NONE); + free_one_page(zone, page, pfn, order, FPI_NONE); } pcp_trylock_finish(UP_flags); } @@ -2686,7 +2800,7 @@ void free_unref_page_list(struct list_head *list) /* Prepare pages for freeing */ list_for_each_entry_safe(page, next, list, lru) { unsigned long pfn = page_to_pfn(page); - if (!free_unref_page_prepare(page, pfn, 0)) { + if (!free_pages_prepare(page, 0, FPI_NONE)) { list_del(&page->lru); continue; } @@ -2695,10 +2809,10 @@ void free_unref_page_list(struct list_head *list) * Free isolated pages directly to the allocator, see * comment in free_unref_page. */ - migratetype = get_pcppage_migratetype(page); + migratetype = get_pfnblock_migratetype(page, pfn); if (unlikely(is_migrate_isolate(migratetype))) { list_del(&page->lru); - free_one_page(page_zone(page), page, pfn, 0, migratetype, FPI_NONE); + free_one_page(page_zone(page), page, pfn, 0, FPI_NONE); continue; } } @@ -2709,9 +2823,10 @@ void free_unref_page_list(struct list_head *list) list_for_each_entry_safe(page, next, list, lru) { struct zone *zone = page_zone(page); + unsigned long pfn = page_to_pfn(page); list_del(&page->lru); - migratetype = get_pcppage_migratetype(page); + migratetype = get_pfnblock_migratetype(page, pfn); /* * Either different zone requiring a different pcp lock or @@ -2724,6 +2839,16 @@ void free_unref_page_list(struct list_head *list) pcp_trylock_finish(UP_flags); } + /* + * Free isolated pages directly to the + * allocator, see comment in free_unref_page. + */ + if (is_migrate_isolate(migratetype)) { + free_one_page(zone, page, page_to_pfn(page), + 0, FPI_NONE); + continue; + } + batch_count = 0; /* @@ -2734,8 +2859,8 @@ void free_unref_page_list(struct list_head *list) pcp = pcp_spin_trylock(zone->per_cpu_pageset); if (unlikely(!pcp)) { pcp_trylock_finish(UP_flags); - free_one_page(zone, page, page_to_pfn(page), - 0, migratetype, FPI_NONE); + free_one_page(zone, page, pfn, + 0, FPI_NONE); locked_zone = NULL; continue; } @@ -2802,11 +2927,9 @@ int __isolate_free_page(struct page *page, unsigned int order) watermark = zone->_watermark[WMARK_MIN] + (1UL << order); if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA)) return 0; - - __mod_zone_freepage_state(zone, -(1UL << order), mt); } - del_page_from_free_list(page, zone, order); + del_page_from_free_list(page, zone, order, mt); /* * Set the pageblock if the isolated page is at least half of a @@ -2821,8 +2944,8 @@ int __isolate_free_page(struct page *page, unsigned int order) * with others) */ if (migratetype_is_mergeable(mt)) - set_pageblock_migratetype(page, - MIGRATE_MOVABLE); + move_freepages_block(zone, page, mt, + MIGRATE_MOVABLE); } } @@ -2911,8 +3034,6 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone, return NULL; } } - __mod_zone_freepage_state(zone, -(1 << order), - get_pcppage_migratetype(page)); spin_unlock_irqrestore(&zone->lock, flags); } while (check_new_pages(page, order)); @@ -3090,11 +3211,10 @@ static inline long __zone_watermark_unusable_free(struct zone *z, /* * If the caller does not have rights to reserves below the min - * watermark then subtract the high-atomic reserves. This will - * over-estimate the size of the atomic reserve but it avoids a search. + * watermark then subtract the free pages reserved for highatomic. */ if (likely(!(alloc_flags & ALLOC_RESERVES))) - unusable_free += z->nr_reserved_highatomic; + unusable_free += READ_ONCE(free_highatomics[zone_idx(z)]); #ifdef CONFIG_CMA /* If allocation can't use CMA areas don't use free CMA pages */ @@ -3488,7 +3608,7 @@ try_this_zone: * if the pageblock should be reserved for the future */ if (unlikely(alloc_flags & ALLOC_HIGHATOMIC)) - reserve_highatomic_pageblock(page, zone); + reserve_highatomic_pageblock(page, order, zone); return page; } else { @@ -3935,6 +4055,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, { unsigned int noreclaim_flag; unsigned long progress; + bool skip = false; cond_resched(); @@ -3943,9 +4064,14 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, fs_reclaim_acquire(gfp_mask); noreclaim_flag = memalloc_noreclaim_save(); + trace_android_rvh_perform_reclaim(order, gfp_mask, ac->nodemask, + &progress, &skip); + if (skip) + goto out; + progress = try_to_free_pages(ac->zonelist, order, gfp_mask, ac->nodemask); - +out: memalloc_noreclaim_restore(noreclaim_flag); fs_reclaim_release(gfp_mask); @@ -6531,7 +6657,6 @@ int alloc_contig_range(unsigned long start, unsigned long end, unsigned migratetype, gfp_t gfp_mask) { unsigned long outer_start, outer_end; - int order; int ret = 0; struct compact_control cc = { @@ -6608,29 +6733,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, * We don't have to hold zone->lock here because the pages are * isolated thus they won't get removed from buddy. */ - - order = 0; - outer_start = start; - while (!PageBuddy(pfn_to_page(outer_start))) { - if (++order > MAX_ORDER) { - outer_start = start; - break; - } - outer_start &= ~0UL << order; - } - - if (outer_start != start) { - order = buddy_order(pfn_to_page(outer_start)); - - /* - * outer_start page could be small order buddy page and - * it doesn't include start page. Adjust outer_start - * in this case to report failed page properly - * on tracepoint in test_pages_isolated() - */ - if (outer_start + (1UL << order) <= start) - outer_start = start; - } + outer_start = find_large_buddy(start); /* Make sure the range is really isolated. */ if (test_pages_isolated(outer_start, end, 0)) { @@ -6873,8 +6976,9 @@ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) BUG_ON(page_count(page)); BUG_ON(!PageBuddy(page)); + VM_WARN_ON(get_pageblock_migratetype(page) != MIGRATE_ISOLATE); order = buddy_order(page); - del_page_from_free_list(page, zone, order); + del_page_from_free_list(page, zone, order, MIGRATE_ISOLATE); pfn += (1 << order); } spin_unlock_irqrestore(&zone->lock, flags); @@ -6902,6 +7006,14 @@ bool is_free_buddy_page(struct page *page) EXPORT_SYMBOL(is_free_buddy_page); #ifdef CONFIG_MEMORY_FAILURE +static inline void add_to_free_list(struct page *page, struct zone *zone, + unsigned int order, int migratetype, + bool tail) +{ + __add_to_free_list(page, zone, order, migratetype, tail); + account_freepages(zone, 1 << order, migratetype); +} + /* * Break down a higher-order page in sub-pages, and keep our target out of * buddy allocator. @@ -6926,11 +7038,11 @@ static void break_down_buddy_pages(struct zone *zone, struct page *page, } page = next_page; - if (set_page_guard(zone, current_buddy, high, migratetype)) + if (set_page_guard(zone, current_buddy, high)) continue; if (current_buddy != target) { - add_to_free_list(current_buddy, zone, high, migratetype); + add_to_free_list(current_buddy, zone, high, migratetype, false); set_buddy_order(current_buddy, high); } } @@ -6957,12 +7069,11 @@ bool take_page_off_buddy(struct page *page) int migratetype = get_pfnblock_migratetype(page_head, pfn_head); - del_page_from_free_list(page_head, zone, page_order); + del_page_from_free_list(page_head, zone, page_order, + migratetype); break_down_buddy_pages(zone, page_head, page, 0, page_order, migratetype); SetPageHWPoisonTakenOff(page); - if (!is_migrate_isolate(migratetype)) - __mod_zone_freepage_state(zone, -1, migratetype); ret = true; break; } @@ -6979,13 +7090,14 @@ bool take_page_off_buddy(struct page *page) bool put_page_back_buddy(struct page *page) { struct zone *zone = page_zone(page); - unsigned long pfn = page_to_pfn(page); unsigned long flags; - int migratetype = get_pfnblock_migratetype(page, pfn); bool ret = false; spin_lock_irqsave(&zone->lock, flags); if (put_page_testzero(page)) { + unsigned long pfn = page_to_pfn(page); + int migratetype = get_pfnblock_migratetype(page, pfn); + ClearPageHWPoisonTakenOff(page); __free_one_page(page, pfn, zone, 0, migratetype, FPI_NONE); if (TestClearPageHWPoison(page)) { @@ -7066,7 +7178,7 @@ static bool try_to_accept_memory_one(struct zone *zone) list_del(&page->lru); last = list_empty(&zone->unaccepted_pages); - __mod_zone_freepage_state(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); + account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); __mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES); spin_unlock_irqrestore(&zone->lock, flags); @@ -7134,7 +7246,7 @@ static bool __free_unaccepted(struct page *page) spin_lock_irqsave(&zone->lock, flags); first = list_empty(&zone->unaccepted_pages); list_add_tail(&page->lru, &zone->unaccepted_pages); - __mod_zone_freepage_state(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); + account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); __mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES); spin_unlock_irqrestore(&zone->lock, flags); diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 8fafd7c18bd0..28c3f5aa0411 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -179,15 +179,11 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_ unmovable = has_unmovable_pages(check_unmovable_start, check_unmovable_end, migratetype, isol_flags); if (!unmovable) { - unsigned long nr_pages; - int mt = get_pageblock_migratetype(page); - - set_pageblock_migratetype(page, MIGRATE_ISOLATE); + if (!move_freepages_block_isolate(zone, page, MIGRATE_ISOLATE)) { + spin_unlock_irqrestore(&zone->lock, flags); + return -EBUSY; + } zone->nr_isolate_pageblock++; - nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE, - NULL); - - __mod_zone_freepage_state(zone, -nr_pages, mt); spin_unlock_irqrestore(&zone->lock, flags); return 0; } @@ -207,7 +203,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_ static void unset_migratetype_isolate(struct page *page, int migratetype) { struct zone *zone; - unsigned long flags, nr_pages; + unsigned long flags; bool isolated_page = false; unsigned int order; struct page *buddy; @@ -253,12 +249,15 @@ static void unset_migratetype_isolate(struct page *page, int migratetype) * allocation. */ if (!isolated_page) { - nr_pages = move_freepages_block(zone, page, migratetype, NULL); - __mod_zone_freepage_state(zone, nr_pages, migratetype); - } - set_pageblock_migratetype(page, migratetype); - if (isolated_page) + /* + * Isolating this block already succeeded, so this + * should not fail on zone boundaries. + */ + WARN_ON_ONCE(!move_freepages_block_isolate(zone, page, migratetype)); + } else { + set_pageblock_migratetype(page, migratetype); __putback_isolated_page(page, order, migratetype); + } zone->nr_isolate_pageblock--; out: spin_unlock_irqrestore(&zone->lock, flags); @@ -367,26 +366,29 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, VM_BUG_ON(!page); pfn = page_to_pfn(page); - /* - * start_pfn is MAX_ORDER_NR_PAGES aligned, if there is any - * free pages in [start_pfn, boundary_pfn), its head page will - * always be in the range. - */ + if (PageBuddy(page)) { int order = buddy_order(page); - if (pfn + (1UL << order) > boundary_pfn) { - /* free page changed before split, check it again */ - if (split_free_page(page, order, boundary_pfn - pfn)) - continue; - } + /* move_freepages_block_isolate() handled this */ + VM_WARN_ON_ONCE(pfn + (1 << order) > boundary_pfn); pfn += 1UL << order; continue; } + /* - * migrate compound pages then let the free page handling code - * above do the rest. If migration is not possible, just fail. + * If a compound page is straddling our block, attempt + * to migrate it out of the way. + * + * We don't have to worry about this creating a large + * free page that straddles into our block: gigantic + * pages are freed as order-0 chunks, and LRU pages + * (currently) do not exceed pageblock_order. + * + * The block of interest has already been marked + * MIGRATE_ISOLATE above, so when migration is done it + * will free its pages onto the correct freelists. */ if (PageCompound(page)) { struct page *head = compound_head(page); @@ -397,16 +399,10 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, pfn = head_pfn + nr_pages; continue; } + #if defined CONFIG_COMPACTION || defined CONFIG_CMA - /* - * hugetlb, lru compound (THP), and movable compound pages - * can be migrated. Otherwise, fail the isolation. - */ - if (PageHuge(page) || PageLRU(page) || __PageMovable(page)) { - int order; - unsigned long outer_pfn; + if (PageHuge(page)) { int page_mt = get_pageblock_migratetype(page); - bool isolate_page = !is_migrate_isolate_page(page); struct compact_control cc = { .nr_migratepages = 0, .order = -1, @@ -419,56 +415,26 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, }; INIT_LIST_HEAD(&cc.migratepages); - /* - * XXX: mark the page as MIGRATE_ISOLATE so that - * no one else can grab the freed page after migration. - * Ideally, the page should be freed as two separate - * pages to be added into separate migratetype free - * lists. - */ - if (isolate_page) { - ret = set_migratetype_isolate(page, page_mt, - flags, head_pfn, head_pfn + nr_pages); - if (ret) - goto failed; - } - ret = __alloc_contig_migrate_range(&cc, head_pfn, head_pfn + nr_pages, page_mt); - - /* - * restore the page's migratetype so that it can - * be split into separate migratetype free lists - * later. - */ - if (isolate_page) - unset_migratetype_isolate(page, page_mt); - if (ret) goto failed; - /* - * reset pfn to the head of the free page, so - * that the free page handling code above can split - * the free page to the right migratetype list. - * - * head_pfn is not used here as a hugetlb page order - * can be bigger than MAX_ORDER, but after it is - * freed, the free page order is not. Use pfn within - * the range to find the head of the free page. - */ - order = 0; - outer_pfn = pfn; - while (!PageBuddy(pfn_to_page(outer_pfn))) { - /* stop if we cannot find the free page */ - if (++order > MAX_ORDER) - goto failed; - outer_pfn &= ~0UL << order; - } - pfn = outer_pfn; + + pfn = head_pfn + nr_pages; continue; - } else + } + + /* + * These pages are movable too, but they're + * not expected to exceed pageblock_order. + * + * Let us know when they do, so we can add + * proper free and split handling for them. + */ + VM_WARN_ON_ONCE_PAGE(PageLRU(page), page); + VM_WARN_ON_ONCE_PAGE(__PageMovable(page), page); #endif - goto failed; + goto failed; } pfn++; diff --git a/mm/pgsize_migration.c b/mm/pgsize_migration.c index 623a50e965a8..3b020924a7a9 100644 --- a/mm/pgsize_migration.c +++ b/mm/pgsize_migration.c @@ -271,10 +271,10 @@ static const struct vm_operations_struct pad_vma_ops = { }; /* - * Returns a new VMA representing the padding in @vma, if no padding - * in @vma returns NULL. + * Returns a new VMA representing the padding in @vma; + * returns NULL if no padding in @vma or allocation failed. */ -struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma) +static struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma) { struct vm_area_struct *pad; @@ -282,6 +282,10 @@ struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma) return NULL; pad = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL); + if (!pad) { + pr_warn("Page size migration: Failed to allocate padding VMA"); + return NULL; + } memcpy(pad, vma, sizeof(struct vm_area_struct)); @@ -306,34 +310,14 @@ struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma) return pad; } -/* - * Returns a new VMA exclusing the padding from @vma; if no padding in - * @vma returns @vma. - */ -struct vm_area_struct *get_data_vma(struct vm_area_struct *vma) -{ - struct vm_area_struct *data; - - if (!is_pgsize_migration_enabled() || !(vma->vm_flags & VM_PAD_MASK)) - return vma; - - data = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL); - - memcpy(data, vma, sizeof(struct vm_area_struct)); - - /* Adjust the end to the start of the padding section */ - data->vm_end = VMA_PAD_START(data); - - return data; -} - /* * Calls the show_pad_vma_fn on the @pad VMA, and frees the copies of @vma * and @pad. */ -void show_map_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *pad, - struct seq_file *m, void *func, bool smaps) +void show_map_pad_vma(struct vm_area_struct *vma, struct seq_file *m, + void *func, bool smaps) { + struct vm_area_struct *pad = get_pad_vma(vma); if (!pad) return; @@ -349,13 +333,21 @@ void show_map_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *pad, */ BUG_ON(!vma); + /* The pad VMA should be anonymous. */ + BUG_ON(pad->vm_file); + + /* The pad VMA should be PROT_NONE. */ + BUG_ON(pad->vm_flags & (VM_READ|VM_WRITE|VM_EXEC)); + + /* The pad VMA itself cannot have padding; infinite recursion */ + BUG_ON(pad->vm_flags & VM_PAD_MASK); + if (smaps) ((show_pad_smaps_fn)func)(m, pad); else ((show_pad_maps_fn)func)(m, pad); kfree(pad); - kfree(vma); } /* diff --git a/mm/vmscan.c b/mm/vmscan.c index 09cbd28da195..dfb5edf63ed5 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1479,8 +1479,15 @@ static int __remove_mapping(struct address_space *mapping, struct folio *folio, * same address_space. */ if (reclaimed && folio_is_file_lru(folio) && - !mapping_exiting(mapping) && !dax_mapping(mapping)) + !mapping_exiting(mapping) && !dax_mapping(mapping)) { + bool keep = false; + + trace_android_vh_keep_reclaimed_folio(folio, refcount, &keep); + if (keep) + goto cannot_free; shadow = workingset_eviction(folio, target_memcg); + } + trace_android_vh_clear_reclaimed_folio(folio, reclaimed); __filemap_remove_folio(folio, shadow); xa_unlock_irq(&mapping->i_pages); if (mapping_shrinkable(mapping)) @@ -5354,6 +5361,12 @@ retry: type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON); list_for_each_entry_safe_reverse(folio, next, &list, lru) { + bool bypass = false; + + trace_android_vh_evict_folios_bypass(folio, &bypass); + if (bypass) + continue; + if (!folio_evictable(folio)) { list_del(&folio->lru); folio_putback_lru(folio); @@ -7549,6 +7562,7 @@ static bool kswapd_shrink_node(pg_data_t *pgdat, sc->nr_to_reclaim += max(high_wmark_pages(zone), SWAP_CLUSTER_MAX); } + trace_android_rvh_kswapd_shrink_node(&sc->nr_to_reclaim); /* * Historically care was taken to put equal pressure on all zones but diff --git a/modules.bzl b/modules.bzl index c93be156738d..e6d7523a9c84 100644 --- a/modules.bzl +++ b/modules.bzl @@ -6,6 +6,7 @@ This module contains a full list of kernel modules compiled by GKI. """ +# LINT.IfChange _COMMON_GKI_MODULES_LIST = [ # keep sorted "drivers/block/virtio_blk.ko", @@ -99,6 +100,7 @@ _ARM64_GKI_MODULES_LIST = [ "drivers/misc/open-dice.ko", "drivers/ptp/ptp_kvm.ko", ] +# LINT.ThenChange(android/abi_gki_protected_exports_aarch64) _X86_GKI_MODULES_LIST = [ # keep sorted @@ -195,11 +197,13 @@ def get_kunit_modules_list(arch = None): return kunit_modules_list +# LINT.IfChange _COMMON_UNPROTECTED_MODULES_LIST = [ "drivers/block/zram/zram.ko", "kernel/kheaders.ko", "mm/zsmalloc.ko", ] +# LINT.ThenChange(android/abi_gki_protected_exports_aarch64) # buildifier: disable=unnamed-macro def get_gki_protected_modules_list(arch = None): diff --git a/net/TEST_MAPPING b/net/TEST_MAPPING index c30cffa00418..41cfef46dbbd 100644 --- a/net/TEST_MAPPING +++ b/net/TEST_MAPPING @@ -241,6 +241,26 @@ }, { "name": "vts_kernel_net_tests" + }, + { + "name": "CtsJobSchedulerTestCases", + "options": [ + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testCellularConstraintExecutedAndStopped" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testConnectivityConstraintExecutes_transitionNetworks" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testConnectivityConstraintExecutes_withMobile" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testEJMeteredConstraintFails_withMobile_DataSaverOn" + }, + { + "include-filter": "android.jobscheduler.cts.ConnectivityConstraintTest#testMeteredConstraintFails_withMobile_DataSaverOn" + } + ] } ], "kernel-presubmit": [ diff --git a/net/core/filter.c b/net/core/filter.c index c02e2bdc6fed..d3ac5973fc81 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -213,24 +213,36 @@ BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x) return 0; } +static int bpf_skb_load_helper_convert_offset(const struct sk_buff *skb, int offset) +{ + if (likely(offset >= 0)) + return offset; + + if (offset >= SKF_NET_OFF) + return offset - SKF_NET_OFF + skb_network_offset(skb); + + if (offset >= SKF_LL_OFF && skb_mac_header_was_set(skb)) + return offset - SKF_LL_OFF + skb_mac_offset(skb); + + return INT_MIN; +} + BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *, data, int, headlen, int, offset) { - u8 tmp, *ptr; + u8 tmp; const int len = sizeof(tmp); - if (offset >= 0) { - if (headlen - offset >= len) - return *(u8 *)(data + offset); - if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) - return tmp; - } else { - ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); - if (likely(ptr)) - return *(u8 *)ptr; - } + offset = bpf_skb_load_helper_convert_offset(skb, offset); + if (offset == INT_MIN) + return -EFAULT; - return -EFAULT; + if (headlen - offset >= len) + return *(u8 *)(data + offset); + if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) + return tmp; + else + return -EFAULT; } BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb, @@ -243,21 +255,19 @@ BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb, BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *, data, int, headlen, int, offset) { - __be16 tmp, *ptr; + __be16 tmp; const int len = sizeof(tmp); - if (offset >= 0) { - if (headlen - offset >= len) - return get_unaligned_be16(data + offset); - if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) - return be16_to_cpu(tmp); - } else { - ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); - if (likely(ptr)) - return get_unaligned_be16(ptr); - } + offset = bpf_skb_load_helper_convert_offset(skb, offset); + if (offset == INT_MIN) + return -EFAULT; - return -EFAULT; + if (headlen - offset >= len) + return get_unaligned_be16(data + offset); + if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) + return be16_to_cpu(tmp); + else + return -EFAULT; } BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb, @@ -270,21 +280,19 @@ BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb, BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *, data, int, headlen, int, offset) { - __be32 tmp, *ptr; + __be32 tmp; const int len = sizeof(tmp); - if (likely(offset >= 0)) { - if (headlen - offset >= len) - return get_unaligned_be32(data + offset); - if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) - return be32_to_cpu(tmp); - } else { - ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); - if (likely(ptr)) - return get_unaligned_be32(ptr); - } + offset = bpf_skb_load_helper_convert_offset(skb, offset); + if (offset == INT_MIN) + return -EFAULT; - return -EFAULT; + if (headlen - offset >= len) + return get_unaligned_be32(data + offset); + if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) + return be32_to_cpu(tmp); + else + return -EFAULT; } BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,