ANDROID: KVM: arm64: Convert kvm_pinned_pages to an interval-tree
Here we are, the last straw... the maple-tree is absolutely terrible for what we need and it is time to get rid of it. With the upcoming set of fixes for memory relinquish with huge-mappings, we need to be able to split a pinned_page under the mmu write_lock. This is just too complicated with the maple-tree while it is a piece of cake with an interval tree. Bug: 419548963 Change-Id: I981b5d875085e1d2f7b4ebe2560c8b6ea3cbae88 Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
This commit is contained in:
@@ -42469,6 +42469,10 @@ member {
|
||||
id: 0x2c96db22
|
||||
type_id: 0x3d751c99
|
||||
}
|
||||
member {
|
||||
id: 0x2cd6acbf
|
||||
type_id: 0x3c74c2ee
|
||||
}
|
||||
member {
|
||||
id: 0x2d16b3a0
|
||||
type_id: 0x3b74be91
|
||||
@@ -43414,6 +43418,11 @@ member {
|
||||
type_id: 0x57bf00b8
|
||||
offset: 576
|
||||
}
|
||||
member {
|
||||
id: 0x3643ba2c
|
||||
type_id: 0x56209a0e
|
||||
offset: 256
|
||||
}
|
||||
member {
|
||||
id: 0x36477112
|
||||
type_id: 0x5633b45b
|
||||
@@ -45547,6 +45556,11 @@ member {
|
||||
offset: 242
|
||||
bitsize: 14
|
||||
}
|
||||
member {
|
||||
id: 0x906218d7
|
||||
name: "__unused"
|
||||
type_id: 0xa179a8c5
|
||||
}
|
||||
member {
|
||||
id: 0x9086e58b
|
||||
name: "__unused"
|
||||
@@ -156569,10 +156583,9 @@ member {
|
||||
offset: 576
|
||||
}
|
||||
member {
|
||||
id: 0x03347550
|
||||
id: 0x0345ffe6
|
||||
name: "pinned_pages"
|
||||
type_id: 0xa179a8c5
|
||||
offset: 256
|
||||
type_id: 0xd0f3b5bf
|
||||
}
|
||||
member {
|
||||
id: 0x88a7076f
|
||||
@@ -223874,6 +223887,14 @@ struct_union {
|
||||
member_id: 0xc101e64f
|
||||
}
|
||||
}
|
||||
struct_union {
|
||||
id: 0x3c74c2ee
|
||||
kind: STRUCT
|
||||
definition {
|
||||
bytesize: 16
|
||||
member_id: 0x906218d7
|
||||
}
|
||||
}
|
||||
struct_union {
|
||||
id: 0x3c9f0fa2
|
||||
kind: STRUCT
|
||||
@@ -225487,6 +225508,16 @@ struct_union {
|
||||
member_id: 0xdf160d99
|
||||
}
|
||||
}
|
||||
struct_union {
|
||||
id: 0x56209a0e
|
||||
kind: UNION
|
||||
definition {
|
||||
bytesize: 16
|
||||
member_id: 0x0345ffe6
|
||||
member_id: 0x2cd6acbf
|
||||
member_id: 0x36752b74
|
||||
}
|
||||
}
|
||||
struct_union {
|
||||
id: 0x5633b45b
|
||||
kind: UNION
|
||||
@@ -253468,7 +253499,7 @@ struct_union {
|
||||
bytesize: 64
|
||||
member_id: 0xb8f5134f
|
||||
member_id: 0x63c436ff
|
||||
member_id: 0x03347550
|
||||
member_id: 0x3643ba2c
|
||||
member_id: 0x0f7f629e
|
||||
member_id: 0x3a2d39cb
|
||||
}
|
||||
|
@@ -132,3 +132,7 @@ type 'struct io_ring_ctx' changed
|
||||
1 variable symbol(s) removed
|
||||
'struct tracepoint __tracepoint_android_vh_filemap_fault_before_folio_locked'
|
||||
|
||||
type 'struct kvm_protected_vm' changed
|
||||
member 'struct maple_tree pinned_pages' was removed
|
||||
member 'union { struct rb_root_cached pinned_pages; struct { struct maple_tree __unused; }; union { }; }' was added
|
||||
|
||||
|
@@ -224,20 +224,33 @@ struct kvm_smccc_features {
|
||||
};
|
||||
|
||||
struct kvm_pinned_page {
|
||||
struct rb_node node;
|
||||
struct page *page;
|
||||
u64 ipa;
|
||||
u64 __subtree_last;
|
||||
u8 order;
|
||||
u16 pins;
|
||||
};
|
||||
|
||||
#define KVM_DUMMY_PPAGE ((struct kvm_pinned_page *)-1)
|
||||
struct kvm_pinned_page
|
||||
*kvm_pinned_pages_iter_first(struct rb_root_cached *root, u64 start, u64 end);
|
||||
struct kvm_pinned_page
|
||||
*kvm_pinned_pages_iter_next(struct kvm_pinned_page *ppage, u64 start, u64 end);
|
||||
|
||||
#define for_ppage_node_in_range(kvm, start, end, __ppage, __tmp) \
|
||||
for (__ppage = kvm_pinned_pages_iter_first(&(kvm)->arch.pkvm.pinned_pages, start, end - 1);\
|
||||
__ppage && ({ __tmp = kvm_pinned_pages_iter_next(__ppage, start, end - 1); 1; }); \
|
||||
__ppage = __tmp)
|
||||
|
||||
void kvm_pinned_pages_remove(struct kvm_pinned_page *ppage,
|
||||
struct rb_root_cached *root);
|
||||
|
||||
typedef unsigned int pkvm_handle_t;
|
||||
|
||||
struct kvm_protected_vm {
|
||||
pkvm_handle_t handle;
|
||||
struct kvm_hyp_memcache stage2_teardown_mc;
|
||||
struct maple_tree pinned_pages;
|
||||
_ANDROID_KABI_REPLACE(struct maple_tree __unused, struct rb_root_cached pinned_pages);
|
||||
gpa_t pvmfw_load_addr;
|
||||
bool enabled;
|
||||
};
|
||||
|
@@ -6,11 +6,11 @@
|
||||
|
||||
#include <linux/cma.h>
|
||||
#include <linux/dma-map-ops.h>
|
||||
#include <linux/maple_tree.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/interval_tree_generic.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <trace/events/kvm.h>
|
||||
#include <asm/pgalloc.h>
|
||||
@@ -291,6 +291,20 @@ static void invalidate_icache_guest_page(void *va, size_t size)
|
||||
__invalidate_icache_guest_page(va, size);
|
||||
}
|
||||
|
||||
static u64 __pinned_page_start(struct kvm_pinned_page *ppage)
|
||||
{
|
||||
return ppage->ipa;
|
||||
}
|
||||
|
||||
static u64 __pinned_page_end(struct kvm_pinned_page *ppage)
|
||||
{
|
||||
return ppage->ipa + (1 << (ppage->order + PAGE_SHIFT)) - 1;
|
||||
}
|
||||
|
||||
INTERVAL_TREE_DEFINE(struct kvm_pinned_page, node, u64, __subtree_last,
|
||||
__pinned_page_start, __pinned_page_end, /* empty */,
|
||||
kvm_pinned_pages);
|
||||
|
||||
static int __pkvm_unmap_guest_call(u64 pfn, u64 gfn, u8 order, void *args)
|
||||
{
|
||||
struct kvm *kvm = args;
|
||||
@@ -312,7 +326,7 @@ static int pkvm_unmap_guest(struct kvm *kvm, struct kvm_pinned_page *ppage)
|
||||
* no update needed from here.
|
||||
*/
|
||||
unpin_user_pages(&ppage->page, 1);
|
||||
mtree_erase(&kvm->arch.pkvm.pinned_pages, ppage->ipa);
|
||||
kvm_pinned_pages_remove(ppage, &kvm->arch.pkvm.pinned_pages);
|
||||
kfree(ppage);
|
||||
|
||||
return 0;
|
||||
@@ -320,17 +334,12 @@ static int pkvm_unmap_guest(struct kvm *kvm, struct kvm_pinned_page *ppage)
|
||||
|
||||
static int pkvm_unmap_range(struct kvm *kvm, u64 start, u64 end)
|
||||
{
|
||||
struct kvm_pinned_page *ppage, *tmp;
|
||||
struct mm_struct *mm = kvm->mm;
|
||||
unsigned long index = start;
|
||||
unsigned long cnt = 0;
|
||||
void *entry;
|
||||
int ret = 0;
|
||||
|
||||
mt_for_each(&kvm->arch.pkvm.pinned_pages, entry, index, end - 1) {
|
||||
struct kvm_pinned_page *ppage = entry;
|
||||
|
||||
if (ppage == KVM_DUMMY_PPAGE)
|
||||
continue;
|
||||
for_ppage_node_in_range(kvm, start, end, ppage, tmp) {
|
||||
ret = pkvm_unmap_guest(kvm, ppage);
|
||||
if (ret)
|
||||
break;
|
||||
@@ -418,8 +427,7 @@ static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 si
|
||||
|
||||
static void pkvm_stage2_flush(struct kvm *kvm)
|
||||
{
|
||||
unsigned long index = 0;
|
||||
void *entry;
|
||||
struct kvm_pinned_page *ppage, *tmp;
|
||||
|
||||
/*
|
||||
* Contrary to stage2_apply_range(), we don't need to check
|
||||
@@ -427,11 +435,7 @@ static void pkvm_stage2_flush(struct kvm *kvm)
|
||||
* from a vcpu thread, and the list is only ever freed on VM
|
||||
* destroy (which only occurs when all vcpu are gone).
|
||||
*/
|
||||
mt_for_each(&kvm->arch.pkvm.pinned_pages, entry, index, ULONG_MAX) {
|
||||
struct kvm_pinned_page *ppage = entry;
|
||||
|
||||
if (ppage == KVM_DUMMY_PPAGE)
|
||||
continue;
|
||||
for_ppage_node_in_range(kvm, 0, ULONG_MAX, ppage, tmp) {
|
||||
__clean_dcache_guest_page(page_address(ppage->page), PAGE_SIZE);
|
||||
cond_resched_rwlock_write(&kvm->mmu_lock);
|
||||
}
|
||||
@@ -1014,7 +1018,6 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
|
||||
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
|
||||
mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
|
||||
kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
|
||||
mt_init_flags(&kvm->arch.pkvm.pinned_pages, MT_FLAGS_USE_RCU);
|
||||
mmu->arch = &kvm->arch;
|
||||
|
||||
if (is_protected_kvm_enabled())
|
||||
@@ -1293,18 +1296,13 @@ static int __pkvm_wrprotect_call(u64 pfn, u64 gfn, u8 order, void *args)
|
||||
|
||||
static int pkvm_wp_range(struct kvm *kvm, u64 start, u64 end)
|
||||
{
|
||||
unsigned long index = start;
|
||||
void *entry;
|
||||
struct kvm_pinned_page *ppage, *tmp;
|
||||
|
||||
mt_for_each(&kvm->arch.pkvm.pinned_pages, entry, index, end - 1) {
|
||||
struct kvm_pinned_page *ppage = entry;
|
||||
for_ppage_node_in_range(kvm, start, end, ppage, tmp) {
|
||||
int ret;
|
||||
|
||||
if (ppage == KVM_DUMMY_PPAGE)
|
||||
continue;
|
||||
ret = pkvm_call_hyp_nvhe_ppage(ppage, __pkvm_wrprotect_call,
|
||||
kvm, false);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@@ -1630,28 +1628,9 @@ static int pkvm_host_map_guest(u64 pfn, u64 gfn, u64 nr_pages,
|
||||
return (ret == -EPERM) ? -EAGAIN : ret;
|
||||
}
|
||||
|
||||
static struct kvm_pinned_page *
|
||||
find_ppage_or_above(struct kvm *kvm, phys_addr_t ipa)
|
||||
{
|
||||
unsigned long index = ipa;
|
||||
void *entry;
|
||||
|
||||
mt_for_each(&kvm->arch.pkvm.pinned_pages, entry, index, ULONG_MAX) {
|
||||
if (entry == KVM_DUMMY_PPAGE)
|
||||
continue;
|
||||
return entry;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct kvm_pinned_page *find_ppage(struct kvm *kvm, u64 ipa)
|
||||
{
|
||||
struct kvm_pinned_page *ppage;
|
||||
unsigned long index = ipa;
|
||||
|
||||
ppage = mt_find(&kvm->arch.pkvm.pinned_pages, &index, ipa + PAGE_SIZE - 1);
|
||||
return ppage == KVM_DUMMY_PPAGE ? NULL : ppage;
|
||||
return kvm_pinned_pages_iter_first(&kvm->arch.pkvm.pinned_pages, ipa, ipa + PAGE_SIZE - 1);
|
||||
}
|
||||
|
||||
static int __pkvm_relax_perms_call(u64 pfn, u64 gfn, u8 order, void *args)
|
||||
@@ -1707,11 +1686,10 @@ static int pkvm_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t *fault_ipa,
|
||||
{
|
||||
unsigned int flags = FOLL_HWPOISON | FOLL_LONGTERM | FOLL_WRITE;
|
||||
struct kvm_hyp_memcache *hyp_memcache = &vcpu->arch.stage2_mc;
|
||||
unsigned long index, pmd_offset, page_size, end;
|
||||
unsigned long page_size = PAGE_SIZE;
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct kvm_pinned_page *ppage;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct maple_tree *mt = &kvm->arch.pkvm.pinned_pages;
|
||||
int ret, nr_pages;
|
||||
struct page *page;
|
||||
u64 pfn;
|
||||
@@ -1760,66 +1738,49 @@ static int pkvm_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t *fault_ipa,
|
||||
}
|
||||
|
||||
pfn = page_to_pfn(page);
|
||||
pmd_offset = *fault_ipa & (PMD_SIZE - 1);
|
||||
page_size = transparent_hugepage_adjust(kvm, memslot,
|
||||
hva, &pfn,
|
||||
fault_ipa);
|
||||
page = pfn_to_page(pfn);
|
||||
|
||||
retry:
|
||||
if (size)
|
||||
*size = page_size;
|
||||
read_lock(&kvm->mmu_lock);
|
||||
if (!kvm_pinned_pages_iter_first(&vcpu->kvm->arch.pkvm.pinned_pages,
|
||||
ALIGN_DOWN(*fault_ipa, PMD_SIZE),
|
||||
ALIGN(*fault_ipa + 1, PMD_SIZE) - 1))
|
||||
page_size = transparent_hugepage_adjust(kvm, memslot, hva, &pfn, fault_ipa);
|
||||
|
||||
/*
|
||||
* We take the risk of racing with another vCPU, but sync will be restored by the
|
||||
* host_map_guest HVC
|
||||
*/
|
||||
read_unlock(&kvm->mmu_lock);
|
||||
|
||||
page = pfn_to_page(pfn);
|
||||
|
||||
ret = account_locked_vm(mm, page_size >> PAGE_SHIFT, true);
|
||||
if (ret)
|
||||
goto unpin;
|
||||
|
||||
index = *fault_ipa;
|
||||
end = index + page_size - 1;
|
||||
ppage->page = page;
|
||||
ppage->ipa = *fault_ipa;
|
||||
ppage->order = get_order(page_size);
|
||||
ppage->pins = 1 << ppage->order;
|
||||
|
||||
/*
|
||||
* If we already have a mapping in the middle of the THP, we have no
|
||||
* other choice than enforcing PAGE_SIZE for pkvm_host_map_guest() to
|
||||
* succeed.
|
||||
*/
|
||||
if (page_size > PAGE_SIZE && mt_find(mt, &index, end)) {
|
||||
*fault_ipa += pmd_offset;
|
||||
pfn += pmd_offset >> PAGE_SHIFT;
|
||||
page = pfn_to_page(pfn);
|
||||
account_locked_vm(mm, page_size >> PAGE_SHIFT, false);
|
||||
page_size = PAGE_SIZE;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Reserve space in the mtree */
|
||||
ret = mtree_insert_range(mt, index, end, KVM_DUMMY_PPAGE, GFP_KERNEL);
|
||||
if (ret) {
|
||||
if (ret == -EEXIST)
|
||||
ret = 0;
|
||||
goto dec_account;
|
||||
}
|
||||
|
||||
write_lock(&kvm->mmu_lock);
|
||||
ret = pkvm_host_map_guest(pfn, *fault_ipa >> PAGE_SHIFT,
|
||||
page_size >> PAGE_SHIFT, KVM_PGTABLE_PROT_R);
|
||||
if (ret) {
|
||||
if (WARN_ON(ret == -EAGAIN))
|
||||
if (ret == -EAGAIN)
|
||||
ret = 0;
|
||||
|
||||
goto err_unlock;
|
||||
}
|
||||
WARN_ON(mtree_store_range(mt, index, end, ppage, GFP_ATOMIC));
|
||||
kvm_pinned_pages_insert(ppage, &kvm->arch.pkvm.pinned_pages);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
|
||||
if (size)
|
||||
*size = page_size;
|
||||
|
||||
return 0;
|
||||
|
||||
err_unlock:
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
dec_account:
|
||||
account_locked_vm(mm, page_size >> PAGE_SHIFT, false);
|
||||
unpin:
|
||||
unpin_user_pages(&page, 1);
|
||||
@@ -1847,13 +1808,13 @@ int pkvm_mem_abort_range(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, size_t si
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
|
||||
read_lock(&vcpu->kvm->mmu_lock);
|
||||
ppage = find_ppage_or_above(vcpu->kvm, fault_ipa);
|
||||
ppage = kvm_pinned_pages_iter_first(&vcpu->kvm->arch.pkvm.pinned_pages,
|
||||
fault_ipa, ipa_end);
|
||||
|
||||
while (fault_ipa < ipa_end) {
|
||||
if (ppage && ppage != KVM_DUMMY_PPAGE && ppage->ipa == fault_ipa) {
|
||||
if (ppage && ppage->ipa == fault_ipa) {
|
||||
page_size = PAGE_SIZE << ppage->order;
|
||||
ppage = mt_next(&vcpu->kvm->arch.pkvm.pinned_pages,
|
||||
ppage->ipa, ULONG_MAX);
|
||||
ppage = kvm_pinned_pages_iter_next(ppage, fault_ipa, ipa_end);
|
||||
} else {
|
||||
gfn_t gfn = gpa_to_gfn(fault_ipa);
|
||||
struct kvm_memory_slot *memslot;
|
||||
@@ -1877,7 +1838,8 @@ int pkvm_mem_abort_range(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, size_t si
|
||||
* We had to release the mmu_lock so let's update the
|
||||
* reference.
|
||||
*/
|
||||
ppage = find_ppage_or_above(vcpu->kvm, fault_ipa + page_size);
|
||||
ppage = kvm_pinned_pages_iter_first(&vcpu->kvm->arch.pkvm.pinned_pages,
|
||||
fault_ipa + PAGE_SIZE, ipa_end);
|
||||
}
|
||||
|
||||
fault_ipa += page_size;
|
||||
|
@@ -319,21 +319,17 @@ static int __reclaim_dying_guest_page_call(u64 pfn, u64 gfn, u8 order, void *arg
|
||||
|
||||
static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
|
||||
{
|
||||
struct kvm_pinned_page *tmp, *ppage;
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct kvm_pinned_page *ppage;
|
||||
struct kvm_vcpu *host_vcpu;
|
||||
unsigned long idx, ipa = 0;
|
||||
unsigned long idx;
|
||||
|
||||
if (!host_kvm->arch.pkvm.handle)
|
||||
goto out_free;
|
||||
|
||||
WARN_ON(kvm_call_hyp_nvhe(__pkvm_start_teardown_vm, host_kvm->arch.pkvm.handle));
|
||||
|
||||
mt_clear_in_rcu(&host_kvm->arch.pkvm.pinned_pages);
|
||||
|
||||
mt_for_each(&host_kvm->arch.pkvm.pinned_pages, ppage, ipa, ULONG_MAX) {
|
||||
if (WARN_ON(ppage == KVM_DUMMY_PPAGE))
|
||||
continue;
|
||||
for_ppage_node_in_range(host_kvm, 0, ULONG_MAX, ppage, tmp) {
|
||||
WARN_ON(pkvm_call_hyp_nvhe_ppage(ppage,
|
||||
__reclaim_dying_guest_page_call,
|
||||
host_kvm, true));
|
||||
@@ -341,9 +337,9 @@ static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
|
||||
|
||||
account_locked_vm(mm, 1, false);
|
||||
unpin_user_pages_dirty_lock(&ppage->page, 1, host_kvm->arch.pkvm.enabled);
|
||||
kvm_pinned_pages_remove(ppage, &host_kvm->arch.pkvm.pinned_pages);
|
||||
kfree(ppage);
|
||||
}
|
||||
mtree_destroy(&host_kvm->arch.pkvm.pinned_pages);
|
||||
|
||||
WARN_ON(kvm_call_hyp_nvhe(__pkvm_finalize_teardown_vm, host_kvm->arch.pkvm.handle));
|
||||
|
||||
@@ -538,13 +534,12 @@ void pkvm_host_reclaim_page(struct kvm *host_kvm, phys_addr_t ipa)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct kvm_pinned_page *ppage;
|
||||
unsigned long index = ipa;
|
||||
u16 pins;
|
||||
|
||||
write_lock(&host_kvm->mmu_lock);
|
||||
ppage = mt_find(&host_kvm->arch.pkvm.pinned_pages, &index,
|
||||
index + PAGE_SIZE - 1);
|
||||
if (ppage && ppage != KVM_DUMMY_PPAGE) {
|
||||
ppage = kvm_pinned_pages_iter_first(&host_kvm->arch.pkvm.pinned_pages,
|
||||
ipa, ipa + PAGE_SIZE - 1);
|
||||
if (ppage) {
|
||||
if (ppage->pins)
|
||||
ppage->pins--;
|
||||
else
|
||||
@@ -552,7 +547,8 @@ void pkvm_host_reclaim_page(struct kvm *host_kvm, phys_addr_t ipa)
|
||||
|
||||
pins = ppage->pins;
|
||||
if (!pins)
|
||||
mtree_erase(&host_kvm->arch.pkvm.pinned_pages, ipa);
|
||||
kvm_pinned_pages_remove(ppage,
|
||||
&host_kvm->arch.pkvm.pinned_pages);
|
||||
}
|
||||
write_unlock(&host_kvm->mmu_lock);
|
||||
|
||||
|
Reference in New Issue
Block a user