| From 73605f135057541c4fe3e7374085f2e1cc4094ab Mon Sep 17 00:00:00 2001 |
| From: Yu Zhao <yuzhao@google.com> |
| Date: Sat, 1 Apr 2023 13:56:51 -0600 |
| Subject: [PATCH] BACKPORT: FROMLIST: kvm/arm64: add |
| kvm_arch_test_clear_young() |
| |
| This patch adds kvm_arch_test_clear_young() for the vast majority of |
| VMs that are not pKVM and run on hardware that sets the accessed bit |
| in KVM PTEs. |
| |
| It relies on two techniques, RCU and cmpxchg, to safely test and clear |
| the accessed bit without taking the MMU lock. The former protects KVM |
| page tables from being freed while the latter clears the accessed bit |
| atomically against both the hardware and other software page table |
| walkers. |
| |
| Signed-off-by: Yu Zhao <yuzhao@google.com> |
| (am from https://patchwork.kernel.org/patch/13144313/) |
| |
| BUG=b:266976439 |
| UPSTREAM-TASK=b:266738578 |
| TEST=built |
| |
| Change-Id: I8fb581d47cd57581c5bf6fa90c15a1e836b190bd |
| Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4392743 |
| Tested-by: Yu Zhao <yuzhao@chromium.org> |
| Reviewed-by: T.J. Alumbaugh <talumbau@google.com> |
| Commit-Queue: Yu Zhao <yuzhao@chromium.org> |
| --- |
| arch/arm64/include/asm/kvm_host.h | 7 +++ |
| arch/arm64/include/asm/kvm_pgtable.h | 55 +++++++++++++++++++ |
| arch/arm64/kvm/arm.c | 1 + |
| arch/arm64/kvm/hyp/pgtable.c | 58 +++----------------- |
| arch/arm64/kvm/mmu.c | 80 ++++++++++++++++++++++++++++ |
| 5 files changed, 149 insertions(+), 52 deletions(-) |
| |
| diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h |
| index 38cf5aac3e377a212e30ee8ba851264c7c54cf84..533665df5c73b61ac24160fa06526ca1a5ec13d3 100644 |
| --- a/arch/arm64/include/asm/kvm_host.h |
| +++ b/arch/arm64/include/asm/kvm_host.h |
| @@ -1155,4 +1155,11 @@ static inline void kvm_hyp_reserve(void) { } |
| void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu); |
| bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu); |
| |
| +/* see the comments on the generic kvm_arch_has_test_clear_young() */ |
| +#define kvm_arch_has_test_clear_young kvm_arch_has_test_clear_young |
| +static inline bool kvm_arch_has_test_clear_young(void) |
| +{ |
| + return IS_ENABLED(CONFIG_KVM) && cpu_has_hw_af() && !is_protected_kvm_enabled(); |
| +} |
| + |
| #endif /* __ARM64_KVM_HOST_H__ */ |
| diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h |
| index d3e354bb8351d7aa9762155e18cb49cffee938e0..55dd75ee8b742114aeee709ef48f76c1685d3f4d 100644 |
| --- a/arch/arm64/include/asm/kvm_pgtable.h |
| +++ b/arch/arm64/include/asm/kvm_pgtable.h |
| @@ -44,11 +44,59 @@ typedef u64 kvm_pte_t; |
| |
| #define KVM_PHYS_INVALID (-1ULL) |
| |
| +#define KVM_PTE_TYPE BIT(1) |
| +#define KVM_PTE_TYPE_BLOCK 0 |
| +#define KVM_PTE_TYPE_PAGE 1 |
| +#define KVM_PTE_TYPE_TABLE 1 |
| + |
| +#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2) |
| + |
| +#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) |
| +#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6) |
| +#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO 3 |
| +#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW 1 |
| +#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8) |
| +#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3 |
| +#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10) |
| + |
| +#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2) |
| +#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6) |
| +#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7) |
| +#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8) |
| +#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3 |
| +#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10) |
| + |
| +#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51) |
| + |
| +#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55) |
| + |
| +#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) |
| + |
| +#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) |
| + |
| +#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \ |
| + KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ |
| + KVM_PTE_LEAF_ATTR_HI_S2_XN) |
| + |
| +#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2) |
| +#define KVM_MAX_OWNER_ID 1 |
| + |
| static inline bool kvm_pte_valid(kvm_pte_t pte) |
| { |
| return pte & KVM_PTE_VALID; |
| } |
| |
| +static inline bool kvm_pte_table(kvm_pte_t pte, u32 level) |
| +{ |
| + if (level == KVM_PGTABLE_MAX_LEVELS - 1) |
| + return false; |
| + |
| + if (!kvm_pte_valid(pte)) |
| + return false; |
| + |
| + return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE; |
| +} |
| + |
| static inline u64 kvm_pte_to_phys(kvm_pte_t pte) |
| { |
| u64 pa = pte & KVM_PTE_ADDR_MASK; |
| @@ -128,6 +176,7 @@ static inline bool kvm_is_block_size_supported(u64 size) |
| * @put_page: Decrement the refcount on a page. When the |
| * refcount reaches 0 the page is automatically |
| * freed. |
| + * @put_page_rcu: RCU variant of the above. |
| * @page_count: Return the refcount of a page. |
| * @phys_to_virt: Convert a physical address into a virtual |
| * address mapped in the current context. |
| @@ -145,6 +194,7 @@ struct kvm_pgtable_mm_ops { |
| void (*free_unlinked_table)(void *addr, u32 level); |
| void (*get_page)(void *addr); |
| void (*put_page)(void *addr); |
| + void (*put_page_rcu)(void *addr); |
| int (*page_count)(void *addr); |
| void* (*phys_to_virt)(phys_addr_t phys); |
| phys_addr_t (*virt_to_phys)(void *addr); |
| @@ -219,6 +269,11 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, |
| * @KVM_PGTABLE_WALK_SKIP_CMO: Visit and update table entries |
| * without Cache maintenance |
| * operations required. |
| + * kvm_arch_test_clear_young() is a special case. It relies on two |
| + * techniques, RCU and cmpxchg, to safely test and clear the accessed |
| + * bit without taking the MMU lock. The former protects KVM page tables |
| + * from being freed while the latter clears the accessed bit atomically |
| + * against both the hardware and other software page table walkers. |
| */ |
| enum kvm_pgtable_walk_flags { |
| KVM_PGTABLE_WALK_LEAF = BIT(0), |
| diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c |
| index 4866b3f7b4ea3847d885e00cfac47a4d7abf9da3..96765da57115ec2d7ebebc1fca5f4e6b63ee9036 100644 |
| --- a/arch/arm64/kvm/arm.c |
| +++ b/arch/arm64/kvm/arm.c |
| @@ -197,6 +197,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) |
| */ |
| void kvm_arch_destroy_vm(struct kvm *kvm) |
| { |
| + kvm_free_stage2_pgd(&kvm->arch.mmu); |
| bitmap_free(kvm->arch.pmu_filter); |
| free_cpumask_var(kvm->arch.supported_cpus); |
| |
| diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c |
| index f155b8c9e98c7fbf1298f4ecf64c6826c76fdb23..13502d89f1adebc5e2bebd8e72297be151c6eeb2 100644 |
| --- a/arch/arm64/kvm/hyp/pgtable.c |
| +++ b/arch/arm64/kvm/hyp/pgtable.c |
| @@ -12,47 +12,8 @@ |
| #include <asm/stage2_pgtable.h> |
| |
| |
| -#define KVM_PTE_TYPE BIT(1) |
| -#define KVM_PTE_TYPE_BLOCK 0 |
| -#define KVM_PTE_TYPE_PAGE 1 |
| -#define KVM_PTE_TYPE_TABLE 1 |
| - |
| -#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2) |
| - |
| -#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) |
| -#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6) |
| -#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \ |
| - ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; }) |
| -#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \ |
| - ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; }) |
| -#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8) |
| -#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3 |
| -#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10) |
| - |
| -#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2) |
| -#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6) |
| -#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7) |
| -#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8) |
| -#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3 |
| -#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10) |
| - |
| -#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50) |
| - |
| -#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55) |
| - |
| -#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) |
| - |
| -#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) |
| - |
| #define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50) |
| |
| -#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \ |
| - KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ |
| - KVM_PTE_LEAF_ATTR_HI_S2_XN) |
| - |
| -#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2) |
| -#define KVM_MAX_OWNER_ID 1 |
| - |
| /* |
| * Used to indicate a pte for which a 'break-before-make' sequence is in |
| * progress. |
| @@ -124,17 +85,6 @@ static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) |
| return kvm_pgd_page_idx(&pgt, -1ULL) + 1; |
| } |
| |
| -static bool kvm_pte_table(kvm_pte_t pte, u32 level) |
| -{ |
| - if (level == KVM_PGTABLE_MAX_LEVELS - 1) |
| - return false; |
| - |
| - if (!kvm_pte_valid(pte)) |
| - return false; |
| - |
| - return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE; |
| -} |
| - |
| static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops *mm_ops) |
| { |
| return mm_ops->phys_to_virt(kvm_pte_to_phys(pte)); |
| @@ -1124,8 +1074,12 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, |
| mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops), |
| kvm_granule_size(ctx->level)); |
| |
| - if (childp) |
| - mm_ops->put_page(childp); |
| + if (childp) { |
| + if (mm_ops->put_page_rcu) |
| + mm_ops->put_page_rcu(childp); |
| + else |
| + mm_ops->put_page(childp); |
| + } |
| |
| return 0; |
| } |
| diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c |
| index a50e8bc217cd8004da7713d10d31fdc013f35336..d0203a44f6b9aa84c92171a4c253b14ab777e431 100644 |
| --- a/arch/arm64/kvm/mmu.c |
| +++ b/arch/arm64/kvm/mmu.c |
| @@ -260,6 +260,21 @@ static int kvm_host_page_count(void *addr) |
| return page_count(virt_to_page(addr)); |
| } |
| |
| +static void kvm_s2_rcu_put_page(struct rcu_head *head) |
| +{ |
| + put_page(container_of(head, struct page, rcu_head)); |
| +} |
| + |
| +static void kvm_s2_put_page_rcu(void *addr) |
| +{ |
| + struct page *page = virt_to_page(addr); |
| + |
| + if (kvm_host_page_count(addr) == 1) |
| + kvm_account_pgtable_pages(addr, -1); |
| + |
| + call_rcu(&page->rcu_head, kvm_s2_rcu_put_page); |
| +} |
| + |
| static phys_addr_t kvm_host_pa(void *addr) |
| { |
| return __pa(addr); |
| @@ -846,6 +861,7 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = { |
| .free_unlinked_table = stage2_free_unlinked_table, |
| .get_page = kvm_host_get_page, |
| .put_page = kvm_s2_put_page, |
| + .put_page_rcu = kvm_s2_put_page_rcu, |
| .page_count = kvm_host_page_count, |
| .phys_to_virt = kvm_host_va, |
| .virt_to_phys = kvm_host_pa, |
| @@ -1846,6 +1862,70 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) |
| size, false); |
| } |
| |
| +struct test_clear_young_arg { |
| + struct kvm_gfn_range *range; |
| + gfn_t lsb_gfn; |
| + unsigned long *bitmap; |
| +}; |
| + |
| +static int stage2_test_clear_young(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, |
| + enum kvm_pgtable_walk_flags flag, void *const ctx) |
| +{ |
| + gfn_t gfn = addr / PAGE_SIZE; |
| + kvm_pte_t old = READ_ONCE(*ptep); |
| + kvm_pte_t new = old & ~KVM_PTE_LEAF_ATTR_LO_S2_AF; |
| + struct test_clear_young_arg *arg = ctx; |
| + |
| + VM_WARN_ON_ONCE(!page_count(virt_to_page(ptep))); |
| + VM_WARN_ON_ONCE(gfn < arg->range->start || gfn >= arg->range->end); |
| + |
| + if (kvm_pte_table(old, level)) |
| + return 0; |
| + |
| + if (!kvm_pte_valid(new)) |
| + return 0; |
| + |
| + if (new == old) |
| + return 0; |
| + |
| + /* see the comments on the generic kvm_arch_has_test_clear_young() */ |
| + if (__test_and_change_bit(arg->lsb_gfn - gfn, arg->bitmap)) |
| + cmpxchg64(ptep, old, new); |
| + |
| + return 0; |
| +} |
| + |
| +bool kvm_arch_test_clear_young(struct kvm *kvm, struct kvm_gfn_range *range, |
| + gfn_t lsb_gfn, unsigned long *bitmap) |
| +{ |
| + u64 start = range->start * PAGE_SIZE; |
| + u64 end = range->end * PAGE_SIZE; |
| + struct test_clear_young_arg arg = { |
| + .range = range, |
| + .lsb_gfn = lsb_gfn, |
| + .bitmap = bitmap, |
| + }; |
| + struct kvm_pgtable_walker walker = { |
| + .cb = stage2_test_clear_young, |
| + .arg = &arg, |
| + .flags = KVM_PGTABLE_WALK_LEAF, |
| + }; |
| + |
| + BUILD_BUG_ON(is_hyp_code()); |
| + |
| + if (WARN_ON_ONCE(!kvm_arch_has_test_clear_young())) |
| + return false; |
| + |
| + /* see the comments on kvm_pgtable_walk_flags */ |
| + rcu_read_lock(); |
| + |
| + kvm_pgtable_walk(kvm->arch.mmu.pgt, start, end - start, &walker); |
| + |
| + rcu_read_unlock(); |
| + |
| + return true; |
| +} |
| + |
| phys_addr_t kvm_mmu_get_httbr(void) |
| { |
| return __pa(hyp_pgtable->pgd); |
| -- |
| 2.42.0.820.g83a721a137-goog |
| |