| From ec1cf2f3bb57a5f7293429c664dea0596ecc2a7f Mon Sep 17 00:00:00 2001 |
| From: David Stevens <stevensd@chromium.org> |
| Date: Thu, 29 Feb 2024 11:57:54 +0900 |
| Subject: [PATCH] FROMLIST: KVM: mmu: Introduce kvm_follow_pfn() |
| |
| Introduce kvm_follow_pfn(), which will replace __gfn_to_pfn_memslot(). |
| This initial implementation is just a refactor of the existing API which |
| uses a single structure for passing the arguments. The arguments are |
| further refactored as follows: |
| |
| - The write_fault and interruptible boolean flags and the in |
| parameter part of async are replaced by setting FOLL_WRITE, |
| FOLL_INTERRUPTIBLE, and FOLL_NOWAIT respectively in a new flags |
| argument. |
| - The out parameter portion of the async parameter is now a return |
| value. |
| - The writable in/out parameter is split into a separate. |
| try_map_writable in parameter and writable out parameter. |
| - All other parameter are the same. |
| |
| Upcoming changes will add the ability to get a pfn without needing to |
| take a ref to the underlying page. |
| |
| Signed-off-by: David Stevens <stevensd@chromium.org> |
| Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com> |
| (am from https://patchwork.kernel.org/patch/13576445/) |
| (also found at https://lore.kernel.org/r/20240229025759.1187910-4-stevensd@google.com) |
| |
| BUG=b:328351865 |
| UPSTREAM-TASK=b:265081912 |
| TEST=tast run zork arc.Boot.vm |
| TEST=No panic on lazor after logging in 5 times with kernel |
| TEST=build with USE="debug lockdebug kcov" |
| |
| Change-Id: I7e6426c1798dabbd08f43ce217614e3d15c297be |
| Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/5359862 |
| Tested-by: Stephen Boyd <swboyd@chromium.org> |
| Reviewed-by: Stephen Boyd <swboyd@chromium.org> |
| Reviewed-by: Sean Paul <sean@poorly.run> |
| Tested-by: David Stevens <stevensd@chromium.org> |
| Commit-Queue: Douglas Anderson <dianders@chromium.org> |
| --- |
| include/linux/kvm_host.h | 18 ++++ |
| virt/kvm/kvm_main.c | 187 ++++++++++++++++++++++----------------- |
| virt/kvm/kvm_mm.h | 3 +- |
| virt/kvm/pfncache.c | 10 ++- |
| 4 files changed, 131 insertions(+), 87 deletions(-) |
| |
| diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h |
| index b359084959dd167669482a1b92c74478ea6ece6b..2d681fcabb4d7d096d7ecc46f0994d565d2e596c 100644 |
| --- a/include/linux/kvm_host.h |
| +++ b/include/linux/kvm_host.h |
| @@ -97,6 +97,7 @@ |
| #define KVM_PFN_ERR_HWPOISON (KVM_PFN_ERR_MASK + 1) |
| #define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 2) |
| #define KVM_PFN_ERR_SIGPENDING (KVM_PFN_ERR_MASK + 3) |
| +#define KVM_PFN_ERR_NEEDS_IO (KVM_PFN_ERR_MASK + 4) |
| |
| /* |
| * error pfns indicate that the gfn is in slot but faild to |
| @@ -1222,6 +1223,23 @@ unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, |
| void kvm_release_page_clean(struct page *page); |
| void kvm_release_page_dirty(struct page *page); |
| |
| +struct kvm_follow_pfn { |
| + const struct kvm_memory_slot *slot; |
| + gfn_t gfn; |
| + /* FOLL_* flags modifying lookup behavior. */ |
| + unsigned int flags; |
| + /* Whether this function can sleep. */ |
| + bool atomic; |
| + /* Try to create a writable mapping even for a read fault. */ |
| + bool try_map_writable; |
| + |
| + /* Outputs of kvm_follow_pfn */ |
| + hva_t hva; |
| + bool writable; |
| +}; |
| + |
| +kvm_pfn_t kvm_follow_pfn(struct kvm_follow_pfn *kfp); |
| + |
| kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); |
| kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, |
| bool *writable); |
| diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c |
| index da80a8331313300bfda34c78f9cdf54ef98727c0..53ceb971ceaf174092f7ece95d26a75b40715bb4 100644 |
| --- a/virt/kvm/kvm_main.c |
| +++ b/virt/kvm/kvm_main.c |
| @@ -2899,8 +2899,7 @@ static inline int check_user_page_hwpoison(unsigned long addr) |
| * true indicates success, otherwise false is returned. It's also the |
| * only part that runs if we can in atomic context. |
| */ |
| -static bool hva_to_pfn_fast(unsigned long addr, bool write_fault, |
| - bool *writable, kvm_pfn_t *pfn) |
| +static bool hva_to_pfn_fast(struct kvm_follow_pfn *kfp, kvm_pfn_t *pfn) |
| { |
| struct page *page[1]; |
| |
| @@ -2909,14 +2908,12 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault, |
| * or the caller allows to map a writable pfn for a read fault |
| * request. |
| */ |
| - if (!(write_fault || writable)) |
| + if (!((kfp->flags & FOLL_WRITE) || kfp->try_map_writable)) |
| return false; |
| |
| - if (get_user_page_fast_only(addr, FOLL_WRITE, page)) { |
| + if (get_user_page_fast_only(kfp->hva, FOLL_WRITE, page)) { |
| *pfn = page_to_pfn(page[0]); |
| - |
| - if (writable) |
| - *writable = true; |
| + kfp->writable = true; |
| return true; |
| } |
| |
| @@ -2927,8 +2924,7 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault, |
| * The slow path to get the pfn of the specified host virtual address, |
| * 1 indicates success, -errno is returned if error is detected. |
| */ |
| -static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, |
| - bool interruptible, bool *writable, kvm_pfn_t *pfn) |
| +static int hva_to_pfn_slow(struct kvm_follow_pfn *kfp, kvm_pfn_t *pfn) |
| { |
| /* |
| * When a VCPU accesses a page that is not mapped into the secondary |
| @@ -2941,32 +2937,24 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, |
| * Note that get_user_page_fast_only() and FOLL_WRITE for now |
| * implicitly honor NUMA hinting faults and don't need this flag. |
| */ |
| - unsigned int flags = FOLL_HWPOISON | FOLL_HONOR_NUMA_FAULT; |
| + unsigned int flags = FOLL_HWPOISON | FOLL_HONOR_NUMA_FAULT | kfp->flags; |
| struct page *page; |
| int npages; |
| |
| might_sleep(); |
| |
| - if (writable) |
| - *writable = write_fault; |
| - |
| - if (write_fault) |
| - flags |= FOLL_WRITE; |
| - if (async) |
| - flags |= FOLL_NOWAIT; |
| - if (interruptible) |
| - flags |= FOLL_INTERRUPTIBLE; |
| - |
| - npages = get_user_pages_unlocked(addr, 1, &page, flags); |
| + npages = get_user_pages_unlocked(kfp->hva, 1, &page, flags); |
| if (npages != 1) |
| return npages; |
| |
| - /* map read fault as writable if possible */ |
| - if (unlikely(!write_fault) && writable) { |
| + if (kfp->flags & FOLL_WRITE) { |
| + kfp->writable = true; |
| + } else if (kfp->try_map_writable) { |
| struct page *wpage; |
| |
| - if (get_user_page_fast_only(addr, FOLL_WRITE, &wpage)) { |
| - *writable = true; |
| + /* map read fault as writable if possible */ |
| + if (get_user_page_fast_only(kfp->hva, FOLL_WRITE, &wpage)) { |
| + kfp->writable = true; |
| put_page(page); |
| page = wpage; |
| } |
| @@ -2997,23 +2985,23 @@ static int kvm_try_get_pfn(kvm_pfn_t pfn) |
| } |
| |
| static int hva_to_pfn_remapped(struct vm_area_struct *vma, |
| - unsigned long addr, bool write_fault, |
| - bool *writable, kvm_pfn_t *p_pfn) |
| + struct kvm_follow_pfn *kfp, kvm_pfn_t *p_pfn) |
| { |
| kvm_pfn_t pfn; |
| pte_t *ptep; |
| pte_t pte; |
| spinlock_t *ptl; |
| + bool write_fault = kfp->flags & FOLL_WRITE; |
| int r; |
| |
| - r = follow_pte(vma, addr, &ptep, &ptl); |
| + r = follow_pte(vma, kfp->hva, &ptep, &ptl); |
| if (r) { |
| /* |
| * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does |
| * not call the fault handler, so do it here. |
| */ |
| bool unlocked = false; |
| - r = fixup_user_fault(current->mm, addr, |
| + r = fixup_user_fault(current->mm, kfp->hva, |
| (write_fault ? FAULT_FLAG_WRITE : 0), |
| &unlocked); |
| if (unlocked) |
| @@ -3021,7 +3009,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, |
| if (r) |
| return r; |
| |
| - r = follow_pte(vma, addr, &ptep, &ptl); |
| + r = follow_pte(vma, kfp->hva, &ptep, &ptl); |
| if (r) |
| return r; |
| } |
| @@ -3033,8 +3021,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, |
| goto out; |
| } |
| |
| - if (writable) |
| - *writable = pte_write(pte); |
| + kfp->writable = pte_write(pte); |
| pfn = pte_pfn(pte); |
| |
| /* |
| @@ -3065,38 +3052,28 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, |
| } |
| |
| /* |
| - * Pin guest page in memory and return its pfn. |
| - * @addr: host virtual address which maps memory to the guest |
| - * @atomic: whether this function is forbidden from sleeping |
| - * @interruptible: whether the process can be interrupted by non-fatal signals |
| - * @async: whether this function need to wait IO complete if the |
| - * host page is not in the memory |
| - * @write_fault: whether we should get a writable host page |
| - * @writable: whether it allows to map a writable host page for !@write_fault |
| - * |
| - * The function will map a writable host page for these two cases: |
| - * 1): @write_fault = true |
| - * 2): @write_fault = false && @writable, @writable will tell the caller |
| - * whether the mapping is writable. |
| + * Convert a hva to a pfn. |
| + * @kfp: args struct for the conversion |
| */ |
| -kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible, |
| - bool *async, bool write_fault, bool *writable) |
| +kvm_pfn_t hva_to_pfn(struct kvm_follow_pfn *kfp) |
| { |
| struct vm_area_struct *vma; |
| kvm_pfn_t pfn; |
| int npages, r; |
| |
| - /* we can do it either atomically or asynchronously, not both */ |
| - WARN_ON_ONCE(atomic && async); |
| + /* |
| + * FOLL_NOWAIT is used for async page faults, which don't make sense |
| + * in an atomic context where the caller can't do async resolution. |
| + */ |
| + WARN_ON_ONCE(kfp->atomic && (kfp->flags & FOLL_NOWAIT)); |
| |
| - if (hva_to_pfn_fast(addr, write_fault, writable, &pfn)) |
| + if (hva_to_pfn_fast(kfp, &pfn)) |
| return pfn; |
| |
| - if (atomic) |
| + if (kfp->atomic) |
| return KVM_PFN_ERR_FAULT; |
| |
| - npages = hva_to_pfn_slow(addr, async, write_fault, interruptible, |
| - writable, &pfn); |
| + npages = hva_to_pfn_slow(kfp, &pfn); |
| if (npages == 1) |
| return pfn; |
| if (npages == -EINTR) |
| @@ -3104,79 +3081,123 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible, |
| |
| mmap_read_lock(current->mm); |
| if (npages == -EHWPOISON || |
| - (!async && check_user_page_hwpoison(addr))) { |
| + (!(kfp->flags & FOLL_NOWAIT) && check_user_page_hwpoison(kfp->hva))) { |
| pfn = KVM_PFN_ERR_HWPOISON; |
| goto exit; |
| } |
| |
| retry: |
| - vma = vma_lookup(current->mm, addr); |
| + vma = vma_lookup(current->mm, kfp->hva); |
| |
| if (vma == NULL) |
| pfn = KVM_PFN_ERR_FAULT; |
| else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) { |
| - r = hva_to_pfn_remapped(vma, addr, write_fault, writable, &pfn); |
| + r = hva_to_pfn_remapped(vma, kfp, &pfn); |
| if (r == -EAGAIN) |
| goto retry; |
| if (r < 0) |
| pfn = KVM_PFN_ERR_FAULT; |
| } else { |
| - if (async && vma_is_valid(vma, write_fault)) |
| - *async = true; |
| - pfn = KVM_PFN_ERR_FAULT; |
| + if ((kfp->flags & FOLL_NOWAIT) && |
| + vma_is_valid(vma, kfp->flags & FOLL_WRITE)) |
| + pfn = KVM_PFN_ERR_NEEDS_IO; |
| + else |
| + pfn = KVM_PFN_ERR_FAULT; |
| } |
| exit: |
| mmap_read_unlock(current->mm); |
| return pfn; |
| } |
| |
| +kvm_pfn_t kvm_follow_pfn(struct kvm_follow_pfn *kfp) |
| +{ |
| + kfp->writable = false; |
| + kfp->hva = __gfn_to_hva_many(kfp->slot, kfp->gfn, NULL, |
| + kfp->flags & FOLL_WRITE); |
| + |
| + if (kfp->hva == KVM_HVA_ERR_RO_BAD) |
| + return KVM_PFN_ERR_RO_FAULT; |
| + |
| + if (kvm_is_error_hva(kfp->hva)) |
| + return KVM_PFN_NOSLOT; |
| + |
| + if (memslot_is_readonly(kfp->slot)) |
| + kfp->try_map_writable = false; |
| + |
| + return hva_to_pfn(kfp); |
| +} |
| +EXPORT_SYMBOL_GPL(kvm_follow_pfn); |
| + |
| kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn, |
| bool atomic, bool interruptible, bool *async, |
| bool write_fault, bool *writable, hva_t *hva) |
| { |
| - unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault); |
| - |
| - if (hva) |
| - *hva = addr; |
| - |
| - if (kvm_is_error_hva(addr)) { |
| - if (writable) |
| - *writable = false; |
| + kvm_pfn_t pfn; |
| + struct kvm_follow_pfn kfp = { |
| + .slot = slot, |
| + .gfn = gfn, |
| + .flags = 0, |
| + .atomic = atomic, |
| + .try_map_writable = !!writable, |
| + }; |
| |
| - return addr == KVM_HVA_ERR_RO_BAD ? KVM_PFN_ERR_RO_FAULT : |
| - KVM_PFN_NOSLOT; |
| - } |
| + if (write_fault) |
| + kfp.flags |= FOLL_WRITE; |
| + if (async) |
| + kfp.flags |= FOLL_NOWAIT; |
| + if (interruptible) |
| + kfp.flags |= FOLL_INTERRUPTIBLE; |
| |
| - /* Do not map writable pfn in the readonly memslot. */ |
| - if (writable && memslot_is_readonly(slot)) { |
| - *writable = false; |
| - writable = NULL; |
| + pfn = kvm_follow_pfn(&kfp); |
| + if (pfn == KVM_PFN_ERR_NEEDS_IO) { |
| + *async = true; |
| + pfn = KVM_PFN_ERR_FAULT; |
| } |
| - |
| - return hva_to_pfn(addr, atomic, interruptible, async, write_fault, |
| - writable); |
| + if (hva) |
| + *hva = kfp.hva; |
| + if (writable) |
| + *writable = kfp.writable; |
| + return pfn; |
| } |
| EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot); |
| |
| kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, |
| bool *writable) |
| { |
| - return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, false, |
| - NULL, write_fault, writable, NULL); |
| + kvm_pfn_t pfn; |
| + struct kvm_follow_pfn kfp = { |
| + .slot = gfn_to_memslot(kvm, gfn), |
| + .gfn = gfn, |
| + .flags = write_fault ? FOLL_WRITE : 0, |
| + .try_map_writable = !!writable, |
| + }; |
| + pfn = kvm_follow_pfn(&kfp); |
| + if (writable) |
| + *writable = kfp.writable; |
| + return pfn; |
| } |
| EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); |
| |
| kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn) |
| { |
| - return __gfn_to_pfn_memslot(slot, gfn, false, false, NULL, true, |
| - NULL, NULL); |
| + struct kvm_follow_pfn kfp = { |
| + .slot = slot, |
| + .gfn = gfn, |
| + .flags = FOLL_WRITE, |
| + }; |
| + return kvm_follow_pfn(&kfp); |
| } |
| EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot); |
| |
| kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn) |
| { |
| - return __gfn_to_pfn_memslot(slot, gfn, true, false, NULL, true, |
| - NULL, NULL); |
| + struct kvm_follow_pfn kfp = { |
| + .slot = slot, |
| + .gfn = gfn, |
| + .flags = FOLL_WRITE, |
| + .atomic = true, |
| + }; |
| + return kvm_follow_pfn(&kfp); |
| } |
| EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic); |
| |
| diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h |
| index 715f19669d01f72912af9b7393ccd01f65c0527e..de03d91e9ea66505e0f7b0fb0ce1cd82d8623c9a 100644 |
| --- a/virt/kvm/kvm_mm.h |
| +++ b/virt/kvm/kvm_mm.h |
| @@ -20,8 +20,7 @@ |
| #define KVM_MMU_UNLOCK(kvm) spin_unlock(&(kvm)->mmu_lock) |
| #endif /* KVM_HAVE_MMU_RWLOCK */ |
| |
| -kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible, |
| - bool *async, bool write_fault, bool *writable); |
| +kvm_pfn_t hva_to_pfn(struct kvm_follow_pfn *foll); |
| |
| #ifdef CONFIG_HAVE_KVM_PFNCACHE |
| void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, |
| diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c |
| index e3453e869e92c8f6546b7aa76ce8b3a2b486df4f..9871797b590f8893b29fcb507c506d4d2bbe6b08 100644 |
| --- a/virt/kvm/pfncache.c |
| +++ b/virt/kvm/pfncache.c |
| @@ -159,6 +159,12 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) |
| kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT; |
| void *new_khva = NULL; |
| unsigned long mmu_seq; |
| + struct kvm_follow_pfn kfp = { |
| + .slot = gpc->memslot, |
| + .gfn = gpa_to_gfn(gpc->gpa), |
| + .flags = FOLL_WRITE, |
| + .hva = gpc->uhva, |
| + }; |
| |
| lockdep_assert_held(&gpc->refresh_lock); |
| |
| @@ -197,8 +203,8 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) |
| cond_resched(); |
| } |
| |
| - /* We always request a writeable mapping */ |
| - new_pfn = hva_to_pfn(gpc->uhva, false, false, NULL, true, NULL); |
| + /* We always request a writable mapping */ |
| + new_pfn = hva_to_pfn(&kfp); |
| if (is_error_noslot_pfn(new_pfn)) |
| goto out_error; |
| |
| -- |
| 2.45.1.288.g0e0cd299f1-goog |
| |