ed36a6c4a40b0ea6d3bfabce422584354e897f29e5d391135dcdaca6.patch - chromiumos/third_party/kernel-rebase-patches - Git at Google

 From ec1cf2f3bb57a5f7293429c664dea0596ecc2a7f Mon Sep 17 00:00:00 2001
 From: David Stevens <stevensd@chromium.org>
 Date: Thu, 29 Feb 2024 11:57:54 +0900
 Subject: [PATCH] FROMLIST: KVM: mmu: Introduce kvm_follow_pfn()

 Introduce kvm_follow_pfn(), which will replace __gfn_to_pfn_memslot().
 This initial implementation is just a refactor of the existing API which
 uses a single structure for passing the arguments. The arguments are
 further refactored as follows:

  - The write_fault and interruptible boolean flags and the in
    parameter part of async are replaced by setting FOLL_WRITE,
    FOLL_INTERRUPTIBLE, and FOLL_NOWAIT respectively in a new flags
    argument.
  - The out parameter portion of the async parameter is now a return
    value.
  - The writable in/out parameter is split into a separate.
    try_map_writable in parameter and writable out parameter.
  - All other parameter are the same.

 Upcoming changes will add the ability to get a pfn without needing to
 take a ref to the underlying page.

 Signed-off-by: David Stevens <stevensd@chromium.org>
 Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
 (am from https://patchwork.kernel.org/patch/13576445/)
 (also found at https://lore.kernel.org/r/20240229025759.1187910-4-stevensd@google.com)

 BUG=b:328351865
 UPSTREAM-TASK=b:265081912
 TEST=tast run zork arc.Boot.vm
 TEST=No panic on lazor after logging in 5 times with kernel
 TEST=build with USE="debug lockdebug kcov"

 Change-Id: I7e6426c1798dabbd08f43ce217614e3d15c297be
 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/5359862
 Tested-by: Stephen Boyd <swboyd@chromium.org>
 Reviewed-by: Stephen Boyd <swboyd@chromium.org>
 Reviewed-by: Sean Paul <sean@poorly.run>
 Tested-by: David Stevens <stevensd@chromium.org>
 Commit-Queue: Douglas Anderson <dianders@chromium.org>
 ---
  include/linux/kvm_host.h |  18 ++++
  virt/kvm/kvm_main.c      | 187 ++++++++++++++++++++++-----------------
  virt/kvm/kvm_mm.h        |   3 +-
  virt/kvm/pfncache.c      |  10 ++-
  4 files changed, 131 insertions(+), 87 deletions(-)

 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
 index b359084959dd167669482a1b92c74478ea6ece6b..2d681fcabb4d7d096d7ecc46f0994d565d2e596c 100644
 --- a/include/linux/kvm_host.h
 +++ b/include/linux/kvm_host.h
 @@ -97,6 +97,7 @@
  #define KVM_PFN_ERR_HWPOISON	(KVM_PFN_ERR_MASK + 1)
  #define KVM_PFN_ERR_RO_FAULT	(KVM_PFN_ERR_MASK + 2)
  #define KVM_PFN_ERR_SIGPENDING	(KVM_PFN_ERR_MASK + 3)
 +#define KVM_PFN_ERR_NEEDS_IO	(KVM_PFN_ERR_MASK + 4)

  /*
   * error pfns indicate that the gfn is in slot but faild to
 @@ -1222,6 +1223,23 @@ unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn,
  void kvm_release_page_clean(struct page *page);
  void kvm_release_page_dirty(struct page *page);

 +struct kvm_follow_pfn {
 +	const struct kvm_memory_slot *slot;
 +	gfn_t gfn;
 +	/* FOLL_* flags modifying lookup behavior. */
 +	unsigned int flags;
 +	/* Whether this function can sleep. */
 +	bool atomic;
 +	/* Try to create a writable mapping even for a read fault. */
 +	bool try_map_writable;
 +
 +	/* Outputs of kvm_follow_pfn */
 +	hva_t hva;
 +	bool writable;
 +};
 +
 +kvm_pfn_t kvm_follow_pfn(struct kvm_follow_pfn *kfp);
 +
  kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
  kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
  		      bool *writable);
 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
 index da80a8331313300bfda34c78f9cdf54ef98727c0..53ceb971ceaf174092f7ece95d26a75b40715bb4 100644
 --- a/virt/kvm/kvm_main.c
 +++ b/virt/kvm/kvm_main.c
 @@ -2899,8 +2899,7 @@ static inline int check_user_page_hwpoison(unsigned long addr)
   * true indicates success, otherwise false is returned.  It's also the
   * only part that runs if we can in atomic context.
   */
 -static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
 -			    bool *writable, kvm_pfn_t *pfn)
 +static bool hva_to_pfn_fast(struct kvm_follow_pfn *kfp, kvm_pfn_t *pfn)
  {
  	struct page *page[1];

 @@ -2909,14 +2908,12 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
  	 * or the caller allows to map a writable pfn for a read fault
  	 * request.
  	 */
 -	if (!(write_fault || writable))
 +	if (!((kfp->flags & FOLL_WRITE) || kfp->try_map_writable))
  		return false;

 -	if (get_user_page_fast_only(addr, FOLL_WRITE, page)) {
 +	if (get_user_page_fast_only(kfp->hva, FOLL_WRITE, page)) {
  		*pfn = page_to_pfn(page[0]);
 -
 -		if (writable)
 -			*writable = true;
 +		kfp->writable = true;
  		return true;
  	}

 @@ -2927,8 +2924,7 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
   * The slow path to get the pfn of the specified host virtual address,
   * 1 indicates success, -errno is returned if error is detected.
   */
 -static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
 -			   bool interruptible, bool *writable, kvm_pfn_t *pfn)
 +static int hva_to_pfn_slow(struct kvm_follow_pfn *kfp, kvm_pfn_t *pfn)
  {
  	/*
  	 * When a VCPU accesses a page that is not mapped into the secondary
 @@ -2941,32 +2937,24 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
  	 * Note that get_user_page_fast_only() and FOLL_WRITE for now
  	 * implicitly honor NUMA hinting faults and don't need this flag.
  	 */
 -	unsigned int flags = FOLL_HWPOISON | FOLL_HONOR_NUMA_FAULT;
 +	unsigned int flags = FOLL_HWPOISON | FOLL_HONOR_NUMA_FAULT | kfp->flags;
  	struct page *page;
  	int npages;

  	might_sleep();

 -	if (writable)
 -		*writable = write_fault;
 -
 -	if (write_fault)
 -		flags |= FOLL_WRITE;
 -	if (async)
 -		flags |= FOLL_NOWAIT;
 -	if (interruptible)
 -		flags |= FOLL_INTERRUPTIBLE;
 -
 -	npages = get_user_pages_unlocked(addr, 1, &page, flags);
 +	npages = get_user_pages_unlocked(kfp->hva, 1, &page, flags);
  	if (npages != 1)
  		return npages;

 -	/* map read fault as writable if possible */
 -	if (unlikely(!write_fault) && writable) {
 +	if (kfp->flags & FOLL_WRITE) {
 +		kfp->writable = true;
 +	} else if (kfp->try_map_writable) {
  		struct page *wpage;

 -		if (get_user_page_fast_only(addr, FOLL_WRITE, &wpage)) {
 -			*writable = true;
 +		/* map read fault as writable if possible */
 +		if (get_user_page_fast_only(kfp->hva, FOLL_WRITE, &wpage)) {
 +			kfp->writable = true;
  			put_page(page);
  			page = wpage;
  		}
 @@ -2997,23 +2985,23 @@ static int kvm_try_get_pfn(kvm_pfn_t pfn)
  }

  static int hva_to_pfn_remapped(struct vm_area_struct *vma,
 -			       unsigned long addr, bool write_fault,
 -			       bool *writable, kvm_pfn_t *p_pfn)
 +			       struct kvm_follow_pfn *kfp, kvm_pfn_t *p_pfn)
  {
  	kvm_pfn_t pfn;
  	pte_t *ptep;
  	pte_t pte;
  	spinlock_t *ptl;
 +	bool write_fault = kfp->flags & FOLL_WRITE;
  	int r;

 -	r = follow_pte(vma, addr, &ptep, &ptl);
 +	r = follow_pte(vma, kfp->hva, &ptep, &ptl);
  	if (r) {
  		/*
  		 * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
  		 * not call the fault handler, so do it here.
  		 */
  		bool unlocked = false;
 -		r = fixup_user_fault(current->mm, addr,
 +		r = fixup_user_fault(current->mm, kfp->hva,
  				     (write_fault ? FAULT_FLAG_WRITE : 0),
  				     &unlocked);
  		if (unlocked)
 @@ -3021,7 +3009,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
  		if (r)
  			return r;

 -		r = follow_pte(vma, addr, &ptep, &ptl);
 +		r = follow_pte(vma, kfp->hva, &ptep, &ptl);
  		if (r)
  			return r;
  	}
 @@ -3033,8 +3021,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
  		goto out;
  	}

 -	if (writable)
 -		*writable = pte_write(pte);
 +	kfp->writable = pte_write(pte);
  	pfn = pte_pfn(pte);

  	/*
 @@ -3065,38 +3052,28 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
  }

  /*
 - * Pin guest page in memory and return its pfn.
 - * @addr: host virtual address which maps memory to the guest
 - * @atomic: whether this function is forbidden from sleeping
 - * @interruptible: whether the process can be interrupted by non-fatal signals
 - * @async: whether this function need to wait IO complete if the
 - *         host page is not in the memory
 - * @write_fault: whether we should get a writable host page
 - * @writable: whether it allows to map a writable host page for !@write_fault
 - *
 - * The function will map a writable host page for these two cases:
 - * 1): @write_fault = true
 - * 2): @write_fault = false && @writable, @writable will tell the caller
 - *     whether the mapping is writable.
 + * Convert a hva to a pfn.
 + * @kfp: args struct for the conversion
   */
 -kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
 -		     bool *async, bool write_fault, bool *writable)
 +kvm_pfn_t hva_to_pfn(struct kvm_follow_pfn *kfp)
  {
  	struct vm_area_struct *vma;
  	kvm_pfn_t pfn;
  	int npages, r;

 -	/* we can do it either atomically or asynchronously, not both */
 -	WARN_ON_ONCE(atomic && async);
 +	/*
 +	 * FOLL_NOWAIT is used for async page faults, which don't make sense
 +	 * in an atomic context where the caller can't do async resolution.
 +	 */
 +	WARN_ON_ONCE(kfp->atomic && (kfp->flags & FOLL_NOWAIT));

 -	if (hva_to_pfn_fast(addr, write_fault, writable, &pfn))
 +	if (hva_to_pfn_fast(kfp, &pfn))
  		return pfn;

 -	if (atomic)
 +	if (kfp->atomic)
  		return KVM_PFN_ERR_FAULT;

 -	npages = hva_to_pfn_slow(addr, async, write_fault, interruptible,
 -				 writable, &pfn);
 +	npages = hva_to_pfn_slow(kfp, &pfn);
  	if (npages == 1)
  		return pfn;
  	if (npages == -EINTR)
 @@ -3104,79 +3081,123 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,

  	mmap_read_lock(current->mm);
  	if (npages == -EHWPOISON ||
 -	      (!async && check_user_page_hwpoison(addr))) {
 +	    (!(kfp->flags & FOLL_NOWAIT) && check_user_page_hwpoison(kfp->hva))) {
  		pfn = KVM_PFN_ERR_HWPOISON;
  		goto exit;
  	}

  retry:
 -	vma = vma_lookup(current->mm, addr);
 +	vma = vma_lookup(current->mm, kfp->hva);

  	if (vma == NULL)
  		pfn = KVM_PFN_ERR_FAULT;
  	else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) {
 -		r = hva_to_pfn_remapped(vma, addr, write_fault, writable, &pfn);
 +		r = hva_to_pfn_remapped(vma, kfp, &pfn);
  		if (r == -EAGAIN)
  			goto retry;
  		if (r < 0)
  			pfn = KVM_PFN_ERR_FAULT;
  	} else {
 -		if (async && vma_is_valid(vma, write_fault))
 -			*async = true;
 -		pfn = KVM_PFN_ERR_FAULT;
 +		if ((kfp->flags & FOLL_NOWAIT) &&
 +		    vma_is_valid(vma, kfp->flags & FOLL_WRITE))
 +			pfn = KVM_PFN_ERR_NEEDS_IO;
 +		else
 +			pfn = KVM_PFN_ERR_FAULT;
  	}
  exit:
  	mmap_read_unlock(current->mm);
  	return pfn;
  }

 +kvm_pfn_t kvm_follow_pfn(struct kvm_follow_pfn *kfp)
 +{
 +	kfp->writable = false;
 +	kfp->hva = __gfn_to_hva_many(kfp->slot, kfp->gfn, NULL,
 +				     kfp->flags & FOLL_WRITE);
 +
 +	if (kfp->hva == KVM_HVA_ERR_RO_BAD)
 +		return KVM_PFN_ERR_RO_FAULT;
 +
 +	if (kvm_is_error_hva(kfp->hva))
 +		return KVM_PFN_NOSLOT;
 +
 +	if (memslot_is_readonly(kfp->slot))
 +		kfp->try_map_writable = false;
 +
 +	return hva_to_pfn(kfp);
 +}
 +EXPORT_SYMBOL_GPL(kvm_follow_pfn);
 +
  kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
  			       bool atomic, bool interruptible, bool *async,
  			       bool write_fault, bool *writable, hva_t *hva)
  {
 -	unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
 -
 -	if (hva)
 -		*hva = addr;
 -
 -	if (kvm_is_error_hva(addr)) {
 -		if (writable)
 -			*writable = false;
 +	kvm_pfn_t pfn;
 +	struct kvm_follow_pfn kfp = {
 +		.slot = slot,
 +		.gfn = gfn,
 +		.flags = 0,
 +		.atomic = atomic,
 +		.try_map_writable = !!writable,
 +	};

 -		return addr == KVM_HVA_ERR_RO_BAD ? KVM_PFN_ERR_RO_FAULT :
 -						    KVM_PFN_NOSLOT;
 -	}
 +	if (write_fault)
 +		kfp.flags |= FOLL_WRITE;
 +	if (async)
 +		kfp.flags |= FOLL_NOWAIT;
 +	if (interruptible)
 +		kfp.flags |= FOLL_INTERRUPTIBLE;

 -	/* Do not map writable pfn in the readonly memslot. */
 -	if (writable && memslot_is_readonly(slot)) {
 -		*writable = false;
 -		writable = NULL;
 +	pfn = kvm_follow_pfn(&kfp);
 +	if (pfn == KVM_PFN_ERR_NEEDS_IO) {
 +		*async = true;
 +		pfn = KVM_PFN_ERR_FAULT;
  	}
 -
 -	return hva_to_pfn(addr, atomic, interruptible, async, write_fault,
 -			  writable);
 +	if (hva)
 +		*hva = kfp.hva;
 +	if (writable)
 +		*writable = kfp.writable;
 +	return pfn;
  }
  EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot);

  kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
  		      bool *writable)
  {
 -	return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, false,
 -				    NULL, write_fault, writable, NULL);
 +	kvm_pfn_t pfn;
 +	struct kvm_follow_pfn kfp = {
 +		.slot = gfn_to_memslot(kvm, gfn),
 +		.gfn = gfn,
 +		.flags = write_fault ? FOLL_WRITE : 0,
 +		.try_map_writable = !!writable,
 +	};
 +	pfn = kvm_follow_pfn(&kfp);
 +	if (writable)
 +		*writable = kfp.writable;
 +	return pfn;
  }
  EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);

  kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn)
  {
 -	return __gfn_to_pfn_memslot(slot, gfn, false, false, NULL, true,
 -				    NULL, NULL);
 +	struct kvm_follow_pfn kfp = {
 +		.slot = slot,
 +		.gfn = gfn,
 +		.flags = FOLL_WRITE,
 +	};
 +	return kvm_follow_pfn(&kfp);
  }
  EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot);

  kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn)
  {
 -	return __gfn_to_pfn_memslot(slot, gfn, true, false, NULL, true,
 -				    NULL, NULL);
 +	struct kvm_follow_pfn kfp = {
 +		.slot = slot,
 +		.gfn = gfn,
 +		.flags = FOLL_WRITE,
 +		.atomic = true,
 +	};
 +	return kvm_follow_pfn(&kfp);
  }
  EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);

 diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
 index 715f19669d01f72912af9b7393ccd01f65c0527e..de03d91e9ea66505e0f7b0fb0ce1cd82d8623c9a 100644
 --- a/virt/kvm/kvm_mm.h
 +++ b/virt/kvm/kvm_mm.h
 @@ -20,8 +20,7 @@
  #define KVM_MMU_UNLOCK(kvm)		spin_unlock(&(kvm)->mmu_lock)
  #endif /* KVM_HAVE_MMU_RWLOCK */

 -kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
 -		     bool *async, bool write_fault, bool *writable);
 +kvm_pfn_t hva_to_pfn(struct kvm_follow_pfn *foll);

  #ifdef CONFIG_HAVE_KVM_PFNCACHE
  void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
 diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
 index e3453e869e92c8f6546b7aa76ce8b3a2b486df4f..9871797b590f8893b29fcb507c506d4d2bbe6b08 100644
 --- a/virt/kvm/pfncache.c
 +++ b/virt/kvm/pfncache.c
 @@ -159,6 +159,12 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
  	kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT;
  	void *new_khva = NULL;
  	unsigned long mmu_seq;
 +	struct kvm_follow_pfn kfp = {
 +		.slot = gpc->memslot,
 +		.gfn = gpa_to_gfn(gpc->gpa),
 +		.flags = FOLL_WRITE,
 +		.hva = gpc->uhva,
 +	};

  	lockdep_assert_held(&gpc->refresh_lock);

 @@ -197,8 +203,8 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
  			cond_resched();
  		}

 -		/* We always request a writeable mapping */
 -		new_pfn = hva_to_pfn(gpc->uhva, false, false, NULL, true, NULL);
 +		/* We always request a writable mapping */
 +		new_pfn = hva_to_pfn(&kfp);
  		if (is_error_noslot_pfn(new_pfn))
  			goto out_error;

 --
 2.45.1.288.g0e0cd299f1-goog
	From ec1cf2f3bb57a5f7293429c664dea0596ecc2a7f Mon Sep 17 00:00:00 2001
	From: David Stevens <stevensd@chromium.org>
	Date: Thu, 29 Feb 2024 11:57:54 +0900
	Subject: [PATCH] FROMLIST: KVM: mmu: Introduce kvm_follow_pfn()

	Introduce kvm_follow_pfn(), which will replace __gfn_to_pfn_memslot().
	This initial implementation is just a refactor of the existing API which
	uses a single structure for passing the arguments. The arguments are
	further refactored as follows:

	- The write_fault and interruptible boolean flags and the in
	parameter part of async are replaced by setting FOLL_WRITE,
	FOLL_INTERRUPTIBLE, and FOLL_NOWAIT respectively in a new flags
	argument.
	- The out parameter portion of the async parameter is now a return
	value.
	- The writable in/out parameter is split into a separate.
	try_map_writable in parameter and writable out parameter.
	- All other parameter are the same.

	Upcoming changes will add the ability to get a pfn without needing to
	take a ref to the underlying page.

	Signed-off-by: David Stevens <stevensd@chromium.org>
	Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
	(am from https://patchwork.kernel.org/patch/13576445/)
	(also found at https://lore.kernel.org/r/20240229025759.1187910-4-stevensd@google.com)

	BUG=b:328351865
	UPSTREAM-TASK=b:265081912
	TEST=tast run zork arc.Boot.vm
	TEST=No panic on lazor after logging in 5 times with kernel
	TEST=build with USE="debug lockdebug kcov"

	Change-Id: I7e6426c1798dabbd08f43ce217614e3d15c297be
	Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/5359862
	Tested-by: Stephen Boyd <swboyd@chromium.org>
	Reviewed-by: Stephen Boyd <swboyd@chromium.org>
	Reviewed-by: Sean Paul <sean@poorly.run>
	Tested-by: David Stevens <stevensd@chromium.org>
	Commit-Queue: Douglas Anderson <dianders@chromium.org>
	---
	include/linux/kvm_host.h \| 18 ++++
	virt/kvm/kvm_main.c \| 187 ++++++++++++++++++++++-----------------
	virt/kvm/kvm_mm.h \| 3 +-
	virt/kvm/pfncache.c \| 10 ++-
	4 files changed, 131 insertions(+), 87 deletions(-)

	diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
	index b359084959dd167669482a1b92c74478ea6ece6b..2d681fcabb4d7d096d7ecc46f0994d565d2e596c 100644
	--- a/include/linux/kvm_host.h
	+++ b/include/linux/kvm_host.h
	@@ -97,6 +97,7 @@
	#define KVM_PFN_ERR_HWPOISON (KVM_PFN_ERR_MASK + 1)
	#define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 2)
	#define KVM_PFN_ERR_SIGPENDING (KVM_PFN_ERR_MASK + 3)
	+#define KVM_PFN_ERR_NEEDS_IO (KVM_PFN_ERR_MASK + 4)

	/*
	* error pfns indicate that the gfn is in slot but faild to
	@@ -1222,6 +1223,23 @@ unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn,
	void kvm_release_page_clean(struct page *page);
	void kvm_release_page_dirty(struct page *page);

	+struct kvm_follow_pfn {
	+ const struct kvm_memory_slot *slot;
	+ gfn_t gfn;
	+ /* FOLL_* flags modifying lookup behavior. */
	+ unsigned int flags;
	+ /* Whether this function can sleep. */
	+ bool atomic;
	+ /* Try to create a writable mapping even for a read fault. */
	+ bool try_map_writable;
	+
	+ /* Outputs of kvm_follow_pfn */
	+ hva_t hva;
	+ bool writable;
	+};
	+
	+kvm_pfn_t kvm_follow_pfn(struct kvm_follow_pfn *kfp);
	+
	kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
	kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
	bool *writable);
	diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
	index da80a8331313300bfda34c78f9cdf54ef98727c0..53ceb971ceaf174092f7ece95d26a75b40715bb4 100644
	--- a/virt/kvm/kvm_main.c
	+++ b/virt/kvm/kvm_main.c
	@@ -2899,8 +2899,7 @@ static inline int check_user_page_hwpoison(unsigned long addr)
	* true indicates success, otherwise false is returned. It's also the
	* only part that runs if we can in atomic context.
	*/
	-static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
	- bool writable, kvm_pfn_t pfn)
	+static bool hva_to_pfn_fast(struct kvm_follow_pfn kfp, kvm_pfn_t pfn)
	{
	struct page *page[1];

	@@ -2909,14 +2908,12 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
	* or the caller allows to map a writable pfn for a read fault
	* request.
	*/
	- if (!(write_fault \|\| writable))
	+ if (!((kfp->flags & FOLL_WRITE) \|\| kfp->try_map_writable))
	return false;

	- if (get_user_page_fast_only(addr, FOLL_WRITE, page)) {
	+ if (get_user_page_fast_only(kfp->hva, FOLL_WRITE, page)) {
	*pfn = page_to_pfn(page[0]);
	-
	- if (writable)
	- *writable = true;
	+ kfp->writable = true;
	return true;
	}

	@@ -2927,8 +2924,7 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
	* The slow path to get the pfn of the specified host virtual address,
	* 1 indicates success, -errno is returned if error is detected.
	*/
	-static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
	- bool interruptible, bool writable, kvm_pfn_t pfn)
	+static int hva_to_pfn_slow(struct kvm_follow_pfn kfp, kvm_pfn_t pfn)
	{
	/*
	* When a VCPU accesses a page that is not mapped into the secondary
	@@ -2941,32 +2937,24 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
	* Note that get_user_page_fast_only() and FOLL_WRITE for now
	* implicitly honor NUMA hinting faults and don't need this flag.
	*/
	- unsigned int flags = FOLL_HWPOISON \| FOLL_HONOR_NUMA_FAULT;
	+ unsigned int flags = FOLL_HWPOISON \| FOLL_HONOR_NUMA_FAULT \| kfp->flags;
	struct page *page;
	int npages;

	might_sleep();

	- if (writable)
	- *writable = write_fault;
	-
	- if (write_fault)
	- flags \|= FOLL_WRITE;
	- if (async)
	- flags \|= FOLL_NOWAIT;
	- if (interruptible)
	- flags \|= FOLL_INTERRUPTIBLE;
	-
	- npages = get_user_pages_unlocked(addr, 1, &page, flags);
	+ npages = get_user_pages_unlocked(kfp->hva, 1, &page, flags);
	if (npages != 1)
	return npages;

	- /* map read fault as writable if possible */
	- if (unlikely(!write_fault) && writable) {
	+ if (kfp->flags & FOLL_WRITE) {
	+ kfp->writable = true;
	+ } else if (kfp->try_map_writable) {
	struct page *wpage;

	- if (get_user_page_fast_only(addr, FOLL_WRITE, &wpage)) {
	- *writable = true;
	+ /* map read fault as writable if possible */
	+ if (get_user_page_fast_only(kfp->hva, FOLL_WRITE, &wpage)) {
	+ kfp->writable = true;
	put_page(page);
	page = wpage;
	}
	@@ -2997,23 +2985,23 @@ static int kvm_try_get_pfn(kvm_pfn_t pfn)
	}

	static int hva_to_pfn_remapped(struct vm_area_struct *vma,
	- unsigned long addr, bool write_fault,
	- bool writable, kvm_pfn_t p_pfn)
	+ struct kvm_follow_pfn kfp, kvm_pfn_t p_pfn)
	{
	kvm_pfn_t pfn;
	pte_t *ptep;
	pte_t pte;
	spinlock_t *ptl;
	+ bool write_fault = kfp->flags & FOLL_WRITE;
	int r;

	- r = follow_pte(vma, addr, &ptep, &ptl);
	+ r = follow_pte(vma, kfp->hva, &ptep, &ptl);
	if (r) {
	/*
	* get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
	* not call the fault handler, so do it here.
	*/
	bool unlocked = false;
	- r = fixup_user_fault(current->mm, addr,
	+ r = fixup_user_fault(current->mm, kfp->hva,
	(write_fault ? FAULT_FLAG_WRITE : 0),
	&unlocked);
	if (unlocked)
	@@ -3021,7 +3009,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
	if (r)
	return r;

	- r = follow_pte(vma, addr, &ptep, &ptl);
	+ r = follow_pte(vma, kfp->hva, &ptep, &ptl);
	if (r)
	return r;
	}
	@@ -3033,8 +3021,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
	goto out;
	}

	- if (writable)
	- *writable = pte_write(pte);
	+ kfp->writable = pte_write(pte);
	pfn = pte_pfn(pte);

	/*
	@@ -3065,38 +3052,28 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
	}

	/*
	- * Pin guest page in memory and return its pfn.
	- * @addr: host virtual address which maps memory to the guest
	- * @atomic: whether this function is forbidden from sleeping
	- * @interruptible: whether the process can be interrupted by non-fatal signals
	- * @async: whether this function need to wait IO complete if the
	- * host page is not in the memory
	- * @write_fault: whether we should get a writable host page
	- * @writable: whether it allows to map a writable host page for !@write_fault
	- *
	- * The function will map a writable host page for these two cases:
	- * 1): @write_fault = true
	- * 2): @write_fault = false && @writable, @writable will tell the caller
	- * whether the mapping is writable.
	+ * Convert a hva to a pfn.
	+ * @kfp: args struct for the conversion
	*/
	-kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
	- bool async, bool write_fault, bool writable)
	+kvm_pfn_t hva_to_pfn(struct kvm_follow_pfn *kfp)
	{
	struct vm_area_struct *vma;
	kvm_pfn_t pfn;
	int npages, r;

	- /* we can do it either atomically or asynchronously, not both */
	- WARN_ON_ONCE(atomic && async);
	+ /*
	+ * FOLL_NOWAIT is used for async page faults, which don't make sense
	+ * in an atomic context where the caller can't do async resolution.
	+ */
	+ WARN_ON_ONCE(kfp->atomic && (kfp->flags & FOLL_NOWAIT));

	- if (hva_to_pfn_fast(addr, write_fault, writable, &pfn))
	+ if (hva_to_pfn_fast(kfp, &pfn))
	return pfn;

	- if (atomic)
	+ if (kfp->atomic)
	return KVM_PFN_ERR_FAULT;

	- npages = hva_to_pfn_slow(addr, async, write_fault, interruptible,
	- writable, &pfn);
	+ npages = hva_to_pfn_slow(kfp, &pfn);
	if (npages == 1)
	return pfn;
	if (npages == -EINTR)
	@@ -3104,79 +3081,123 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,

	mmap_read_lock(current->mm);
	if (npages == -EHWPOISON \|\|
	- (!async && check_user_page_hwpoison(addr))) {
	+ (!(kfp->flags & FOLL_NOWAIT) && check_user_page_hwpoison(kfp->hva))) {
	pfn = KVM_PFN_ERR_HWPOISON;
	goto exit;
	}

	retry:
	- vma = vma_lookup(current->mm, addr);
	+ vma = vma_lookup(current->mm, kfp->hva);

	if (vma == NULL)
	pfn = KVM_PFN_ERR_FAULT;
	else if (vma->vm_flags & (VM_IO \| VM_PFNMAP)) {
	- r = hva_to_pfn_remapped(vma, addr, write_fault, writable, &pfn);
	+ r = hva_to_pfn_remapped(vma, kfp, &pfn);
	if (r == -EAGAIN)
	goto retry;
	if (r < 0)
	pfn = KVM_PFN_ERR_FAULT;
	} else {
	- if (async && vma_is_valid(vma, write_fault))
	- *async = true;
	- pfn = KVM_PFN_ERR_FAULT;
	+ if ((kfp->flags & FOLL_NOWAIT) &&
	+ vma_is_valid(vma, kfp->flags & FOLL_WRITE))
	+ pfn = KVM_PFN_ERR_NEEDS_IO;
	+ else
	+ pfn = KVM_PFN_ERR_FAULT;
	}
	exit:
	mmap_read_unlock(current->mm);
	return pfn;
	}

	+kvm_pfn_t kvm_follow_pfn(struct kvm_follow_pfn *kfp)
	+{
	+ kfp->writable = false;
	+ kfp->hva = __gfn_to_hva_many(kfp->slot, kfp->gfn, NULL,
	+ kfp->flags & FOLL_WRITE);
	+
	+ if (kfp->hva == KVM_HVA_ERR_RO_BAD)
	+ return KVM_PFN_ERR_RO_FAULT;
	+
	+ if (kvm_is_error_hva(kfp->hva))
	+ return KVM_PFN_NOSLOT;
	+
	+ if (memslot_is_readonly(kfp->slot))
	+ kfp->try_map_writable = false;
	+
	+ return hva_to_pfn(kfp);
	+}
	+EXPORT_SYMBOL_GPL(kvm_follow_pfn);
	+
	kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
	bool atomic, bool interruptible, bool *async,
	bool write_fault, bool writable, hva_t hva)
	{
	- unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
	-
	- if (hva)
	- *hva = addr;
	-
	- if (kvm_is_error_hva(addr)) {
	- if (writable)
	- *writable = false;
	+ kvm_pfn_t pfn;
	+ struct kvm_follow_pfn kfp = {
	+ .slot = slot,
	+ .gfn = gfn,
	+ .flags = 0,
	+ .atomic = atomic,
	+ .try_map_writable = !!writable,
	+ };

	- return addr == KVM_HVA_ERR_RO_BAD ? KVM_PFN_ERR_RO_FAULT :
	- KVM_PFN_NOSLOT;
	- }
	+ if (write_fault)
	+ kfp.flags \|= FOLL_WRITE;
	+ if (async)
	+ kfp.flags \|= FOLL_NOWAIT;
	+ if (interruptible)
	+ kfp.flags \|= FOLL_INTERRUPTIBLE;

	- /* Do not map writable pfn in the readonly memslot. */
	- if (writable && memslot_is_readonly(slot)) {
	- *writable = false;
	- writable = NULL;
	+ pfn = kvm_follow_pfn(&kfp);
	+ if (pfn == KVM_PFN_ERR_NEEDS_IO) {
	+ *async = true;
	+ pfn = KVM_PFN_ERR_FAULT;
	}
	-
	- return hva_to_pfn(addr, atomic, interruptible, async, write_fault,
	- writable);
	+ if (hva)
	+ *hva = kfp.hva;
	+ if (writable)
	+ *writable = kfp.writable;
	+ return pfn;
	}
	EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot);

	kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
	bool *writable)
	{
	- return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, false,
	- NULL, write_fault, writable, NULL);
	+ kvm_pfn_t pfn;
	+ struct kvm_follow_pfn kfp = {
	+ .slot = gfn_to_memslot(kvm, gfn),
	+ .gfn = gfn,
	+ .flags = write_fault ? FOLL_WRITE : 0,
	+ .try_map_writable = !!writable,
	+ };
	+ pfn = kvm_follow_pfn(&kfp);
	+ if (writable)
	+ *writable = kfp.writable;
	+ return pfn;
	}
	EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);

	kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn)
	{
	- return __gfn_to_pfn_memslot(slot, gfn, false, false, NULL, true,
	- NULL, NULL);
	+ struct kvm_follow_pfn kfp = {
	+ .slot = slot,
	+ .gfn = gfn,
	+ .flags = FOLL_WRITE,
	+ };
	+ return kvm_follow_pfn(&kfp);
	}
	EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot);

	kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn)
	{
	- return __gfn_to_pfn_memslot(slot, gfn, true, false, NULL, true,
	- NULL, NULL);
	+ struct kvm_follow_pfn kfp = {
	+ .slot = slot,
	+ .gfn = gfn,
	+ .flags = FOLL_WRITE,
	+ .atomic = true,
	+ };
	+ return kvm_follow_pfn(&kfp);
	}
	EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);

	diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
	index 715f19669d01f72912af9b7393ccd01f65c0527e..de03d91e9ea66505e0f7b0fb0ce1cd82d8623c9a 100644
	--- a/virt/kvm/kvm_mm.h
	+++ b/virt/kvm/kvm_mm.h
	@@ -20,8 +20,7 @@
	#define KVM_MMU_UNLOCK(kvm) spin_unlock(&(kvm)->mmu_lock)
	#endif /* KVM_HAVE_MMU_RWLOCK */

	-kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
	- bool async, bool write_fault, bool writable);
	+kvm_pfn_t hva_to_pfn(struct kvm_follow_pfn *foll);

	#ifdef CONFIG_HAVE_KVM_PFNCACHE
	void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
	diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
	index e3453e869e92c8f6546b7aa76ce8b3a2b486df4f..9871797b590f8893b29fcb507c506d4d2bbe6b08 100644
	--- a/virt/kvm/pfncache.c
	+++ b/virt/kvm/pfncache.c
	@@ -159,6 +159,12 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
	kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT;
	void *new_khva = NULL;
	unsigned long mmu_seq;
	+ struct kvm_follow_pfn kfp = {
	+ .slot = gpc->memslot,
	+ .gfn = gpa_to_gfn(gpc->gpa),
	+ .flags = FOLL_WRITE,
	+ .hva = gpc->uhva,
	+ };

	lockdep_assert_held(&gpc->refresh_lock);

	@@ -197,8 +203,8 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
	cond_resched();
	}

	- /* We always request a writeable mapping */
	- new_pfn = hva_to_pfn(gpc->uhva, false, false, NULL, true, NULL);
	+ /* We always request a writable mapping */
	+ new_pfn = hva_to_pfn(&kfp);
	if (is_error_noslot_pfn(new_pfn))
	goto out_error;

	--
	2.45.1.288.g0e0cd299f1-goog