From 32f070ad274d46e6693b49c6dd7399c8d642e831 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 1 Feb 2021 05:12:11 -0500 Subject: KVM: do not assume PTE is writable after follow_pfn commit bd2fae8da794b55bf2ac02632da3a151b10e664c upstream. In order to convert an HVA to a PFN, KVM usually tries to use the get_user_pages family of functinso. This however is not possible for VM_IO vmas; in that case, KVM instead uses follow_pfn. In doing this however KVM loses the information on whether the PFN is writable. That is usually not a problem because the main use of VM_IO vmas with KVM is for BARs in PCI device assignment, however it is a bug. To fix it, use follow_pte and check pte_write while under the protection of the PTE lock. The information can be used to fail hva_to_pfn_remapped or passed back to the caller via *writable. Usage of follow_pfn was introduced in commit add6a0cd1c5b ("KVM: MMU: try to fix up page faults before giving up", 2016-07-05); however, even older version have the same issue, all the way back to commit 2e2e3738af33 ("KVM: Handle vma regions with no backing page", 2008-07-20), as they also did not check whether the PFN was writable. Fixes: 2e2e3738af33 ("KVM: Handle vma regions with no backing page") Reported-by: David Stevens Cc: 3pvd@google.com Cc: Jann Horn Cc: Jason Gunthorpe Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- virt/kvm/kvm_main.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f25b5043cbca..04535e9d9183 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1599,9 +1599,11 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, kvm_pfn_t *p_pfn) { unsigned long pfn; + pte_t *ptep; + spinlock_t *ptl; int r; - r = follow_pfn(vma, addr, &pfn); + r = follow_pte(vma->vm_mm, addr, NULL, &ptep, NULL, &ptl); if (r) { /* * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does @@ -1616,14 +1618,19 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, if (r) return r; - r = follow_pfn(vma, addr, &pfn); + r = follow_pte(vma->vm_mm, addr, NULL, &ptep, NULL, &ptl); if (r) return r; + } + if (write_fault && !pte_write(*ptep)) { + pfn = KVM_PFN_ERR_RO_FAULT; + goto out; } if (writable) - *writable = true; + *writable = pte_write(*ptep); + pfn = pte_pfn(*ptep); /* * Get a reference here because callers of *hva_to_pfn* and @@ -1638,6 +1645,8 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, */ kvm_get_pfn(pfn); +out: + pte_unmap_unlock(ptep, ptl); *p_pfn = pfn; return 0; } -- cgit v1.2.3 From 3f9fbe70316407a6f7322e2bb6ac5fb272dbbe79 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 5 Feb 2021 05:07:11 -0500 Subject: mm: provide a saner PTE walking API for modules commit 9fd6dad1261a541b3f5fa7dc5b152222306e6702 upstream. Currently, the follow_pfn function is exported for modules but follow_pte is not. However, follow_pfn is very easy to misuse, because it does not provide protections (so most of its callers assume the page is writable!) and because it returns after having already unlocked the page table lock. Provide instead a simplified version of follow_pte that does not have the pmdpp and range arguments. The older version survives as follow_invalidate_pte() for use by fs/dax.c. Reviewed-by: Jason Gunthorpe Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- virt/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 04535e9d9183..19de91b896ff 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1603,7 +1603,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, spinlock_t *ptl; int r; - r = follow_pte(vma->vm_mm, addr, NULL, &ptep, NULL, &ptl); + r = follow_pte(vma->vm_mm, addr, &ptep, &ptl); if (r) { /* * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does @@ -1618,7 +1618,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, if (r) return r; - r = follow_pte(vma->vm_mm, addr, NULL, &ptep, NULL, &ptl); + r = follow_pte(vma->vm_mm, addr, &ptep, &ptl); if (r) return r; } -- cgit v1.2.3 From 5f2093be36273ad826c3ebe7251a233e75d74b4b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 8 Feb 2021 12:19:40 -0800 Subject: KVM: Use kvm_pfn_t for local PFN variable in hva_to_pfn_remapped() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a9545779ee9e9e103648f6f2552e73cfe808d0f4 upstream. Use kvm_pfn_t, a.k.a. u64, for the local 'pfn' variable when retrieving a so called "remapped" hva/pfn pair. In theory, the hva could resolve to a pfn in high memory on a 32-bit kernel. This bug was inadvertantly exposed by commit bd2fae8da794 ("KVM: do not assume PTE is writable after follow_pfn"), which added an error PFN value to the mix, causing gcc to comlain about overflowing the unsigned long. arch/x86/kvm/../../../virt/kvm/kvm_main.c: In function ‘hva_to_pfn_remapped’: include/linux/kvm_host.h:89:30: error: conversion from ‘long long unsigned int’ to ‘long unsigned int’ changes value from ‘9218868437227405314’ to ‘2’ [-Werror=overflow] 89 | #define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 2) | ^ virt/kvm/kvm_main.c:1935:9: note: in expansion of macro ‘KVM_PFN_ERR_RO_FAULT’ Cc: stable@vger.kernel.org Fixes: add6a0cd1c5b ("KVM: MMU: try to fix up page faults before giving up") Signed-off-by: Sean Christopherson Message-Id: <20210208201940.1258328-1-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 19de91b896ff..048b555c5acc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1598,7 +1598,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, bool write_fault, bool *writable, kvm_pfn_t *p_pfn) { - unsigned long pfn; + kvm_pfn_t pfn; pte_t *ptep; spinlock_t *ptl; int r; -- cgit v1.2.3