diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/assigned-dev.c | 2 | ||||
-rw-r--r-- | virt/kvm/async_pf.c | 4 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 3 | ||||
-rw-r--r-- | virt/kvm/iommu.c | 10 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 79 | ||||
-rw-r--r-- | virt/kvm/vfio.c | 4 | ||||
-rw-r--r-- | virt/kvm/vfio.h | 13 |
7 files changed, 91 insertions, 24 deletions
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 5819a2708d7e..e05000e200d2 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -302,7 +302,7 @@ static void kvm_free_assigned_device(struct kvm *kvm, else pci_restore_state(assigned_dev->dev); - assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; + pci_clear_dev_assigned(assigned_dev->dev); pci_release_regions(assigned_dev->dev); pci_disable_device(assigned_dev->dev); diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index d6a3d0993d88..5ff7f7f2689a 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -80,9 +80,7 @@ static void async_pf_execute(struct work_struct *work) might_sleep(); - down_read(&mm->mmap_sem); - get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL); - up_read(&mm->mmap_sem); + kvm_get_user_page_io(NULL, mm, addr, 1, NULL); kvm_async_page_present_sync(vcpu, apf); spin_lock(&vcpu->async_pf.lock); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 0c712a779b44..b0fb390943c6 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -36,6 +36,9 @@ #include <linux/seqlock.h> #include <trace/events/kvm.h> +#ifdef __KVM_HAVE_IOAPIC +#include "ioapic.h" +#endif #include "iodev.h" #ifdef CONFIG_HAVE_KVM_IRQFD diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 714b94932312..e51d9f9b995f 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -191,8 +191,7 @@ int kvm_assign_device(struct kvm *kvm, return r; } - noncoherent = !iommu_domain_has_cap(kvm->arch.iommu_domain, - IOMMU_CAP_CACHE_COHERENCY); + noncoherent = !iommu_capable(&pci_bus_type, IOMMU_CAP_CACHE_COHERENCY); /* Check if need to update IOMMU page table for guest memory */ if (noncoherent != kvm->arch.iommu_noncoherent) { @@ -203,7 +202,7 @@ int kvm_assign_device(struct kvm *kvm, goto out_unmap; } - pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED; + pci_set_dev_assigned(pdev); dev_info(&pdev->dev, "kvm assign device\n"); @@ -229,7 +228,7 @@ int kvm_deassign_device(struct kvm *kvm, iommu_detach_device(domain, &pdev->dev); - pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; + pci_clear_dev_assigned(pdev); dev_info(&pdev->dev, "kvm deassign device\n"); @@ -254,8 +253,7 @@ int kvm_iommu_map_guest(struct kvm *kvm) } if (!allow_unsafe_assigned_interrupts && - !iommu_domain_has_cap(kvm->arch.iommu_domain, - IOMMU_CAP_INTR_REMAP)) { + !iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) { printk(KERN_WARNING "%s: No interrupt remapping support," " disallowing device assignment." " Re-enble with \"allow_unsafe_assigned_interrupts=1\"" diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 278232025129..384eaa7b02fa 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -52,11 +52,13 @@ #include <asm/processor.h> #include <asm/io.h> +#include <asm/ioctl.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include "coalesced_mmio.h" #include "async_pf.h" +#include "vfio.h" #define CREATE_TRACE_POINTS #include <trace/events/kvm.h> @@ -108,7 +110,7 @@ static bool largepages_enabled = true; bool kvm_is_mmio_pfn(pfn_t pfn) { if (pfn_valid(pfn)) - return PageReserved(pfn_to_page(pfn)); + return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)); return true; } @@ -151,7 +153,7 @@ static void ack_flush(void *_completed) { } -static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) +bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) { int i, cpu, me; cpumask_var_t cpus; @@ -188,7 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) long dirty_count = kvm->tlbs_dirty; smp_mb(); - if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) + if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) ++kvm->stat.remote_tlb_flush; cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); } @@ -196,17 +198,17 @@ EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); void kvm_reload_remote_mmus(struct kvm *kvm) { - make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); + kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); } void kvm_make_mclock_inprogress_request(struct kvm *kvm) { - make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); + kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); } void kvm_make_scan_ioapic_request(struct kvm *kvm) { - make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); + kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); } int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) @@ -294,6 +296,9 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, kvm_flush_remote_tlbs(kvm); spin_unlock(&kvm->mmu_lock); + + kvm_arch_mmu_notifier_invalidate_page(kvm, address); + srcu_read_unlock(&kvm->srcu, idx); } @@ -367,7 +372,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { struct kvm *kvm = mmu_notifier_to_kvm(mn); int young, idx; @@ -375,7 +381,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); - young = kvm_age_hva(kvm, address); + young = kvm_age_hva(kvm, start, end); if (young) kvm_flush_remote_tlbs(kvm); @@ -1128,6 +1134,43 @@ static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL); } +int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm, + unsigned long addr, bool write_fault, + struct page **pagep) +{ + int npages; + int locked = 1; + int flags = FOLL_TOUCH | FOLL_HWPOISON | + (pagep ? FOLL_GET : 0) | + (write_fault ? FOLL_WRITE : 0); + + /* + * If retrying the fault, we get here *not* having allowed the filemap + * to wait on the page lock. We should now allow waiting on the IO with + * the mmap semaphore released. + */ + down_read(&mm->mmap_sem); + npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL, + &locked); + if (!locked) { + VM_BUG_ON(npages); + + if (!pagep) + return 0; + + /* + * The previous call has now waited on the IO. Now we can + * retry and complete. Pass TRIED to ensure we do not re + * schedule async IO (see e.g. filemap_fault). + */ + down_read(&mm->mmap_sem); + npages = __get_user_pages(tsk, mm, addr, 1, flags | FOLL_TRIED, + pagep, NULL, NULL); + } + up_read(&mm->mmap_sem); + return npages; +} + static inline int check_user_page_hwpoison(unsigned long addr) { int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE; @@ -1190,9 +1233,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, npages = get_user_page_nowait(current, current->mm, addr, write_fault, page); up_read(¤t->mm->mmap_sem); - } else - npages = get_user_pages_fast(addr, 1, write_fault, - page); + } else { + /* + * By now we have tried gup_fast, and possibly async_pf, and we + * are certainly not atomic. Time to retry the gup, allowing + * mmap semaphore to be relinquished in the case of IO. + */ + npages = kvm_get_user_page_io(current, current->mm, addr, + write_fault, page); + } if (npages != 1) return npages; @@ -1746,7 +1795,7 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target) rcu_read_lock(); pid = rcu_dereference(target->pid); if (pid) - task = get_pid_task(target->pid, PIDTYPE_PID); + task = get_pid_task(pid, PIDTYPE_PID); rcu_read_unlock(); if (!task) return ret; @@ -1995,6 +2044,9 @@ static long kvm_vcpu_ioctl(struct file *filp, if (vcpu->kvm->mm != current->mm) return -EIO; + if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) + return -EINVAL; + #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) /* * Special cases: vcpu ioctls that are asynchronous to vcpu execution, @@ -3233,6 +3285,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, goto out_undebugfs; } + r = kvm_vfio_ops_init(); + WARN_ON(r); + return 0; out_undebugfs: diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index bb11b36ee8a2..281e7cf2b8e5 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -18,6 +18,7 @@ #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/vfio.h> +#include "vfio.h" struct kvm_vfio_group { struct list_head node; @@ -278,8 +279,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type) return 0; } -static int __init kvm_vfio_ops_init(void) +int kvm_vfio_ops_init(void) { return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO); } -module_init(kvm_vfio_ops_init); diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h new file mode 100644 index 000000000000..92eac75d6b62 --- /dev/null +++ b/virt/kvm/vfio.h @@ -0,0 +1,13 @@ +#ifndef __KVM_VFIO_H +#define __KVM_VFIO_H + +#ifdef CONFIG_KVM_VFIO +int kvm_vfio_ops_init(void); +#else +static inline int kvm_vfio_ops_init(void) +{ + return 0; +} +#endif + +#endif |