diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/coalesced_mmio.h | 4 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 124 | ||||
-rw-r--r-- | virt/kvm/irqchip.c | 8 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 133 | ||||
-rw-r--r-- | virt/kvm/vfio.c | 5 |
5 files changed, 199 insertions, 75 deletions
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h index 5cbf190d238c..6bca74ca5331 100644 --- a/virt/kvm/coalesced_mmio.h +++ b/virt/kvm/coalesced_mmio.h @@ -24,9 +24,9 @@ struct kvm_coalesced_mmio_dev { int kvm_coalesced_mmio_init(struct kvm *kvm); void kvm_coalesced_mmio_free(struct kvm *kvm); int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, - struct kvm_coalesced_mmio_zone *zone); + struct kvm_coalesced_mmio_zone *zone); int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, - struct kvm_coalesced_mmio_zone *zone); + struct kvm_coalesced_mmio_zone *zone); #else diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 9ff4193dfa49..79db45336e3a 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -771,40 +771,14 @@ static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) return KVM_MMIO_BUS; } -static int -kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +static int kvm_assign_ioeventfd_idx(struct kvm *kvm, + enum kvm_bus bus_idx, + struct kvm_ioeventfd *args) { - enum kvm_bus bus_idx; - struct _ioeventfd *p; - struct eventfd_ctx *eventfd; - int ret; - - bus_idx = ioeventfd_bus_from_flags(args->flags); - /* must be natural-word sized, or 0 to ignore length */ - switch (args->len) { - case 0: - case 1: - case 2: - case 4: - case 8: - break; - default: - return -EINVAL; - } - - /* check for range overflow */ - if (args->addr + args->len < args->addr) - return -EINVAL; - /* check for extra flags that we don't understand */ - if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) - return -EINVAL; - - /* ioeventfd with no length can't be combined with DATAMATCH */ - if (!args->len && - args->flags & (KVM_IOEVENTFD_FLAG_PIO | - KVM_IOEVENTFD_FLAG_DATAMATCH)) - return -EINVAL; + struct eventfd_ctx *eventfd; + struct _ioeventfd *p; + int ret; eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) @@ -843,16 +817,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) if (ret < 0) goto unlock_fail; - /* When length is ignored, MMIO is also put on a separate bus, for - * faster lookups. - */ - if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) { - ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS, - p->addr, 0, &p->dev); - if (ret < 0) - goto register_fail; - } - kvm->buses[bus_idx]->ioeventfd_count++; list_add_tail(&p->list, &kvm->ioeventfds); @@ -860,8 +824,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return 0; -register_fail: - kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); unlock_fail: mutex_unlock(&kvm->slots_lock); @@ -873,14 +835,13 @@ fail: } static int -kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, + struct kvm_ioeventfd *args) { - enum kvm_bus bus_idx; struct _ioeventfd *p, *tmp; struct eventfd_ctx *eventfd; int ret = -ENOENT; - bus_idx = ioeventfd_bus_from_flags(args->flags); eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) return PTR_ERR(eventfd); @@ -901,10 +862,6 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) continue; kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); - if (!p->length) { - kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS, - &p->dev); - } kvm->buses[bus_idx]->ioeventfd_count--; ioeventfd_release(p); ret = 0; @@ -918,6 +875,71 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return ret; } +static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags); + int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); + + if (!args->len && bus_idx == KVM_MMIO_BUS) + kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); + + return ret; +} + +static int +kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + enum kvm_bus bus_idx; + int ret; + + bus_idx = ioeventfd_bus_from_flags(args->flags); + /* must be natural-word sized, or 0 to ignore length */ + switch (args->len) { + case 0: + case 1: + case 2: + case 4: + case 8: + break; + default: + return -EINVAL; + } + + /* check for range overflow */ + if (args->addr + args->len < args->addr) + return -EINVAL; + + /* check for extra flags that we don't understand */ + if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) + return -EINVAL; + + /* ioeventfd with no length can't be combined with DATAMATCH */ + if (!args->len && + args->flags & (KVM_IOEVENTFD_FLAG_PIO | + KVM_IOEVENTFD_FLAG_DATAMATCH)) + return -EINVAL; + + ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); + if (ret) + goto fail; + + /* When length is ignored, MMIO is also put on a separate bus, for + * faster lookups. + */ + if (!args->len && bus_idx == KVM_MMIO_BUS) { + ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); + if (ret < 0) + goto fast_fail; + } + + return 0; + +fast_fail: + kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); +fail: + return ret; +} + int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 21c14244f4c4..d7ea8e20dae4 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -213,11 +213,15 @@ int kvm_set_irq_routing(struct kvm *kvm, goto out; r = -EINVAL; - if (ue->flags) + if (ue->flags) { + kfree(e); goto out; + } r = setup_routing_entry(new, e, ue); - if (r) + if (r) { + kfree(e); goto out; + } ++ue; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8b8a44453670..04146a2e1d81 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -66,9 +66,18 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); -static unsigned int halt_poll_ns; +/* halt polling only reduces halt latency by 5-7 us, 500us is enough */ +static unsigned int halt_poll_ns = 500000; module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); +/* Default doubles per-vcpu halt_poll_ns. */ +static unsigned int halt_poll_ns_grow = 2; +module_param(halt_poll_ns_grow, int, S_IRUGO); + +/* Default resets per-vcpu halt_poll_ns . */ +static unsigned int halt_poll_ns_shrink; +module_param(halt_poll_ns_shrink, int, S_IRUGO); + /* * Ordering of locks: * @@ -217,6 +226,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) vcpu->kvm = kvm; vcpu->vcpu_id = id; vcpu->pid = NULL; + vcpu->halt_poll_ns = 0; init_waitqueue_head(&vcpu->wq); kvm_async_pf_vcpu_init(vcpu); @@ -387,6 +397,36 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, return young; } +static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + int young, idx; + + idx = srcu_read_lock(&kvm->srcu); + spin_lock(&kvm->mmu_lock); + /* + * Even though we do not flush TLB, this will still adversely + * affect performance on pre-Haswell Intel EPT, where there is + * no EPT Access Bit to clear so that we have to tear down EPT + * tables instead. If we find this unacceptable, we can always + * add a parameter to kvm_age_hva so that it effectively doesn't + * do anything on clear_young. + * + * Also note that currently we never issue secondary TLB flushes + * from clear_young, leaving this job up to the regular system + * cadence. If we find this inaccurate, we might come up with a + * more sophisticated heuristic later. + */ + young = kvm_age_hva(kvm, start, end); + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); + + return young; +} + static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long address) @@ -419,6 +459,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, .clear_flush_young = kvm_mmu_notifier_clear_flush_young, + .clear_young = kvm_mmu_notifier_clear_young, .test_young = kvm_mmu_notifier_test_young, .change_pte = kvm_mmu_notifier_change_pte, .release = kvm_mmu_notifier_release, @@ -1906,6 +1947,35 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn) } EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); +static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) +{ + int old, val; + + old = val = vcpu->halt_poll_ns; + /* 10us base */ + if (val == 0 && halt_poll_ns_grow) + val = 10000; + else + val *= halt_poll_ns_grow; + + vcpu->halt_poll_ns = val; + trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); +} + +static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu) +{ + int old, val; + + old = val = vcpu->halt_poll_ns; + if (halt_poll_ns_shrink == 0) + val = 0; + else + val /= halt_poll_ns_shrink; + + vcpu->halt_poll_ns = val; + trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old); +} + static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu) { if (kvm_arch_vcpu_runnable(vcpu)) { @@ -1928,11 +1998,13 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) ktime_t start, cur; DEFINE_WAIT(wait); bool waited = false; + u64 block_ns; start = cur = ktime_get(); - if (halt_poll_ns) { - ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns); + if (vcpu->halt_poll_ns) { + ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns); + ++vcpu->stat.halt_attempted_poll; do { /* * This sets KVM_REQ_UNHALT if an interrupt @@ -1960,7 +2032,22 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) cur = ktime_get(); out: - trace_kvm_vcpu_wakeup(ktime_to_ns(cur) - ktime_to_ns(start), waited); + block_ns = ktime_to_ns(cur) - ktime_to_ns(start); + + if (halt_poll_ns) { + if (block_ns <= vcpu->halt_poll_ns) + ; + /* we had a long block, shrink polling */ + else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns) + shrink_halt_poll_ns(vcpu); + /* we had a short halt and our poll time is too small */ + else if (vcpu->halt_poll_ns < halt_poll_ns && + block_ns < halt_poll_ns) + grow_halt_poll_ns(vcpu); + } else + vcpu->halt_poll_ns = 0; + + trace_kvm_vcpu_wakeup(block_ns, waited); } EXPORT_SYMBOL_GPL(kvm_vcpu_block); @@ -2206,6 +2293,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) } kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu; + + /* + * Pairs with smp_rmb() in kvm_get_vcpu. Write kvm->vcpus + * before kvm->online_vcpu's incremented value. + */ smp_wmb(); atomic_inc(&kvm->online_vcpus); @@ -2618,9 +2710,6 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_USER_MEMORY: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: -#ifdef CONFIG_KVM_APIC_ARCHITECTURE - case KVM_CAP_SET_BOOT_CPU_ID: -#endif case KVM_CAP_INTERNAL_ERROR_DATA: #ifdef CONFIG_HAVE_KVM_MSI case KVM_CAP_SIGNAL_MSI: @@ -2716,17 +2805,6 @@ static long kvm_vm_ioctl(struct file *filp, r = kvm_ioeventfd(kvm, &data); break; } -#ifdef CONFIG_KVM_APIC_ARCHITECTURE - case KVM_SET_BOOT_CPU_ID: - r = 0; - mutex_lock(&kvm->lock); - if (atomic_read(&kvm->online_vcpus) != 0) - r = -EBUSY; - else - kvm->bsp_vcpu_id = arg; - mutex_unlock(&kvm->lock); - break; -#endif #ifdef CONFIG_HAVE_KVM_MSI case KVM_SIGNAL_MSI: { struct kvm_msi msi; @@ -3080,10 +3158,25 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1, const struct kvm_io_range *r2) { - if (r1->addr < r2->addr) + gpa_t addr1 = r1->addr; + gpa_t addr2 = r2->addr; + + if (addr1 < addr2) return -1; - if (r1->addr + r1->len > r2->addr + r2->len) + + /* If r2->len == 0, match the exact address. If r2->len != 0, + * accept any overlapping write. Any order is acceptable for + * overlapping ranges, because kvm_io_bus_get_first_dev ensures + * we process all of them. + */ + if (r2->len) { + addr1 += r1->len; + addr2 += r2->len; + } + + if (addr1 > addr2) return 1; + return 0; } diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 620e37f741b8..1dd087da6f31 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -155,6 +155,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) list_add_tail(&kvg->node, &kv->group_list); kvg->vfio_group = vfio_group; + kvm_arch_start_assignment(dev->kvm); + mutex_unlock(&kv->lock); kvm_vfio_update_coherency(dev); @@ -190,6 +192,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) break; } + kvm_arch_end_assignment(dev->kvm); + mutex_unlock(&kv->lock); kvm_vfio_group_put_external_user(vfio_group); @@ -239,6 +243,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev) kvm_vfio_group_put_external_user(kvg->vfio_group); list_del(&kvg->node); kfree(kvg); + kvm_arch_end_assignment(dev->kvm); } kvm_vfio_update_coherency(dev); |