diff options
Diffstat (limited to 'virt/kvm')
-rw-r--r-- | virt/kvm/arm/arch_timer.c | 27 | ||||
-rw-r--r-- | virt/kvm/arm/vgic-v3.c | 2 | ||||
-rw-r--r-- | virt/kvm/arm/vgic.c | 83 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.h | 4 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 124 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 27 |
6 files changed, 179 insertions, 88 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 76e38d231e99..b9d3a32cbc04 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -137,6 +137,8 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + bool phys_active; + int ret; /* * We're about to run this vcpu again, so there is no need to @@ -151,6 +153,23 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) */ if (kvm_timer_should_fire(vcpu)) kvm_timer_inject_irq(vcpu); + + /* + * We keep track of whether the edge-triggered interrupt has been + * signalled to the vgic/guest, and if so, we mask the interrupt and + * the physical distributor to prevent the timer from raising a + * physical interrupt whenever we run a guest, preventing forward + * VCPU progress. + */ + if (kvm_vgic_get_phys_irq_active(timer->map)) + phys_active = true; + else + phys_active = false; + + ret = irq_set_irqchip_state(timer->map->irq, + IRQCHIP_STATE_ACTIVE, + phys_active); + WARN_ON(ret); } /** @@ -200,6 +219,14 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, timer->irq = irq; /* + * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 + * and to 0 for ARMv7. We provide an implementation that always + * resets the timer to be disabled and unmasked and is compliant with + * the ARMv7 architecture. + */ + timer->cntv_ctl = 0; + + /* * Tell the VGIC that the virtual interrupt is tied to a * physical interrupt. We do that once per VCPU. */ diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index afbf925b00f4..7dd5d62f10a1 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -288,7 +288,7 @@ int vgic_v3_probe(struct device_node *vgic_node, vgic->vctrl_base = NULL; vgic->type = VGIC_V3; - vgic->max_gic_vcpus = KVM_MAX_VCPUS; + vgic->max_gic_vcpus = VGIC_V3_MAX_CPUS; kvm_info("%s@%llx IRQ%d\n", vgic_node->name, vcpu_res.start, vgic->maint_irq); diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 9eb489a2c94c..66c66165e712 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -531,6 +531,34 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm, return false; } +/* + * If a mapped interrupt's state has been modified by the guest such that it + * is no longer active or pending, without it have gone through the sync path, + * then the map->active field must be cleared so the interrupt can be taken + * again. + */ +static void vgic_handle_clear_mapped_irq(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct list_head *root; + struct irq_phys_map_entry *entry; + struct irq_phys_map *map; + + rcu_read_lock(); + + /* Check for PPIs */ + root = &vgic_cpu->irq_phys_map_list; + list_for_each_entry_rcu(entry, root, entry) { + map = &entry->map; + + if (!vgic_dist_irq_is_pending(vcpu, map->virt_irq) && + !vgic_irq_is_active(vcpu, map->virt_irq)) + map->active = false; + } + + rcu_read_unlock(); +} + bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, phys_addr_t offset, int vcpu_id) @@ -561,6 +589,7 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm, vcpu_id, offset); vgic_reg_access(mmio, reg, offset, mode); + vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id)); vgic_update_state(kvm); return true; } @@ -598,6 +627,7 @@ bool vgic_handle_clear_active_reg(struct kvm *kvm, ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); if (mmio->is_write) { + vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id)); vgic_update_state(kvm); return true; } @@ -982,6 +1012,12 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; pend_shared = vcpu->arch.vgic_cpu.pending_shared; + if (!dist->enabled) { + bitmap_zero(pend_percpu, VGIC_NR_PRIVATE_IRQS); + bitmap_zero(pend_shared, nr_shared); + return 0; + } + pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id); enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); @@ -1009,11 +1045,6 @@ void vgic_update_state(struct kvm *kvm) struct kvm_vcpu *vcpu; int c; - if (!dist->enabled) { - set_bit(0, dist->irq_pending_on_cpu); - return; - } - kvm_for_each_vcpu(c, vcpu, kvm) { if (compute_pending_for_cpu(vcpu)) set_bit(c, dist->irq_pending_on_cpu); @@ -1092,6 +1123,15 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); + /* + * We must transfer the pending state back to the distributor before + * retiring the LR, otherwise we may loose edge-triggered interrupts. + */ + if (vlr.state & LR_STATE_PENDING) { + vgic_dist_irq_set_pending(vcpu, irq); + vlr.hwirq = 0; + } + vlr.state = 0; vgic_set_lr(vcpu, lr_nr, vlr); clear_bit(lr_nr, vgic_cpu->lr_used); @@ -1132,7 +1172,8 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state); vgic_irq_clear_active(vcpu, irq); vgic_update_state(vcpu->kvm); - } else if (vgic_dist_irq_is_pending(vcpu, irq)) { + } else { + WARN_ON(!vgic_dist_irq_is_pending(vcpu, irq)); vlr.state |= LR_STATE_PENDING; kvm_debug("Set pending: 0x%x\n", vlr.state); } @@ -1144,26 +1185,11 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, struct irq_phys_map *map; map = vgic_irq_map_search(vcpu, irq); - /* - * If we have a mapping, and the virtual interrupt is - * being injected, then we must set the state to - * active in the physical world. Otherwise the - * physical interrupt will fire and the guest will - * exit before processing the virtual interrupt. - */ if (map) { - int ret; - - BUG_ON(!map->active); vlr.hwirq = map->phys_irq; vlr.state |= LR_HW; vlr.state &= ~LR_EOI_INT; - ret = irq_set_irqchip_state(map->irq, - IRQCHIP_STATE_ACTIVE, - true); - WARN_ON(ret); - /* * Make sure we're not going to sample this * again, as a HW-backed interrupt cannot be @@ -1411,7 +1437,7 @@ static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr) return 0; map = vgic_irq_map_search(vcpu, vlr.irq); - BUG_ON(!map || !map->active); + BUG_ON(!map); ret = irq_get_irqchip_state(map->irq, IRQCHIP_STATE_ACTIVE, @@ -1419,13 +1445,8 @@ static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr) WARN_ON(ret); - if (map->active) { - ret = irq_set_irqchip_state(map->irq, - IRQCHIP_STATE_ACTIVE, - false); - WARN_ON(ret); + if (map->active) return 0; - } return 1; } @@ -1597,8 +1618,12 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, } else { if (level_triggered) { vgic_dist_irq_clear_level(vcpu, irq_num); - if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) + if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) { vgic_dist_irq_clear_pending(vcpu, irq_num); + vgic_cpu_irq_clear(vcpu, irq_num); + if (!compute_pending_for_cpu(vcpu)) + clear_bit(cpuid, dist->irq_pending_on_cpu); + } } ret = false; diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h index 5cbf190d238c..6bca74ca5331 100644 --- a/virt/kvm/coalesced_mmio.h +++ b/virt/kvm/coalesced_mmio.h @@ -24,9 +24,9 @@ struct kvm_coalesced_mmio_dev { int kvm_coalesced_mmio_init(struct kvm *kvm); void kvm_coalesced_mmio_free(struct kvm *kvm); int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, - struct kvm_coalesced_mmio_zone *zone); + struct kvm_coalesced_mmio_zone *zone); int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, - struct kvm_coalesced_mmio_zone *zone); + struct kvm_coalesced_mmio_zone *zone); #else diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 9ff4193dfa49..79db45336e3a 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -771,40 +771,14 @@ static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) return KVM_MMIO_BUS; } -static int -kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +static int kvm_assign_ioeventfd_idx(struct kvm *kvm, + enum kvm_bus bus_idx, + struct kvm_ioeventfd *args) { - enum kvm_bus bus_idx; - struct _ioeventfd *p; - struct eventfd_ctx *eventfd; - int ret; - - bus_idx = ioeventfd_bus_from_flags(args->flags); - /* must be natural-word sized, or 0 to ignore length */ - switch (args->len) { - case 0: - case 1: - case 2: - case 4: - case 8: - break; - default: - return -EINVAL; - } - - /* check for range overflow */ - if (args->addr + args->len < args->addr) - return -EINVAL; - /* check for extra flags that we don't understand */ - if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) - return -EINVAL; - - /* ioeventfd with no length can't be combined with DATAMATCH */ - if (!args->len && - args->flags & (KVM_IOEVENTFD_FLAG_PIO | - KVM_IOEVENTFD_FLAG_DATAMATCH)) - return -EINVAL; + struct eventfd_ctx *eventfd; + struct _ioeventfd *p; + int ret; eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) @@ -843,16 +817,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) if (ret < 0) goto unlock_fail; - /* When length is ignored, MMIO is also put on a separate bus, for - * faster lookups. - */ - if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) { - ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS, - p->addr, 0, &p->dev); - if (ret < 0) - goto register_fail; - } - kvm->buses[bus_idx]->ioeventfd_count++; list_add_tail(&p->list, &kvm->ioeventfds); @@ -860,8 +824,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return 0; -register_fail: - kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); unlock_fail: mutex_unlock(&kvm->slots_lock); @@ -873,14 +835,13 @@ fail: } static int -kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, + struct kvm_ioeventfd *args) { - enum kvm_bus bus_idx; struct _ioeventfd *p, *tmp; struct eventfd_ctx *eventfd; int ret = -ENOENT; - bus_idx = ioeventfd_bus_from_flags(args->flags); eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) return PTR_ERR(eventfd); @@ -901,10 +862,6 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) continue; kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); - if (!p->length) { - kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS, - &p->dev); - } kvm->buses[bus_idx]->ioeventfd_count--; ioeventfd_release(p); ret = 0; @@ -918,6 +875,71 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return ret; } +static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags); + int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); + + if (!args->len && bus_idx == KVM_MMIO_BUS) + kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); + + return ret; +} + +static int +kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + enum kvm_bus bus_idx; + int ret; + + bus_idx = ioeventfd_bus_from_flags(args->flags); + /* must be natural-word sized, or 0 to ignore length */ + switch (args->len) { + case 0: + case 1: + case 2: + case 4: + case 8: + break; + default: + return -EINVAL; + } + + /* check for range overflow */ + if (args->addr + args->len < args->addr) + return -EINVAL; + + /* check for extra flags that we don't understand */ + if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) + return -EINVAL; + + /* ioeventfd with no length can't be combined with DATAMATCH */ + if (!args->len && + args->flags & (KVM_IOEVENTFD_FLAG_PIO | + KVM_IOEVENTFD_FLAG_DATAMATCH)) + return -EINVAL; + + ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); + if (ret) + goto fail; + + /* When length is ignored, MMIO is also put on a separate bus, for + * faster lookups. + */ + if (!args->len && bus_idx == KVM_MMIO_BUS) { + ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); + if (ret < 0) + goto fast_fail; + } + + return 0; + +fast_fail: + kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); +fail: + return ret; +} + int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a25a73147f71..8db1d9361993 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -66,8 +66,8 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); -/* halt polling only reduces halt latency by 5-7 us, 500us is enough */ -static unsigned int halt_poll_ns = 500000; +/* Architectures should define their poll value according to the halt latency */ +static unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT; module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); /* Default doubles per-vcpu halt_poll_ns. */ @@ -2004,6 +2004,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) if (vcpu->halt_poll_ns) { ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns); + ++vcpu->stat.halt_attempted_poll; do { /* * This sets KVM_REQ_UNHALT if an interrupt @@ -2043,7 +2044,8 @@ out: else if (vcpu->halt_poll_ns < halt_poll_ns && block_ns < halt_poll_ns) grow_halt_poll_ns(vcpu); - } + } else + vcpu->halt_poll_ns = 0; trace_kvm_vcpu_wakeup(block_ns, waited); } @@ -3156,10 +3158,25 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1, const struct kvm_io_range *r2) { - if (r1->addr < r2->addr) + gpa_t addr1 = r1->addr; + gpa_t addr2 = r2->addr; + + if (addr1 < addr2) return -1; - if (r1->addr + r1->len > r2->addr + r2->len) + + /* If r2->len == 0, match the exact address. If r2->len != 0, + * accept any overlapping write. Any order is acceptable for + * overlapping ranges, because kvm_io_bus_get_first_dev ensures + * we process all of them. + */ + if (r2->len) { + addr1 += r1->len; + addr2 += r2->len; + } + + if (addr1 > addr2) return 1; + return 0; } |