summaryrefslogtreecommitdiff
path: root/virt/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/arm/arch_timer.c27
-rw-r--r--virt/kvm/arm/vgic-v3.c2
-rw-r--r--virt/kvm/arm/vgic.c83
-rw-r--r--virt/kvm/coalesced_mmio.h4
-rw-r--r--virt/kvm/eventfd.c124
-rw-r--r--virt/kvm/kvm_main.c27
6 files changed, 179 insertions, 88 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 76e38d231e99..b9d3a32cbc04 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -137,6 +137,8 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ bool phys_active;
+ int ret;
/*
* We're about to run this vcpu again, so there is no need to
@@ -151,6 +153,23 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
*/
if (kvm_timer_should_fire(vcpu))
kvm_timer_inject_irq(vcpu);
+
+ /*
+ * We keep track of whether the edge-triggered interrupt has been
+ * signalled to the vgic/guest, and if so, we mask the interrupt and
+ * the physical distributor to prevent the timer from raising a
+ * physical interrupt whenever we run a guest, preventing forward
+ * VCPU progress.
+ */
+ if (kvm_vgic_get_phys_irq_active(timer->map))
+ phys_active = true;
+ else
+ phys_active = false;
+
+ ret = irq_set_irqchip_state(timer->map->irq,
+ IRQCHIP_STATE_ACTIVE,
+ phys_active);
+ WARN_ON(ret);
}
/**
@@ -200,6 +219,14 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
timer->irq = irq;
/*
+ * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
+ * and to 0 for ARMv7. We provide an implementation that always
+ * resets the timer to be disabled and unmasked and is compliant with
+ * the ARMv7 architecture.
+ */
+ timer->cntv_ctl = 0;
+
+ /*
* Tell the VGIC that the virtual interrupt is tied to a
* physical interrupt. We do that once per VCPU.
*/
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index afbf925b00f4..7dd5d62f10a1 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -288,7 +288,7 @@ int vgic_v3_probe(struct device_node *vgic_node,
vgic->vctrl_base = NULL;
vgic->type = VGIC_V3;
- vgic->max_gic_vcpus = KVM_MAX_VCPUS;
+ vgic->max_gic_vcpus = VGIC_V3_MAX_CPUS;
kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
vcpu_res.start, vgic->maint_irq);
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 9eb489a2c94c..66c66165e712 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -531,6 +531,34 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm,
return false;
}
+/*
+ * If a mapped interrupt's state has been modified by the guest such that it
+ * is no longer active or pending, without it have gone through the sync path,
+ * then the map->active field must be cleared so the interrupt can be taken
+ * again.
+ */
+static void vgic_handle_clear_mapped_irq(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct list_head *root;
+ struct irq_phys_map_entry *entry;
+ struct irq_phys_map *map;
+
+ rcu_read_lock();
+
+ /* Check for PPIs */
+ root = &vgic_cpu->irq_phys_map_list;
+ list_for_each_entry_rcu(entry, root, entry) {
+ map = &entry->map;
+
+ if (!vgic_dist_irq_is_pending(vcpu, map->virt_irq) &&
+ !vgic_irq_is_active(vcpu, map->virt_irq))
+ map->active = false;
+ }
+
+ rcu_read_unlock();
+}
+
bool vgic_handle_clear_pending_reg(struct kvm *kvm,
struct kvm_exit_mmio *mmio,
phys_addr_t offset, int vcpu_id)
@@ -561,6 +589,7 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm,
vcpu_id, offset);
vgic_reg_access(mmio, reg, offset, mode);
+ vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id));
vgic_update_state(kvm);
return true;
}
@@ -598,6 +627,7 @@ bool vgic_handle_clear_active_reg(struct kvm *kvm,
ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
if (mmio->is_write) {
+ vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id));
vgic_update_state(kvm);
return true;
}
@@ -982,6 +1012,12 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
pend_shared = vcpu->arch.vgic_cpu.pending_shared;
+ if (!dist->enabled) {
+ bitmap_zero(pend_percpu, VGIC_NR_PRIVATE_IRQS);
+ bitmap_zero(pend_shared, nr_shared);
+ return 0;
+ }
+
pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id);
enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
@@ -1009,11 +1045,6 @@ void vgic_update_state(struct kvm *kvm)
struct kvm_vcpu *vcpu;
int c;
- if (!dist->enabled) {
- set_bit(0, dist->irq_pending_on_cpu);
- return;
- }
-
kvm_for_each_vcpu(c, vcpu, kvm) {
if (compute_pending_for_cpu(vcpu))
set_bit(c, dist->irq_pending_on_cpu);
@@ -1092,6 +1123,15 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
+ /*
+ * We must transfer the pending state back to the distributor before
+ * retiring the LR, otherwise we may loose edge-triggered interrupts.
+ */
+ if (vlr.state & LR_STATE_PENDING) {
+ vgic_dist_irq_set_pending(vcpu, irq);
+ vlr.hwirq = 0;
+ }
+
vlr.state = 0;
vgic_set_lr(vcpu, lr_nr, vlr);
clear_bit(lr_nr, vgic_cpu->lr_used);
@@ -1132,7 +1172,8 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state);
vgic_irq_clear_active(vcpu, irq);
vgic_update_state(vcpu->kvm);
- } else if (vgic_dist_irq_is_pending(vcpu, irq)) {
+ } else {
+ WARN_ON(!vgic_dist_irq_is_pending(vcpu, irq));
vlr.state |= LR_STATE_PENDING;
kvm_debug("Set pending: 0x%x\n", vlr.state);
}
@@ -1144,26 +1185,11 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
struct irq_phys_map *map;
map = vgic_irq_map_search(vcpu, irq);
- /*
- * If we have a mapping, and the virtual interrupt is
- * being injected, then we must set the state to
- * active in the physical world. Otherwise the
- * physical interrupt will fire and the guest will
- * exit before processing the virtual interrupt.
- */
if (map) {
- int ret;
-
- BUG_ON(!map->active);
vlr.hwirq = map->phys_irq;
vlr.state |= LR_HW;
vlr.state &= ~LR_EOI_INT;
- ret = irq_set_irqchip_state(map->irq,
- IRQCHIP_STATE_ACTIVE,
- true);
- WARN_ON(ret);
-
/*
* Make sure we're not going to sample this
* again, as a HW-backed interrupt cannot be
@@ -1411,7 +1437,7 @@ static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr)
return 0;
map = vgic_irq_map_search(vcpu, vlr.irq);
- BUG_ON(!map || !map->active);
+ BUG_ON(!map);
ret = irq_get_irqchip_state(map->irq,
IRQCHIP_STATE_ACTIVE,
@@ -1419,13 +1445,8 @@ static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr)
WARN_ON(ret);
- if (map->active) {
- ret = irq_set_irqchip_state(map->irq,
- IRQCHIP_STATE_ACTIVE,
- false);
- WARN_ON(ret);
+ if (map->active)
return 0;
- }
return 1;
}
@@ -1597,8 +1618,12 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
} else {
if (level_triggered) {
vgic_dist_irq_clear_level(vcpu, irq_num);
- if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
+ if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) {
vgic_dist_irq_clear_pending(vcpu, irq_num);
+ vgic_cpu_irq_clear(vcpu, irq_num);
+ if (!compute_pending_for_cpu(vcpu))
+ clear_bit(cpuid, dist->irq_pending_on_cpu);
+ }
}
ret = false;
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h
index 5cbf190d238c..6bca74ca5331 100644
--- a/virt/kvm/coalesced_mmio.h
+++ b/virt/kvm/coalesced_mmio.h
@@ -24,9 +24,9 @@ struct kvm_coalesced_mmio_dev {
int kvm_coalesced_mmio_init(struct kvm *kvm);
void kvm_coalesced_mmio_free(struct kvm *kvm);
int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
- struct kvm_coalesced_mmio_zone *zone);
+ struct kvm_coalesced_mmio_zone *zone);
int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
- struct kvm_coalesced_mmio_zone *zone);
+ struct kvm_coalesced_mmio_zone *zone);
#else
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 9ff4193dfa49..79db45336e3a 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -771,40 +771,14 @@ static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
return KVM_MMIO_BUS;
}
-static int
-kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
+static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
+ enum kvm_bus bus_idx,
+ struct kvm_ioeventfd *args)
{
- enum kvm_bus bus_idx;
- struct _ioeventfd *p;
- struct eventfd_ctx *eventfd;
- int ret;
-
- bus_idx = ioeventfd_bus_from_flags(args->flags);
- /* must be natural-word sized, or 0 to ignore length */
- switch (args->len) {
- case 0:
- case 1:
- case 2:
- case 4:
- case 8:
- break;
- default:
- return -EINVAL;
- }
-
- /* check for range overflow */
- if (args->addr + args->len < args->addr)
- return -EINVAL;
- /* check for extra flags that we don't understand */
- if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
- return -EINVAL;
-
- /* ioeventfd with no length can't be combined with DATAMATCH */
- if (!args->len &&
- args->flags & (KVM_IOEVENTFD_FLAG_PIO |
- KVM_IOEVENTFD_FLAG_DATAMATCH))
- return -EINVAL;
+ struct eventfd_ctx *eventfd;
+ struct _ioeventfd *p;
+ int ret;
eventfd = eventfd_ctx_fdget(args->fd);
if (IS_ERR(eventfd))
@@ -843,16 +817,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
if (ret < 0)
goto unlock_fail;
- /* When length is ignored, MMIO is also put on a separate bus, for
- * faster lookups.
- */
- if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) {
- ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS,
- p->addr, 0, &p->dev);
- if (ret < 0)
- goto register_fail;
- }
-
kvm->buses[bus_idx]->ioeventfd_count++;
list_add_tail(&p->list, &kvm->ioeventfds);
@@ -860,8 +824,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
return 0;
-register_fail:
- kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
unlock_fail:
mutex_unlock(&kvm->slots_lock);
@@ -873,14 +835,13 @@ fail:
}
static int
-kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
+kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
+ struct kvm_ioeventfd *args)
{
- enum kvm_bus bus_idx;
struct _ioeventfd *p, *tmp;
struct eventfd_ctx *eventfd;
int ret = -ENOENT;
- bus_idx = ioeventfd_bus_from_flags(args->flags);
eventfd = eventfd_ctx_fdget(args->fd);
if (IS_ERR(eventfd))
return PTR_ERR(eventfd);
@@ -901,10 +862,6 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
continue;
kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
- if (!p->length) {
- kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS,
- &p->dev);
- }
kvm->buses[bus_idx]->ioeventfd_count--;
ioeventfd_release(p);
ret = 0;
@@ -918,6 +875,71 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
return ret;
}
+static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
+{
+ enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
+ int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
+
+ if (!args->len && bus_idx == KVM_MMIO_BUS)
+ kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
+
+ return ret;
+}
+
+static int
+kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
+{
+ enum kvm_bus bus_idx;
+ int ret;
+
+ bus_idx = ioeventfd_bus_from_flags(args->flags);
+ /* must be natural-word sized, or 0 to ignore length */
+ switch (args->len) {
+ case 0:
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* check for range overflow */
+ if (args->addr + args->len < args->addr)
+ return -EINVAL;
+
+ /* check for extra flags that we don't understand */
+ if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
+ return -EINVAL;
+
+ /* ioeventfd with no length can't be combined with DATAMATCH */
+ if (!args->len &&
+ args->flags & (KVM_IOEVENTFD_FLAG_PIO |
+ KVM_IOEVENTFD_FLAG_DATAMATCH))
+ return -EINVAL;
+
+ ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
+ if (ret)
+ goto fail;
+
+ /* When length is ignored, MMIO is also put on a separate bus, for
+ * faster lookups.
+ */
+ if (!args->len && bus_idx == KVM_MMIO_BUS) {
+ ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
+ if (ret < 0)
+ goto fast_fail;
+ }
+
+ return 0;
+
+fast_fail:
+ kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
+fail:
+ return ret;
+}
+
int
kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
{
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a25a73147f71..8db1d9361993 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -66,8 +66,8 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
-/* halt polling only reduces halt latency by 5-7 us, 500us is enough */
-static unsigned int halt_poll_ns = 500000;
+/* Architectures should define their poll value according to the halt latency */
+static unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT;
module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
/* Default doubles per-vcpu halt_poll_ns. */
@@ -2004,6 +2004,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
if (vcpu->halt_poll_ns) {
ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
+ ++vcpu->stat.halt_attempted_poll;
do {
/*
* This sets KVM_REQ_UNHALT if an interrupt
@@ -2043,7 +2044,8 @@ out:
else if (vcpu->halt_poll_ns < halt_poll_ns &&
block_ns < halt_poll_ns)
grow_halt_poll_ns(vcpu);
- }
+ } else
+ vcpu->halt_poll_ns = 0;
trace_kvm_vcpu_wakeup(block_ns, waited);
}
@@ -3156,10 +3158,25 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1,
const struct kvm_io_range *r2)
{
- if (r1->addr < r2->addr)
+ gpa_t addr1 = r1->addr;
+ gpa_t addr2 = r2->addr;
+
+ if (addr1 < addr2)
return -1;
- if (r1->addr + r1->len > r2->addr + r2->len)
+
+ /* If r2->len == 0, match the exact address. If r2->len != 0,
+ * accept any overlapping write. Any order is acceptable for
+ * overlapping ranges, because kvm_io_bus_get_first_dev ensures
+ * we process all of them.
+ */
+ if (r2->len) {
+ addr1 += r1->len;
+ addr2 += r2->len;
+ }
+
+ if (addr1 > addr2)
return 1;
+
return 0;
}