diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/Kconfig | 3 | ||||
-rw-r--r-- | virt/kvm/assigned-dev.c | 43 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 53 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 163 | ||||
-rw-r--r-- | virt/kvm/ioapic.h | 27 | ||||
-rw-r--r-- | virt/kvm/irq_comm.c | 215 | ||||
-rw-r--r-- | virt/kvm/irqchip.c | 237 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 258 |
8 files changed, 659 insertions, 340 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index d01b24b72c61..779262f59e25 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -6,6 +6,9 @@ config HAVE_KVM config HAVE_KVM_IRQCHIP bool +config HAVE_KVM_IRQ_ROUTING + bool + config HAVE_KVM_EVENTFD bool select EVENTFD diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 3642239252b0..8db43701016f 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -80,11 +80,12 @@ kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, spin_lock(&assigned_dev->intx_mask_lock); if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) kvm_set_irq(assigned_dev->kvm, - assigned_dev->irq_source_id, vector, 1); + assigned_dev->irq_source_id, vector, 1, + false); spin_unlock(&assigned_dev->intx_mask_lock); } else kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - vector, 1); + vector, 1, false); } static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) @@ -165,7 +166,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) container_of(kian, struct kvm_assigned_dev_kernel, ack_notifier); - kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); + kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false); spin_lock(&dev->intx_mask_lock); @@ -188,7 +189,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) if (reassert) kvm_set_irq(dev->kvm, dev->irq_source_id, - dev->guest_irq, 1); + dev->guest_irq, 1, false); } spin_unlock(&dev->intx_mask_lock); @@ -202,7 +203,7 @@ static void deassign_guest_irq(struct kvm *kvm, &assigned_dev->ack_notifier); kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - assigned_dev->guest_irq, 0); + assigned_dev->guest_irq, 0, false); if (assigned_dev->irq_source_id != -1) kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); @@ -901,7 +902,7 @@ static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { kvm_set_irq(match->kvm, match->irq_source_id, - match->guest_irq, 0); + match->guest_irq, 0, false); /* * Masking at hardware-level is performed on demand, * i.e. when an IRQ actually arrives at the host. @@ -982,36 +983,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, goto out; break; } -#ifdef KVM_CAP_IRQ_ROUTING - case KVM_SET_GSI_ROUTING: { - struct kvm_irq_routing routing; - struct kvm_irq_routing __user *urouting; - struct kvm_irq_routing_entry *entries; - - r = -EFAULT; - if (copy_from_user(&routing, argp, sizeof(routing))) - goto out; - r = -EINVAL; - if (routing.nr >= KVM_MAX_IRQ_ROUTES) - goto out; - if (routing.flags) - goto out; - r = -ENOMEM; - entries = vmalloc(routing.nr * sizeof(*entries)); - if (!entries) - goto out; - r = -EFAULT; - urouting = argp; - if (copy_from_user(entries, urouting->entries, - routing.nr * sizeof(*entries))) - goto out_free_irq_routing; - r = kvm_set_irq_routing(kvm, entries, routing.nr, - routing.flags); - out_free_irq_routing: - vfree(entries); - break; - } -#endif /* KVM_CAP_IRQ_ROUTING */ #ifdef __KVM_HAVE_MSIX case KVM_ASSIGN_SET_MSIX_NR: { struct kvm_assigned_msix_nr entry_nr; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index adb17f266b28..64ee720b75c7 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -35,7 +35,7 @@ #include "iodev.h" -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING /* * -------------------------------------------------------------------- * irqfd: Allows an fd to be used to inject an interrupt to the guest @@ -100,11 +100,13 @@ irqfd_inject(struct work_struct *work) struct kvm *kvm = irqfd->kvm; if (!irqfd->resampler) { - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1, + false); + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0, + false); } else kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, - irqfd->gsi, 1); + irqfd->gsi, 1, false); } /* @@ -121,7 +123,7 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) resampler = container_of(kian, struct _irqfd_resampler, notifier); kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, - resampler->notifier.gsi, 0); + resampler->notifier.gsi, 0, false); rcu_read_lock(); @@ -146,7 +148,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd) list_del(&resampler->link); kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, - resampler->notifier.gsi, 0); + resampler->notifier.gsi, 0, false); kfree(resampler); } @@ -225,7 +227,8 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) irq = rcu_dereference(irqfd->irq_entry); /* An event has been signaled, inject an interrupt */ if (irq) - kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); + kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, + false); else schedule_work(&irqfd->inject); rcu_read_unlock(); @@ -430,7 +433,7 @@ fail: void kvm_eventfd_init(struct kvm *kvm) { -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING spin_lock_init(&kvm->irqfds.lock); INIT_LIST_HEAD(&kvm->irqfds.items); INIT_LIST_HEAD(&kvm->irqfds.resampler_list); @@ -439,7 +442,7 @@ kvm_eventfd_init(struct kvm *kvm) INIT_LIST_HEAD(&kvm->ioeventfds); } -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING /* * shutdown any irqfd's that match fd+gsi */ @@ -543,7 +546,7 @@ void kvm_irq_routing_update(struct kvm *kvm, * aggregated from all vm* instances. We need our own isolated single-thread * queue to prevent deadlock against flushing the normal work-queue. */ -static int __init irqfd_module_init(void) +int kvm_irqfd_init(void) { irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup"); if (!irqfd_cleanup_wq) @@ -552,13 +555,10 @@ static int __init irqfd_module_init(void) return 0; } -static void __exit irqfd_module_exit(void) +void kvm_irqfd_exit(void) { destroy_workqueue(irqfd_cleanup_wq); } - -module_init(irqfd_module_init); -module_exit(irqfd_module_exit); #endif /* @@ -577,6 +577,7 @@ struct _ioeventfd { struct eventfd_ctx *eventfd; u64 datamatch; struct kvm_io_device dev; + u8 bus_idx; bool wildcard; }; @@ -669,7 +670,8 @@ ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) struct _ioeventfd *_p; list_for_each_entry(_p, &kvm->ioeventfds, list) - if (_p->addr == p->addr && _p->length == p->length && + if (_p->bus_idx == p->bus_idx && + _p->addr == p->addr && _p->length == p->length && (_p->wildcard || p->wildcard || _p->datamatch == p->datamatch)) return true; @@ -677,15 +679,24 @@ ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) return false; } +static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) +{ + if (flags & KVM_IOEVENTFD_FLAG_PIO) + return KVM_PIO_BUS; + if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY) + return KVM_VIRTIO_CCW_NOTIFY_BUS; + return KVM_MMIO_BUS; +} + static int kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { - int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; - enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; + enum kvm_bus bus_idx; struct _ioeventfd *p; struct eventfd_ctx *eventfd; int ret; + bus_idx = ioeventfd_bus_from_flags(args->flags); /* must be natural-word sized */ switch (args->len) { case 1: @@ -717,6 +728,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) INIT_LIST_HEAD(&p->list); p->addr = args->addr; + p->bus_idx = bus_idx; p->length = args->len; p->eventfd = eventfd; @@ -760,12 +772,12 @@ fail: static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { - int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; - enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; + enum kvm_bus bus_idx; struct _ioeventfd *p, *tmp; struct eventfd_ctx *eventfd; int ret = -ENOENT; + bus_idx = ioeventfd_bus_from_flags(args->flags); eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) return PTR_ERR(eventfd); @@ -775,7 +787,8 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); - if (p->eventfd != eventfd || + if (p->bus_idx != bus_idx || + p->eventfd != eventfd || p->addr != args->addr || p->length != args->len || p->wildcard != wildcard) diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 5ba005c00e2f..2d682977ce82 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -50,7 +50,8 @@ #else #define ioapic_debug(fmt, arg...) #endif -static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq); +static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq, + bool line_status); static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, unsigned long addr, @@ -90,7 +91,80 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, return result; } -static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) +static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) +{ + ioapic->rtc_status.pending_eoi = 0; + bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS); +} + +static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) +{ + bool new_val, old_val; + struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + union kvm_ioapic_redirect_entry *e; + + e = &ioapic->redirtbl[RTC_GSI]; + if (!kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, + e->fields.dest_mode)) + return; + + new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); + old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); + + if (new_val == old_val) + return; + + if (new_val) { + __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); + ioapic->rtc_status.pending_eoi++; + } else { + __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); + ioapic->rtc_status.pending_eoi--; + } + + WARN_ON(ioapic->rtc_status.pending_eoi < 0); +} + +void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) +{ + struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + + spin_lock(&ioapic->lock); + __rtc_irq_eoi_tracking_restore_one(vcpu); + spin_unlock(&ioapic->lock); +} + +static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) +{ + struct kvm_vcpu *vcpu; + int i; + + if (RTC_GSI >= IOAPIC_NUM_PINS) + return; + + rtc_irq_eoi_tracking_reset(ioapic); + kvm_for_each_vcpu(i, vcpu, ioapic->kvm) + __rtc_irq_eoi_tracking_restore_one(vcpu); +} + +static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu) +{ + if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) + --ioapic->rtc_status.pending_eoi; + + WARN_ON(ioapic->rtc_status.pending_eoi < 0); +} + +static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic) +{ + if (ioapic->rtc_status.pending_eoi > 0) + return true; /* coalesced */ + + return false; +} + +static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx, + bool line_status) { union kvm_ioapic_redirect_entry *pent; int injected = -1; @@ -98,7 +172,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) pent = &ioapic->redirtbl[idx]; if (!pent->fields.mask) { - injected = ioapic_deliver(ioapic, idx); + injected = ioapic_deliver(ioapic, idx, line_status); if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) pent->fields.remote_irr = 1; } @@ -119,41 +193,48 @@ static void update_handled_vectors(struct kvm_ioapic *ioapic) smp_wmb(); } -void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, - u64 *eoi_exit_bitmap) +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, + u32 *tmr) { struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; union kvm_ioapic_redirect_entry *e; - struct kvm_lapic_irq irqe; int index; spin_lock(&ioapic->lock); - /* traverse ioapic entry to set eoi exit bitmap*/ for (index = 0; index < IOAPIC_NUM_PINS; index++) { e = &ioapic->redirtbl[index]; if (!e->fields.mask && (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, - index))) { - irqe.dest_id = e->fields.dest_id; - irqe.vector = e->fields.vector; - irqe.dest_mode = e->fields.dest_mode; - irqe.delivery_mode = e->fields.delivery_mode << 8; - kvm_calculate_eoi_exitmap(vcpu, &irqe, eoi_exit_bitmap); + index) || index == RTC_GSI)) { + if (kvm_apic_match_dest(vcpu, NULL, 0, + e->fields.dest_id, e->fields.dest_mode)) { + __set_bit(e->fields.vector, + (unsigned long *)eoi_exit_bitmap); + if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG) + __set_bit(e->fields.vector, + (unsigned long *)tmr); + } } } spin_unlock(&ioapic->lock); } -EXPORT_SYMBOL_GPL(kvm_ioapic_calculate_eoi_exitmap); -void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm) +#ifdef CONFIG_X86 +void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; - if (!kvm_apic_vid_enabled(kvm) || !ioapic) + if (!ioapic) return; - kvm_make_update_eoibitmap_request(kvm); + kvm_make_scan_ioapic_request(kvm); } +#else +void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) +{ + return; +} +#endif static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { @@ -195,16 +276,17 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG && ioapic->irr & (1 << index)) - ioapic_service(ioapic, index); - kvm_ioapic_make_eoibitmap_request(ioapic->kvm); + ioapic_service(ioapic, index, false); + kvm_vcpu_request_scan_ioapic(ioapic->kvm); break; } } -static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) +static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status) { union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; struct kvm_lapic_irq irqe; + int ret; ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " "vector=%x trig_mode=%x\n", @@ -220,11 +302,19 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) irqe.level = 1; irqe.shorthand = 0; - return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); + if (irq == RTC_GSI && line_status) { + BUG_ON(ioapic->rtc_status.pending_eoi != 0); + ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, + ioapic->rtc_status.dest_map); + ioapic->rtc_status.pending_eoi = ret; + } else + ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); + + return ret; } int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, - int level) + int level, bool line_status) { u32 old_irr; u32 mask = 1 << irq; @@ -244,13 +334,20 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, ret = 1; } else { int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); + + if (irq == RTC_GSI && line_status && + rtc_irq_check_coalesced(ioapic)) { + ret = 0; /* coalesced */ + goto out; + } ioapic->irr |= mask; if ((edge && old_irr != ioapic->irr) || (!edge && !entry.fields.remote_irr)) - ret = ioapic_service(ioapic, irq); + ret = ioapic_service(ioapic, irq, line_status); else ret = 0; /* report coalesced interrupt */ } +out: trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); spin_unlock(&ioapic->lock); @@ -267,8 +364,8 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id) spin_unlock(&ioapic->lock); } -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, - int trigger_mode) +static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, + struct kvm_ioapic *ioapic, int vector, int trigger_mode) { int i; @@ -278,6 +375,8 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, if (ent->fields.vector != vector) continue; + if (i == RTC_GSI) + rtc_irq_eoi(ioapic, vcpu); /* * We are dropping lock while calling ack notifiers because ack * notifier callbacks for assigned devices call into IOAPIC @@ -296,7 +395,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); ent->fields.remote_irr = 0; if (!ent->fields.mask && (ioapic->irr & (1 << i))) - ioapic_service(ioapic, i); + ioapic_service(ioapic, i, false); } } @@ -307,12 +406,12 @@ bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) return test_bit(vector, ioapic->handled_vectors); } -void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) +void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode) { - struct kvm_ioapic *ioapic = kvm->arch.vioapic; + struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; spin_lock(&ioapic->lock); - __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); + __kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode); spin_unlock(&ioapic->lock); } @@ -410,7 +509,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, break; #ifdef CONFIG_IA64 case IOAPIC_REG_EOI: - __kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG); + __kvm_ioapic_update_eoi(NULL, ioapic, data, IOAPIC_LEVEL_TRIG); break; #endif @@ -431,6 +530,7 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic) ioapic->ioregsel = 0; ioapic->irr = 0; ioapic->id = 0; + rtc_irq_eoi_tracking_reset(ioapic); update_handled_vectors(ioapic); } @@ -496,7 +596,8 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) spin_lock(&ioapic->lock); memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); update_handled_vectors(ioapic); - kvm_ioapic_make_eoibitmap_request(kvm); + kvm_vcpu_request_scan_ioapic(kvm); + kvm_rtc_eoi_tracking_restore_all(ioapic); spin_unlock(&ioapic->lock); return 0; } diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 0400a466c50c..615d8c995c3c 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -34,6 +34,17 @@ struct kvm_vcpu; #define IOAPIC_INIT 0x5 #define IOAPIC_EXTINT 0x7 +#ifdef CONFIG_X86 +#define RTC_GSI 8 +#else +#define RTC_GSI -1U +#endif + +struct rtc_status { + int pending_eoi; + DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS); +}; + struct kvm_ioapic { u64 base_address; u32 ioregsel; @@ -47,6 +58,7 @@ struct kvm_ioapic { void (*ack_notifier)(void *opaque, int irq); spinlock_t lock; DECLARE_BITMAP(handled_vectors, 256); + struct rtc_status rtc_status; }; #ifdef DEBUG @@ -67,24 +79,25 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) return kvm->arch.vioapic; } +void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int short_hand, int dest, int dest_mode); int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); -void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); +void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, + int trigger_mode); bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector); int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_destroy(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, - int level); + int level, bool line_status); void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); void kvm_ioapic_reset(struct kvm_ioapic *ioapic); int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq); + struct kvm_lapic_irq *irq, unsigned long *dest_map); int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); -void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm); -void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, - u64 *eoi_exit_bitmap); - +void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, + u32 *tmr); #endif diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index e9073cf4d040..e2e6b4473a96 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -35,7 +35,8 @@ #include "ioapic.h" static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, int level) + struct kvm *kvm, int irq_source_id, int level, + bool line_status) { #ifdef CONFIG_X86 struct kvm_pic *pic = pic_irqchip(kvm); @@ -46,10 +47,12 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, } static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, int level) + struct kvm *kvm, int irq_source_id, int level, + bool line_status) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; - return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level); + return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level, + line_status); } inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) @@ -63,7 +66,7 @@ inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) } int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq) + struct kvm_lapic_irq *irq, unsigned long *dest_map) { int i, r = -1; struct kvm_vcpu *vcpu, *lowest = NULL; @@ -74,7 +77,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, irq->delivery_mode = APIC_DM_FIXED; } - if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r)) + if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) return r; kvm_for_each_vcpu(i, vcpu, kvm) { @@ -88,7 +91,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, if (!kvm_is_dm_lowest_prio(irq)) { if (r < 0) r = 0; - r += kvm_apic_set_irq(vcpu, irq); + r += kvm_apic_set_irq(vcpu, irq, dest_map); } else if (kvm_lapic_enabled(vcpu)) { if (!lowest) lowest = vcpu; @@ -98,7 +101,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, } if (lowest) - r = kvm_apic_set_irq(lowest, irq); + r = kvm_apic_set_irq(lowest, irq, dest_map); return r; } @@ -121,7 +124,7 @@ static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, } int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, int level) + struct kvm *kvm, int irq_source_id, int level, bool line_status) { struct kvm_lapic_irq irq; @@ -130,7 +133,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, kvm_set_msi_irq(e, &irq); - return kvm_irq_delivery_to_apic(kvm, NULL, &irq); + return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL); } @@ -142,63 +145,12 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, kvm_set_msi_irq(e, &irq); - if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r)) + if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) return r; else return -EWOULDBLOCK; } -int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) -{ - struct kvm_kernel_irq_routing_entry route; - - if (!irqchip_in_kernel(kvm) || msi->flags != 0) - return -EINVAL; - - route.msi.address_lo = msi->address_lo; - route.msi.address_hi = msi->address_hi; - route.msi.data = msi->data; - - return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); -} - -/* - * Return value: - * < 0 Interrupt was ignored (masked or not delivered for other reasons) - * = 0 Interrupt was coalesced (previous irq is still pending) - * > 0 Number of CPUs interrupt was delivered to - */ -int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) -{ - struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; - int ret = -1, i = 0; - struct kvm_irq_routing_table *irq_rt; - - trace_kvm_set_irq(irq, level, irq_source_id); - - /* Not possible to detect if the guest uses the PIC or the - * IOAPIC. So set the bit in both. The guest will ignore - * writes to the unused one. - */ - rcu_read_lock(); - irq_rt = rcu_dereference(kvm->irq_routing); - if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, &irq_rt->map[irq], link) - irq_set[i++] = *e; - rcu_read_unlock(); - - while(i--) { - int r; - r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level); - if (r < 0) - continue; - - ret = r + ((ret < 0) ? 0 : ret); - } - - return ret; -} - /* * Deliver an IRQ in an atomic context if we can, or return a failure, * user can retry in a process context. @@ -236,63 +188,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) return ret; } -bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi; - - rcu_read_lock(); - gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) { - rcu_read_unlock(); - return true; - } - - rcu_read_unlock(); - - return false; -} -EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); - -void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi; - - trace_kvm_ack_irq(irqchip, pin); - - rcu_read_lock(); - gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) - kian->irq_acked(kian); - rcu_read_unlock(); -} - -void kvm_register_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); - mutex_unlock(&kvm->irq_lock); - kvm_ioapic_make_eoibitmap_request(kvm); -} - -void kvm_unregister_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_del_init_rcu(&kian->link); - mutex_unlock(&kvm->irq_lock); - synchronize_rcu(); - kvm_ioapic_make_eoibitmap_request(kvm); -} - int kvm_request_irq_source_id(struct kvm *kvm) { unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; @@ -376,34 +271,14 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, rcu_read_unlock(); } -void kvm_free_irq_routing(struct kvm *kvm) -{ - /* Called only during vm destruction. Nobody can use the pointer - at this stage */ - kfree(kvm->irq_routing); -} - -static int setup_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, - const struct kvm_irq_routing_entry *ue) +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; int delta; unsigned max_pin; - struct kvm_kernel_irq_routing_entry *ei; - /* - * Do not allow GSI to be mapped to the same irqchip more than once. - * Allow only one to one mapping between GSI and MSI. - */ - hlist_for_each_entry(ei, &rt->map[ue->gsi], link) - if (ei->type == KVM_IRQ_ROUTING_MSI || - ue->type == KVM_IRQ_ROUTING_MSI || - ue->u.irqchip.irqchip == ei->irqchip.irqchip) - return r; - - e->gsi = ue->gsi; - e->type = ue->type; switch (ue->type) { case KVM_IRQ_ROUTING_IRQCHIP: delta = 0; @@ -440,69 +315,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, goto out; } - hlist_add_head(&e->link, &rt->map[e->gsi]); r = 0; out: return r; } - -int kvm_set_irq_routing(struct kvm *kvm, - const struct kvm_irq_routing_entry *ue, - unsigned nr, - unsigned flags) -{ - struct kvm_irq_routing_table *new, *old; - u32 i, j, nr_rt_entries = 0; - int r; - - for (i = 0; i < nr; ++i) { - if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) - return -EINVAL; - nr_rt_entries = max(nr_rt_entries, ue[i].gsi); - } - - nr_rt_entries += 1; - - new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) - + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), - GFP_KERNEL); - - if (!new) - return -ENOMEM; - - new->rt_entries = (void *)&new->map[nr_rt_entries]; - - new->nr_rt_entries = nr_rt_entries; - for (i = 0; i < 3; i++) - for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++) - new->chip[i][j] = -1; - - for (i = 0; i < nr; ++i) { - r = -EINVAL; - if (ue->flags) - goto out; - r = setup_routing_entry(new, &new->rt_entries[i], ue); - if (r) - goto out; - ++ue; - } - - mutex_lock(&kvm->irq_lock); - old = kvm->irq_routing; - kvm_irq_routing_update(kvm, new); - mutex_unlock(&kvm->irq_lock); - - synchronize_rcu(); - - new = old; - r = 0; - -out: - kfree(new); - return r; -} - #define IOAPIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) } diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c new file mode 100644 index 000000000000..20dc9e4a8f6c --- /dev/null +++ b/virt/kvm/irqchip.c @@ -0,0 +1,237 @@ +/* + * irqchip.c: Common API for in kernel interrupt controllers + * Copyright (c) 2007, Intel Corporation. + * Copyright 2010 Red Hat, Inc. and/or its affiliates. + * Copyright (c) 2013, Alexander Graf <agraf@suse.de> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * This file is derived from virt/kvm/irq_comm.c. + * + * Authors: + * Yaozu (Eddie) Dong <Eddie.dong@intel.com> + * Alexander Graf <agraf@suse.de> + */ + +#include <linux/kvm_host.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <trace/events/kvm.h> +#include "irq.h" + +bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi; + + rcu_read_lock(); + gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) { + rcu_read_unlock(); + return true; + } + + rcu_read_unlock(); + + return false; +} +EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi; + + trace_kvm_ack_irq(irqchip, pin); + + rcu_read_lock(); + gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); + rcu_read_unlock(); +} + +void kvm_register_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); + mutex_unlock(&kvm->irq_lock); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} + +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_del_init_rcu(&kian->link); + mutex_unlock(&kvm->irq_lock); + synchronize_rcu(); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} + +int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) +{ + struct kvm_kernel_irq_routing_entry route; + + if (!irqchip_in_kernel(kvm) || msi->flags != 0) + return -EINVAL; + + route.msi.address_lo = msi->address_lo; + route.msi.address_hi = msi->address_hi; + route.msi.data = msi->data; + + return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); +} + +/* + * Return value: + * < 0 Interrupt was ignored (masked or not delivered for other reasons) + * = 0 Interrupt was coalesced (previous irq is still pending) + * > 0 Number of CPUs interrupt was delivered to + */ +int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, + bool line_status) +{ + struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; + int ret = -1, i = 0; + struct kvm_irq_routing_table *irq_rt; + + trace_kvm_set_irq(irq, level, irq_source_id); + + /* Not possible to detect if the guest uses the PIC or the + * IOAPIC. So set the bit in both. The guest will ignore + * writes to the unused one. + */ + rcu_read_lock(); + irq_rt = rcu_dereference(kvm->irq_routing); + if (irq < irq_rt->nr_rt_entries) + hlist_for_each_entry(e, &irq_rt->map[irq], link) + irq_set[i++] = *e; + rcu_read_unlock(); + + while(i--) { + int r; + r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level, + line_status); + if (r < 0) + continue; + + ret = r + ((ret < 0) ? 0 : ret); + } + + return ret; +} + +void kvm_free_irq_routing(struct kvm *kvm) +{ + /* Called only during vm destruction. Nobody can use the pointer + at this stage */ + kfree(kvm->irq_routing); +} + +static int setup_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + struct kvm_kernel_irq_routing_entry *ei; + + /* + * Do not allow GSI to be mapped to the same irqchip more than once. + * Allow only one to one mapping between GSI and MSI. + */ + hlist_for_each_entry(ei, &rt->map[ue->gsi], link) + if (ei->type == KVM_IRQ_ROUTING_MSI || + ue->type == KVM_IRQ_ROUTING_MSI || + ue->u.irqchip.irqchip == ei->irqchip.irqchip) + return r; + + e->gsi = ue->gsi; + e->type = ue->type; + r = kvm_set_routing_entry(rt, e, ue); + if (r) + goto out; + + hlist_add_head(&e->link, &rt->map[e->gsi]); + r = 0; +out: + return r; +} + +int kvm_set_irq_routing(struct kvm *kvm, + const struct kvm_irq_routing_entry *ue, + unsigned nr, + unsigned flags) +{ + struct kvm_irq_routing_table *new, *old; + u32 i, j, nr_rt_entries = 0; + int r; + + for (i = 0; i < nr; ++i) { + if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) + return -EINVAL; + nr_rt_entries = max(nr_rt_entries, ue[i].gsi); + } + + nr_rt_entries += 1; + + new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) + + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), + GFP_KERNEL); + + if (!new) + return -ENOMEM; + + new->rt_entries = (void *)&new->map[nr_rt_entries]; + + new->nr_rt_entries = nr_rt_entries; + for (i = 0; i < KVM_NR_IRQCHIPS; i++) + for (j = 0; j < KVM_IRQCHIP_NUM_PINS; j++) + new->chip[i][j] = -1; + + for (i = 0; i < nr; ++i) { + r = -EINVAL; + if (ue->flags) + goto out; + r = setup_routing_entry(new, &new->rt_entries[i], ue); + if (r) + goto out; + ++ue; + } + + mutex_lock(&kvm->irq_lock); + old = kvm->irq_routing; + kvm_irq_routing_update(kvm, new); + mutex_unlock(&kvm->irq_lock); + + synchronize_rcu(); + + new = old; + r = 0; + +out: + kfree(new); + return r; +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f18013f09e68..45f09362ee7b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -217,9 +217,9 @@ void kvm_make_mclock_inprogress_request(struct kvm *kvm) make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); } -void kvm_make_update_eoibitmap_request(struct kvm *kvm) +void kvm_make_scan_ioapic_request(struct kvm *kvm) { - make_all_cpus_request(kvm, KVM_REQ_EOIBITMAP); + make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); } int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) @@ -244,6 +244,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) kvm_vcpu_set_in_spin_loop(vcpu, false); kvm_vcpu_set_dy_eligible(vcpu, false); + vcpu->preempted = false; r = kvm_arch_vcpu_init(vcpu); if (r < 0) @@ -503,6 +504,7 @@ static struct kvm *kvm_create_vm(unsigned long type) mutex_init(&kvm->irq_lock); mutex_init(&kvm->slots_lock); atomic_set(&kvm->users_count, 1); + INIT_LIST_HEAD(&kvm->devices); r = kvm_init_mmu_notifier(kvm); if (r) @@ -580,6 +582,19 @@ void kvm_free_physmem(struct kvm *kvm) kfree(kvm->memslots); } +static void kvm_destroy_devices(struct kvm *kvm) +{ + struct list_head *node, *tmp; + + list_for_each_safe(node, tmp, &kvm->devices) { + struct kvm_device *dev = + list_entry(node, struct kvm_device, vm_node); + + list_del(node); + dev->ops->destroy(dev); + } +} + static void kvm_destroy_vm(struct kvm *kvm) { int i; @@ -599,6 +614,7 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_arch_flush_shadow_all(kvm); #endif kvm_arch_destroy_vm(kvm); + kvm_destroy_devices(kvm); kvm_free_physmem(kvm); cleanup_srcu_struct(&kvm->srcu); kvm_arch_free_vm(kvm); @@ -719,24 +735,6 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, } /* - * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: - * - create a new memory slot - * - delete an existing memory slot - * - modify an existing memory slot - * -- move it in the guest physical memory space - * -- just change its flags - * - * Since flags can be changed by some of these operations, the following - * differentiation is the best we can do for __kvm_set_memory_region(): - */ -enum kvm_mr_change { - KVM_MR_CREATE, - KVM_MR_DELETE, - KVM_MR_MOVE, - KVM_MR_FLAGS_ONLY, -}; - -/* * Allocate some memory and give it an address in the guest physical address * space. * @@ -745,8 +743,7 @@ enum kvm_mr_change { * Must be called holding mmap_sem for write. */ int __kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { int r; gfn_t base_gfn; @@ -767,7 +764,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if (mem->guest_phys_addr & (PAGE_SIZE - 1)) goto out; /* We can read the guest memory with __xxx_user() later on. */ - if (user_alloc && + if ((mem->slot < KVM_USER_MEM_SLOTS) && ((mem->userspace_addr & (PAGE_SIZE - 1)) || !access_ok(VERIFY_WRITE, (void __user *)(unsigned long)mem->userspace_addr, @@ -875,7 +872,7 @@ int __kvm_set_memory_region(struct kvm *kvm, slots = old_memslots; } - r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); + r = kvm_arch_prepare_memory_region(kvm, &new, mem, change); if (r) goto out_slots; @@ -915,7 +912,7 @@ int __kvm_set_memory_region(struct kvm *kvm, old_memslots = install_new_memslots(kvm, slots, &new); - kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); + kvm_arch_commit_memory_region(kvm, mem, &old, change); kvm_free_physmem_slot(&old, &new); kfree(old_memslots); @@ -932,26 +929,23 @@ out: EXPORT_SYMBOL_GPL(__kvm_set_memory_region); int kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { int r; mutex_lock(&kvm->slots_lock); - r = __kvm_set_memory_region(kvm, mem, user_alloc); + r = __kvm_set_memory_region(kvm, mem); mutex_unlock(&kvm->slots_lock); return r; } EXPORT_SYMBOL_GPL(kvm_set_memory_region); int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, - struct - kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { if (mem->slot >= KVM_USER_MEM_SLOTS) return -EINVAL; - return kvm_set_memory_region(kvm, mem, user_alloc); + return kvm_set_memory_region(kvm, mem); } int kvm_get_dirty_log(struct kvm *kvm, @@ -1099,7 +1093,7 @@ static int kvm_read_hva_atomic(void *data, void __user *hva, int len) return __copy_from_user_inatomic(data, hva, len); } -int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, +static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int write, struct page **page) { int flags = FOLL_TOUCH | FOLL_NOWAIT | FOLL_HWPOISON | FOLL_GET; @@ -1719,6 +1713,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) smp_send_reschedule(cpu); put_cpu(); } +EXPORT_SYMBOL_GPL(kvm_vcpu_kick); #endif /* !CONFIG_S390 */ void kvm_resched(struct kvm_vcpu *vcpu) @@ -1816,6 +1811,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; } else if (pass && i > last_boosted_vcpu) break; + if (!ACCESS_ONCE(vcpu->preempted)) + continue; if (vcpu == me) continue; if (waitqueue_active(&vcpu->wq)) @@ -2204,6 +2201,119 @@ out: } #endif +static int kvm_device_ioctl_attr(struct kvm_device *dev, + int (*accessor)(struct kvm_device *dev, + struct kvm_device_attr *attr), + unsigned long arg) +{ + struct kvm_device_attr attr; + + if (!accessor) + return -EPERM; + + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) + return -EFAULT; + + return accessor(dev, &attr); +} + +static long kvm_device_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + struct kvm_device *dev = filp->private_data; + + switch (ioctl) { + case KVM_SET_DEVICE_ATTR: + return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); + case KVM_GET_DEVICE_ATTR: + return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg); + case KVM_HAS_DEVICE_ATTR: + return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg); + default: + if (dev->ops->ioctl) + return dev->ops->ioctl(dev, ioctl, arg); + + return -ENOTTY; + } +} + +static int kvm_device_release(struct inode *inode, struct file *filp) +{ + struct kvm_device *dev = filp->private_data; + struct kvm *kvm = dev->kvm; + + kvm_put_kvm(kvm); + return 0; +} + +static const struct file_operations kvm_device_fops = { + .unlocked_ioctl = kvm_device_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = kvm_device_ioctl, +#endif + .release = kvm_device_release, +}; + +struct kvm_device *kvm_device_from_filp(struct file *filp) +{ + if (filp->f_op != &kvm_device_fops) + return NULL; + + return filp->private_data; +} + +static int kvm_ioctl_create_device(struct kvm *kvm, + struct kvm_create_device *cd) +{ + struct kvm_device_ops *ops = NULL; + struct kvm_device *dev; + bool test = cd->flags & KVM_CREATE_DEVICE_TEST; + int ret; + + switch (cd->type) { +#ifdef CONFIG_KVM_MPIC + case KVM_DEV_TYPE_FSL_MPIC_20: + case KVM_DEV_TYPE_FSL_MPIC_42: + ops = &kvm_mpic_ops; + break; +#endif +#ifdef CONFIG_KVM_XICS + case KVM_DEV_TYPE_XICS: + ops = &kvm_xics_ops; + break; +#endif + default: + return -ENODEV; + } + + if (test) + return 0; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + dev->ops = ops; + dev->kvm = kvm; + + ret = ops->create(dev, cd->type); + if (ret < 0) { + kfree(dev); + return ret; + } + + ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR); + if (ret < 0) { + ops->destroy(dev); + return ret; + } + + list_add(&dev->vm_node, &kvm->devices); + kvm_get_kvm(kvm); + cd->fd = ret; + return 0; +} + static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2225,7 +2335,7 @@ static long kvm_vm_ioctl(struct file *filp, sizeof kvm_userspace_mem)) goto out; - r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, true); + r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem); break; } case KVM_GET_DIRTY_LOG: { @@ -2304,7 +2414,8 @@ static long kvm_vm_ioctl(struct file *filp, if (copy_from_user(&irq_event, argp, sizeof irq_event)) goto out; - r = kvm_vm_ioctl_irq_line(kvm, &irq_event); + r = kvm_vm_ioctl_irq_line(kvm, &irq_event, + ioctl == KVM_IRQ_LINE_STATUS); if (r) goto out; @@ -2318,6 +2429,54 @@ static long kvm_vm_ioctl(struct file *filp, break; } #endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_SET_GSI_ROUTING: { + struct kvm_irq_routing routing; + struct kvm_irq_routing __user *urouting; + struct kvm_irq_routing_entry *entries; + + r = -EFAULT; + if (copy_from_user(&routing, argp, sizeof(routing))) + goto out; + r = -EINVAL; + if (routing.nr >= KVM_MAX_IRQ_ROUTES) + goto out; + if (routing.flags) + goto out; + r = -ENOMEM; + entries = vmalloc(routing.nr * sizeof(*entries)); + if (!entries) + goto out; + r = -EFAULT; + urouting = argp; + if (copy_from_user(entries, urouting->entries, + routing.nr * sizeof(*entries))) + goto out_free_irq_routing; + r = kvm_set_irq_routing(kvm, entries, routing.nr, + routing.flags); + out_free_irq_routing: + vfree(entries); + break; + } +#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */ + case KVM_CREATE_DEVICE: { + struct kvm_create_device cd; + + r = -EFAULT; + if (copy_from_user(&cd, argp, sizeof(cd))) + goto out; + + r = kvm_ioctl_create_device(kvm, &cd); + if (r) + goto out; + + r = -EFAULT; + if (copy_to_user(argp, &cd, sizeof(cd))) + goto out; + + r = 0; + break; + } default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); if (r == -ENOTTY) @@ -2447,8 +2606,11 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) #ifdef CONFIG_HAVE_KVM_MSI case KVM_CAP_SIGNAL_MSI: #endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_CAP_IRQFD_RESAMPLE: +#endif return 1; -#ifdef KVM_CAP_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING case KVM_CAP_IRQ_ROUTING: return KVM_MAX_IRQ_ROUTES; #endif @@ -2618,14 +2780,6 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, return NOTIFY_OK; } - -asmlinkage void kvm_spurious_fault(void) -{ - /* Fault while not rebooting. We want the trace. */ - BUG(); -} -EXPORT_SYMBOL_GPL(kvm_spurious_fault); - static int kvm_reboot(struct notifier_block *notifier, unsigned long val, void *v) { @@ -2658,7 +2812,7 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) kfree(bus); } -int kvm_io_bus_sort_cmp(const void *p1, const void *p2) +static int kvm_io_bus_sort_cmp(const void *p1, const void *p2) { const struct kvm_io_range *r1 = p1; const struct kvm_io_range *r2 = p2; @@ -2670,7 +2824,7 @@ int kvm_io_bus_sort_cmp(const void *p1, const void *p2) return 0; } -int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, +static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, gpa_t addr, int len) { bus->range[bus->dev_count++] = (struct kvm_io_range) { @@ -2685,7 +2839,7 @@ int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, return 0; } -int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, +static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, gpa_t addr, int len) { struct kvm_io_range *range, key; @@ -2929,6 +3083,8 @@ struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) static void kvm_sched_in(struct preempt_notifier *pn, int cpu) { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + if (vcpu->preempted) + vcpu->preempted = false; kvm_arch_vcpu_load(vcpu, cpu); } @@ -2938,6 +3094,8 @@ static void kvm_sched_out(struct preempt_notifier *pn, { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + if (current->state == TASK_RUNNING) + vcpu->preempted = true; kvm_arch_vcpu_put(vcpu); } @@ -2947,6 +3105,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, int r; int cpu; + r = kvm_irqfd_init(); + if (r) + goto out_irqfd; r = kvm_arch_init(opaque); if (r) goto out_fail; @@ -3027,6 +3188,8 @@ out_free_0a: out_free_0: kvm_arch_exit(); out_fail: + kvm_irqfd_exit(); +out_irqfd: return r; } EXPORT_SYMBOL_GPL(kvm_init); @@ -3043,6 +3206,7 @@ void kvm_exit(void) on_each_cpu(hardware_disable_nolock, NULL, 1); kvm_arch_hardware_unsetup(); kvm_arch_exit(); + kvm_irqfd_exit(); free_cpumask_var(cpus_hardware_enabled); } EXPORT_SYMBOL_GPL(kvm_exit); |