diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/Kconfig | 3 | ||||
-rw-r--r-- | virt/kvm/assigned-dev.c | 213 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 10 | ||||
-rw-r--r-- | virt/kvm/ioapic.h | 1 | ||||
-rw-r--r-- | virt/kvm/iommu.c | 30 | ||||
-rw-r--r-- | virt/kvm/irq_comm.c | 14 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 281 |
7 files changed, 356 insertions, 196 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index f63ccb0a5982..28694f4a9139 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -18,3 +18,6 @@ config KVM_MMIO config KVM_ASYNC_PF bool + +config HAVE_KVM_MSI + bool diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 758e3b36d4cf..01f572c10c71 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -49,31 +49,73 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel index = i; break; } - if (index < 0) { + if (index < 0) printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); - return 0; - } return index; } -static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id) +static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id) { struct kvm_assigned_dev_kernel *assigned_dev = dev_id; + int ret; + + spin_lock(&assigned_dev->intx_lock); + if (pci_check_and_mask_intx(assigned_dev->dev)) { + assigned_dev->host_irq_disabled = true; + ret = IRQ_WAKE_THREAD; + } else + ret = IRQ_NONE; + spin_unlock(&assigned_dev->intx_lock); + + return ret; +} - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) { - spin_lock(&assigned_dev->intx_lock); +static void +kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, + int vector) +{ + if (unlikely(assigned_dev->irq_requested_type & + KVM_DEV_IRQ_GUEST_INTX)) { + spin_lock(&assigned_dev->intx_mask_lock); + if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) + kvm_set_irq(assigned_dev->kvm, + assigned_dev->irq_source_id, vector, 1); + spin_unlock(&assigned_dev->intx_mask_lock); + } else + kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, + vector, 1); +} + +static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) +{ + struct kvm_assigned_dev_kernel *assigned_dev = dev_id; + + if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { + spin_lock_irq(&assigned_dev->intx_lock); disable_irq_nosync(irq); assigned_dev->host_irq_disabled = true; - spin_unlock(&assigned_dev->intx_lock); + spin_unlock_irq(&assigned_dev->intx_lock); } - kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - assigned_dev->guest_irq, 1); + kvm_assigned_dev_raise_guest_irq(assigned_dev, + assigned_dev->guest_irq); return IRQ_HANDLED; } +#ifdef __KVM_HAVE_MSI +static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) +{ + struct kvm_assigned_dev_kernel *assigned_dev = dev_id; + + kvm_assigned_dev_raise_guest_irq(assigned_dev, + assigned_dev->guest_irq); + + return IRQ_HANDLED; +} +#endif + #ifdef __KVM_HAVE_MSIX static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) { @@ -83,8 +125,7 @@ static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) if (index >= 0) { vector = assigned_dev->guest_msix_entries[index].vector; - kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - vector, 1); + kvm_assigned_dev_raise_guest_irq(assigned_dev, vector); } return IRQ_HANDLED; @@ -100,15 +141,31 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); - /* The guest irq may be shared so this ack may be - * from another device. - */ - spin_lock(&dev->intx_lock); - if (dev->host_irq_disabled) { - enable_irq(dev->host_irq); - dev->host_irq_disabled = false; + spin_lock(&dev->intx_mask_lock); + + if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) { + bool reassert = false; + + spin_lock_irq(&dev->intx_lock); + /* + * The guest IRQ may be shared so this ack can come from an + * IRQ for another guest device. + */ + if (dev->host_irq_disabled) { + if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) + enable_irq(dev->host_irq); + else if (!pci_check_and_unmask_intx(dev->dev)) + reassert = true; + dev->host_irq_disabled = reassert; + } + spin_unlock_irq(&dev->intx_lock); + + if (reassert) + kvm_set_irq(dev->kvm, dev->irq_source_id, + dev->guest_irq, 1); } - spin_unlock(&dev->intx_lock); + + spin_unlock(&dev->intx_mask_lock); } static void deassign_guest_irq(struct kvm *kvm, @@ -156,7 +213,15 @@ static void deassign_host_irq(struct kvm *kvm, pci_disable_msix(assigned_dev->dev); } else { /* Deal with MSI and INTx */ - disable_irq(assigned_dev->host_irq); + if ((assigned_dev->irq_requested_type & + KVM_DEV_IRQ_HOST_INTX) && + (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { + spin_lock_irq(&assigned_dev->intx_lock); + pci_intx(assigned_dev->dev, false); + spin_unlock_irq(&assigned_dev->intx_lock); + synchronize_irq(assigned_dev->host_irq); + } else + disable_irq(assigned_dev->host_irq); free_irq(assigned_dev->host_irq, assigned_dev); @@ -237,15 +302,34 @@ void kvm_free_all_assigned_devices(struct kvm *kvm) static int assigned_device_enable_host_intx(struct kvm *kvm, struct kvm_assigned_dev_kernel *dev) { + irq_handler_t irq_handler; + unsigned long flags; + dev->host_irq = dev->dev->irq; - /* Even though this is PCI, we don't want to use shared - * interrupts. Sharing host devices with guest-assigned devices - * on the same interrupt line is not a happy situation: there - * are going to be long delays in accepting, acking, etc. + + /* + * We can only share the IRQ line with other host devices if we are + * able to disable the IRQ source at device-level - independently of + * the guest driver. Otherwise host devices may suffer from unbounded + * IRQ latencies when the guest keeps the line asserted. */ - if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, - IRQF_ONESHOT, dev->irq_name, dev)) + if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { + irq_handler = kvm_assigned_dev_intx; + flags = IRQF_SHARED; + } else { + irq_handler = NULL; + flags = IRQF_ONESHOT; + } + if (request_threaded_irq(dev->host_irq, irq_handler, + kvm_assigned_dev_thread_intx, flags, + dev->irq_name, dev)) return -EIO; + + if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { + spin_lock_irq(&dev->intx_lock); + pci_intx(dev->dev, true); + spin_unlock_irq(&dev->intx_lock); + } return 0; } @@ -262,8 +346,9 @@ static int assigned_device_enable_host_msi(struct kvm *kvm, } dev->host_irq = dev->dev->irq; - if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, - 0, dev->irq_name, dev)) { + if (request_threaded_irq(dev->host_irq, NULL, + kvm_assigned_dev_thread_msi, 0, + dev->irq_name, dev)) { pci_disable_msi(dev->dev); return -EIO; } @@ -321,7 +406,6 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm, { dev->guest_irq = irq->guest_irq; dev->ack_notifier.gsi = -1; - dev->host_irq_disabled = false; return 0; } #endif @@ -333,7 +417,6 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm, { dev->guest_irq = irq->guest_irq; dev->ack_notifier.gsi = -1; - dev->host_irq_disabled = false; return 0; } #endif @@ -367,6 +450,7 @@ static int assign_host_irq(struct kvm *kvm, default: r = -EINVAL; } + dev->host_irq_disabled = false; if (!r) dev->irq_requested_type |= host_irq_type; @@ -468,6 +552,7 @@ static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, { int r = -ENODEV; struct kvm_assigned_dev_kernel *match; + unsigned long irq_type; mutex_lock(&kvm->lock); @@ -476,7 +561,9 @@ static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, if (!match) goto out; - r = kvm_deassign_irq(kvm, match, assigned_irq->flags); + irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK | + KVM_DEV_IRQ_GUEST_MASK); + r = kvm_deassign_irq(kvm, match, irq_type); out: mutex_unlock(&kvm->lock); return r; @@ -609,6 +696,10 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, if (!match->pci_saved_state) printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", __func__, dev_name(&dev->dev)); + + if (!pci_intx_mask_supported(dev)) + assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3; + match->assigned_dev_id = assigned_dev->assigned_dev_id; match->host_segnr = assigned_dev->segnr; match->host_busnr = assigned_dev->busnr; @@ -616,6 +707,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, match->flags = assigned_dev->flags; match->dev = dev; spin_lock_init(&match->intx_lock); + spin_lock_init(&match->intx_mask_lock); match->irq_source_id = -1; match->kvm = kvm; match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; @@ -761,6 +853,55 @@ msix_entry_out: } #endif +static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_dev->assigned_dev_id); + if (!match) { + r = -ENODEV; + goto out; + } + + spin_lock(&match->intx_mask_lock); + + match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX; + match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX; + + if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { + if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { + kvm_set_irq(match->kvm, match->irq_source_id, + match->guest_irq, 0); + /* + * Masking at hardware-level is performed on demand, + * i.e. when an IRQ actually arrives at the host. + */ + } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { + /* + * Unmask the IRQ line if required. Unmasking at + * device level will be performed by user space. + */ + spin_lock_irq(&match->intx_lock); + if (match->host_irq_disabled) { + enable_irq(match->host_irq); + match->host_irq_disabled = false; + } + spin_unlock_irq(&match->intx_lock); + } + } + + spin_unlock(&match->intx_mask_lock); + +out: + mutex_unlock(&kvm->lock); + return r; +} + long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, unsigned long arg) { @@ -868,6 +1009,15 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, break; } #endif + case KVM_ASSIGN_SET_INTX_MASK: { + struct kvm_assigned_pci_dev assigned_dev; + + r = -EFAULT; + if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) + goto out; + r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev); + break; + } default: r = -ENOTTY; break; @@ -875,4 +1025,3 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, out: return r; } - diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index dcaf272c26c0..26fd54dc459e 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -254,13 +254,17 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, } } +bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + smp_rmb(); + return test_bit(vector, ioapic->handled_vectors); +} + void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; - smp_rmb(); - if (!test_bit(vector, ioapic->handled_vectors)) - return; spin_lock(&ioapic->lock); __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); spin_unlock(&ioapic->lock); diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 0b190c34ccc3..32872a09b63f 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -71,6 +71,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int short_hand, int dest, int dest_mode); int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); +bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector); int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_destroy(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index a457d2138f49..e9fff9830bf0 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -240,9 +240,13 @@ int kvm_iommu_map_guest(struct kvm *kvm) return -ENODEV; } + mutex_lock(&kvm->slots_lock); + kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type); - if (!kvm->arch.iommu_domain) - return -ENOMEM; + if (!kvm->arch.iommu_domain) { + r = -ENOMEM; + goto out_unlock; + } if (!allow_unsafe_assigned_interrupts && !iommu_domain_has_cap(kvm->arch.iommu_domain, @@ -253,17 +257,16 @@ int kvm_iommu_map_guest(struct kvm *kvm) " module option.\n", __func__); iommu_domain_free(kvm->arch.iommu_domain); kvm->arch.iommu_domain = NULL; - return -EPERM; + r = -EPERM; + goto out_unlock; } r = kvm_iommu_map_memslots(kvm); if (r) - goto out_unmap; - - return 0; + kvm_iommu_unmap_memslots(kvm); -out_unmap: - kvm_iommu_unmap_memslots(kvm); +out_unlock: + mutex_unlock(&kvm->slots_lock); return r; } @@ -310,6 +313,11 @@ static void kvm_iommu_put_pages(struct kvm *kvm, } } +void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot) +{ + kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages); +} + static int kvm_iommu_unmap_memslots(struct kvm *kvm) { int idx; @@ -320,7 +328,7 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm) slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, slots) - kvm_iommu_put_pages(kvm, memslot->base_gfn, memslot->npages); + kvm_iommu_unmap_pages(kvm, memslot); srcu_read_unlock(&kvm->srcu, idx); @@ -335,7 +343,11 @@ int kvm_iommu_unmap_guest(struct kvm *kvm) if (!domain) return 0; + mutex_lock(&kvm->slots_lock); kvm_iommu_unmap_memslots(kvm); + kvm->arch.iommu_domain = NULL; + mutex_unlock(&kvm->slots_lock); + iommu_domain_free(domain); return 0; } diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 9f614b4e365f..a6a0365475ed 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -138,6 +138,20 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return kvm_irq_delivery_to_apic(kvm, NULL, &irq); } +int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) +{ + struct kvm_kernel_irq_routing_entry route; + + if (!irqchip_in_kernel(kvm) || msi->flags != 0) + return -EINVAL; + + route.msi.address_lo = msi->address_lo; + route.msi.address_hi = msi->address_hi; + route.msi.data = msi->data; + + return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); +} + /* * Return value: * < 0 Interrupt was ignored (masked or not delivered for other reasons) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a91f980077d8..7e140683ff14 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -203,7 +203,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) void kvm_flush_remote_tlbs(struct kvm *kvm) { - int dirty_count = kvm->tlbs_dirty; + long dirty_count = kvm->tlbs_dirty; smp_mb(); if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) @@ -289,15 +289,15 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, */ idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); + kvm->mmu_notifier_seq++; need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty; - spin_unlock(&kvm->mmu_lock); - srcu_read_unlock(&kvm->srcu, idx); - /* we've to flush the tlb before the pages can be freed */ if (need_tlb_flush) kvm_flush_remote_tlbs(kvm); + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); } static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, @@ -335,12 +335,12 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, for (; start < end; start += PAGE_SIZE) need_tlb_flush |= kvm_unmap_hva(kvm, start); need_tlb_flush |= kvm->tlbs_dirty; - spin_unlock(&kvm->mmu_lock); - srcu_read_unlock(&kvm->srcu, idx); - /* we've to flush the tlb before the pages can be freed */ if (need_tlb_flush) kvm_flush_remote_tlbs(kvm); + + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); } static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, @@ -357,11 +357,11 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, * been freed. */ kvm->mmu_notifier_seq++; + smp_wmb(); /* * The above sequence increase must be visible before the - * below count decrease but both values are read by the kvm - * page fault under mmu_lock spinlock so we don't need to add - * a smb_wmb() here in between the two. + * below count decrease, which is ensured by the smp_wmb above + * in conjunction with the smp_rmb in mmu_notifier_retry(). */ kvm->mmu_notifier_count--; spin_unlock(&kvm->mmu_lock); @@ -378,13 +378,14 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); - young = kvm_age_hva(kvm, address); - spin_unlock(&kvm->mmu_lock); - srcu_read_unlock(&kvm->srcu, idx); + young = kvm_age_hva(kvm, address); if (young) kvm_flush_remote_tlbs(kvm); + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); + return young; } @@ -449,7 +450,7 @@ static void kvm_init_memslots_id(struct kvm *kvm) slots->id_to_index[i] = slots->memslots[i].id = i; } -static struct kvm *kvm_create_vm(void) +static struct kvm *kvm_create_vm(unsigned long type) { int r, i; struct kvm *kvm = kvm_arch_alloc_vm(); @@ -457,7 +458,7 @@ static struct kvm *kvm_create_vm(void) if (!kvm) return ERR_PTR(-ENOMEM); - r = kvm_arch_init_vm(kvm); + r = kvm_arch_init_vm(kvm, type); if (r) goto out_err_nodisable; @@ -521,12 +522,11 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) return; if (2 * kvm_dirty_bitmap_bytes(memslot) > PAGE_SIZE) - vfree(memslot->dirty_bitmap_head); + vfree(memslot->dirty_bitmap); else - kfree(memslot->dirty_bitmap_head); + kfree(memslot->dirty_bitmap); memslot->dirty_bitmap = NULL; - memslot->dirty_bitmap_head = NULL; } /* @@ -535,21 +535,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) static void kvm_free_physmem_slot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { - int i; - if (!dont || free->rmap != dont->rmap) vfree(free->rmap); if (!dont || free->dirty_bitmap != dont->dirty_bitmap) kvm_destroy_dirty_bitmap(free); - - for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { - if (!dont || free->lpage_info[i] != dont->lpage_info[i]) { - vfree(free->lpage_info[i]); - free->lpage_info[i] = NULL; - } - } + kvm_arch_free_memslot(free, dont); free->npages = 0; free->rmap = NULL; @@ -616,14 +608,13 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) return 0; } -#ifndef CONFIG_S390 /* * Allocation size is twice as large as the actual dirty bitmap size. - * This makes it possible to do double buffering: see x86's - * kvm_vm_ioctl_get_dirty_log(). + * See x86's kvm_vm_ioctl_get_dirty_log() why this is needed. */ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) { +#ifndef CONFIG_S390 unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); if (dirty_bytes > PAGE_SIZE) @@ -634,23 +625,8 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) if (!memslot->dirty_bitmap) return -ENOMEM; - memslot->dirty_bitmap_head = memslot->dirty_bitmap; - memslot->nr_dirty_pages = 0; - return 0; -} #endif /* !CONFIG_S390 */ - -static struct kvm_memory_slot * -search_memslots(struct kvm_memslots *slots, gfn_t gfn) -{ - struct kvm_memory_slot *memslot; - - kvm_for_each_memslot(memslot, slots) - if (gfn >= memslot->base_gfn && - gfn < memslot->base_gfn + memslot->npages) - return memslot; - - return NULL; + return 0; } static int cmp_memslot(const void *slot1, const void *slot2) @@ -778,69 +754,24 @@ int __kvm_set_memory_region(struct kvm *kvm, r = -ENOMEM; /* Allocate if a slot is being created */ + if (npages && !old.npages) { + new.user_alloc = user_alloc; + new.userspace_addr = mem->userspace_addr; #ifndef CONFIG_S390 - if (npages && !new.rmap) { new.rmap = vzalloc(npages * sizeof(*new.rmap)); - if (!new.rmap) goto out_free; - - new.user_alloc = user_alloc; - new.userspace_addr = mem->userspace_addr; - } - if (!npages) - goto skip_lpage; - - for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { - unsigned long ugfn; - unsigned long j; - int lpages; - int level = i + 2; - - /* Avoid unused variable warning if no large pages */ - (void)level; - - if (new.lpage_info[i]) - continue; - - lpages = 1 + ((base_gfn + npages - 1) - >> KVM_HPAGE_GFN_SHIFT(level)); - lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level); - - new.lpage_info[i] = vzalloc(lpages * sizeof(*new.lpage_info[i])); - - if (!new.lpage_info[i]) +#endif /* not defined CONFIG_S390 */ + if (kvm_arch_create_memslot(&new, npages)) goto out_free; - - if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) - new.lpage_info[i][0].write_count = 1; - if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) - new.lpage_info[i][lpages - 1].write_count = 1; - ugfn = new.userspace_addr >> PAGE_SHIFT; - /* - * If the gfn and userspace address are not aligned wrt each - * other, or if explicitly asked to, disable large page - * support for this slot - */ - if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || - !largepages_enabled) - for (j = 0; j < lpages; ++j) - new.lpage_info[i][j].write_count = 1; } -skip_lpage: - /* Allocate page dirty bitmap if needed */ if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { if (kvm_create_dirty_bitmap(&new) < 0) goto out_free; /* destroy any largepage mappings for dirty tracking */ } -#else /* not defined CONFIG_S390 */ - new.user_alloc = user_alloc; - if (user_alloc) - new.userspace_addr = mem->userspace_addr; -#endif /* not defined CONFIG_S390 */ if (!npages) { struct kvm_memory_slot *slot; @@ -873,12 +804,13 @@ skip_lpage: if (r) goto out_free; - /* map the pages in iommu page table */ + /* map/unmap the pages in iommu page table */ if (npages) { r = kvm_iommu_map_pages(kvm, &new); if (r) goto out_free; - } + } else + kvm_iommu_unmap_pages(kvm, &old); r = -ENOMEM; slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), @@ -890,8 +822,7 @@ skip_lpage: if (!npages) { new.rmap = NULL; new.dirty_bitmap = NULL; - for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) - new.lpage_info[i] = NULL; + memset(&new.arch, 0, sizeof(new.arch)); } update_memslots(slots, &new); @@ -978,6 +909,11 @@ out: return r; } +bool kvm_largepages_enabled(void) +{ + return largepages_enabled; +} + void kvm_disable_largepages(void) { largepages_enabled = false; @@ -1031,12 +967,6 @@ int kvm_is_error_hva(unsigned long addr) } EXPORT_SYMBOL_GPL(kvm_is_error_hva); -static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots, - gfn_t gfn) -{ - return search_memslots(slots, gfn); -} - struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) { return __gfn_to_memslot(kvm_memslots(kvm), gfn); @@ -1459,7 +1389,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, ghc->gpa = gpa; ghc->generation = slots->generation; - ghc->memslot = __gfn_to_memslot(slots, gfn); + ghc->memslot = gfn_to_memslot(kvm, gfn); ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL); if (!kvm_is_error_hva(ghc->hva)) ghc->hva += offset; @@ -1543,8 +1473,8 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; - if (!test_and_set_bit_le(rel_gfn, memslot->dirty_bitmap)) - memslot->nr_dirty_pages++; + /* TODO: introduce set_bit_le() and use it */ + test_and_set_bit_le(rel_gfn, memslot->dirty_bitmap); } } @@ -1581,6 +1511,30 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) finish_wait(&vcpu->wq, &wait); } +#ifndef CONFIG_S390 +/* + * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode. + */ +void kvm_vcpu_kick(struct kvm_vcpu *vcpu) +{ + int me; + int cpu = vcpu->cpu; + wait_queue_head_t *wqp; + + wqp = kvm_arch_vcpu_wq(vcpu); + if (waitqueue_active(wqp)) { + wake_up_interruptible(wqp); + ++vcpu->stat.halt_wakeup; + } + + me = get_cpu(); + if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) + if (kvm_arch_vcpu_should_kick(vcpu)) + smp_send_reschedule(cpu); + put_cpu(); +} +#endif /* !CONFIG_S390 */ + void kvm_resched(struct kvm_vcpu *vcpu) { if (!need_resched()) @@ -1589,6 +1543,31 @@ void kvm_resched(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_resched); +bool kvm_vcpu_yield_to(struct kvm_vcpu *target) +{ + struct pid *pid; + struct task_struct *task = NULL; + + rcu_read_lock(); + pid = rcu_dereference(target->pid); + if (pid) + task = get_pid_task(target->pid, PIDTYPE_PID); + rcu_read_unlock(); + if (!task) + return false; + if (task->flags & PF_VCPU) { + put_task_struct(task); + return false; + } + if (yield_to(task, 1)) { + put_task_struct(task); + return true; + } + put_task_struct(task); + return false; +} +EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); + void kvm_vcpu_on_spin(struct kvm_vcpu *me) { struct kvm *kvm = me->kvm; @@ -1607,8 +1586,6 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) */ for (pass = 0; pass < 2 && !yielded; pass++) { kvm_for_each_vcpu(i, vcpu, kvm) { - struct task_struct *task = NULL; - struct pid *pid; if (!pass && i < last_boosted_vcpu) { i = last_boosted_vcpu; continue; @@ -1618,24 +1595,11 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; if (waitqueue_active(&vcpu->wq)) continue; - rcu_read_lock(); - pid = rcu_dereference(vcpu->pid); - if (pid) - task = get_pid_task(vcpu->pid, PIDTYPE_PID); - rcu_read_unlock(); - if (!task) - continue; - if (task->flags & PF_VCPU) { - put_task_struct(task); - continue; - } - if (yield_to(task, 1)) { - put_task_struct(task); + if (kvm_vcpu_yield_to(vcpu)) { kvm->last_boosted_vcpu = i; yielded = 1; break; } - put_task_struct(task); } } } @@ -1657,7 +1621,7 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); #endif else - return VM_FAULT_SIGBUS; + return kvm_arch_vcpu_fault(vcpu, vmf); get_page(page); vmf->page = page; return 0; @@ -1718,6 +1682,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) goto vcpu_destroy; mutex_lock(&kvm->lock); + if (!kvm_vcpu_compatible(vcpu)) { + r = -EINVAL; + goto unlock_vcpu_destroy; + } if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { r = -EINVAL; goto unlock_vcpu_destroy; @@ -2102,6 +2070,17 @@ static long kvm_vm_ioctl(struct file *filp, mutex_unlock(&kvm->lock); break; #endif +#ifdef CONFIG_HAVE_KVM_MSI + case KVM_SIGNAL_MSI: { + struct kvm_msi msi; + + r = -EFAULT; + if (copy_from_user(&msi, argp, sizeof msi)) + goto out; + r = kvm_send_userspace_msi(kvm, &msi); + break; + } +#endif default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); if (r == -ENOTTY) @@ -2198,12 +2177,12 @@ static struct file_operations kvm_vm_fops = { .llseek = noop_llseek, }; -static int kvm_dev_ioctl_create_vm(void) +static int kvm_dev_ioctl_create_vm(unsigned long type) { int r; struct kvm *kvm; - kvm = kvm_create_vm(); + kvm = kvm_create_vm(type); if (IS_ERR(kvm)) return PTR_ERR(kvm); #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET @@ -2230,6 +2209,9 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) case KVM_CAP_SET_BOOT_CPU_ID: #endif case KVM_CAP_INTERNAL_ERROR_DATA: +#ifdef CONFIG_HAVE_KVM_MSI + case KVM_CAP_SIGNAL_MSI: +#endif return 1; #ifdef CONFIG_HAVE_KVM_IRQCHIP case KVM_CAP_IRQ_ROUTING: @@ -2254,10 +2236,7 @@ static long kvm_dev_ioctl(struct file *filp, r = KVM_API_VERSION; break; case KVM_CREATE_VM: - r = -EINVAL; - if (arg) - goto out; - r = kvm_dev_ioctl_create_vm(); + r = kvm_dev_ioctl_create_vm(arg); break; case KVM_CHECK_EXTENSION: r = kvm_dev_ioctl_check_extension_generic(arg); @@ -2459,9 +2438,6 @@ int kvm_io_bus_sort_cmp(const void *p1, const void *p2) int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, gpa_t addr, int len) { - if (bus->dev_count == NR_IOBUS_DEVS) - return -ENOSPC; - bus->range[bus->dev_count++] = (struct kvm_io_range) { .addr = addr, .len = len, @@ -2561,12 +2537,15 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; - if (bus->dev_count > NR_IOBUS_DEVS-1) + if (bus->dev_count > NR_IOBUS_DEVS - 1) return -ENOSPC; - new_bus = kmemdup(bus, sizeof(struct kvm_io_bus), GFP_KERNEL); + new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) * + sizeof(struct kvm_io_range)), GFP_KERNEL); if (!new_bus) return -ENOMEM; + memcpy(new_bus, bus, sizeof(*bus) + (bus->dev_count * + sizeof(struct kvm_io_range))); kvm_io_bus_insert_dev(new_bus, dev, addr, len); rcu_assign_pointer(kvm->buses[bus_idx], new_bus); synchronize_srcu_expedited(&kvm->srcu); @@ -2583,27 +2562,25 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; - - new_bus = kmemdup(bus, sizeof(*bus), GFP_KERNEL); - if (!new_bus) - return -ENOMEM; - r = -ENOENT; - for (i = 0; i < new_bus->dev_count; i++) - if (new_bus->range[i].dev == dev) { + for (i = 0; i < bus->dev_count; i++) + if (bus->range[i].dev == dev) { r = 0; - new_bus->dev_count--; - new_bus->range[i] = new_bus->range[new_bus->dev_count]; - sort(new_bus->range, new_bus->dev_count, - sizeof(struct kvm_io_range), - kvm_io_bus_sort_cmp, NULL); break; } - if (r) { - kfree(new_bus); + if (r) return r; - } + + new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count - 1) * + sizeof(struct kvm_io_range)), GFP_KERNEL); + if (!new_bus) + return -ENOMEM; + + memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); + new_bus->dev_count--; + memcpy(new_bus->range + i, bus->range + i + 1, + (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); rcu_assign_pointer(kvm->buses[bus_idx], new_bus); synchronize_srcu_expedited(&kvm->srcu); |