From 49df6397edfc5a8ba8ca813b51fb9729d8e94b40 Mon Sep 17 00:00:00 2001 From: Steve Rutherford Date: Wed, 29 Jul 2015 23:21:40 -0700 Subject: KVM: x86: Split the APIC from the rest of IRQCHIP. First patch in a series which enables the relocation of the PIC/IOAPIC to userspace. Adds capability KVM_CAP_SPLIT_IRQCHIP; KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the rest of the irqchip. Compile tested for x86. Signed-off-by: Steve Rutherford Suggested-by: Andrew Honig Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1bef9e21e725..354f147647ab 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1002,6 +1002,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) #endif int kvm_setup_default_irq_routing(struct kvm *kvm); +int kvm_setup_empty_irq_routing(struct kvm *kvm); int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a9256f0331ae..ed00f8fc9ea2 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -824,6 +824,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_MULTI_ADDRESS_SPACE 118 #define KVM_CAP_GUEST_DEBUG_HW_BPS 119 #define KVM_CAP_GUEST_DEBUG_HW_WPS 120 +#define KVM_CAP_SPLIT_IRQCHIP 121 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 7543a635aa09eb138b2cbf60ac3ff19503ae6954 Mon Sep 17 00:00:00 2001 From: Steve Rutherford Date: Wed, 29 Jul 2015 23:21:41 -0700 Subject: KVM: x86: Add KVM exit for IOAPIC EOIs Adds KVM_EXIT_IOAPIC_EOI which allows the kernel to EOI level-triggered IOAPIC interrupts. Uses a per VCPU exit bitmap to decide whether or not the IOAPIC needs to be informed (which is identical to the EOI_EXIT_BITMAP field used by modern x86 processors, but can also be used to elide kvm IOAPIC EOI exits on older processors). [Note: A prototype using ResampleFDs found that decoupling the EOI from the VCPU's thread made it possible for the VCPU to not see a recent EOI after reentering the guest. This does not match real hardware.] Compile tested for Intel x86. Signed-off-by: Steve Rutherford Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- include/uapi/linux/kvm.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 354f147647ab..3b33215ed447 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -140,6 +140,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_APIC_PAGE_RELOAD 25 #define KVM_REQ_SMI 26 #define KVM_REQ_HV_CRASH 27 +#define KVM_REQ_IOAPIC_EOI_EXIT 28 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 @@ -1146,4 +1147,3 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) } #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ #endif - diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ed00f8fc9ea2..12e3afbf0f47 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -183,6 +183,7 @@ struct kvm_s390_skeys { #define KVM_EXIT_EPR 23 #define KVM_EXIT_SYSTEM_EVENT 24 #define KVM_EXIT_S390_STSI 25 +#define KVM_EXIT_IOAPIC_EOI 26 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -333,6 +334,10 @@ struct kvm_run { __u8 sel1; __u16 sel2; } s390_stsi; + /* KVM_EXIT_IOAPIC_EOI */ + struct { + __u8 vector; + } eoi; /* Fix the size of the union. */ char padding[256]; }; -- cgit v1.2.3 From b053b2aef25d00773fa6762dcd4b7f5c9c42d171 Mon Sep 17 00:00:00 2001 From: Steve Rutherford Date: Wed, 29 Jul 2015 23:32:35 -0700 Subject: KVM: x86: Add EOI exit bitmap inference In order to support a userspace IOAPIC interacting with an in kernel APIC, the EOI exit bitmaps need to be configurable. If the IOAPIC is in userspace (i.e. the irqchip has been split), the EOI exit bitmaps will be set whenever the GSI Routes are configured. In particular, for the low MSI routes are reservable for userspace IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the destination vector of the route will be set for the destination VCPU. The intention is for the userspace IOAPICs to use the reservable MSI routes to inject interrupts into the guest. This is a slight abuse of the notion of an MSI Route, given that MSIs classically bypass the IOAPIC. It might be worthwhile to add an additional route type to improve clarity. Compile tested for Intel x86. Signed-off-by: Steve Rutherford Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3b33215ed447..d754f2d826e9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -330,6 +330,18 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +struct kvm_irq_routing_table { + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; + u32 nr_rt_entries; + /* + * Array indexed by gsi. Each entry contains list of irq chips + * the gsi is connected to. + */ + struct hlist_head map[0]; +}; +#endif + #ifndef KVM_PRIVATE_MEM_SLOTS #define KVM_PRIVATE_MEM_SLOTS 0 #endif @@ -456,10 +468,14 @@ void vcpu_put(struct kvm_vcpu *vcpu); #ifdef __KVM_HAVE_IOAPIC void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); +void kvm_arch_irq_routing_update(struct kvm *kvm); #else static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) { } +static inline void kvm_arch_irq_routing_update(struct kvm *kvm) +{ +} #endif #ifdef CONFIG_HAVE_KVM_IRQFD -- cgit v1.2.3 From e9ea5069d9e569c32ab913c39467df32e056b3a7 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 15 Sep 2015 14:41:59 +0800 Subject: kvm: add capability for any-length ioeventfds Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Jason Wang Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 12e3afbf0f47..03f3618612aa 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -830,6 +830,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_GUEST_DEBUG_HW_BPS 119 #define KVM_CAP_GUEST_DEBUG_HW_WPS 120 #define KVM_CAP_SPLIT_IRQCHIP 121 +#define KVM_CAP_IOEVENTFD_ANY_LENGTH 122 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From e516cebb4f2b2057a5a421fea589079502acfff6 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Wed, 16 Sep 2015 12:29:48 +0300 Subject: kvm/x86: Hyper-V HV_X64_MSR_RESET msr HV_X64_MSR_RESET msr is used by Hyper-V based Windows guest to reset guest VM by hypervisor. Necessary to support loading of winhv.sys in guest, which in turn is required to support Windows VMBus. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Paolo Bonzini CC: Gleb Natapov Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d754f2d826e9..cd0ba2e931e1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -141,6 +141,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_SMI 26 #define KVM_REQ_HV_CRASH 27 #define KVM_REQ_IOAPIC_EOI_EXIT 28 +#define KVM_REQ_HV_RESET 29 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- cgit v1.2.3 From f73f8173126ba68eb1c42bd9a234a51d78576ca6 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 18 Sep 2015 22:29:39 +0800 Subject: virt: IRQ bypass manager When a physical I/O device is assigned to a virtual machine through facilities like VFIO and KVM, the interrupt for the device generally bounces through the host system before being injected into the VM. However, hardware technologies exist that often allow the host to be bypassed for some of these scenarios. Intel Posted Interrupts allow the specified physical edge interrupts to be directly injected into a guest when delivered to a physical processor while the vCPU is running. ARM IRQ Forwarding allows forwarded physical interrupts to be directly deactivated by the guest. The IRQ bypass manager here is meant to provide the shim to connect interrupt producers, generally the host physical device driver, with interrupt consumers, generally the hypervisor, in order to configure these bypass mechanism. To do this, we base the connection on a shared, opaque token. For KVM-VFIO this is expected to be an eventfd_ctx since this is the connection we already use to connect an eventfd to an irqfd on the in-kernel path. When a producer and consumer with matching tokens is found, callbacks via both registered participants allow the bypass facilities to be automatically enabled. Signed-off-by: Alex Williamson Reviewed-by: Eric Auger Tested-by: Eric Auger Tested-by: Feng Wu Signed-off-by: Feng Wu Signed-off-by: Paolo Bonzini --- include/linux/irqbypass.h | 90 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 include/linux/irqbypass.h (limited to 'include') diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h new file mode 100644 index 000000000000..1551b5b2f4c2 --- /dev/null +++ b/include/linux/irqbypass.h @@ -0,0 +1,90 @@ +/* + * IRQ offload/bypass manager + * + * Copyright (C) 2015 Red Hat, Inc. + * Copyright (c) 2015 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef IRQBYPASS_H +#define IRQBYPASS_H + +#include + +struct irq_bypass_consumer; + +/* + * Theory of operation + * + * The IRQ bypass manager is a simple set of lists and callbacks that allows + * IRQ producers (ex. physical interrupt sources) to be matched to IRQ + * consumers (ex. virtualization hardware that allows IRQ bypass or offload) + * via a shared token (ex. eventfd_ctx). Producers and consumers register + * independently. When a token match is found, the optional @stop callback + * will be called for each participant. The pair will then be connected via + * the @add_* callbacks, and finally the optional @start callback will allow + * any final coordination. When either participant is unregistered, the + * process is repeated using the @del_* callbacks in place of the @add_* + * callbacks. Match tokens must be unique per producer/consumer, 1:N pairings + * are not supported. + */ + +/** + * struct irq_bypass_producer - IRQ bypass producer definition + * @node: IRQ bypass manager private list management + * @token: opaque token to match between producer and consumer + * @irq: Linux IRQ number for the producer device + * @add_consumer: Connect the IRQ producer to an IRQ consumer (optional) + * @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional) + * @stop: Perform any quiesce operations necessary prior to add/del (optional) + * @start: Perform any startup operations necessary after add/del (optional) + * + * The IRQ bypass producer structure represents an interrupt source for + * participation in possible host bypass, for instance an interrupt vector + * for a physical device assigned to a VM. + */ +struct irq_bypass_producer { + struct list_head node; + void *token; + int irq; + int (*add_consumer)(struct irq_bypass_producer *, + struct irq_bypass_consumer *); + void (*del_consumer)(struct irq_bypass_producer *, + struct irq_bypass_consumer *); + void (*stop)(struct irq_bypass_producer *); + void (*start)(struct irq_bypass_producer *); +}; + +/** + * struct irq_bypass_consumer - IRQ bypass consumer definition + * @node: IRQ bypass manager private list management + * @token: opaque token to match between producer and consumer + * @add_producer: Connect the IRQ consumer to an IRQ producer + * @del_producer: Disconnect the IRQ consumer from an IRQ producer + * @stop: Perform any quiesce operations necessary prior to add/del (optional) + * @start: Perform any startup operations necessary after add/del (optional) + * + * The IRQ bypass consumer structure represents an interrupt sink for + * participation in possible host bypass, for instance a hypervisor may + * support offloads to allow bypassing the host entirely or offload + * portions of the interrupt handling to the VM. + */ +struct irq_bypass_consumer { + struct list_head node; + void *token; + int (*add_producer)(struct irq_bypass_consumer *, + struct irq_bypass_producer *); + void (*del_producer)(struct irq_bypass_consumer *, + struct irq_bypass_producer *); + void (*stop)(struct irq_bypass_consumer *); + void (*start)(struct irq_bypass_consumer *); +}; + +int irq_bypass_register_producer(struct irq_bypass_producer *); +void irq_bypass_unregister_producer(struct irq_bypass_producer *); +int irq_bypass_register_consumer(struct irq_bypass_consumer *); +void irq_bypass_unregister_consumer(struct irq_bypass_consumer *); + +#endif /* IRQBYPASS_H */ -- cgit v1.2.3 From 166c9775f1f8b8f00ad1db0fa5c8fc74059d965d Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 18 Sep 2015 22:29:42 +0800 Subject: KVM: create kvm_irqfd.h Move _irqfd_resampler and _irqfd struct declarations in a new public header: kvm_irqfd.h. They are respectively renamed into kvm_kernel_irqfd_resampler and kvm_kernel_irqfd. Those datatypes will be used by architecture specific code, in the context of IRQ bypass manager integration. Signed-off-by: Eric Auger Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_irqfd.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 include/linux/kvm_irqfd.h (limited to 'include') diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h new file mode 100644 index 000000000000..f926b39a26b6 --- /dev/null +++ b/include/linux/kvm_irqfd.h @@ -0,0 +1,69 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * irqfd: Allows an fd to be used to inject an interrupt to the guest + * Credit goes to Avi Kivity for the original idea. + */ + +#ifndef __LINUX_KVM_IRQFD_H +#define __LINUX_KVM_IRQFD_H + +#include +#include + +/* + * Resampling irqfds are a special variety of irqfds used to emulate + * level triggered interrupts. The interrupt is asserted on eventfd + * trigger. On acknowledgment through the irq ack notifier, the + * interrupt is de-asserted and userspace is notified through the + * resamplefd. All resamplers on the same gsi are de-asserted + * together, so we don't need to track the state of each individual + * user. We can also therefore share the same irq source ID. + */ +struct kvm_kernel_irqfd_resampler { + struct kvm *kvm; + /* + * List of resampling struct _irqfd objects sharing this gsi. + * RCU list modified under kvm->irqfds.resampler_lock + */ + struct list_head list; + struct kvm_irq_ack_notifier notifier; + /* + * Entry in list of kvm->irqfd.resampler_list. Use for sharing + * resamplers among irqfds on the same gsi. + * Accessed and modified under kvm->irqfds.resampler_lock + */ + struct list_head link; +}; + +struct kvm_kernel_irqfd { + /* Used for MSI fast-path */ + struct kvm *kvm; + wait_queue_t wait; + /* Update side is protected by irqfds.lock */ + struct kvm_kernel_irq_routing_entry irq_entry; + seqcount_t irq_entry_sc; + /* Used for level IRQ fast-path */ + int gsi; + struct work_struct inject; + /* The resampler used by this irqfd (resampler-only) */ + struct kvm_kernel_irqfd_resampler *resampler; + /* Eventfd notified on resample (resampler-only) */ + struct eventfd_ctx *resamplefd; + /* Entry in list of irqfds for a resampler (resampler-only) */ + struct list_head resampler_link; + /* Used for setup/shutdown */ + struct eventfd_ctx *eventfd; + struct list_head list; + poll_table pt; + struct work_struct shutdown; +}; + +#endif /* __LINUX_KVM_IRQFD_H */ -- cgit v1.2.3 From 1a02b27035f82091d51ecafcb9ccaac1f31d4eb2 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 18 Sep 2015 22:29:43 +0800 Subject: KVM: introduce kvm_arch functions for IRQ bypass This patch introduces - kvm_arch_irq_bypass_add_producer - kvm_arch_irq_bypass_del_producer - kvm_arch_irq_bypass_stop - kvm_arch_irq_bypass_start They make possible to specialize the KVM IRQ bypass consumer in case CONFIG_KVM_HAVE_IRQ_BYPASS is set. Signed-off-by: Eric Auger [Add weak implementations of the callbacks. - Feng] Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cd0ba2e931e1..b8543b02b7bc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -1163,4 +1164,13 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) { } #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ + +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *, + struct irq_bypass_producer *); +void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *, + struct irq_bypass_producer *); +void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *); +void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); +#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ #endif -- cgit v1.2.3 From 9016cfb577a15abd6a7990890ccf6bf1edf04d31 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 18 Sep 2015 22:29:44 +0800 Subject: KVM: eventfd: add irq bypass consumer management This patch adds the registration/unregistration of an irq_bypass_consumer on irqfd assignment/deassignment. Signed-off-by: Eric Auger Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_irqfd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h index f926b39a26b6..0c1de05098c8 100644 --- a/include/linux/kvm_irqfd.h +++ b/include/linux/kvm_irqfd.h @@ -64,6 +64,8 @@ struct kvm_kernel_irqfd { struct list_head list; poll_table pt; struct work_struct shutdown; + struct irq_bypass_consumer consumer; + struct irq_bypass_producer *producer; }; #endif /* __LINUX_KVM_IRQFD_H */ -- cgit v1.2.3 From f70c20aaf141adb715a2d750c55154073b02a9c3 Mon Sep 17 00:00:00 2001 From: Feng Wu Date: Fri, 18 Sep 2015 22:29:53 +0800 Subject: KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd' This patch adds an arch specific hooks 'arch_update' in 'struct kvm_kernel_irqfd'. On Intel side, it is used to update the IRTE when VT-d posted-interrupts is used. Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b8543b02b7bc..5c3f4538807f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1172,5 +1172,7 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *); void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); +int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set); #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ #endif -- cgit v1.2.3 From bf9f6ac8d74969690df1485b33b7c238ca9f2269 Mon Sep 17 00:00:00 2001 From: Feng Wu Date: Fri, 18 Sep 2015 22:29:55 +0800 Subject: KVM: Update Posted-Interrupts Descriptor when vCPU is blocked This patch updates the Posted-Interrupts Descriptor when vCPU is blocked. pre-block: - Add the vCPU to the blocked per-CPU list - Set 'NV' to POSTED_INTR_WAKEUP_VECTOR post-block: - Remove the vCPU from the per-CPU list Signed-off-by: Feng Wu [Concentrate invocation of pre/post-block hooks to vcpu_block. - Paolo] Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5c3f4538807f..9596a2f0977b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -234,6 +234,9 @@ struct kvm_vcpu { unsigned long requests; unsigned long guest_debug; + int pre_pcpu; + struct list_head blocked_vcpu_list; + struct mutex mutex; struct kvm_run *run; -- cgit v1.2.3 From ba1aefcd6db5536d3eb3ca3ce7bd6786960140ea Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 16 Oct 2015 10:07:46 +0300 Subject: kvm/eventfd: factor out kvm_notify_acked_gsi() Factor out kvm_notify_acked_gsi() helper to iterate over EOI listeners and notify those matching the given gsi. It will be reused in the upcoming Hyper-V SynIC implementation. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Vitaly Kuznetsov CC: "K. Y. Srinivasan" CC: Gleb Natapov CC: Paolo Bonzini Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9596a2f0977b..b66861c297c4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -829,6 +829,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, int irq_source_id, int level, bool line_status); bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); +void kvm_notify_acked_gsi(struct kvm *kvm, int gsi); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); -- cgit v1.2.3 From c9a5eccac1abf50649949f15754a7635f263a1ff Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 16 Oct 2015 10:07:47 +0300 Subject: kvm/eventfd: add arch-specific set_irq Allow for arch-specific interrupt types to be set. For that, add kvm_arch_set_irq() which takes interrupt type-specific action if it recognizes the interrupt type given, and -EWOULDBLOCK otherwise. The default implementation always returns -EWOULDBLOCK. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Vitaly Kuznetsov CC: "K. Y. Srinivasan" CC: Gleb Natapov CC: Paolo Bonzini Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b66861c297c4..eba9caebc9c1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -828,6 +828,10 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, int irq_source_id, int level, bool line_status); + +int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, + int irq_source_id, int level, bool line_status); + bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_notify_acked_gsi(struct kvm *kvm, int gsi); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); -- cgit v1.2.3 From 3217f7c25bca66eed9b07f0b8bfd1937169b0736 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 27 Aug 2015 16:41:15 +0200 Subject: KVM: Add kvm_arch_vcpu_{un}blocking callbacks Some times it is useful for architecture implementations of KVM to know when the VCPU thread is about to block or when it comes back from blocking (arm/arm64 needs to know this to properly implement timers, for example). Therefore provide a generic architecture callback function in line with what we do elsewhere for KVM generic-arch interactions. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1bef9e21e725..4a86f5f072c0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -625,6 +625,8 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); void kvm_vcpu_block(struct kvm_vcpu *vcpu); +void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); +void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From d35268da66870d733ae763fd7f9b06a1f63f395e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 25 Aug 2015 19:48:21 +0200 Subject: arm/arm64: KVM: arch_timer: Only schedule soft timer on vcpu_block We currently schedule a soft timer every time we exit the guest if the timer did not expire while running the guest. This is really not necessary, because the only work we do in the timer work function is to kick the vcpu. Kicking the vcpu does two things: (1) If the vpcu thread is on a waitqueue, make it runnable and remove it from the waitqueue. (2) If the vcpu is running on a different physical CPU from the one doing the kick, it sends a reschedule IPI. The second case cannot happen, because the soft timer is only ever scheduled when the vcpu is not running. The first case is only relevant when the vcpu thread is on a waitqueue, which is only the case when the vcpu thread has called kvm_vcpu_block(). Therefore, we only need to make sure a timer is scheduled for kvm_vcpu_block(), which we do by encapsulating all calls to kvm_vcpu_block() with kvm_timer_{un}schedule calls. Additionally, we only schedule a soft timer if the timer is enabled and unmasked, since it is useless otherwise. Note that theoretically userspace can use the SET_ONE_REG interface to change registers that should cause the timer to fire, even if the vcpu is blocked without a scheduled timer, but this case was not supported before this patch and we leave it for future work for now. Signed-off-by: Christoffer Dall --- include/kvm/arm_arch_timer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index e1e4d7c38dda..ef14cc1f1f26 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -71,5 +71,7 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); bool kvm_timer_should_fire(struct kvm_vcpu *vcpu); +void kvm_timer_schedule(struct kvm_vcpu *vcpu); +void kvm_timer_unschedule(struct kvm_vcpu *vcpu); #endif -- cgit v1.2.3 From 4b4b4512da2a844b8da2585609b67fae1ce4f4db Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 30 Aug 2015 15:01:27 +0200 Subject: arm/arm64: KVM: Rework the arch timer to use level-triggered semantics The arch timer currently uses edge-triggered semantics in the sense that the line is never sampled by the vgic and lowering the line from the timer to the vgic doesn't have any effect on the pending state of virtual interrupts in the vgic. This means that we do not support a guest with the otherwise valid behavior of (1) disable interrupts (2) enable the timer (3) disable the timer (4) enable interrupts. Such a guest would validly not expect to see any interrupts on real hardware, but will see interrupts on KVM. This patch fixes this shortcoming through the following series of changes. First, we change the flow of the timer/vgic sync/flush operations. Now the timer is always flushed/synced before the vgic, because the vgic samples the state of the timer output. This has the implication that we move the timer operations in to non-preempible sections, but that is fine after the previous commit getting rid of hrtimer schedules on every entry/exit. Second, we change the internal behavior of the timer, letting the timer keep track of its previous output state, and only lower/raise the line to the vgic when the state changes. Note that in theory this could have been accomplished more simply by signalling the vgic every time the state *potentially* changed, but we don't want to be hitting the vgic more often than necessary. Third, we get rid of the use of the map->active field in the vgic and instead simply set the interrupt as active on the physical distributor whenever the input to the GIC is asserted and conversely clear the physical active state when the input to the GIC is deasserted. Fourth, and finally, we now initialize the timer PPIs (and all the other unused PPIs for now), to be level-triggered, and modify the sync code to sample the line state on HW sync and re-inject a new interrupt if it is still pending at that time. Signed-off-by: Christoffer Dall --- include/kvm/arm_arch_timer.h | 2 +- include/kvm/arm_vgic.h | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index ef14cc1f1f26..1800227af9d6 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -51,7 +51,7 @@ struct arch_timer_cpu { bool armed; /* Timer IRQ */ - const struct kvm_irq_level *irq; + struct kvm_irq_level irq; /* VGIC mapping */ struct irq_phys_map *map; diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 4e14dac282bb..7bc5d0224ab0 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -159,7 +159,6 @@ struct irq_phys_map { u32 virt_irq; u32 phys_irq; u32 irq; - bool active; }; struct irq_phys_map_entry { @@ -354,8 +353,6 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int irq); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map); -bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map); -void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) -- cgit v1.2.3 From 5fdf876d30ce7c9b63045b3db8374912ab9b262c Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Thu, 15 Oct 2015 22:16:28 +0200 Subject: KVM: arm: Do not indent the arguments of DECLARE_BITMAP Besides being a coding style issue, it confuses make tags: ctags: Warning: include/kvm/arm_vgic.h:307: null expansion of name pattern "\1" ctags: Warning: include/kvm/arm_vgic.h:308: null expansion of name pattern "\1" ctags: Warning: include/kvm/arm_vgic.h:309: null expansion of name pattern "\1" ctags: Warning: include/kvm/arm_vgic.h:317: null expansion of name pattern "\1" Cc: kvmarm@lists.cs.columbia.edu Signed-off-by: Michal Marek Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 7bc5d0224ab0..8065801a1847 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -299,9 +299,9 @@ struct vgic_cpu { u8 *vgic_irq_lr_map; /* Pending/active/both interrupts on this VCPU */ - DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); - DECLARE_BITMAP( active_percpu, VGIC_NR_PRIVATE_IRQS); - DECLARE_BITMAP( pend_act_percpu, VGIC_NR_PRIVATE_IRQS); + DECLARE_BITMAP(pending_percpu, VGIC_NR_PRIVATE_IRQS); + DECLARE_BITMAP(active_percpu, VGIC_NR_PRIVATE_IRQS); + DECLARE_BITMAP(pend_act_percpu, VGIC_NR_PRIVATE_IRQS); /* Pending/active/both shared interrupts, dynamically sized */ unsigned long *pending_shared; @@ -309,7 +309,7 @@ struct vgic_cpu { unsigned long *pend_act_shared; /* Bitmap of used/free list registers */ - DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS); + DECLARE_BITMAP(lr_used, VGIC_V2_MAX_LRS); /* Number of list registers on this CPU */ int nr_lr; -- cgit v1.2.3 From c4cd4c168b81dad53e659d18cdae653bc0ec2384 Mon Sep 17 00:00:00 2001 From: Pavel Fedin Date: Tue, 27 Oct 2015 11:37:29 +0300 Subject: KVM: arm/arm64: Optimize away redundant LR tracking Currently we use vgic_irq_lr_map in order to track which LRs hold which IRQs, and lr_used bitmap in order to track which LRs are used or free. vgic_irq_lr_map is actually used only for piggy-back optimization, and can be easily replaced by iteration over lr_used. This is good because in future, when LPI support is introduced, number of IRQs will grow up to at least 16384, while numbers from 1024 to 8192 are never going to be used. This would be a huge memory waste. In its turn, lr_used is also completely redundant since ae705930fca6322600690df9dc1c7d0516145a93 ("arm/arm64: KVM: Keep elrsr/aisr in sync with software model"), because together with lr_used we also update elrsr. This allows to easily replace lr_used with elrsr, inverting all conditions (because in elrsr '1' means 'free'). Signed-off-by: Pavel Fedin Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 8065801a1847..3936bf802e1d 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -295,9 +295,6 @@ struct vgic_v3_cpu_if { }; struct vgic_cpu { - /* per IRQ to LR mapping */ - u8 *vgic_irq_lr_map; - /* Pending/active/both interrupts on this VCPU */ DECLARE_BITMAP(pending_percpu, VGIC_NR_PRIVATE_IRQS); DECLARE_BITMAP(active_percpu, VGIC_NR_PRIVATE_IRQS); @@ -308,9 +305,6 @@ struct vgic_cpu { unsigned long *active_shared; unsigned long *pend_act_shared; - /* Bitmap of used/free list registers */ - DECLARE_BITMAP(lr_used, VGIC_V2_MAX_LRS); - /* Number of list registers on this CPU */ int nr_lr; -- cgit v1.2.3 From 26caea7693cb99833fe4ecc544c842289d6b3f69 Mon Sep 17 00:00:00 2001 From: Pavel Fedin Date: Tue, 27 Oct 2015 11:37:31 +0300 Subject: KVM: arm/arm64: Merge vgic_set_lr() and vgic_sync_lr_elrsr() Now we see that vgic_set_lr() and vgic_sync_lr_elrsr() are always used together. Merge them into one function, saving from second vgic_ops dereferencing every time. Signed-off-by: Pavel Fedin Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 3936bf802e1d..f62addc17dcf 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -112,7 +112,6 @@ struct vgic_vmcr { struct vgic_ops { struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int); void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr); - void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr); u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); u64 (*get_eisr)(const struct kvm_vcpu *vcpu); void (*clear_eisr)(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From c75efa974e013640496620f26f0b532cb5cb17f9 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 16 Oct 2015 10:07:50 +0300 Subject: drivers/hv: share Hyper-V SynIC constants with userspace Moved Hyper-V synic contants from guest Hyper-V drivers private header into x86 arch uapi Hyper-V header. Added Hyper-V synic msr's flags into x86 arch uapi Hyper-V header. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Vitaly Kuznetsov CC: "K. Y. Srinivasan" CC: Gleb Natapov CC: Paolo Bonzini Signed-off-by: Paolo Bonzini --- include/linux/hyperv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 54733d5b503e..8fdc17b84739 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -26,6 +26,7 @@ #define _HYPERV_H #include +#include #include #include -- cgit v1.2.3 From b97e6de9c96cefaa02a6a7464731ea504b45e150 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 28 Oct 2015 19:16:47 +0100 Subject: KVM: x86: merge kvm_arch_set_irq with kvm_set_msi_inatomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do not want to do too much work in atomic context, in particular not walking all the VCPUs of the virtual machine. So we want to distinguish the architecture-specific injection function for irqfd from kvm_set_msi. Since it's still empty, reuse the newly added kvm_arch_set_irq and rename it to kvm_arch_set_irq_inatomic. Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 87189a41d904..15c78f320678 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -830,10 +830,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, int irq_source_id, int level, bool line_status); - -int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, - int irq_source_id, int level, bool line_status); - +int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status); bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_notify_acked_gsi(struct kvm *kvm, int gsi); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); -- cgit v1.2.3 From 8a22f234a81ab4d1de5d948c3478608f08a9b844 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 28 Oct 2015 18:52:02 +0100 Subject: KVM: x86: move kvm_set_irq_inatomic to legacy device assignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function is not used outside device assignment, and kvm_arch_set_irq_inatomic has a different prototype. Move it here and make it static to avoid confusion. Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 15c78f320678..242a6d2b53ff 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -827,7 +827,6 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin); int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); -int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, int irq_source_id, int level, bool line_status); int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, -- cgit v1.2.3