diff options
| author | Paolo Bonzini <pbonzini@redhat.com> | 2025-07-28 11:03:04 -0400 |
|---|---|---|
| committer | Paolo Bonzini <pbonzini@redhat.com> | 2025-07-29 08:35:46 -0400 |
| commit | f02b1bcc73a17602903480562571069f0dff9f24 (patch) | |
| tree | 325828b3a747b77a069cde7e0df2f949152ac081 /include | |
| parent | 65164fd0f6b50781fe27736be54e55535c9ad82d (diff) | |
| parent | 81bf24f1ac77029bf858c0da081088eb62b1b230 (diff) | |
Merge tag 'kvm-x86-irqs-6.17' of https://github.com/kvm-x86/linux into HEAD
KVM IRQ changes for 6.17
- Rework irqbypass to track/match producers and consumers via an xarray
instead of a linked list. Using a linked list leads to O(n^2) insertion
times, which is hugely problematic for use cases that create large numbers
of VMs. Such use cases typically don't actually use irqbypass, but
eliminating the pointless registration is a future problem to solve as it
likely requires new uAPI.
- Track irqbypass's "token" as "struct eventfd_ctx *" instead of a "void *",
to avoid making a simple concept unnecessarily difficult to understand.
- Add CONFIG_KVM_IOAPIC for x86 to allow disabling support for I/O APIC, PIC,
and PIT emulation at compile time.
- Drop x86's irq_comm.c, and move a pile of IRQ related code into irq.c.
- Fix a variety of flaws and bugs in the AVIC device posted IRQ code.
- Inhibited AVIC if a vCPU's ID is too big (relative to what hardware
supports) instead of rejecting vCPU creation.
- Extend enable_ipiv module param support to SVM, by simply leaving IsRunning
clear in the vCPU's physical ID table entry.
- Disable IPI virtualization, via enable_ipiv, if the CPU is affected by
erratum #1235, to allow (safely) enabling AVIC on such CPUs.
- Dedup x86's device posted IRQ code, as the vast majority of functionality
can be shared verbatime between SVM and VMX.
- Harden the device posted IRQ code against bugs and runtime errors.
- Use vcpu_idx, not vcpu_id, for GA log tag/metadata, to make lookups O(1)
instead of O(n).
- Generate GA Log interrupts if and only if the target vCPU is blocking, i.e.
only if KVM needs a notification in order to wake the vCPU.
- Decouple device posted IRQs from VFIO device assignment, as binding a VM to
a VFIO group is not a requirement for enabling device posted IRQs.
- Clean up and document/comment the irqfd assignment code.
- Disallow binding multiple irqfds to an eventfd with a priority waiter, i.e.
ensure an eventfd is bound to at most one irqfd through the entire host,
and add a selftest to verify eventfd:irqfd bindings are globally unique.
Diffstat (limited to 'include')
| -rw-r--r-- | include/kvm/arm_vgic.h | 2 | ||||
| -rw-r--r-- | include/linux/amd-iommu.h | 25 | ||||
| -rw-r--r-- | include/linux/irqbypass.h | 46 | ||||
| -rw-r--r-- | include/linux/irqchip/arm-gic-v4.h | 2 | ||||
| -rw-r--r-- | include/linux/kvm_host.h | 18 | ||||
| -rw-r--r-- | include/linux/kvm_irqfd.h | 5 | ||||
| -rw-r--r-- | include/linux/wait.h | 2 | ||||
| -rw-r--r-- | include/trace/events/kvm.h | 84 |
8 files changed, 47 insertions, 137 deletions
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 4a34f7f0a864..b2a04481de1a 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -434,7 +434,7 @@ struct kvm_kernel_irq_routing_entry; int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq, struct kvm_kernel_irq_routing_entry *irq_entry); -int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq); +void kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq); int vgic_v4_load(struct kvm_vcpu *vcpu); void vgic_v4_commit(struct kvm_vcpu *vcpu); diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 062fbd4c9b77..8cced632ecd0 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -12,20 +12,6 @@ struct amd_iommu; -/* - * This is mainly used to communicate information back-and-forth - * between SVM and IOMMU for setting up and tearing down posted - * interrupt - */ -struct amd_iommu_pi_data { - u32 ga_tag; - u32 prev_ga_tag; - u64 base; - bool is_guest_mode; - struct vcpu_data *vcpu_data; - void *ir_data; -}; - #ifdef CONFIG_AMD_IOMMU struct task_struct; @@ -44,10 +30,8 @@ static inline void amd_iommu_detect(void) { } /* IOMMU AVIC Function */ extern int amd_iommu_register_ga_log_notifier(int (*notifier)(u32)); -extern int -amd_iommu_update_ga(int cpu, bool is_run, void *data); - -extern int amd_iommu_activate_guest_mode(void *data); +extern int amd_iommu_update_ga(void *data, int cpu, bool ga_log_intr); +extern int amd_iommu_activate_guest_mode(void *data, int cpu, bool ga_log_intr); extern int amd_iommu_deactivate_guest_mode(void *data); #else /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */ @@ -58,13 +42,12 @@ amd_iommu_register_ga_log_notifier(int (*notifier)(u32)) return 0; } -static inline int -amd_iommu_update_ga(int cpu, bool is_run, void *data) +static inline int amd_iommu_update_ga(void *data, int cpu, bool ga_log_intr) { return 0; } -static inline int amd_iommu_activate_guest_mode(void *data) +static inline int amd_iommu_activate_guest_mode(void *data, int cpu, bool ga_log_intr) { return 0; } diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h index 9bdb2a781841..ede1fa938152 100644 --- a/include/linux/irqbypass.h +++ b/include/linux/irqbypass.h @@ -10,6 +10,7 @@ #include <linux/list.h> +struct eventfd_ctx; struct irq_bypass_consumer; /* @@ -18,20 +19,22 @@ struct irq_bypass_consumer; * The IRQ bypass manager is a simple set of lists and callbacks that allows * IRQ producers (ex. physical interrupt sources) to be matched to IRQ * consumers (ex. virtualization hardware that allows IRQ bypass or offload) - * via a shared token (ex. eventfd_ctx). Producers and consumers register - * independently. When a token match is found, the optional @stop callback - * will be called for each participant. The pair will then be connected via - * the @add_* callbacks, and finally the optional @start callback will allow - * any final coordination. When either participant is unregistered, the - * process is repeated using the @del_* callbacks in place of the @add_* - * callbacks. Match tokens must be unique per producer/consumer, 1:N pairings - * are not supported. + * via a shared eventfd_ctx. Producers and consumers register independently. + * When a producer and consumer are paired, i.e. an eventfd match is found, the + * optional @stop callback will be called for each participant. The pair will + * then be connected via the @add_* callbacks, and finally the optional @start + * callback will allow any final coordination. When either participant is + * unregistered, the process is repeated using the @del_* callbacks in place of + * the @add_* callbacks. eventfds must be unique per producer/consumer, 1:N + * pairings are not supported. */ +struct irq_bypass_consumer; + /** * struct irq_bypass_producer - IRQ bypass producer definition - * @node: IRQ bypass manager private list management - * @token: opaque token to match between producer and consumer (non-NULL) + * @eventfd: eventfd context used to match producers and consumers + * @consumer: The connected consumer (NULL if no connection) * @irq: Linux IRQ number for the producer device * @add_consumer: Connect the IRQ producer to an IRQ consumer (optional) * @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional) @@ -43,8 +46,8 @@ struct irq_bypass_consumer; * for a physical device assigned to a VM. */ struct irq_bypass_producer { - struct list_head node; - void *token; + struct eventfd_ctx *eventfd; + struct irq_bypass_consumer *consumer; int irq; int (*add_consumer)(struct irq_bypass_producer *, struct irq_bypass_consumer *); @@ -56,8 +59,8 @@ struct irq_bypass_producer { /** * struct irq_bypass_consumer - IRQ bypass consumer definition - * @node: IRQ bypass manager private list management - * @token: opaque token to match between producer and consumer (non-NULL) + * @eventfd: eventfd context used to match producers and consumers + * @producer: The connected producer (NULL if no connection) * @add_producer: Connect the IRQ consumer to an IRQ producer * @del_producer: Disconnect the IRQ consumer from an IRQ producer * @stop: Perform any quiesce operations necessary prior to add/del (optional) @@ -69,8 +72,9 @@ struct irq_bypass_producer { * portions of the interrupt handling to the VM. */ struct irq_bypass_consumer { - struct list_head node; - void *token; + struct eventfd_ctx *eventfd; + struct irq_bypass_producer *producer; + int (*add_producer)(struct irq_bypass_consumer *, struct irq_bypass_producer *); void (*del_producer)(struct irq_bypass_consumer *, @@ -79,9 +83,11 @@ struct irq_bypass_consumer { void (*start)(struct irq_bypass_consumer *); }; -int irq_bypass_register_producer(struct irq_bypass_producer *); -void irq_bypass_unregister_producer(struct irq_bypass_producer *); -int irq_bypass_register_consumer(struct irq_bypass_consumer *); -void irq_bypass_unregister_consumer(struct irq_bypass_consumer *); +int irq_bypass_register_producer(struct irq_bypass_producer *producer, + struct eventfd_ctx *eventfd, int irq); +void irq_bypass_unregister_producer(struct irq_bypass_producer *producer); +int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer, + struct eventfd_ctx *eventfd); +void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer); #endif /* IRQBYPASS_H */ diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 7f1f11a5e4e4..0b0887099fd7 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -146,7 +146,7 @@ int its_commit_vpe(struct its_vpe *vpe); int its_invall_vpe(struct its_vpe *vpe); int its_map_vlpi(int irq, struct its_vlpi_map *map); int its_get_vlpi(int irq, struct its_vlpi_map *map); -int its_unmap_vlpi(int irq); +void its_unmap_vlpi(int irq); int its_prop_update_vlpi(int irq, u8 config, bool inv); int its_prop_update_vsgi(int irq, u8 priority, bool group); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3bde4fb5c6aa..fb9ec06aa807 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -190,6 +190,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 +#define KVM_PIT_IRQ_SOURCE_ID 2 extern struct mutex kvm_lock; extern struct list_head vm_list; @@ -1022,16 +1023,12 @@ void kvm_unlock_all_vcpus(struct kvm *kvm); void vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_KVM_IOAPIC void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm); -void kvm_arch_post_irq_routing_update(struct kvm *kvm); #else static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm) { } -static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm) -{ -} #endif #ifdef CONFIG_HAVE_KVM_IRQCHIP @@ -1788,8 +1785,6 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); void kvm_unregister_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); -int kvm_request_irq_source_id(struct kvm *kvm); -void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args); /* @@ -2406,6 +2401,8 @@ struct kvm_vcpu *kvm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); #if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) +struct kvm_kernel_irqfd; + bool kvm_arch_has_irq_bypass(void); int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); @@ -2413,10 +2410,9 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *); void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); -int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set); -bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *, - struct kvm_kernel_irq_routing_entry *); +void kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd, + struct kvm_kernel_irq_routing_entry *old, + struct kvm_kernel_irq_routing_entry *new); #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ #ifdef CONFIG_HAVE_KVM_INVALID_WAKEUPS diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h index 8ad43692e3bb..ef8c134ded8a 100644 --- a/include/linux/kvm_irqfd.h +++ b/include/linux/kvm_irqfd.h @@ -55,10 +55,13 @@ struct kvm_kernel_irqfd { /* Used for setup/shutdown */ struct eventfd_ctx *eventfd; struct list_head list; - poll_table pt; struct work_struct shutdown; struct irq_bypass_consumer consumer; struct irq_bypass_producer *producer; + + struct kvm_vcpu *irq_bypass_vcpu; + struct list_head vcpu_list; + void *irq_bypass_data; }; #endif /* __LINUX_KVM_IRQFD_H */ diff --git a/include/linux/wait.h b/include/linux/wait.h index 965a19809c7e..09855d819418 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -164,6 +164,8 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head) extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); +extern int add_wait_queue_priority_exclusive(struct wait_queue_head *wq_head, + struct wait_queue_entry *wq_entry); extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index fc7d0f8ff078..0b6b79b1a1bc 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -82,95 +82,15 @@ TRACE_EVENT(kvm_set_irq, TP_printk("gsi %u level %d source %d", __entry->gsi, __entry->level, __entry->irq_source_id) ); -#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */ - -#if defined(__KVM_HAVE_IOAPIC) -#define kvm_deliver_mode \ - {0x0, "Fixed"}, \ - {0x1, "LowPrio"}, \ - {0x2, "SMI"}, \ - {0x3, "Res3"}, \ - {0x4, "NMI"}, \ - {0x5, "INIT"}, \ - {0x6, "SIPI"}, \ - {0x7, "ExtINT"} - -TRACE_EVENT(kvm_ioapic_set_irq, - TP_PROTO(__u64 e, int pin, bool coalesced), - TP_ARGS(e, pin, coalesced), - - TP_STRUCT__entry( - __field( __u64, e ) - __field( int, pin ) - __field( bool, coalesced ) - ), - - TP_fast_assign( - __entry->e = e; - __entry->pin = pin; - __entry->coalesced = coalesced; - ), - - TP_printk("pin %u dst %x vec %u (%s|%s|%s%s)%s", - __entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e, - __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), - (__entry->e & (1<<11)) ? "logical" : "physical", - (__entry->e & (1<<15)) ? "level" : "edge", - (__entry->e & (1<<16)) ? "|masked" : "", - __entry->coalesced ? " (coalesced)" : "") -); - -TRACE_EVENT(kvm_ioapic_delayed_eoi_inj, - TP_PROTO(__u64 e), - TP_ARGS(e), - - TP_STRUCT__entry( - __field( __u64, e ) - ), - TP_fast_assign( - __entry->e = e; - ), - - TP_printk("dst %x vec %u (%s|%s|%s%s)", - (u8)(__entry->e >> 56), (u8)__entry->e, - __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), - (__entry->e & (1<<11)) ? "logical" : "physical", - (__entry->e & (1<<15)) ? "level" : "edge", - (__entry->e & (1<<16)) ? "|masked" : "") -); - -TRACE_EVENT(kvm_msi_set_irq, - TP_PROTO(__u64 address, __u64 data), - TP_ARGS(address, data), - - TP_STRUCT__entry( - __field( __u64, address ) - __field( __u64, data ) - ), - - TP_fast_assign( - __entry->address = address; - __entry->data = data; - ), - - TP_printk("dst %llx vec %u (%s|%s|%s%s)", - (u8)(__entry->address >> 12) | ((__entry->address >> 32) & 0xffffff00), - (u8)__entry->data, - __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode), - (__entry->address & (1<<2)) ? "logical" : "physical", - (__entry->data & (1<<15)) ? "level" : "edge", - (__entry->address & (1<<3)) ? "|rh" : "") -); +#ifdef CONFIG_KVM_IOAPIC #define kvm_irqchips \ {KVM_IRQCHIP_PIC_MASTER, "PIC master"}, \ {KVM_IRQCHIP_PIC_SLAVE, "PIC slave"}, \ {KVM_IRQCHIP_IOAPIC, "IOAPIC"} -#endif /* defined(__KVM_HAVE_IOAPIC) */ - -#if defined(CONFIG_HAVE_KVM_IRQCHIP) +#endif /* CONFIG_KVM_IOAPIC */ #ifdef kvm_irqchips #define kvm_ack_irq_string "irqchip %s pin %u" |
