diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-04 10:38:56 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-04 10:38:56 -0800 |
| commit | 27db1ae6ecdf23f4176276da6037eaafbd23bf94 (patch) | |
| tree | 440f75bac0051c9217ef78328da0a7d70e3b9978 | |
| parent | a14980444f418de53a7cc315eb4fbd8a89c72991 (diff) | |
| parent | 0de4a0eec25b9171f2a2abb1a820e125e6797770 (diff) | |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Paolo Bonzini:
- Fix a bug where AVIC is incorrectly inhibited when running with
x2AVIC disabled via module param (or on a system without x2AVIC)
- Fix a dangling device posted IRQs bug by explicitly checking if the
irqfd is still active (on the list) when handling an eventfd signal,
instead of zeroing the irqfd's routing information when the irqfd is
deassigned.
Zeroing the irqfd's routing info causes arm64 and x86's to not
disable posting for the IRQ (kvm_arch_irq_bypass_del_producer() looks
for an MSI), incorrectly leaving the IRQ in posted mode (and leading
to use-after-free and memory leaks on AMD in particular).
This is both the most pressing and scariest, but it's been in -next
for a while.
- Disable FORTIFY_SOURCE for KVM selftests to prevent the compiler from
generating calls to the checked versions of memset() and friends,
which leads to unexpected page faults in guest code due e.g.
__memset_chk@plt not being resolved.
- Explicitly configure the supported XSS capabilities from within
{svm,vmx}_set_cpu_caps() to fix a bug where VMX will compute the
reference VMCS configuration with SHSTK and IBT enabled, but then
compute each CPUs local config with SHSTK and IBT disabled if not all
CET xfeatures are enabled, e.g. if the kernel is built with
X86_KERNEL_IBT=n.
The mismatch in features results in differing nVMX setting, and
ultimately causes kvm-intel.ko to refuse to load with nested=1.
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: x86: Explicitly configure supported XSS from {svm,vmx}_set_cpu_caps()
KVM: selftests: Add -U_FORTIFY_SOURCE to avoid some unpredictable test failures
KVM: x86: Assert that non-MSI doesn't have bypass vCPU when deleting producer
KVM: Don't clobber irqfd routing type when deassigning irqfd
KVM: SVM: Check vCPU ID against max x2AVIC ID if and only if x2AVIC is enabled
| -rw-r--r-- | arch/x86/kvm/irq.c | 3 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/avic.c | 4 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/svm.c | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 30 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.h | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/Makefile.kvm | 1 | ||||
| -rw-r--r-- | virt/kvm/eventfd.c | 44 |
8 files changed, 52 insertions, 36 deletions
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 7cc8950005b6..4c7688670c2d 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -514,7 +514,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, */ spin_lock_irq(&kvm->irqfds.lock); - if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) { + if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI || + WARN_ON_ONCE(irqfd->irq_bypass_vcpu)) { ret = kvm_pi_update_irte(irqfd, NULL); if (ret) pr_info("irq bypass consumer (eventfd %p) unregistration fails: %d\n", diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 6b77b2033208..0f6c8596719b 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -376,6 +376,7 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb) static int avic_init_backing_page(struct kvm_vcpu *vcpu) { + u32 max_id = x2avic_enabled ? x2avic_max_physical_id : AVIC_MAX_PHYSICAL_ID; struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); struct vcpu_svm *svm = to_svm(vcpu); u32 id = vcpu->vcpu_id; @@ -388,8 +389,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) * avic_vcpu_load() expects to be called if and only if the vCPU has * fully initialized AVIC. */ - if ((!x2avic_enabled && id > AVIC_MAX_PHYSICAL_ID) || - (id > x2avic_max_physical_id)) { + if (id > max_id) { kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG); vcpu->arch.apic->apicv_active = false; return 0; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 24d59ccfa40d..4394be40fe78 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -5284,6 +5284,8 @@ static __init void svm_set_cpu_caps(void) */ kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT); kvm_cpu_cap_clear(X86_FEATURE_MSR_IMM); + + kvm_setup_xss_caps(); } static __init int svm_hardware_setup(void) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6b96f7aea20b..8c94241fbcca 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -8051,6 +8051,8 @@ static __init void vmx_set_cpu_caps(void) kvm_cpu_cap_clear(X86_FEATURE_SHSTK); kvm_cpu_cap_clear(X86_FEATURE_IBT); } + + kvm_setup_xss_caps(); } static bool vmx_is_io_intercepted(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 63afdb6bb078..72d37c8930ad 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9953,6 +9953,23 @@ static struct notifier_block pvclock_gtod_notifier = { }; #endif +void kvm_setup_xss_caps(void) +{ + if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) + kvm_caps.supported_xss = 0; + + if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) && + !kvm_cpu_cap_has(X86_FEATURE_IBT)) + kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL; + + if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) { + kvm_cpu_cap_clear(X86_FEATURE_SHSTK); + kvm_cpu_cap_clear(X86_FEATURE_IBT); + kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL; + } +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_setup_xss_caps); + static inline void kvm_ops_update(struct kvm_x86_init_ops *ops) { memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops)); @@ -10125,19 +10142,6 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) if (!tdp_enabled) kvm_caps.supported_quirks &= ~KVM_X86_QUIRK_IGNORE_GUEST_PAT; - if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) - kvm_caps.supported_xss = 0; - - if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) && - !kvm_cpu_cap_has(X86_FEATURE_IBT)) - kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL; - - if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) { - kvm_cpu_cap_clear(X86_FEATURE_SHSTK); - kvm_cpu_cap_clear(X86_FEATURE_IBT); - kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL; - } - if (kvm_caps.has_tsc_control) { /* * Make sure the user can only configure tsc_khz values that diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index fdab0ad49098..00de24f55b1f 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -471,6 +471,8 @@ extern struct kvm_host_values kvm_host; extern bool enable_pmu; +void kvm_setup_xss_caps(void); + /* * Get a filtered version of KVM's supported XCR0 that strips out dynamic * features for which the current process doesn't (yet) have permission to use. diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index ba5c2b643efa..d45bf4ccb3bf 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -251,6 +251,7 @@ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ + -U_FORTIFY_SOURCE \ -fno-builtin-memcmp -fno-builtin-memcpy \ -fno-builtin-memset -fno-builtin-strnlen \ -fno-stack-protector -fno-PIE -fno-strict-aliasing \ diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 0e8b5277be3b..a369b20d47f0 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -157,21 +157,28 @@ irqfd_shutdown(struct work_struct *work) } -/* assumes kvm->irqfds.lock is held */ -static bool -irqfd_is_active(struct kvm_kernel_irqfd *irqfd) +static bool irqfd_is_active(struct kvm_kernel_irqfd *irqfd) { + /* + * Assert that either irqfds.lock or SRCU is held, as irqfds.lock must + * be held to prevent false positives (on the irqfd being active), and + * while false negatives are impossible as irqfds are never added back + * to the list once they're deactivated, the caller must at least hold + * SRCU to guard against routing changes if the irqfd is deactivated. + */ + lockdep_assert_once(lockdep_is_held(&irqfd->kvm->irqfds.lock) || + srcu_read_lock_held(&irqfd->kvm->irq_srcu)); + return list_empty(&irqfd->list) ? false : true; } /* * Mark the irqfd as inactive and schedule it for removal - * - * assumes kvm->irqfds.lock is held */ -static void -irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) +static void irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) { + lockdep_assert_held(&irqfd->kvm->irqfds.lock); + BUG_ON(!irqfd_is_active(irqfd)); list_del_init(&irqfd->list); @@ -217,8 +224,15 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) seq = read_seqcount_begin(&irqfd->irq_entry_sc); irq = irqfd->irq_entry; } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); - /* An event has been signaled, inject an interrupt */ - if (kvm_arch_set_irq_inatomic(&irq, kvm, + + /* + * An event has been signaled, inject an interrupt unless the + * irqfd is being deassigned (isn't active), in which case the + * routing information may be stale (once the irqfd is removed + * from the list, it will stop receiving routing updates). + */ + if (unlikely(!irqfd_is_active(irqfd)) || + kvm_arch_set_irq_inatomic(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false) == -EWOULDBLOCK) schedule_work(&irqfd->inject); @@ -585,18 +599,8 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) spin_lock_irq(&kvm->irqfds.lock); list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { - if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) { - /* - * This clearing of irq_entry.type is needed for when - * another thread calls kvm_irq_routing_update before - * we flush workqueue below (we synchronize with - * kvm_irq_routing_update using irqfds.lock). - */ - write_seqcount_begin(&irqfd->irq_entry_sc); - irqfd->irq_entry.type = 0; - write_seqcount_end(&irqfd->irq_entry_sc); + if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) irqfd_deactivate(irqfd); - } } spin_unlock_irq(&kvm->irqfds.lock); |
