summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-04 10:38:56 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-04 10:38:56 -0800
commit27db1ae6ecdf23f4176276da6037eaafbd23bf94 (patch)
tree440f75bac0051c9217ef78328da0a7d70e3b9978
parenta14980444f418de53a7cc315eb4fbd8a89c72991 (diff)
parent0de4a0eec25b9171f2a2abb1a820e125e6797770 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Paolo Bonzini: - Fix a bug where AVIC is incorrectly inhibited when running with x2AVIC disabled via module param (or on a system without x2AVIC) - Fix a dangling device posted IRQs bug by explicitly checking if the irqfd is still active (on the list) when handling an eventfd signal, instead of zeroing the irqfd's routing information when the irqfd is deassigned. Zeroing the irqfd's routing info causes arm64 and x86's to not disable posting for the IRQ (kvm_arch_irq_bypass_del_producer() looks for an MSI), incorrectly leaving the IRQ in posted mode (and leading to use-after-free and memory leaks on AMD in particular). This is both the most pressing and scariest, but it's been in -next for a while. - Disable FORTIFY_SOURCE for KVM selftests to prevent the compiler from generating calls to the checked versions of memset() and friends, which leads to unexpected page faults in guest code due e.g. __memset_chk@plt not being resolved. - Explicitly configure the supported XSS capabilities from within {svm,vmx}_set_cpu_caps() to fix a bug where VMX will compute the reference VMCS configuration with SHSTK and IBT enabled, but then compute each CPUs local config with SHSTK and IBT disabled if not all CET xfeatures are enabled, e.g. if the kernel is built with X86_KERNEL_IBT=n. The mismatch in features results in differing nVMX setting, and ultimately causes kvm-intel.ko to refuse to load with nested=1. * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: Explicitly configure supported XSS from {svm,vmx}_set_cpu_caps() KVM: selftests: Add -U_FORTIFY_SOURCE to avoid some unpredictable test failures KVM: x86: Assert that non-MSI doesn't have bypass vCPU when deleting producer KVM: Don't clobber irqfd routing type when deassigning irqfd KVM: SVM: Check vCPU ID against max x2AVIC ID if and only if x2AVIC is enabled
-rw-r--r--arch/x86/kvm/irq.c3
-rw-r--r--arch/x86/kvm/svm/avic.c4
-rw-r--r--arch/x86/kvm/svm/svm.c2
-rw-r--r--arch/x86/kvm/vmx/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c30
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm1
-rw-r--r--virt/kvm/eventfd.c44
8 files changed, 52 insertions, 36 deletions
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 7cc8950005b6..4c7688670c2d 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -514,7 +514,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
*/
spin_lock_irq(&kvm->irqfds.lock);
- if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
+ if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI ||
+ WARN_ON_ONCE(irqfd->irq_bypass_vcpu)) {
ret = kvm_pi_update_irte(irqfd, NULL);
if (ret)
pr_info("irq bypass consumer (eventfd %p) unregistration fails: %d\n",
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 6b77b2033208..0f6c8596719b 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -376,6 +376,7 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb)
static int avic_init_backing_page(struct kvm_vcpu *vcpu)
{
+ u32 max_id = x2avic_enabled ? x2avic_max_physical_id : AVIC_MAX_PHYSICAL_ID;
struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
struct vcpu_svm *svm = to_svm(vcpu);
u32 id = vcpu->vcpu_id;
@@ -388,8 +389,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
* avic_vcpu_load() expects to be called if and only if the vCPU has
* fully initialized AVIC.
*/
- if ((!x2avic_enabled && id > AVIC_MAX_PHYSICAL_ID) ||
- (id > x2avic_max_physical_id)) {
+ if (id > max_id) {
kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG);
vcpu->arch.apic->apicv_active = false;
return 0;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 24d59ccfa40d..4394be40fe78 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -5284,6 +5284,8 @@ static __init void svm_set_cpu_caps(void)
*/
kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
kvm_cpu_cap_clear(X86_FEATURE_MSR_IMM);
+
+ kvm_setup_xss_caps();
}
static __init int svm_hardware_setup(void)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6b96f7aea20b..8c94241fbcca 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8051,6 +8051,8 @@ static __init void vmx_set_cpu_caps(void)
kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
kvm_cpu_cap_clear(X86_FEATURE_IBT);
}
+
+ kvm_setup_xss_caps();
}
static bool vmx_is_io_intercepted(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 63afdb6bb078..72d37c8930ad 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9953,6 +9953,23 @@ static struct notifier_block pvclock_gtod_notifier = {
};
#endif
+void kvm_setup_xss_caps(void)
+{
+ if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
+ kvm_caps.supported_xss = 0;
+
+ if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
+ !kvm_cpu_cap_has(X86_FEATURE_IBT))
+ kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
+
+ if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) {
+ kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+ kvm_cpu_cap_clear(X86_FEATURE_IBT);
+ kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
+ }
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_setup_xss_caps);
+
static inline void kvm_ops_update(struct kvm_x86_init_ops *ops)
{
memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
@@ -10125,19 +10142,6 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
if (!tdp_enabled)
kvm_caps.supported_quirks &= ~KVM_X86_QUIRK_IGNORE_GUEST_PAT;
- if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
- kvm_caps.supported_xss = 0;
-
- if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
- !kvm_cpu_cap_has(X86_FEATURE_IBT))
- kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
-
- if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) {
- kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
- kvm_cpu_cap_clear(X86_FEATURE_IBT);
- kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
- }
-
if (kvm_caps.has_tsc_control) {
/*
* Make sure the user can only configure tsc_khz values that
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index fdab0ad49098..00de24f55b1f 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -471,6 +471,8 @@ extern struct kvm_host_values kvm_host;
extern bool enable_pmu;
+void kvm_setup_xss_caps(void);
+
/*
* Get a filtered version of KVM's supported XCR0 that strips out dynamic
* features for which the current process doesn't (yet) have permission to use.
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index ba5c2b643efa..d45bf4ccb3bf 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -251,6 +251,7 @@ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
+ -U_FORTIFY_SOURCE \
-fno-builtin-memcmp -fno-builtin-memcpy \
-fno-builtin-memset -fno-builtin-strnlen \
-fno-stack-protector -fno-PIE -fno-strict-aliasing \
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 0e8b5277be3b..a369b20d47f0 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -157,21 +157,28 @@ irqfd_shutdown(struct work_struct *work)
}
-/* assumes kvm->irqfds.lock is held */
-static bool
-irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
+static bool irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
{
+ /*
+ * Assert that either irqfds.lock or SRCU is held, as irqfds.lock must
+ * be held to prevent false positives (on the irqfd being active), and
+ * while false negatives are impossible as irqfds are never added back
+ * to the list once they're deactivated, the caller must at least hold
+ * SRCU to guard against routing changes if the irqfd is deactivated.
+ */
+ lockdep_assert_once(lockdep_is_held(&irqfd->kvm->irqfds.lock) ||
+ srcu_read_lock_held(&irqfd->kvm->irq_srcu));
+
return list_empty(&irqfd->list) ? false : true;
}
/*
* Mark the irqfd as inactive and schedule it for removal
- *
- * assumes kvm->irqfds.lock is held
*/
-static void
-irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
+static void irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
{
+ lockdep_assert_held(&irqfd->kvm->irqfds.lock);
+
BUG_ON(!irqfd_is_active(irqfd));
list_del_init(&irqfd->list);
@@ -217,8 +224,15 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
seq = read_seqcount_begin(&irqfd->irq_entry_sc);
irq = irqfd->irq_entry;
} while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
- /* An event has been signaled, inject an interrupt */
- if (kvm_arch_set_irq_inatomic(&irq, kvm,
+
+ /*
+ * An event has been signaled, inject an interrupt unless the
+ * irqfd is being deassigned (isn't active), in which case the
+ * routing information may be stale (once the irqfd is removed
+ * from the list, it will stop receiving routing updates).
+ */
+ if (unlikely(!irqfd_is_active(irqfd)) ||
+ kvm_arch_set_irq_inatomic(&irq, kvm,
KVM_USERSPACE_IRQ_SOURCE_ID, 1,
false) == -EWOULDBLOCK)
schedule_work(&irqfd->inject);
@@ -585,18 +599,8 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
spin_lock_irq(&kvm->irqfds.lock);
list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
- if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
- /*
- * This clearing of irq_entry.type is needed for when
- * another thread calls kvm_irq_routing_update before
- * we flush workqueue below (we synchronize with
- * kvm_irq_routing_update using irqfds.lock).
- */
- write_seqcount_begin(&irqfd->irq_entry_sc);
- irqfd->irq_entry.type = 0;
- write_seqcount_end(&irqfd->irq_entry_sc);
+ if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi)
irqfd_deactivate(irqfd);
- }
}
spin_unlock_irq(&kvm->irqfds.lock);