summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/boot/dts/mediatek/Makefile55
-rw-r--r--arch/arm64/kernel/fpsimd.c130
-rw-r--r--arch/arm64/kernel/process.c1
-rw-r--r--arch/arm64/mm/mmu.c33
-rw-r--r--arch/mips/alchemy/common/setup.c3
-rw-r--r--arch/mips/sgi-ip22/ip22-gio.c3
-rw-r--r--arch/x86/include/asm/bug.h2
-rw-r--r--arch/x86/include/asm/irq_remapping.h7
-rw-r--r--arch/x86/include/asm/irqflags.h2
-rw-r--r--arch/x86/include/asm/uv/bios.h2
-rw-r--r--arch/x86/kernel/cpu/sgx/ioctl.c2
-rw-r--r--arch/x86/kernel/fpu/xstate.c4
-rw-r--r--arch/x86/kernel/irq.c23
-rw-r--r--arch/x86/kvm/cpuid.c11
-rw-r--r--arch/x86/kvm/svm/nested.c4
-rw-r--r--arch/x86/kvm/svm/svm.c2
-rw-r--r--arch/x86/kvm/svm/svm.h7
-rw-r--r--arch/x86/kvm/vmx/nested.c3
-rw-r--r--arch/x86/kvm/vmx/vmx.c9
-rw-r--r--arch/x86/kvm/x86.c7
-rw-r--r--arch/x86/xen/enlighten_pv.c2
21 files changed, 158 insertions, 154 deletions
diff --git a/arch/arm64/boot/dts/mediatek/Makefile b/arch/arm64/boot/dts/mediatek/Makefile
index c5fd6191a925..3f76d9ce9879 100644
--- a/arch/arm64/boot/dts/mediatek/Makefile
+++ b/arch/arm64/boot/dts/mediatek/Makefile
@@ -19,6 +19,27 @@ dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-nand.dtbo
dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-nor.dtbo
dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-sata.dtbo
dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-sd.dtbo
+mt7986a-bananapi-bpi-r3-emmc-nand-dtbs := \
+ mt7986a-bananapi-bpi-r3.dtb \
+ mt7986a-bananapi-bpi-r3-emmc.dtbo \
+ mt7986a-bananapi-bpi-r3-nand.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-emmc-nand.dtb
+mt7986a-bananapi-bpi-r3-emmc-nor-dtbs := \
+ mt7986a-bananapi-bpi-r3.dtb \
+ mt7986a-bananapi-bpi-r3-emmc.dtbo \
+ mt7986a-bananapi-bpi-r3-nor.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-emmc-nor.dtb
+mt7986a-bananapi-bpi-r3-sd-nand-dtbs := \
+ mt7986a-bananapi-bpi-r3.dtb \
+ mt7986a-bananapi-bpi-r3-sd.dtbo \
+ mt7986a-bananapi-bpi-r3-nand.dtbo \
+ mt7986a-bananapi-bpi-r3-sata.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-sd-nand.dtb
+mt7986a-bananapi-bpi-r3-sd-nor-dtbs := \
+ mt7986a-bananapi-bpi-r3.dtb \
+ mt7986a-bananapi-bpi-r3-sd.dtbo \
+ mt7986a-bananapi-bpi-r3-nor.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-bananapi-bpi-r3-sd-nor.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986a-rfb.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt7986b-rfb.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4.dtb
@@ -31,6 +52,38 @@ dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-cn18.dtbo
dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-emmc.dtbo
dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-sd.dtbo
dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-sd.dtbo
+mt7988a-bananapi-bpi-r4-emmc-dtbs := \
+ mt7988a-bananapi-bpi-r4.dtb \
+ mt7988a-bananapi-bpi-r4-emmc.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-emmc.dtb
+mt7988a-bananapi-bpi-r4-sd-dtbs := \
+ mt7988a-bananapi-bpi-r4.dtb \
+ mt7988a-bananapi-bpi-r4-sd.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-sd.dtb
+mt7988a-bananapi-bpi-r4-2g5-emmc-dtbs := \
+ mt7988a-bananapi-bpi-r4-2g5.dtb \
+ mt7988a-bananapi-bpi-r4-emmc.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-2g5-emmc.dtb
+mt7988a-bananapi-bpi-r4-2g5-sd-dtbs := \
+ mt7988a-bananapi-bpi-r4-2g5.dtb \
+ mt7988a-bananapi-bpi-r4-sd.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-2g5-sd.dtb
+mt7988a-bananapi-bpi-r4-pro-8x-emmc-dtbs := \
+ mt7988a-bananapi-bpi-r4-pro-8x.dtb \
+ mt7988a-bananapi-bpi-r4-pro-emmc.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-8x-emmc.dtb
+mt7988a-bananapi-bpi-r4-pro-8x-sd-dtbs := \
+ mt7988a-bananapi-bpi-r4-pro-8x.dtb \
+ mt7988a-bananapi-bpi-r4-pro-sd.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-8x-sd.dtb
+mt7988a-bananapi-bpi-r4-pro-8x-sd-cn15-dtbs := \
+ mt7988a-bananapi-bpi-r4-pro-8x-sd.dtb \
+ mt7988a-bananapi-bpi-r4-pro-cn15.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-8x-sd-cn15.dtb
+mt7988a-bananapi-bpi-r4-pro-8x-sd-cn18-dtbs := \
+ mt7988a-bananapi-bpi-r4-pro-8x-sd.dtb \
+ mt7988a-bananapi-bpi-r4-pro-cn18.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt7988a-bananapi-bpi-r4-pro-8x-sd-cn18.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8167-pumpkin.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8173-elm.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8173-elm-hana.dtb
@@ -113,6 +166,8 @@ dtb-$(CONFIG_ARCH_MEDIATEK) += mt8390-grinn-genio-700-sbc.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8395-kontron-3-5-sbc-i1200.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8395-radxa-nio-12l.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8395-radxa-nio-12l-8-hd-panel.dtbo
+mt8395-radxa-nio-12l-8-hd-panel-dtbs := mt8395-radxa-nio-12l.dtb mt8395-radxa-nio-12l-8-hd-panel.dtbo
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt8395-radxa-nio-12l-8-hd-panel.dtb
dtb-$(CONFIG_ARCH_MEDIATEK) += mt8516-pumpkin.dtb
# Device tree overlays support
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index c154f72634e0..9de1d8a604cb 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -180,13 +180,6 @@ static inline void set_sve_default_vl(int val)
set_default_vl(ARM64_VEC_SVE, val);
}
-static u8 *efi_sve_state;
-
-#else /* ! CONFIG_ARM64_SVE */
-
-/* Dummy declaration for code that will be optimised out: */
-extern u8 *efi_sve_state;
-
#endif /* ! CONFIG_ARM64_SVE */
#ifdef CONFIG_ARM64_SME
@@ -1095,36 +1088,6 @@ int vec_verify_vq_map(enum vec_type type)
return 0;
}
-static void __init sve_efi_setup(void)
-{
- int max_vl = 0;
- int i;
-
- if (!IS_ENABLED(CONFIG_EFI))
- return;
-
- for (i = 0; i < ARRAY_SIZE(vl_info); i++)
- max_vl = max(vl_info[i].max_vl, max_vl);
-
- /*
- * alloc_percpu() warns and prints a backtrace if this goes wrong.
- * This is evidence of a crippled system and we are returning void,
- * so no attempt is made to handle this situation here.
- */
- if (!sve_vl_valid(max_vl))
- goto fail;
-
- efi_sve_state = kmalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)),
- GFP_KERNEL);
- if (!efi_sve_state)
- goto fail;
-
- return;
-
-fail:
- panic("Cannot allocate memory for EFI SVE save/restore");
-}
-
void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p)
{
write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
@@ -1185,8 +1148,6 @@ void __init sve_setup(void)
if (sve_max_virtualisable_vl() < sve_max_vl())
pr_warn("%s: unvirtualisable vector lengths present\n",
info->name);
-
- sve_efi_setup();
}
/*
@@ -1947,9 +1908,6 @@ EXPORT_SYMBOL_GPL(kernel_neon_end);
#ifdef CONFIG_EFI
static struct user_fpsimd_state efi_fpsimd_state;
-static bool efi_fpsimd_state_used;
-static bool efi_sve_state_used;
-static bool efi_sm_state;
/*
* EFI runtime services support functions
@@ -1976,43 +1934,26 @@ void __efi_fpsimd_begin(void)
if (may_use_simd()) {
kernel_neon_begin(&efi_fpsimd_state);
} else {
- WARN_ON(preemptible());
-
/*
- * If !efi_sve_state, SVE can't be in use yet and doesn't need
- * preserving:
+ * We are running in hardirq or NMI context, and the only
+ * legitimate case where this might happen is when EFI pstore
+ * is attempting to record the system's dying gasps into EFI
+ * variables. This could be due to an oops, a panic or a call
+ * to emergency_restart(), and in none of those cases, we can
+ * expect the current task to ever return to user space again,
+ * or for the kernel to resume any normal execution, for that
+ * matter (an oops in hardirq context triggers a panic too).
+ *
+ * Therefore, there is no point in attempting to preserve any
+ * SVE/SME state here. On the off chance that we might have
+ * ended up here for a different reason inadvertently, kill the
+ * task and preserve/restore the base FP/SIMD state, which
+ * might belong to kernel mode FP/SIMD.
*/
- if (system_supports_sve() && efi_sve_state != NULL) {
- bool ffr = true;
- u64 svcr;
-
- efi_sve_state_used = true;
-
- if (system_supports_sme()) {
- svcr = read_sysreg_s(SYS_SVCR);
-
- efi_sm_state = svcr & SVCR_SM_MASK;
-
- /*
- * Unless we have FA64 FFR does not
- * exist in streaming mode.
- */
- if (!system_supports_fa64())
- ffr = !(svcr & SVCR_SM_MASK);
- }
-
- sve_save_state(efi_sve_state + sve_ffr_offset(sve_max_vl()),
- &efi_fpsimd_state.fpsr, ffr);
-
- if (system_supports_sme())
- sysreg_clear_set_s(SYS_SVCR,
- SVCR_SM_MASK, 0);
-
- } else {
- fpsimd_save_state(&efi_fpsimd_state);
- }
-
- efi_fpsimd_state_used = true;
+ pr_warn_ratelimited("Calling EFI runtime from %s context\n",
+ in_nmi() ? "NMI" : "hardirq");
+ force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
+ fpsimd_save_state(&efi_fpsimd_state);
}
}
@@ -2024,41 +1965,10 @@ void __efi_fpsimd_end(void)
if (!system_supports_fpsimd())
return;
- if (!efi_fpsimd_state_used) {
+ if (may_use_simd()) {
kernel_neon_end(&efi_fpsimd_state);
} else {
- if (system_supports_sve() && efi_sve_state_used) {
- bool ffr = true;
-
- /*
- * Restore streaming mode; EFI calls are
- * normal function calls so should not return in
- * streaming mode.
- */
- if (system_supports_sme()) {
- if (efi_sm_state) {
- sysreg_clear_set_s(SYS_SVCR,
- 0,
- SVCR_SM_MASK);
-
- /*
- * Unless we have FA64 FFR does not
- * exist in streaming mode.
- */
- if (!system_supports_fa64())
- ffr = false;
- }
- }
-
- sve_load_state(efi_sve_state + sve_ffr_offset(sve_max_vl()),
- &efi_fpsimd_state.fpsr, ffr);
-
- efi_sve_state_used = false;
- } else {
- fpsimd_load_state(&efi_fpsimd_state);
- }
-
- efi_fpsimd_state_used = false;
+ fpsimd_load_state(&efi_fpsimd_state);
}
}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index fba7ca102a8c..489554931231 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -292,6 +292,7 @@ static void flush_gcs(void)
current->thread.gcs_base = 0;
current->thread.gcs_size = 0;
current->thread.gcs_el0_mode = 0;
+ current->thread.gcs_el0_locked = 0;
write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1);
write_sysreg_s(0, SYS_GCSPR_EL0);
}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 9ae7ce00a7ef..8e1d80a7033e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -767,18 +767,6 @@ static inline bool force_pte_mapping(void)
return rodata_full || arm64_kfence_can_set_direct_map() || is_realm_world();
}
-static inline bool split_leaf_mapping_possible(void)
-{
- /*
- * !BBML2_NOABORT systems should never run into scenarios where we would
- * have to split. So exit early and let calling code detect it and raise
- * a warning.
- */
- if (!system_supports_bbml2_noabort())
- return false;
- return !force_pte_mapping();
-}
-
static DEFINE_MUTEX(pgtable_split_lock);
int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
@@ -786,11 +774,22 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
int ret;
/*
- * Exit early if the region is within a pte-mapped area or if we can't
- * split. For the latter case, the permission change code will raise a
- * warning if not already pte-mapped.
+ * !BBML2_NOABORT systems should not be trying to change permissions on
+ * anything that is not pte-mapped in the first place. Just return early
+ * and let the permission change code raise a warning if not already
+ * pte-mapped.
*/
- if (!split_leaf_mapping_possible() || is_kfence_address((void *)start))
+ if (!system_supports_bbml2_noabort())
+ return 0;
+
+ /*
+ * If the region is within a pte-mapped area, there is no need to try to
+ * split. Additionally, CONFIG_DEBUG_PAGEALLOC and CONFIG_KFENCE may
+ * change permissions from atomic context so for those cases (which are
+ * always pte-mapped), we must not go any further because taking the
+ * mutex below may sleep.
+ */
+ if (force_pte_mapping() || is_kfence_address((void *)start))
return 0;
/*
@@ -1089,7 +1088,7 @@ bool arch_kfence_init_pool(void)
int ret;
/* Exit early if we know the linear map is already pte-mapped. */
- if (!split_leaf_mapping_possible())
+ if (force_pte_mapping())
return true;
/* Kfence pool is already pte-mapped for the early init case. */
diff --git a/arch/mips/alchemy/common/setup.c b/arch/mips/alchemy/common/setup.c
index c35b4f809d51..992134a8c23a 100644
--- a/arch/mips/alchemy/common/setup.c
+++ b/arch/mips/alchemy/common/setup.c
@@ -94,8 +94,7 @@ phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size)
return phys_addr;
}
-static inline unsigned long io_remap_pfn_range_pfn(unsigned long pfn,
- unsigned long size)
+unsigned long io_remap_pfn_range_pfn(unsigned long pfn, unsigned long size)
{
phys_addr_t phys_addr = fixup_bigphys_addr(pfn << PAGE_SHIFT, size);
diff --git a/arch/mips/sgi-ip22/ip22-gio.c b/arch/mips/sgi-ip22/ip22-gio.c
index 5893ea4e382c..19b70928d6dc 100644
--- a/arch/mips/sgi-ip22/ip22-gio.c
+++ b/arch/mips/sgi-ip22/ip22-gio.c
@@ -372,7 +372,8 @@ static void ip22_check_gio(int slotno, unsigned long addr, int irq)
gio_dev->resource.flags = IORESOURCE_MEM;
gio_dev->irq = irq;
dev_set_name(&gio_dev->dev, "%d", slotno);
- gio_device_register(gio_dev);
+ if (gio_device_register(gio_dev))
+ gio_dev_put(gio_dev);
} else
printk(KERN_INFO "GIO: slot %d : Empty\n", slotno);
}
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index d561a8443c13..9b4e04690e1a 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -15,7 +15,7 @@ extern void __WARN_trap(struct bug_entry *bug, ...);
/*
* Despite that some emulators terminate on UD2, we use it for WARN().
*/
-#define ASM_UD2 _ASM_BYTES(0x0f, 0x0b)
+#define ASM_UD2 __ASM_FORM(ud2)
#define INSN_UD2 0x0b0f
#define LEN_UD2 2
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 5a0d42464d44..4e55d1755846 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -87,4 +87,11 @@ static inline void panic_if_irq_remap(const char *msg)
}
#endif /* CONFIG_IRQ_REMAP */
+
+#ifdef CONFIG_X86_POSTED_MSI
+void intel_ack_posted_msi_irq(struct irq_data *irqd);
+#else
+#define intel_ack_posted_msi_irq NULL
+#endif
+
#endif /* __X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index b30e5474c18e..a1193e9d65f2 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -25,7 +25,7 @@ extern __always_inline unsigned long native_save_fl(void)
*/
asm volatile("# __raw_save_flags\n\t"
"pushf ; pop %0"
- : "=rm" (flags)
+ : ASM_OUTPUT_RM (flags)
: /* no input */
: "memory");
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 6989b824fd32..d0b62e255290 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -122,7 +122,7 @@ struct uv_systab {
struct {
u32 type:8; /* type of entry */
u32 offset:24; /* byte offset from struct start to entry */
- } entry[1]; /* additional entries follow */
+ } entry[]; /* additional entries follow */
};
extern struct uv_systab *uv_systab;
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 66f1efa16fbb..9322a9287dc7 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -242,7 +242,7 @@ static int __sgx_encl_add_page(struct sgx_encl *encl,
/*
* If the caller requires measurement of the page as a proof for the content,
* use EEXTEND to add a measurement for 256 bytes of the page. Repeat this
- * operation until the entire page is measured."
+ * operation until the entire page is measured.
*/
static int __sgx_encl_extend(struct sgx_encl *encl,
struct sgx_epc_page *epc_page)
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 48113c5193aa..76153dfb58c9 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1946,7 +1946,7 @@ static int dump_xsave_layout_desc(struct coredump_params *cprm)
};
if (!dump_emit(cprm, &xc, sizeof(xc)))
- return 0;
+ return -1;
num_records++;
}
@@ -1984,7 +1984,7 @@ int elf_coredump_extra_notes_write(struct coredump_params *cprm)
return 1;
num_records = dump_xsave_layout_desc(cprm);
- if (!num_records)
+ if (num_records < 0)
return 1;
/* Total size should be equal to the number of records */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 86f4e574de02..b2fe6181960c 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -397,6 +397,7 @@ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi)
/* Posted Interrupt Descriptors for coalesced MSIs to be posted */
DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc);
+static DEFINE_PER_CPU_CACHE_HOT(bool, posted_msi_handler_active);
void intel_posted_msi_init(void)
{
@@ -414,6 +415,25 @@ void intel_posted_msi_init(void)
this_cpu_write(posted_msi_pi_desc.ndst, destination);
}
+void intel_ack_posted_msi_irq(struct irq_data *irqd)
+{
+ irq_move_irq(irqd);
+
+ /*
+ * Handle the rare case that irq_retrigger() raised the actual
+ * assigned vector on the target CPU, which means that it was not
+ * invoked via the posted MSI handler below. In that case APIC EOI
+ * is required as otherwise the ISR entry becomes stale and lower
+ * priority interrupts are never going to be delivered after that.
+ *
+ * If the posted handler invoked the device interrupt handler then
+ * the EOI would be premature because it would acknowledge the
+ * posted vector.
+ */
+ if (unlikely(!__this_cpu_read(posted_msi_handler_active)))
+ apic_eoi();
+}
+
static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs)
{
unsigned long pir_copy[NR_PIR_WORDS];
@@ -446,6 +466,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
pid = this_cpu_ptr(&posted_msi_pi_desc);
+ /* Mark the handler active for intel_ack_posted_msi_irq() */
+ __this_cpu_write(posted_msi_handler_active, true);
inc_irq_stat(posted_msi_notification_count);
irq_enter();
@@ -474,6 +496,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
apic_eoi();
irq_exit();
+ __this_cpu_write(posted_msi_handler_active, false);
set_irq_regs(old_regs);
}
#endif /* X86_POSTED_MSI */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index d563a948318b..88a5426674a1 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -510,10 +510,17 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
int r;
/*
+ * Apply pending runtime CPUID updates to the current CPUID entries to
+ * avoid false positives due to mismatches on KVM-owned feature flags.
+ */
+ if (vcpu->arch.cpuid_dynamic_bits_dirty)
+ kvm_update_cpuid_runtime(vcpu);
+
+ /*
* Swap the existing (old) entries with the incoming (new) entries in
* order to massage the new entries, e.g. to account for dynamic bits
- * that KVM controls, without clobbering the current guest CPUID, which
- * KVM needs to preserve in order to unwind on failure.
+ * that KVM controls, without losing the current guest CPUID, which KVM
+ * needs to preserve in order to unwind on failure.
*
* Similarly, save the vCPU's current cpu_caps so that the capabilities
* can be updated alongside the CPUID entries when performing runtime
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index c81005b24522..ba0f11c68372 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -985,7 +985,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
if (!nested_vmcb_check_save(vcpu) ||
!nested_vmcb_check_controls(vcpu)) {
vmcb12->control.exit_code = SVM_EXIT_ERR;
- vmcb12->control.exit_code_hi = 0;
+ vmcb12->control.exit_code_hi = -1u;
vmcb12->control.exit_info_1 = 0;
vmcb12->control.exit_info_2 = 0;
goto out;
@@ -1018,7 +1018,7 @@ out_exit_err:
svm->soft_int_injected = false;
svm->vmcb->control.exit_code = SVM_EXIT_ERR;
- svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_code_hi = -1u;
svm->vmcb->control.exit_info_1 = 0;
svm->vmcb->control.exit_info_2 = 0;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index f56c2d895011..24d59ccfa40d 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2443,6 +2443,7 @@ static bool check_selective_cr0_intercepted(struct kvm_vcpu *vcpu,
if (cr0 ^ val) {
svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
+ svm->vmcb->control.exit_code_hi = 0;
ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
}
@@ -4617,6 +4618,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
if (static_cpu_has(X86_FEATURE_NRIPS))
vmcb->control.next_rip = info->next_rip;
vmcb->control.exit_code = icpt_info.exit_code;
+ vmcb->control.exit_code_hi = 0;
vmexit = nested_svm_exit_handled(svm);
ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 9e151dbdef25..01be93a53d07 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -761,9 +761,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm);
static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
{
- svm->vmcb->control.exit_code = exit_code;
- svm->vmcb->control.exit_info_1 = 0;
- svm->vmcb->control.exit_info_2 = 0;
+ svm->vmcb->control.exit_code = exit_code;
+ svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_info_1 = 0;
+ svm->vmcb->control.exit_info_2 = 0;
return nested_svm_vmexit(svm);
}
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 40777278eabb..6137e5307d0f 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -19,6 +19,7 @@
#include "trace.h"
#include "vmx.h"
#include "smm.h"
+#include "x86_ops.h"
static bool __read_mostly enable_shadow_vmcs = 1;
module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
@@ -5165,7 +5166,7 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
if (vmx->nested.update_vmcs01_apicv_status) {
vmx->nested.update_vmcs01_apicv_status = false;
- kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+ vmx_refresh_apicv_exec_ctrl(vcpu);
}
if (vmx->nested.update_vmcs01_hwapic_isr) {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4cbe8c84b636..6b96f7aea20b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6937,15 +6937,6 @@ void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
* VM-Exit, otherwise L1 with run with a stale SVI.
*/
if (is_guest_mode(vcpu)) {
- /*
- * KVM is supposed to forward intercepted L2 EOIs to L1 if VID
- * is enabled in vmcs12; as above, the EOIs affect L2's vAPIC.
- * Note, userspace can stuff state while L2 is active; assert
- * that VID is disabled if and only if the vCPU is in KVM_RUN
- * to avoid false positives if userspace is setting APIC state.
- */
- WARN_ON_ONCE(vcpu->wants_to_run &&
- nested_cpu_has_vid(get_vmcs12(vcpu)));
to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true;
return;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0c6d899d53dd..ff8812f3a129 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10886,9 +10886,16 @@ void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
* pending. At the same time, KVM_REQ_EVENT may not be set as APICv was
* still active when the interrupt got accepted. Make sure
* kvm_check_and_inject_events() is called to check for that.
+ *
+ * Update SVI when APICv gets enabled, otherwise SVI won't reflect the
+ * highest bit in vISR and the next accelerated EOI in the guest won't
+ * be virtualized correctly (the CPU uses SVI to determine which vISR
+ * vector to clear).
*/
if (!apic->apicv_active)
kvm_make_request(KVM_REQ_EVENT, vcpu);
+ else
+ kvm_apic_update_hwapic_isr(vcpu);
out:
preempt_enable();
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 4806cc28d7ca..b74ff8bc7f2a 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -108,7 +108,7 @@ static int xen_cpu_dead_pv(unsigned int cpu);
* calls.
*/
DEFINE_PER_CPU(bool, xen_in_preemptible_hcall);
-EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
+EXPORT_PER_CPU_SYMBOL_GPL(xen_in_preemptible_hcall);
/*
* In case of scheduling the flag must be cleared and restored after