diff options
-rw-r--r-- | arch/x86/include/asm/kexec.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/processor.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 17 | ||||
-rw-r--r-- | arch/x86/kernel/machine_kexec_64.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 24 | ||||
-rw-r--r-- | arch/x86/kernel/relocate_kernel_64.S | 13 |
6 files changed, 52 insertions, 22 deletions
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 12cebbcdb6c8..5cfb27f26583 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -17,8 +17,8 @@ #include <linux/bits.h> -#define RELOC_KERNEL_PRESERVE_CONTEXT BIT(0) -#define RELOC_KERNEL_HOST_MEM_ENC_ACTIVE BIT(1) +#define RELOC_KERNEL_PRESERVE_CONTEXT BIT(0) +#define RELOC_KERNEL_CACHE_INCOHERENT BIT(1) #endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index bde58f6510ac..a24c7805acdb 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -731,6 +731,8 @@ void __noreturn stop_this_cpu(void *dummy); void microcode_check(struct cpuinfo_x86 *prev_info); void store_cpu_caps(struct cpuinfo_x86 *info); +DECLARE_PER_CPU(bool, cache_state_incoherent); + enum l1tf_mitigations { L1TF_MITIGATION_OFF, L1TF_MITIGATION_AUTO, diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a5ece6ebe8a7..66a682be4a1a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -546,6 +546,23 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) u64 msr; /* + * Mark using WBINVD is needed during kexec on processors that + * support SME. This provides support for performing a successful + * kexec when going from SME inactive to SME active (or vice-versa). + * + * The cache must be cleared so that if there are entries with the + * same physical address, both with and without the encryption bit, + * they don't race each other when flushed and potentially end up + * with the wrong entry being committed to memory. + * + * Test the CPUID bit directly because with mem_encrypt=off the + * BSP will clear the X86_FEATURE_SME bit and the APs will not + * see it set after that. + */ + if (c->extended_cpuid_level >= 0x8000001f && (cpuid_eax(0x8000001f) & BIT(0))) + __this_cpu_write(cache_state_incoherent, true); + + /* * BIOS support is required for SME and SEV. * For SME: If BIOS has enabled SME then adjust x86_phys_bits by * the SME physical address space reduction value. diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 5cda8d8d8b13..dfb91091f451 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -29,6 +29,7 @@ #include <asm/set_memory.h> #include <asm/cpu.h> #include <asm/efi.h> +#include <asm/processor.h> #ifdef CONFIG_ACPI /* @@ -426,11 +427,11 @@ void __nocfi machine_kexec(struct kimage *image) relocate_kernel_flags |= RELOC_KERNEL_PRESERVE_CONTEXT; /* - * This must be done before load_segments() since if call depth tracking - * is used then GS must be valid to make any function calls. + * This must be done before load_segments() since it resets + * GS to 0 and percpu data needs the correct GS to work. */ - if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) - relocate_kernel_flags |= RELOC_KERNEL_HOST_MEM_ENC_ACTIVE; + if (this_cpu_read(cache_state_incoherent)) + relocate_kernel_flags |= RELOC_KERNEL_CACHE_INCOHERENT; /* * The segment registers are funny things, they have both a @@ -441,6 +442,11 @@ void __nocfi machine_kexec(struct kimage *image) * * Take advantage of this here by force loading the segments, * before the GDT is zapped with an invalid value. + * + * load_segments() resets GS to 0. Don't make any function call + * after here since call depth tracking uses percpu variables to + * operate (relocate_kernel() is explicitly ignored by call depth + * tracking). */ load_segments(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 1b7960cf6eb0..f2bbbeef5477 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -89,6 +89,16 @@ DEFINE_PER_CPU(bool, __tss_limit_invalid); EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid); /* + * The cache may be in an incoherent state and needs flushing during kexec. + * E.g., on SME/TDX platforms, dirty cacheline aliases with and without + * encryption bit(s) can coexist and the cache needs to be flushed before + * booting to the new kernel to avoid the silent memory corruption due to + * dirty cachelines with different encryption property being written back + * to the memory. + */ +DEFINE_PER_CPU(bool, cache_state_incoherent); + +/* * this gets called so that we can store lazy state into memory and copy the * current task into the new thread. */ @@ -827,19 +837,7 @@ void __noreturn stop_this_cpu(void *dummy) disable_local_APIC(); mcheck_cpu_clear(c); - /* - * Use wbinvd on processors that support SME. This provides support - * for performing a successful kexec when going from SME inactive - * to SME active (or vice-versa). The cache must be cleared so that - * if there are entries with the same physical address, both with and - * without the encryption bit, they don't race each other when flushed - * and potentially end up with the wrong entry being committed to - * memory. - * - * Test the CPUID bit directly because the machine might've cleared - * X86_FEATURE_SME due to cmdline options. - */ - if (c->extended_cpuid_level >= 0x8000001f && (cpuid_eax(0x8000001f) & BIT(0))) + if (this_cpu_read(cache_state_incoherent)) wbinvd(); /* diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 26e945f85d19..11e20bb13aca 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -198,14 +198,21 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) movq %r9, %cr3 /* + * If the memory cache is in incoherent state, e.g., due to + * memory encryption, do WBINVD to flush cache. + * * If SME is active, there could be old encrypted cache line * entries that will conflict with the now unencrypted memory * used by kexec. Flush the caches before copying the kernel. + * + * Note SME sets this flag to true when the platform supports + * SME, so the WBINVD is performed even SME is not activated + * by the kernel. But this has no harm. */ - testb $RELOC_KERNEL_HOST_MEM_ENC_ACTIVE, %r11b - jz .Lsme_off + testb $RELOC_KERNEL_CACHE_INCOHERENT, %r11b + jz .Lnowbinvd wbinvd -.Lsme_off: +.Lnowbinvd: call swap_pages |