summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/Kbuild2
-rw-r--r--arch/x86/Kconfig13
-rw-r--r--arch/x86/Makefile1
-rw-r--r--arch/x86/boot/compressed/Makefile2
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/boot/compressed/sev-handle-vc.c3
-rw-r--r--arch/x86/boot/compressed/sev.c132
-rw-r--r--arch/x86/boot/cpuflags.c13
-rw-r--r--arch/x86/boot/startup/Makefile22
-rw-r--r--arch/x86/boot/startup/exports.h14
-rw-r--r--arch/x86/boot/startup/gdt_idt.c4
-rw-r--r--arch/x86/boot/startup/map_kernel.c4
-rw-r--r--arch/x86/boot/startup/sev-shared.c327
-rw-r--r--arch/x86/boot/startup/sev-startup.c210
-rw-r--r--arch/x86/boot/startup/sme.c30
-rw-r--r--arch/x86/coco/core.c3
-rw-r--r--arch/x86/coco/sev/Makefile8
-rw-r--r--arch/x86/coco/sev/core.c276
-rw-r--r--arch/x86/coco/sev/noinstr.c (renamed from arch/x86/coco/sev/sev-nmi.c)74
-rw-r--r--arch/x86/coco/sev/vc-handle.c22
-rw-r--r--arch/x86/coco/sev/vc-shared.c143
-rw-r--r--arch/x86/include/asm/apic.h11
-rw-r--r--arch/x86/include/asm/apicdef.h2
-rw-r--r--arch/x86/include/asm/boot.h2
-rw-r--r--arch/x86/include/asm/init.h6
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/setup.h1
-rw-r--r--arch/x86/include/asm/sev-common.h1
-rw-r--r--arch/x86/include/asm/sev-internal.h28
-rw-r--r--arch/x86/include/asm/sev.h41
-rw-r--r--arch/x86/include/uapi/asm/svm.h4
-rw-r--r--arch/x86/kernel/apic/Makefile1
-rw-r--r--arch/x86/kernel/apic/apic.c85
-rw-r--r--arch/x86/kernel/apic/vector.c28
-rw-r--r--arch/x86/kernel/apic/x2apic_savic.c428
-rw-r--r--arch/x86/kernel/head64.c5
-rw-r--r--arch/x86/kernel/head_32.S5
-rw-r--r--arch/x86/kernel/head_64.S10
-rw-r--r--arch/x86/kernel/vmlinux.lds.S9
-rw-r--r--arch/x86/mm/mem_encrypt_amd.c6
-rw-r--r--arch/x86/mm/mem_encrypt_boot.S6
-rw-r--r--arch/x86/platform/pvh/head.S2
-rw-r--r--arch/x86/tools/relocs.c8
-rw-r--r--arch/x86/virt/svm/sev.c7
-rw-r--r--drivers/crypto/ccp/Makefile3
-rw-r--r--drivers/crypto/ccp/psp-dev.c20
-rw-r--r--drivers/crypto/ccp/psp-dev.h8
-rw-r--r--drivers/crypto/ccp/sev-dev.c182
-rw-r--r--drivers/crypto/ccp/sev-dev.h3
-rw-r--r--drivers/crypto/ccp/sfs.c311
-rw-r--r--drivers/crypto/ccp/sfs.h47
-rw-r--r--drivers/firmware/efi/libstub/x86-stub.c4
-rw-r--r--include/linux/cc_platform.h8
-rw-r--r--include/linux/psp-platform-access.h2
-rw-r--r--include/uapi/linux/psp-sfs.h87
-rw-r--r--tools/objtool/arch/x86/decode.c12
-rw-r--r--tools/objtool/builtin-check.c2
-rw-r--r--tools/objtool/check.c48
-rw-r--r--tools/objtool/include/objtool/arch.h1
-rw-r--r--tools/objtool/include/objtool/builtin.h1
-rw-r--r--tools/objtool/noreturns.h1
61 files changed, 2042 insertions, 708 deletions
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index f7fb3d88c57b..36b985d0e7bf 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -3,6 +3,8 @@
# Branch profiling isn't noinstr-safe. Disable it for arch/x86/*
subdir-ccflags-$(CONFIG_TRACE_BRANCH_PROFILING) += -DDISABLE_BRANCH_PROFILING
+obj-y += boot/startup/
+
obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += coco/
obj-y += entry/
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 679c7f980aa3..72a27bc30caf 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -487,6 +487,19 @@ config X86_X2APIC
If in doubt, say Y.
+config AMD_SECURE_AVIC
+ bool "AMD Secure AVIC"
+ depends on AMD_MEM_ENCRYPT && X86_X2APIC
+ help
+ Enable this to get AMD Secure AVIC support on guests that have this feature.
+
+ AMD Secure AVIC provides hardware acceleration for performance sensitive
+ APIC accesses and support for managing guest owned APIC state for SEV-SNP
+ guests. Secure AVIC does not support xAPIC mode. It has functional
+ dependency on x2apic being enabled in the guest.
+
+ If you don't know what to do here, say N.
+
config X86_POSTED_MSI
bool "Enable MSI and MSI-x delivery by posted interrupts"
depends on X86_64 && IRQ_REMAP
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 4b4e2a3ac6df..4db7e4bf69f5 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -275,7 +275,6 @@ archprepare: $(cpufeaturemasks.hdr)
###
# Kernel objects
-core-y += arch/x86/boot/startup/
libs-y += arch/x86/lib/
# drivers-y are linked after core-y
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 3a38fdcdb9bd..74657589264d 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -73,7 +73,7 @@ LDFLAGS_vmlinux += -T
hostprogs := mkpiggy
HOST_EXTRACFLAGS += -I$(srctree)/tools/include
-sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABbCDGRSTtVW] \(_text\|__start_rodata\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p'
+sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABbCDGRSTtVW] \(_text\|__start_rodata\|_sinittext\|__inittext_end\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p'
quiet_cmd_voffset = VOFFSET $@
cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 94b5991da001..0f41ca0e52c0 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -332,6 +332,8 @@ static size_t parse_elf(void *output)
}
const unsigned long kernel_text_size = VO___start_rodata - VO__text;
+const unsigned long kernel_inittext_offset = VO__sinittext - VO__text;
+const unsigned long kernel_inittext_size = VO___inittext_end - VO__sinittext;
const unsigned long kernel_total_size = VO__end - VO__text;
static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4);
diff --git a/arch/x86/boot/compressed/sev-handle-vc.c b/arch/x86/boot/compressed/sev-handle-vc.c
index 89dd02de2a0f..7530ad8b768b 100644
--- a/arch/x86/boot/compressed/sev-handle-vc.c
+++ b/arch/x86/boot/compressed/sev-handle-vc.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "misc.h"
+#include "error.h"
#include "sev.h"
#include <linux/kernel.h>
@@ -14,6 +15,8 @@
#include <asm/fpu/xcr.h>
#define __BOOT_COMPRESSED
+#undef __init
+#define __init
/* Basic instruction decoding support needed */
#include "../../lib/inat.c"
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index fd1b67dfea22..6e5c32a53d03 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -32,102 +32,47 @@ struct ghcb *boot_ghcb;
#undef __init
#define __init
-#undef __head
-#define __head
-
#define __BOOT_COMPRESSED
-extern struct svsm_ca *boot_svsm_caa;
-extern u64 boot_svsm_caa_pa;
-
-struct svsm_ca *svsm_get_caa(void)
-{
- return boot_svsm_caa;
-}
-
-u64 svsm_get_caa_pa(void)
-{
- return boot_svsm_caa_pa;
-}
-
-int svsm_perform_call_protocol(struct svsm_call *call);
-
u8 snp_vmpl;
+u16 ghcb_version;
+
+u64 boot_svsm_caa_pa;
/* Include code for early handlers */
#include "../../boot/startup/sev-shared.c"
-int svsm_perform_call_protocol(struct svsm_call *call)
-{
- struct ghcb *ghcb;
- int ret;
-
- if (boot_ghcb)
- ghcb = boot_ghcb;
- else
- ghcb = NULL;
-
- do {
- ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call)
- : svsm_perform_msr_protocol(call);
- } while (ret == -EAGAIN);
-
- return ret;
-}
-
static bool sev_snp_enabled(void)
{
return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
}
-static void __page_state_change(unsigned long paddr, enum psc_op op)
-{
- u64 val, msr;
-
- /*
- * If private -> shared then invalidate the page before requesting the
- * state change in the RMP table.
- */
- if (op == SNP_PAGE_STATE_SHARED)
- pvalidate_4k_page(paddr, paddr, false);
-
- /* Save the current GHCB MSR value */
- msr = sev_es_rd_ghcb_msr();
-
- /* Issue VMGEXIT to change the page state in RMP table. */
- sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
- VMGEXIT();
-
- /* Read the response of the VMGEXIT. */
- val = sev_es_rd_ghcb_msr();
- if ((GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) || GHCB_MSR_PSC_RESP_VAL(val))
- sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
-
- /* Restore the GHCB MSR value */
- sev_es_wr_ghcb_msr(msr);
-
- /*
- * Now that page state is changed in the RMP table, validate it so that it is
- * consistent with the RMP entry.
- */
- if (op == SNP_PAGE_STATE_PRIVATE)
- pvalidate_4k_page(paddr, paddr, true);
-}
-
void snp_set_page_private(unsigned long paddr)
{
+ struct psc_desc d = {
+ SNP_PAGE_STATE_PRIVATE,
+ (struct svsm_ca *)boot_svsm_caa_pa,
+ boot_svsm_caa_pa
+ };
+
if (!sev_snp_enabled())
return;
- __page_state_change(paddr, SNP_PAGE_STATE_PRIVATE);
+ __page_state_change(paddr, paddr, &d);
}
void snp_set_page_shared(unsigned long paddr)
{
+ struct psc_desc d = {
+ SNP_PAGE_STATE_SHARED,
+ (struct svsm_ca *)boot_svsm_caa_pa,
+ boot_svsm_caa_pa
+ };
+
if (!sev_snp_enabled())
return;
- __page_state_change(paddr, SNP_PAGE_STATE_SHARED);
+ __page_state_change(paddr, paddr, &d);
}
bool early_setup_ghcb(void)
@@ -152,8 +97,14 @@ bool early_setup_ghcb(void)
void snp_accept_memory(phys_addr_t start, phys_addr_t end)
{
+ struct psc_desc d = {
+ SNP_PAGE_STATE_PRIVATE,
+ (struct svsm_ca *)boot_svsm_caa_pa,
+ boot_svsm_caa_pa
+ };
+
for (phys_addr_t pa = start; pa < end; pa += PAGE_SIZE)
- __page_state_change(pa, SNP_PAGE_STATE_PRIVATE);
+ __page_state_change(pa, pa, &d);
}
void sev_es_shutdown_ghcb(void)
@@ -235,15 +186,23 @@ bool sev_es_check_ghcb_fault(unsigned long address)
MSR_AMD64_SNP_VMSA_REG_PROT | \
MSR_AMD64_SNP_RESERVED_BIT13 | \
MSR_AMD64_SNP_RESERVED_BIT15 | \
+ MSR_AMD64_SNP_SECURE_AVIC | \
MSR_AMD64_SNP_RESERVED_MASK)
+#ifdef CONFIG_AMD_SECURE_AVIC
+#define SNP_FEATURE_SECURE_AVIC MSR_AMD64_SNP_SECURE_AVIC
+#else
+#define SNP_FEATURE_SECURE_AVIC 0
+#endif
+
/*
* SNP_FEATURES_PRESENT is the mask of SNP features that are implemented
* by the guest kernel. As and when a new feature is implemented in the
* guest kernel, a corresponding bit should be added to the mask.
*/
#define SNP_FEATURES_PRESENT (MSR_AMD64_SNP_DEBUG_SWAP | \
- MSR_AMD64_SNP_SECURE_TSC)
+ MSR_AMD64_SNP_SECURE_TSC | \
+ SNP_FEATURE_SECURE_AVIC)
u64 snp_get_unsupported_features(u64 status)
{
@@ -347,7 +306,7 @@ static bool early_snp_init(struct boot_params *bp)
* running at VMPL0. The CA will be used to communicate with the
* SVSM and request its services.
*/
- svsm_setup_ca(cc_info);
+ svsm_setup_ca(cc_info, rip_rel_ptr(&boot_ghcb_page));
/*
* Pass run-time kernel a pointer to CC info via boot_params so EFI
@@ -391,6 +350,8 @@ static int sev_check_cpu_support(void)
if (!(eax & BIT(1)))
return -ENODEV;
+ sev_snp_needs_sfw = !(ebx & BIT(31));
+
return ebx & 0x3f;
}
@@ -453,30 +414,16 @@ void sev_enable(struct boot_params *bp)
*/
if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) {
u64 hv_features;
- int ret;
hv_features = get_hv_features();
if (!(hv_features & GHCB_HV_FT_SNP))
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
/*
- * Enforce running at VMPL0 or with an SVSM.
- *
- * Use RMPADJUST (see the rmpadjust() function for a description of
- * what the instruction does) to update the VMPL1 permissions of a
- * page. If the guest is running at VMPL0, this will succeed. If the
- * guest is running at any other VMPL, this will fail. Linux SNP guests
- * only ever run at a single VMPL level so permission mask changes of a
- * lesser-privileged VMPL are a don't-care.
- */
- ret = rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, 1);
-
- /*
- * Running at VMPL0 is not required if an SVSM is present and the hypervisor
- * supports the required SVSM GHCB events.
+ * Running at VMPL0 is required unless an SVSM is present and
+ * the hypervisor supports the required SVSM GHCB events.
*/
- if (ret &&
- !(snp_vmpl && (hv_features & GHCB_HV_FT_SNP_MULTI_VMPL)))
+ if (snp_vmpl && !(hv_features & GHCB_HV_FT_SNP_MULTI_VMPL))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0);
}
@@ -550,7 +497,6 @@ bool early_is_sevsnp_guest(void)
/* Obtain the address of the calling area to use */
boot_rdmsr(MSR_SVSM_CAA, &m);
- boot_svsm_caa = (void *)m.q;
boot_svsm_caa_pa = m.q;
/*
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
index 63e037e94e4c..916bac09b464 100644
--- a/arch/x86/boot/cpuflags.c
+++ b/arch/x86/boot/cpuflags.c
@@ -106,18 +106,5 @@ void get_cpuflags(void)
cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6],
&cpu.flags[1]);
}
-
- if (max_amd_level >= 0x8000001f) {
- u32 ebx;
-
- /*
- * The X86_FEATURE_COHERENCY_SFW_NO feature bit is in
- * the virtualization flags entry (word 8) and set by
- * scattered.c, so the bit needs to be explicitly set.
- */
- cpuid(0x8000001f, &ignored, &ebx, &ignored, &ignored);
- if (ebx & BIT(31))
- set_bit(X86_FEATURE_COHERENCY_SFW_NO, cpu.flags);
- }
}
}
diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile
index b514f7e81332..e8fdf020b422 100644
--- a/arch/x86/boot/startup/Makefile
+++ b/arch/x86/boot/startup/Makefile
@@ -4,6 +4,7 @@ KBUILD_AFLAGS += -D__DISABLE_EXPORTS
KBUILD_CFLAGS += -D__DISABLE_EXPORTS -mcmodel=small -fPIC \
-Os -DDISABLE_BRANCH_PROFILING \
$(DISABLE_STACKLEAK_PLUGIN) \
+ $(DISABLE_LATENT_ENTROPY_PLUGIN) \
-fno-stack-protector -D__NO_FORTIFY \
-fno-jump-tables \
-include $(srctree)/include/linux/hidden.h
@@ -19,6 +20,7 @@ KCOV_INSTRUMENT := n
obj-$(CONFIG_X86_64) += gdt_idt.o map_kernel.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += sme.o sev-startup.o
+pi-objs := $(patsubst %.o,$(obj)/%.o,$(obj-y))
lib-$(CONFIG_X86_64) += la57toggle.o
lib-$(CONFIG_EFI_MIXED) += efi-mixed.o
@@ -28,3 +30,23 @@ lib-$(CONFIG_EFI_MIXED) += efi-mixed.o
# to be linked into the decompressor or the EFI stub but not vmlinux
#
$(patsubst %.o,$(obj)/%.o,$(lib-y)): OBJECT_FILES_NON_STANDARD := y
+
+#
+# Invoke objtool for each object individually to check for absolute
+# relocations, even if other objtool actions are being deferred.
+#
+$(pi-objs): objtool-enabled = 1
+$(pi-objs): objtool-args = $(if $(delay-objtool),,$(objtool-args-y)) --noabs
+
+#
+# Confine the startup code by prefixing all symbols with __pi_ (for position
+# independent). This ensures that startup code can only call other startup
+# code, or code that has explicitly been made accessible to it via a symbol
+# alias.
+#
+$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_
+$(obj)/%.pi.o: $(obj)/%.o FORCE
+ $(call if_changed,objcopy)
+
+targets += $(obj-y)
+obj-y := $(patsubst %.o,%.pi.o,$(obj-y))
diff --git a/arch/x86/boot/startup/exports.h b/arch/x86/boot/startup/exports.h
new file mode 100644
index 000000000000..01d2363dc445
--- /dev/null
+++ b/arch/x86/boot/startup/exports.h
@@ -0,0 +1,14 @@
+
+/*
+ * The symbols below are functions that are implemented by the startup code,
+ * but called at runtime by the SEV code residing in the core kernel.
+ */
+PROVIDE(early_set_pages_state = __pi_early_set_pages_state);
+PROVIDE(early_snp_set_memory_private = __pi_early_snp_set_memory_private);
+PROVIDE(early_snp_set_memory_shared = __pi_early_snp_set_memory_shared);
+PROVIDE(get_hv_features = __pi_get_hv_features);
+PROVIDE(sev_es_terminate = __pi_sev_es_terminate);
+PROVIDE(snp_cpuid = __pi_snp_cpuid);
+PROVIDE(snp_cpuid_get_table = __pi_snp_cpuid_get_table);
+PROVIDE(svsm_issue_call = __pi_svsm_issue_call);
+PROVIDE(svsm_process_result_codes = __pi_svsm_process_result_codes);
diff --git a/arch/x86/boot/startup/gdt_idt.c b/arch/x86/boot/startup/gdt_idt.c
index a3112a69b06a..d16102abdaec 100644
--- a/arch/x86/boot/startup/gdt_idt.c
+++ b/arch/x86/boot/startup/gdt_idt.c
@@ -24,7 +24,7 @@
static gate_desc bringup_idt_table[NUM_EXCEPTION_VECTORS] __page_aligned_data;
/* This may run while still in the direct mapping */
-void __head startup_64_load_idt(void *vc_handler)
+void startup_64_load_idt(void *vc_handler)
{
struct desc_ptr desc = {
.address = (unsigned long)rip_rel_ptr(bringup_idt_table),
@@ -46,7 +46,7 @@ void __head startup_64_load_idt(void *vc_handler)
/*
* Setup boot CPU state needed before kernel switches to virtual addresses.
*/
-void __head startup_64_setup_gdt_idt(void)
+void __init startup_64_setup_gdt_idt(void)
{
struct gdt_page *gp = rip_rel_ptr((void *)(__force unsigned long)&gdt_page);
void *handler = NULL;
diff --git a/arch/x86/boot/startup/map_kernel.c b/arch/x86/boot/startup/map_kernel.c
index 332dbe6688c4..83ba98d61572 100644
--- a/arch/x86/boot/startup/map_kernel.c
+++ b/arch/x86/boot/startup/map_kernel.c
@@ -30,7 +30,7 @@ static inline bool check_la57_support(void)
return true;
}
-static unsigned long __head sme_postprocess_startup(struct boot_params *bp,
+static unsigned long __init sme_postprocess_startup(struct boot_params *bp,
pmdval_t *pmd,
unsigned long p2v_offset)
{
@@ -84,7 +84,7 @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp,
* the 1:1 mapping of memory. Kernel virtual addresses can be determined by
* subtracting p2v_offset from the RIP-relative address.
*/
-unsigned long __head __startup_64(unsigned long p2v_offset,
+unsigned long __init __startup_64(unsigned long p2v_offset,
struct boot_params *bp)
{
pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts);
diff --git a/arch/x86/boot/startup/sev-shared.c b/arch/x86/boot/startup/sev-shared.c
index a34cd19796f9..4e22ffd73516 100644
--- a/arch/x86/boot/startup/sev-shared.c
+++ b/arch/x86/boot/startup/sev-shared.c
@@ -12,35 +12,12 @@
#include <asm/setup_data.h>
#ifndef __BOOT_COMPRESSED
-#define error(v) pr_err(v)
#define has_cpuflag(f) boot_cpu_has(f)
#else
#undef WARN
#define WARN(condition, format...) (!!(condition))
-#undef vc_forward_exception
-#define vc_forward_exception(c) panic("SNP: Hypervisor requested exception\n")
#endif
-/*
- * SVSM related information:
- * During boot, the page tables are set up as identity mapped and later
- * changed to use kernel virtual addresses. Maintain separate virtual and
- * physical addresses for the CAA to allow SVSM functions to be used during
- * early boot, both with identity mapped virtual addresses and proper kernel
- * virtual addresses.
- */
-struct svsm_ca *boot_svsm_caa __ro_after_init;
-u64 boot_svsm_caa_pa __ro_after_init;
-
-/*
- * Since feature negotiation related variables are set early in the boot
- * process they must reside in the .data section so as not to be zeroed
- * out when the .bss section is later cleared.
- *
- * GHCB protocol version negotiated with the hypervisor.
- */
-static u16 ghcb_version __ro_after_init;
-
/* Copy of the SNP firmware's CPUID page. */
static struct snp_cpuid_table cpuid_table_copy __ro_after_init;
@@ -54,17 +31,9 @@ static u32 cpuid_std_range_max __ro_after_init;
static u32 cpuid_hyp_range_max __ro_after_init;
static u32 cpuid_ext_range_max __ro_after_init;
-bool __init sev_es_check_cpu_features(void)
-{
- if (!has_cpuflag(X86_FEATURE_RDRAND)) {
- error("RDRAND instruction not supported - no trusted source of randomness available\n");
- return false;
- }
+bool sev_snp_needs_sfw;
- return true;
-}
-
-void __head __noreturn
+void __noreturn
sev_es_terminate(unsigned int set, unsigned int reason)
{
u64 val = GHCB_MSR_TERM_REQ;
@@ -83,7 +52,7 @@ sev_es_terminate(unsigned int set, unsigned int reason)
/*
* The hypervisor features are available from GHCB version 2 onward.
*/
-u64 get_hv_features(void)
+u64 __init get_hv_features(void)
{
u64 val;
@@ -100,72 +69,7 @@ u64 get_hv_features(void)
return GHCB_MSR_HV_FT_RESP_VAL(val);
}
-void snp_register_ghcb_early(unsigned long paddr)
-{
- unsigned long pfn = paddr >> PAGE_SHIFT;
- u64 val;
-
- sev_es_wr_ghcb_msr(GHCB_MSR_REG_GPA_REQ_VAL(pfn));
- VMGEXIT();
-
- val = sev_es_rd_ghcb_msr();
-
- /* If the response GPA is not ours then abort the guest */
- if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) ||
- (GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn))
- sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER);
-}
-
-bool sev_es_negotiate_protocol(void)
-{
- u64 val;
-
- /* Do the GHCB protocol version negotiation */
- sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ);
- VMGEXIT();
- val = sev_es_rd_ghcb_msr();
-
- if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP)
- return false;
-
- if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN ||
- GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX)
- return false;
-
- ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), GHCB_PROTOCOL_MAX);
-
- return true;
-}
-
-static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
-{
- u32 ret;
-
- ret = ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0);
- if (!ret)
- return ES_OK;
-
- if (ret == 1) {
- u64 info = ghcb->save.sw_exit_info_2;
- unsigned long v = info & SVM_EVTINJ_VEC_MASK;
-
- /* Check if exception information from hypervisor is sane. */
- if ((info & SVM_EVTINJ_VALID) &&
- ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) &&
- ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) {
- ctxt->fi.vector = v;
-
- if (info & SVM_EVTINJ_VALID_ERR)
- ctxt->fi.error_code = info >> 32;
-
- return ES_EXCEPTION;
- }
- }
-
- return ES_VMM_ERROR;
-}
-
-static inline int svsm_process_result_codes(struct svsm_call *call)
+int svsm_process_result_codes(struct svsm_call *call)
{
switch (call->rax_out) {
case SVSM_SUCCESS:
@@ -193,7 +97,7 @@ static inline int svsm_process_result_codes(struct svsm_call *call)
* - RAX specifies the SVSM protocol/callid as input and the return code
* as output.
*/
-static __always_inline void svsm_issue_call(struct svsm_call *call, u8 *pending)
+void svsm_issue_call(struct svsm_call *call, u8 *pending)
{
register unsigned long rax asm("rax") = call->rax;
register unsigned long rcx asm("rcx") = call->rcx;
@@ -216,7 +120,7 @@ static __always_inline void svsm_issue_call(struct svsm_call *call, u8 *pending)
call->r9_out = r9;
}
-static int svsm_perform_msr_protocol(struct svsm_call *call)
+int svsm_perform_msr_protocol(struct svsm_call *call)
{
u8 pending = 0;
u64 val, resp;
@@ -247,63 +151,6 @@ static int svsm_perform_msr_protocol(struct svsm_call *call)
return svsm_process_result_codes(call);
}
-static int svsm_perform_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call)
-{
- struct es_em_ctxt ctxt;
- u8 pending = 0;
-
- vc_ghcb_invalidate(ghcb);
-
- /*
- * Fill in protocol and format specifiers. This can be called very early
- * in the boot, so use rip-relative references as needed.
- */
- ghcb->protocol_version = ghcb_version;
- ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
-
- ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL);
- ghcb_set_sw_exit_info_1(ghcb, 0);
- ghcb_set_sw_exit_info_2(ghcb, 0);
-
- sev_es_wr_ghcb_msr(__pa(ghcb));
-
- svsm_issue_call(call, &pending);
-
- if (pending)
- return -EINVAL;
-
- switch (verify_exception_info(ghcb, &ctxt)) {
- case ES_OK:
- break;
- case ES_EXCEPTION:
- vc_forward_exception(&ctxt);
- fallthrough;
- default:
- return -EINVAL;
- }
-
- return svsm_process_result_codes(call);
-}
-
-enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
- struct es_em_ctxt *ctxt,
- u64 exit_code, u64 exit_info_1,
- u64 exit_info_2)
-{
- /* Fill in protocol and format specifiers */
- ghcb->protocol_version = ghcb_version;
- ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
-
- ghcb_set_sw_exit_code(ghcb, exit_code);
- ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
- ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
-
- sev_es_wr_ghcb_msr(__pa(ghcb));
- VMGEXIT();
-
- return verify_exception_info(ghcb, ctxt);
-}
-
static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg)
{
u64 val;
@@ -342,44 +189,7 @@ static int __sev_cpuid_hv_msr(struct cpuid_leaf *leaf)
return ret;
}
-static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
-{
- u32 cr4 = native_read_cr4();
- int ret;
-
- ghcb_set_rax(ghcb, leaf->fn);
- ghcb_set_rcx(ghcb, leaf->subfn);
-
- if (cr4 & X86_CR4_OSXSAVE)
- /* Safe to read xcr0 */
- ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
- else
- /* xgetbv will cause #UD - use reset value for xcr0 */
- ghcb_set_xcr0(ghcb, 1);
-
- ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
- if (ret != ES_OK)
- return ret;
-
- if (!(ghcb_rax_is_valid(ghcb) &&
- ghcb_rbx_is_valid(ghcb) &&
- ghcb_rcx_is_valid(ghcb) &&
- ghcb_rdx_is_valid(ghcb)))
- return ES_VMM_ERROR;
- leaf->eax = ghcb->save.rax;
- leaf->ebx = ghcb->save.rbx;
- leaf->ecx = ghcb->save.rcx;
- leaf->edx = ghcb->save.rdx;
-
- return ES_OK;
-}
-
-static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
-{
- return ghcb ? __sev_cpuid_hv_ghcb(ghcb, ctxt, leaf)
- : __sev_cpuid_hv_msr(leaf);
-}
/*
* This may be called early while still running on the initial identity
@@ -412,7 +222,7 @@ const struct snp_cpuid_table *snp_cpuid_get_table(void)
*
* Return: XSAVE area size on success, 0 otherwise.
*/
-static u32 __head snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
+static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
u64 xfeatures_found = 0;
@@ -448,7 +258,7 @@ static u32 __head snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
return xsave_size;
}
-static bool __head
+static bool
snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
@@ -484,21 +294,21 @@ snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
return false;
}
-static void snp_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
+static void snp_cpuid_hv_msr(void *ctx, struct cpuid_leaf *leaf)
{
- if (sev_cpuid_hv(ghcb, ctxt, leaf))
+ if (__sev_cpuid_hv_msr(leaf))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV);
}
-static int __head
-snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
- struct cpuid_leaf *leaf)
+static int
+snp_cpuid_postprocess(void (*cpuid_fn)(void *ctx, struct cpuid_leaf *leaf),
+ void *ctx, struct cpuid_leaf *leaf)
{
struct cpuid_leaf leaf_hv = *leaf;
switch (leaf->fn) {
case 0x1:
- snp_cpuid_hv(ghcb, ctxt, &leaf_hv);
+ cpuid_fn(ctx, &leaf_hv);
/* initial APIC ID */
leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0));
@@ -517,7 +327,7 @@ snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
break;
case 0xB:
leaf_hv.subfn = 0;
- snp_cpuid_hv(ghcb, ctxt, &leaf_hv);
+ cpuid_fn(ctx, &leaf_hv);
/* extended APIC ID */
leaf->edx = leaf_hv.edx;
@@ -565,7 +375,7 @@ snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
}
break;
case 0x8000001E:
- snp_cpuid_hv(ghcb, ctxt, &leaf_hv);
+ cpuid_fn(ctx, &leaf_hv);
/* extended APIC ID */
leaf->eax = leaf_hv.eax;
@@ -586,8 +396,8 @@ snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
* Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
* should be treated as fatal by caller.
*/
-int __head
-snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
+int snp_cpuid(void (*cpuid_fn)(void *ctx, struct cpuid_leaf *leaf),
+ void *ctx, struct cpuid_leaf *leaf)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
@@ -621,7 +431,7 @@ snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
return 0;
}
- return snp_cpuid_postprocess(ghcb, ctxt, leaf);
+ return snp_cpuid_postprocess(cpuid_fn, ctx, leaf);
}
/*
@@ -629,7 +439,7 @@ snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
* page yet, so it only supports the MSR based communication with the
* hypervisor and only the CPUID exit-code.
*/
-void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
+void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
{
unsigned int subfn = lower_bits(regs->cx, 32);
unsigned int fn = lower_bits(regs->ax, 32);
@@ -648,13 +458,24 @@ void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
leaf.fn = fn;
leaf.subfn = subfn;
- ret = snp_cpuid(NULL, NULL, &leaf);
+ /*
+ * If SNP is active, then snp_cpuid() uses the CPUID table to obtain the
+ * CPUID values (with possible HV interaction during post-processing of
+ * the values). But if SNP is not active (no CPUID table present), then
+ * snp_cpuid() returns -EOPNOTSUPP so that an SEV-ES guest can call the
+ * HV to obtain the CPUID information.
+ */
+ ret = snp_cpuid(snp_cpuid_hv_msr, NULL, &leaf);
if (!ret)
goto cpuid_done;
if (ret != -EOPNOTSUPP)
goto fail;
+ /*
+ * This is reached by a SEV-ES guest and needs to invoke the HV for
+ * the CPUID data.
+ */
if (__sev_cpuid_hv_msr(&leaf))
goto fail;
@@ -705,7 +526,7 @@ struct cc_setup_data {
* Search for a Confidential Computing blob passed in as a setup_data entry
* via the Linux Boot Protocol.
*/
-static __head
+static __init
struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
{
struct cc_setup_data *sd = NULL;
@@ -733,7 +554,7 @@ struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
* mapping needs to be updated in sync with all the changes to virtual memory
* layout and related mapping facilities throughout the boot process.
*/
-static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
+static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
{
const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table;
int i;
@@ -761,13 +582,24 @@ static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
}
}
-static void __head svsm_pval_4k_page(unsigned long paddr, bool validate)
+static int svsm_call_msr_protocol(struct svsm_call *call)
+{
+ int ret;
+
+ do {
+ ret = svsm_perform_msr_protocol(call);
+ } while (ret == -EAGAIN);
+
+ return ret;
+}
+
+static void svsm_pval_4k_page(unsigned long paddr, bool validate,
+ struct svsm_ca *caa, u64 caa_pa)
{
struct svsm_pvalidate_call *pc;
struct svsm_call call = {};
unsigned long flags;
u64 pc_pa;
- int ret;
/*
* This can be called very early in the boot, use native functions in
@@ -775,10 +607,10 @@ static void __head svsm_pval_4k_page(unsigned long paddr, bool validate)
*/
flags = native_local_irq_save();
- call.caa = svsm_get_caa();
+ call.caa = caa;
pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer;
- pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
+ pc_pa = caa_pa + offsetof(struct svsm_ca, svsm_buffer);
pc->num_entries = 1;
pc->cur_index = 0;
@@ -792,20 +624,24 @@ static void __head svsm_pval_4k_page(unsigned long paddr, bool validate)
call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE);
call.rcx = pc_pa;
- ret = svsm_perform_call_protocol(&call);
- if (ret)
+ /*
+ * Use the MSR protocol exclusively, so that this code is usable in
+ * startup code where VA/PA translations of the GHCB page's address may
+ * be problematic.
+ */
+ if (svsm_call_msr_protocol(&call))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
native_local_irq_restore(flags);
}
-static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr,
- bool validate)
+static void pvalidate_4k_page(unsigned long vaddr, unsigned long paddr,
+ bool validate, struct svsm_ca *caa, u64 caa_pa)
{
int ret;
if (snp_vmpl) {
- svsm_pval_4k_page(paddr, validate);
+ svsm_pval_4k_page(paddr, validate, caa, caa_pa);
} else {
ret = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
if (ret)
@@ -816,15 +652,51 @@ static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr,
* If validating memory (making it private) and affected by the
* cache-coherency vulnerability, perform the cache eviction mitigation.
*/
- if (validate && !has_cpuflag(X86_FEATURE_COHERENCY_SFW_NO))
+ if (validate && sev_snp_needs_sfw)
sev_evict_cache((void *)vaddr, 1);
}
+static void __page_state_change(unsigned long vaddr, unsigned long paddr,
+ const struct psc_desc *desc)
+{
+ u64 val, msr;
+
+ /*
+ * If private -> shared then invalidate the page before requesting the
+ * state change in the RMP table.
+ */
+ if (desc->op == SNP_PAGE_STATE_SHARED)
+ pvalidate_4k_page(vaddr, paddr, false, desc->ca, desc->caa_pa);
+
+ /* Save the current GHCB MSR value */
+ msr = sev_es_rd_ghcb_msr();
+
+ /* Issue VMGEXIT to change the page state in RMP table. */
+ sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, desc->op));
+ VMGEXIT();
+
+ /* Read the response of the VMGEXIT. */
+ val = sev_es_rd_ghcb_msr();
+ if ((GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) || GHCB_MSR_PSC_RESP_VAL(val))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
+
+ /* Restore the GHCB MSR value */
+ sev_es_wr_ghcb_msr(msr);
+
+ /*
+ * Now that page state is changed in the RMP table, validate it so that it is
+ * consistent with the RMP entry.
+ */
+ if (desc->op == SNP_PAGE_STATE_PRIVATE)
+ pvalidate_4k_page(vaddr, paddr, true, desc->ca, desc->caa_pa);
+}
+
/*
* Maintain the GPA of the SVSM Calling Area (CA) in order to utilize the SVSM
* services needed when not running in VMPL0.
*/
-static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info)
+static bool __init svsm_setup_ca(const struct cc_blob_sev_info *cc_info,
+ void *page)
{
struct snp_secrets_page *secrets_page;
struct snp_cpuid_table *cpuid_table;
@@ -847,7 +719,7 @@ static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info)
* routine is running identity mapped when called, both by the decompressor
* code and the early kernel code.
*/
- if (!rmpadjust((unsigned long)rip_rel_ptr(&boot_ghcb_page), RMP_PG_SIZE_4K, 1))
+ if (!rmpadjust((unsigned long)page, RMP_PG_SIZE_4K, 1))
return false;
/*
@@ -875,11 +747,6 @@ static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info)
if (caa & (PAGE_SIZE - 1))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CAA);
- /*
- * The CA is identity mapped when this routine is called, both by the
- * decompressor code and the early kernel code.
- */
- boot_svsm_caa = (struct svsm_ca *)caa;
boot_svsm_caa_pa = caa;
/* Advertise the SVSM presence via CPUID. */
diff --git a/arch/x86/boot/startup/sev-startup.c b/arch/x86/boot/startup/sev-startup.c
index 0b7e3b950183..09725428d3e6 100644
--- a/arch/x86/boot/startup/sev-startup.c
+++ b/arch/x86/boot/startup/sev-startup.c
@@ -41,143 +41,14 @@
#include <asm/cpuid/api.h>
#include <asm/cmdline.h>
-/* For early boot hypervisor communication in SEV-ES enabled guests */
-struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
-
-/*
- * Needs to be in the .data section because we need it NULL before bss is
- * cleared
- */
-struct ghcb *boot_ghcb __section(".data");
-
-/* Bitmap of SEV features supported by the hypervisor */
-u64 sev_hv_features __ro_after_init;
-
-/* Secrets page physical address from the CC blob */
-u64 sev_secrets_pa __ro_after_init;
-
-/* For early boot SVSM communication */
-struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE);
-
-DEFINE_PER_CPU(struct svsm_ca *, svsm_caa);
-DEFINE_PER_CPU(u64, svsm_caa_pa);
-
-/*
- * Nothing shall interrupt this code path while holding the per-CPU
- * GHCB. The backup GHCB is only for NMIs interrupting this path.
- *
- * Callers must disable local interrupts around it.
- */
-noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
-{
- struct sev_es_runtime_data *data;
- struct ghcb *ghcb;
-
- WARN_ON(!irqs_disabled());
-
- data = this_cpu_read(runtime_data);
- ghcb = &data->ghcb_page;
-
- if (unlikely(data->ghcb_active)) {
- /* GHCB is already in use - save its contents */
-
- if (unlikely(data->backup_ghcb_active)) {
- /*
- * Backup-GHCB is also already in use. There is no way
- * to continue here so just kill the machine. To make
- * panic() work, mark GHCBs inactive so that messages
- * can be printed out.
- */
- data->ghcb_active = false;
- data->backup_ghcb_active = false;
-
- instrumentation_begin();
- panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
- instrumentation_end();
- }
-
- /* Mark backup_ghcb active before writing to it */
- data->backup_ghcb_active = true;
-
- state->ghcb = &data->backup_ghcb;
-
- /* Backup GHCB content */
- *state->ghcb = *ghcb;
- } else {
- state->ghcb = NULL;
- data->ghcb_active = true;
- }
-
- return ghcb;
-}
-
/* Include code shared with pre-decompression boot stage */
#include "sev-shared.c"
-noinstr void __sev_put_ghcb(struct ghcb_state *state)
-{
- struct sev_es_runtime_data *data;
- struct ghcb *ghcb;
-
- WARN_ON(!irqs_disabled());
-
- data = this_cpu_read(runtime_data);
- ghcb = &data->ghcb_page;
-
- if (state->ghcb) {
- /* Restore GHCB from Backup */
- *ghcb = *state->ghcb;
- data->backup_ghcb_active = false;
- state->ghcb = NULL;
- } else {
- /*
- * Invalidate the GHCB so a VMGEXIT instruction issued
- * from userspace won't appear to be valid.
- */
- vc_ghcb_invalidate(ghcb);
- data->ghcb_active = false;
- }
-}
-
-int svsm_perform_call_protocol(struct svsm_call *call)
-{
- struct ghcb_state state;
- unsigned long flags;
- struct ghcb *ghcb;
- int ret;
-
- /*
- * This can be called very early in the boot, use native functions in
- * order to avoid paravirt issues.
- */
- flags = native_local_irq_save();
-
- if (sev_cfg.ghcbs_initialized)
- ghcb = __sev_get_ghcb(&state);
- else if (boot_ghcb)
- ghcb = boot_ghcb;
- else
- ghcb = NULL;
-
- do {
- ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call)
- : svsm_perform_msr_protocol(call);
- } while (ret == -EAGAIN);
-
- if (sev_cfg.ghcbs_initialized)
- __sev_put_ghcb(&state);
-
- native_local_irq_restore(flags);
-
- return ret;
-}
-
-void __head
+void
early_set_pages_state(unsigned long vaddr, unsigned long paddr,
- unsigned long npages, enum psc_op op)
+ unsigned long npages, const struct psc_desc *desc)
{
unsigned long paddr_end;
- u64 val;
vaddr = vaddr & PAGE_MASK;
@@ -185,42 +56,22 @@ early_set_pages_state(unsigned long vaddr, unsigned long paddr,
paddr_end = paddr + (npages << PAGE_SHIFT);
while (paddr < paddr_end) {
- /* Page validation must be rescinded before changing to shared */
- if (op == SNP_PAGE_STATE_SHARED)
- pvalidate_4k_page(vaddr, paddr, false);
-
- /*
- * Use the MSR protocol because this function can be called before
- * the GHCB is established.
- */
- sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
- VMGEXIT();
-
- val = sev_es_rd_ghcb_msr();
-
- if (GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP)
- goto e_term;
-
- if (GHCB_MSR_PSC_RESP_VAL(val))
- goto e_term;
-
- /* Page validation must be performed after changing to private */
- if (op == SNP_PAGE_STATE_PRIVATE)
- pvalidate_4k_page(vaddr, paddr, true);
+ __page_state_change(vaddr, paddr, desc);
vaddr += PAGE_SIZE;
paddr += PAGE_SIZE;
}
-
- return;
-
-e_term:
- sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
}
-void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
unsigned long npages)
{
+ struct psc_desc d = {
+ SNP_PAGE_STATE_PRIVATE,
+ rip_rel_ptr(&boot_svsm_ca_page),
+ boot_svsm_caa_pa
+ };
+
/*
* This can be invoked in early boot while running identity mapped, so
* use an open coded check for SNP instead of using cc_platform_has().
@@ -234,12 +85,18 @@ void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long padd
* Ask the hypervisor to mark the memory pages as private in the RMP
* table.
*/
- early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE);
+ early_set_pages_state(vaddr, paddr, npages, &d);
}
-void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
unsigned long npages)
{
+ struct psc_desc d = {
+ SNP_PAGE_STATE_SHARED,
+ rip_rel_ptr(&boot_svsm_ca_page),
+ boot_svsm_caa_pa
+ };
+
/*
* This can be invoked in early boot while running identity mapped, so
* use an open coded check for SNP instead of using cc_platform_has().
@@ -250,7 +107,7 @@ void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr
return;
/* Ask hypervisor to mark the memory pages shared in the RMP table. */
- early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED);
+ early_set_pages_state(vaddr, paddr, npages, &d);
}
/*
@@ -266,7 +123,7 @@ void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr
*
* Scan for the blob in that order.
*/
-static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
+static struct cc_blob_sev_info *__init find_cc_blob(struct boot_params *bp)
{
struct cc_blob_sev_info *cc_info;
@@ -287,15 +144,15 @@ static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
found_cc_info:
if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC)
- snp_abort();
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
return cc_info;
}
-static __head void svsm_setup(struct cc_blob_sev_info *cc_info)
+static void __init svsm_setup(struct cc_blob_sev_info *cc_info)
{
+ struct snp_secrets_page *secrets = (void *)cc_info->secrets_phys;
struct svsm_call call = {};
- int ret;
u64 pa;
/*
@@ -303,7 +160,7 @@ static __head void svsm_setup(struct cc_blob_sev_info *cc_info)
* running at VMPL0. The CA will be used to communicate with the
* SVSM to perform the SVSM services.
*/
- if (!svsm_setup_ca(cc_info))
+ if (!svsm_setup_ca(cc_info, rip_rel_ptr(&boot_svsm_ca_page)))
return;
/*
@@ -315,25 +172,25 @@ static __head void svsm_setup(struct cc_blob_sev_info *cc_info)
pa = (u64)rip_rel_ptr(&boot_svsm_ca_page);
/*
- * Switch over to the boot SVSM CA while the current CA is still
- * addressable. There is no GHCB at this point so use the MSR protocol.
+ * Switch over to the boot SVSM CA while the current CA is still 1:1
+ * mapped and thus addressable with VA == PA. There is no GHCB at this
+ * point so use the MSR protocol.
*
* SVSM_CORE_REMAP_CA call:
* RAX = 0 (Protocol=0, CallID=0)
* RCX = New CA GPA
*/
- call.caa = svsm_get_caa();
+ call.caa = (struct svsm_ca *)secrets->svsm_caa;
call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
call.rcx = pa;
- ret = svsm_perform_call_protocol(&call);
- if (ret)
+
+ if (svsm_call_msr_protocol(&call))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CA_REMAP_FAIL);
- boot_svsm_caa = (struct svsm_ca *)pa;
boot_svsm_caa_pa = pa;
}
-bool __head snp_init(struct boot_params *bp)
+bool __init snp_init(struct boot_params *bp)
{
struct cc_blob_sev_info *cc_info;
@@ -361,8 +218,3 @@ bool __head snp_init(struct boot_params *bp)
return true;
}
-
-void __head __noreturn snp_abort(void)
-{
- sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
-}
diff --git a/arch/x86/boot/startup/sme.c b/arch/x86/boot/startup/sme.c
index 70ea1748c0a7..e7ea65f3f1d6 100644
--- a/arch/x86/boot/startup/sme.c
+++ b/arch/x86/boot/startup/sme.c
@@ -91,7 +91,7 @@ struct sme_populate_pgd_data {
*/
static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch");
-static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd)
+static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
{
unsigned long pgd_start, pgd_end, pgd_size;
pgd_t *pgd_p;
@@ -106,7 +106,7 @@ static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd)
memset(pgd_p, 0, pgd_size);
}
-static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
+static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
{
pgd_t *pgd;
p4d_t *p4d;
@@ -143,7 +143,7 @@ static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
return pud;
}
-static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
+static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
{
pud_t *pud;
pmd_t *pmd;
@@ -159,7 +159,7 @@ static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags));
}
-static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd)
+static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
{
pud_t *pud;
pmd_t *pmd;
@@ -185,7 +185,7 @@ static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd)
set_pte(pte, __pte(ppd->paddr | ppd->pte_flags));
}
-static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
+static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
{
while (ppd->vaddr < ppd->vaddr_end) {
sme_populate_pgd_large(ppd);
@@ -195,7 +195,7 @@ static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
}
}
-static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
+static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
{
while (ppd->vaddr < ppd->vaddr_end) {
sme_populate_pgd(ppd);
@@ -205,7 +205,7 @@ static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
}
}
-static void __head __sme_map_range(struct sme_populate_pgd_data *ppd,
+static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
pmdval_t pmd_flags, pteval_t pte_flags)
{
unsigned long vaddr_end;
@@ -229,22 +229,22 @@ static void __head __sme_map_range(struct sme_populate_pgd_data *ppd,
__sme_map_range_pte(ppd);
}
-static void __head sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
+static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
}
-static void __head sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
+static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
}
-static void __head sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
+static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
}
-static unsigned long __head sme_pgtable_calc(unsigned long len)
+static unsigned long __init sme_pgtable_calc(unsigned long len)
{
unsigned long entries = 0, tables = 0;
@@ -281,7 +281,7 @@ static unsigned long __head sme_pgtable_calc(unsigned long len)
return entries + tables;
}
-void __head sme_encrypt_kernel(struct boot_params *bp)
+void __init sme_encrypt_kernel(struct boot_params *bp)
{
unsigned long workarea_start, workarea_end, workarea_len;
unsigned long execute_start, execute_end, execute_len;
@@ -485,7 +485,7 @@ void __head sme_encrypt_kernel(struct boot_params *bp)
native_write_cr3(__native_read_cr3());
}
-void __head sme_enable(struct boot_params *bp)
+void __init sme_enable(struct boot_params *bp)
{
unsigned int eax, ebx, ecx, edx;
unsigned long feature_mask;
@@ -521,6 +521,7 @@ void __head sme_enable(struct boot_params *bp)
return;
me_mask = 1UL << (ebx & 0x3f);
+ sev_snp_needs_sfw = !(ebx & BIT(31));
/* Check the SEV MSR whether SEV or SME is enabled */
sev_status = msr = native_rdmsrq(MSR_AMD64_SEV);
@@ -531,7 +532,7 @@ void __head sme_enable(struct boot_params *bp)
* enablement abort the guest.
*/
if (snp_en ^ !!(msr & MSR_AMD64_SEV_SNP_ENABLED))
- snp_abort();
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
/* Check if memory encryption is enabled */
if (feature_mask == AMD_SME_BIT) {
@@ -567,7 +568,6 @@ void __head sme_enable(struct boot_params *bp)
#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
/* Local version for startup code, which never operates on user page tables */
-__weak
pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
{
return pgd;
diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
index d4610af68114..989ca9f72ba3 100644
--- a/arch/x86/coco/core.c
+++ b/arch/x86/coco/core.c
@@ -104,6 +104,9 @@ static bool noinstr amd_cc_platform_has(enum cc_attr attr)
case CC_ATTR_HOST_SEV_SNP:
return cc_flags.host_sev_snp;
+ case CC_ATTR_SNP_SECURE_AVIC:
+ return sev_status & MSR_AMD64_SNP_SECURE_AVIC;
+
default:
return false;
}
diff --git a/arch/x86/coco/sev/Makefile b/arch/x86/coco/sev/Makefile
index 342d79f0ab6a..3b8ae214a6a6 100644
--- a/arch/x86/coco/sev/Makefile
+++ b/arch/x86/coco/sev/Makefile
@@ -1,10 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
-obj-y += core.o sev-nmi.o vc-handle.o
+obj-y += core.o noinstr.o vc-handle.o
# Clang 14 and older may fail to respect __no_sanitize_undefined when inlining
-UBSAN_SANITIZE_sev-nmi.o := n
+UBSAN_SANITIZE_noinstr.o := n
# GCC may fail to respect __no_sanitize_address or __no_kcsan when inlining
-KASAN_SANITIZE_sev-nmi.o := n
-KCSAN_SANITIZE_sev-nmi.o := n
+KASAN_SANITIZE_noinstr.o := n
+KCSAN_SANITIZE_noinstr.o := n
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index 14ef5908fb27..9ae3b11754e6 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -46,6 +46,48 @@
#include <asm/cmdline.h>
#include <asm/msr.h>
+/* Bitmap of SEV features supported by the hypervisor */
+u64 sev_hv_features __ro_after_init;
+SYM_PIC_ALIAS(sev_hv_features);
+
+/* Secrets page physical address from the CC blob */
+u64 sev_secrets_pa __ro_after_init;
+SYM_PIC_ALIAS(sev_secrets_pa);
+
+/* For early boot SVSM communication */
+struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE);
+SYM_PIC_ALIAS(boot_svsm_ca_page);
+
+/*
+ * SVSM related information:
+ * During boot, the page tables are set up as identity mapped and later
+ * changed to use kernel virtual addresses. Maintain separate virtual and
+ * physical addresses for the CAA to allow SVSM functions to be used during
+ * early boot, both with identity mapped virtual addresses and proper kernel
+ * virtual addresses.
+ */
+u64 boot_svsm_caa_pa __ro_after_init;
+SYM_PIC_ALIAS(boot_svsm_caa_pa);
+
+DEFINE_PER_CPU(struct svsm_ca *, svsm_caa);
+DEFINE_PER_CPU(u64, svsm_caa_pa);
+
+static inline struct svsm_ca *svsm_get_caa(void)
+{
+ if (sev_cfg.use_cas)
+ return this_cpu_read(svsm_caa);
+ else
+ return rip_rel_ptr(&boot_svsm_ca_page);
+}
+
+static inline u64 svsm_get_caa_pa(void)
+{
+ if (sev_cfg.use_cas)
+ return this_cpu_read(svsm_caa_pa);
+ else
+ return boot_svsm_caa_pa;
+}
+
/* AP INIT values as documented in the APM2 section "Processor Initialization State" */
#define AP_INIT_CS_LIMIT 0xffff
#define AP_INIT_DS_LIMIT 0xffff
@@ -79,6 +121,7 @@ static const char * const sev_status_feat_names[] = {
[MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt",
[MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt",
[MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt",
+ [MSR_AMD64_SNP_SECURE_AVIC_BIT] = "SecureAVIC",
};
/*
@@ -100,6 +143,26 @@ DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
*/
u8 snp_vmpl __ro_after_init;
EXPORT_SYMBOL_GPL(snp_vmpl);
+SYM_PIC_ALIAS(snp_vmpl);
+
+/*
+ * Since feature negotiation related variables are set early in the boot
+ * process they must reside in the .data section so as not to be zeroed
+ * out when the .bss section is later cleared.
+ *
+ * GHCB protocol version negotiated with the hypervisor.
+ */
+u16 ghcb_version __ro_after_init;
+SYM_PIC_ALIAS(ghcb_version);
+
+/* For early boot hypervisor communication in SEV-ES enabled guests */
+static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
+
+/*
+ * Needs to be in the .data section because we need it NULL before bss is
+ * cleared
+ */
+struct ghcb *boot_ghcb __section(".data");
static u64 __init get_snp_jump_table_addr(void)
{
@@ -154,6 +217,73 @@ static u64 __init get_jump_table_addr(void)
return ret;
}
+static int svsm_perform_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call)
+{
+ struct es_em_ctxt ctxt;
+ u8 pending = 0;
+
+ vc_ghcb_invalidate(ghcb);
+
+ /*
+ * Fill in protocol and format specifiers. This can be called very early
+ * in the boot, so use rip-relative references as needed.
+ */
+ ghcb->protocol_version = ghcb_version;
+ ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
+
+ ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL);
+ ghcb_set_sw_exit_info_1(ghcb, 0);
+ ghcb_set_sw_exit_info_2(ghcb, 0);
+
+ sev_es_wr_ghcb_msr(__pa(ghcb));
+
+ svsm_issue_call(call, &pending);
+
+ if (pending)
+ return -EINVAL;
+
+ switch (verify_exception_info(ghcb, &ctxt)) {
+ case ES_OK:
+ break;
+ case ES_EXCEPTION:
+ vc_forward_exception(&ctxt);
+ fallthrough;
+ default:
+ return -EINVAL;
+ }
+
+ return svsm_process_result_codes(call);
+}
+
+static int svsm_perform_call_protocol(struct svsm_call *call)
+{
+ struct ghcb_state state;
+ unsigned long flags;
+ struct ghcb *ghcb;
+ int ret;
+
+ flags = native_local_irq_save();
+
+ if (sev_cfg.ghcbs_initialized)
+ ghcb = __sev_get_ghcb(&state);
+ else if (boot_ghcb)
+ ghcb = boot_ghcb;
+ else
+ ghcb = NULL;
+
+ do {
+ ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call)
+ : __pi_svsm_perform_msr_protocol(call);
+ } while (ret == -EAGAIN);
+
+ if (sev_cfg.ghcbs_initialized)
+ __sev_put_ghcb(&state);
+
+ native_local_irq_restore(flags);
+
+ return ret;
+}
+
static inline void __pval_terminate(u64 pfn, bool action, unsigned int page_size,
int ret, u64 svsm_ret)
{
@@ -531,8 +661,11 @@ static void set_pages_state(unsigned long vaddr, unsigned long npages, int op)
unsigned long vaddr_end;
/* Use the MSR protocol when a GHCB is not available. */
- if (!boot_ghcb)
- return early_set_pages_state(vaddr, __pa(vaddr), npages, op);
+ if (!boot_ghcb) {
+ struct psc_desc d = { op, svsm_get_caa(), svsm_get_caa_pa() };
+
+ return early_set_pages_state(vaddr, __pa(vaddr), npages, &d);
+ }
vaddr = vaddr & PAGE_MASK;
vaddr_end = vaddr + (npages << PAGE_SHIFT);
@@ -973,6 +1106,9 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned
vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT;
vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT;
+ if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC))
+ vmsa->vintr_ctrl |= V_GIF_MASK | V_NMI_ENABLE_MASK;
+
/* SVME must be set. */
vmsa->efer = EFER_SVME;
@@ -1107,6 +1243,105 @@ int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd)
return 0;
}
+u64 savic_ghcb_msr_read(u32 reg)
+{
+ u64 msr = APIC_BASE_MSR + (reg >> 4);
+ struct pt_regs regs = { .cx = msr };
+ struct es_em_ctxt ctxt = { .regs = &regs };
+ struct ghcb_state state;
+ enum es_result res;
+ struct ghcb *ghcb;
+
+ guard(irqsave)();
+
+ ghcb = __sev_get_ghcb(&state);
+ vc_ghcb_invalidate(ghcb);
+
+ res = sev_es_ghcb_handle_msr(ghcb, &ctxt, false);
+ if (res != ES_OK) {
+ pr_err("Secure AVIC MSR (0x%llx) read returned error (%d)\n", msr, res);
+ /* MSR read failures are treated as fatal errors */
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
+ }
+
+ __sev_put_ghcb(&state);
+
+ return regs.ax | regs.dx << 32;
+}
+
+void savic_ghcb_msr_write(u32 reg, u64 value)
+{
+ u64 msr = APIC_BASE_MSR + (reg >> 4);
+ struct pt_regs regs = {
+ .cx = msr,
+ .ax = lower_32_bits(value),
+ .dx = upper_32_bits(value)
+ };
+ struct es_em_ctxt ctxt = { .regs = &regs };
+ struct ghcb_state state;
+ enum es_result res;
+ struct ghcb *ghcb;
+
+ guard(irqsave)();
+
+ ghcb = __sev_get_ghcb(&state);
+ vc_ghcb_invalidate(ghcb);
+
+ res = sev_es_ghcb_handle_msr(ghcb, &ctxt, true);
+ if (res != ES_OK) {
+ pr_err("Secure AVIC MSR (0x%llx) write returned error (%d)\n", msr, res);
+ /* MSR writes should never fail. Any failure is fatal error for SNP guest */
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
+ }
+
+ __sev_put_ghcb(&state);
+}
+
+enum es_result savic_register_gpa(u64 gpa)
+{
+ struct ghcb_state state;
+ struct es_em_ctxt ctxt;
+ enum es_result res;
+ struct ghcb *ghcb;
+
+ guard(irqsave)();
+
+ ghcb = __sev_get_ghcb(&state);
+ vc_ghcb_invalidate(ghcb);
+
+ ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA);
+ ghcb_set_rbx(ghcb, gpa);
+ res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC,
+ SVM_VMGEXIT_SAVIC_REGISTER_GPA, 0);
+
+ __sev_put_ghcb(&state);
+
+ return res;
+}
+
+enum es_result savic_unregister_gpa(u64 *gpa)
+{
+ struct ghcb_state state;
+ struct es_em_ctxt ctxt;
+ enum es_result res;
+ struct ghcb *ghcb;
+
+ guard(irqsave)();
+
+ ghcb = __sev_get_ghcb(&state);
+ vc_ghcb_invalidate(ghcb);
+
+ ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA);
+ res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC,
+ SVM_VMGEXIT_SAVIC_UNREGISTER_GPA, 0);
+ if (gpa && res == ES_OK)
+ *gpa = ghcb->save.rbx;
+
+ __sev_put_ghcb(&state);
+
+ return res;
+}
+
static void snp_register_per_cpu_ghcb(void)
{
struct sev_es_runtime_data *data;
@@ -1233,7 +1468,8 @@ static void __init alloc_runtime_data(int cpu)
struct svsm_ca *caa;
/* Allocate the SVSM CA page if an SVSM is present */
- caa = memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE);
+ caa = cpu ? memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE)
+ : &boot_svsm_ca_page;
per_cpu(svsm_caa, cpu) = caa;
per_cpu(svsm_caa_pa, cpu) = __pa(caa);
@@ -1287,32 +1523,9 @@ void __init sev_es_init_vc_handling(void)
init_ghcb(cpu);
}
- /* If running under an SVSM, switch to the per-cpu CA */
- if (snp_vmpl) {
- struct svsm_call call = {};
- unsigned long flags;
- int ret;
-
- local_irq_save(flags);
-
- /*
- * SVSM_CORE_REMAP_CA call:
- * RAX = 0 (Protocol=0, CallID=0)
- * RCX = New CA GPA
- */
- call.caa = svsm_get_caa();
- call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
- call.rcx = this_cpu_read(svsm_caa_pa);
- ret = svsm_perform_call_protocol(&call);
- if (ret)
- panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n",
- ret, call.rax_out);
-
+ if (snp_vmpl)
sev_cfg.use_cas = true;
- local_irq_restore(flags);
- }
-
sev_es_setup_play_dead();
/* Secondary CPUs use the runtime #VC handler */
@@ -1590,15 +1803,6 @@ void sev_show_status(void)
pr_cont("\n");
}
-void __init snp_update_svsm_ca(void)
-{
- if (!snp_vmpl)
- return;
-
- /* Update the CAA to a proper kernel address */
- boot_svsm_caa = &boot_svsm_ca_page;
-}
-
#ifdef CONFIG_SYSFS
static ssize_t vmpl_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
diff --git a/arch/x86/coco/sev/sev-nmi.c b/arch/x86/coco/sev/noinstr.c
index d8dfaddfb367..b527eafb6312 100644
--- a/arch/x86/coco/sev/sev-nmi.c
+++ b/arch/x86/coco/sev/noinstr.c
@@ -106,3 +106,77 @@ void noinstr __sev_es_nmi_complete(void)
__sev_put_ghcb(&state);
}
+
+/*
+ * Nothing shall interrupt this code path while holding the per-CPU
+ * GHCB. The backup GHCB is only for NMIs interrupting this path.
+ *
+ * Callers must disable local interrupts around it.
+ */
+noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
+{
+ struct sev_es_runtime_data *data;
+ struct ghcb *ghcb;
+
+ WARN_ON(!irqs_disabled());
+
+ data = this_cpu_read(runtime_data);
+ ghcb = &data->ghcb_page;
+
+ if (unlikely(data->ghcb_active)) {
+ /* GHCB is already in use - save its contents */
+
+ if (unlikely(data->backup_ghcb_active)) {
+ /*
+ * Backup-GHCB is also already in use. There is no way
+ * to continue here so just kill the machine. To make
+ * panic() work, mark GHCBs inactive so that messages
+ * can be printed out.
+ */
+ data->ghcb_active = false;
+ data->backup_ghcb_active = false;
+
+ instrumentation_begin();
+ panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
+ instrumentation_end();
+ }
+
+ /* Mark backup_ghcb active before writing to it */
+ data->backup_ghcb_active = true;
+
+ state->ghcb = &data->backup_ghcb;
+
+ /* Backup GHCB content */
+ *state->ghcb = *ghcb;
+ } else {
+ state->ghcb = NULL;
+ data->ghcb_active = true;
+ }
+
+ return ghcb;
+}
+
+noinstr void __sev_put_ghcb(struct ghcb_state *state)
+{
+ struct sev_es_runtime_data *data;
+ struct ghcb *ghcb;
+
+ WARN_ON(!irqs_disabled());
+
+ data = this_cpu_read(runtime_data);
+ ghcb = &data->ghcb_page;
+
+ if (state->ghcb) {
+ /* Restore GHCB from Backup */
+ *ghcb = *state->ghcb;
+ data->backup_ghcb_active = false;
+ state->ghcb = NULL;
+ } else {
+ /*
+ * Invalidate the GHCB so a VMGEXIT instruction issued
+ * from userspace won't appear to be valid.
+ */
+ vc_ghcb_invalidate(ghcb);
+ data->ghcb_active = false;
+ }
+}
diff --git a/arch/x86/coco/sev/vc-handle.c b/arch/x86/coco/sev/vc-handle.c
index c3b4acbde0d8..7fc136a35334 100644
--- a/arch/x86/coco/sev/vc-handle.c
+++ b/arch/x86/coco/sev/vc-handle.c
@@ -351,6 +351,8 @@ fault:
}
#define sev_printk(fmt, ...) printk(fmt, ##__VA_ARGS__)
+#define error(v)
+#define has_cpuflag(f) boot_cpu_has(f)
#include "vc-shared.c"
@@ -402,14 +404,10 @@ static enum es_result __vc_handle_secure_tsc_msrs(struct es_em_ctxt *ctxt, bool
return ES_OK;
}
-static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+enum es_result sev_es_ghcb_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write)
{
struct pt_regs *regs = ctxt->regs;
enum es_result ret;
- bool write;
-
- /* Is it a WRMSR? */
- write = ctxt->insn.opcode.bytes[1] == 0x30;
switch (regs->cx) {
case MSR_SVSM_CAA:
@@ -419,6 +417,15 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
if (sev_status & MSR_AMD64_SNP_SECURE_TSC)
return __vc_handle_secure_tsc_msrs(ctxt, write);
break;
+ case MSR_AMD64_SAVIC_CONTROL:
+ /*
+ * AMD64_SAVIC_CONTROL should not be intercepted when
+ * Secure AVIC is enabled. Terminate the Secure AVIC guest
+ * if the interception is enabled.
+ */
+ if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC))
+ return ES_VMM_ERROR;
+ break;
default:
break;
}
@@ -439,6 +446,11 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
return ret;
}
+static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+ return sev_es_ghcb_handle_msr(ghcb, ctxt, ctxt->insn.opcode.bytes[1] == 0x30);
+}
+
static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
{
int trapnr = ctxt->fi.vector;
diff --git a/arch/x86/coco/sev/vc-shared.c b/arch/x86/coco/sev/vc-shared.c
index 2c0ab0fdc060..9b01c9ad81be 100644
--- a/arch/x86/coco/sev/vc-shared.c
+++ b/arch/x86/coco/sev/vc-shared.c
@@ -409,15 +409,109 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
return ret;
}
+enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+ u32 ret;
+
+ ret = ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0);
+ if (!ret)
+ return ES_OK;
+
+ if (ret == 1) {
+ u64 info = ghcb->save.sw_exit_info_2;
+ unsigned long v = info & SVM_EVTINJ_VEC_MASK;
+
+ /* Check if exception information from hypervisor is sane. */
+ if ((info & SVM_EVTINJ_VALID) &&
+ ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) &&
+ ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) {
+ ctxt->fi.vector = v;
+
+ if (info & SVM_EVTINJ_VALID_ERR)
+ ctxt->fi.error_code = info >> 32;
+
+ return ES_EXCEPTION;
+ }
+ }
+
+ return ES_VMM_ERROR;
+}
+
+enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt,
+ u64 exit_code, u64 exit_info_1,
+ u64 exit_info_2)
+{
+ /* Fill in protocol and format specifiers */
+ ghcb->protocol_version = ghcb_version;
+ ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
+
+ ghcb_set_sw_exit_code(ghcb, exit_code);
+ ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
+ ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
+
+ sev_es_wr_ghcb_msr(__pa(ghcb));
+ VMGEXIT();
+
+ return verify_exception_info(ghcb, ctxt);
+}
+
+static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
+{
+ u32 cr4 = native_read_cr4();
+ int ret;
+
+ ghcb_set_rax(ghcb, leaf->fn);
+ ghcb_set_rcx(ghcb, leaf->subfn);
+
+ if (cr4 & X86_CR4_OSXSAVE)
+ /* Safe to read xcr0 */
+ ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
+ else
+ /* xgetbv will cause #UD - use reset value for xcr0 */
+ ghcb_set_xcr0(ghcb, 1);
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (!(ghcb_rax_is_valid(ghcb) &&
+ ghcb_rbx_is_valid(ghcb) &&
+ ghcb_rcx_is_valid(ghcb) &&
+ ghcb_rdx_is_valid(ghcb)))
+ return ES_VMM_ERROR;
+
+ leaf->eax = ghcb->save.rax;
+ leaf->ebx = ghcb->save.rbx;
+ leaf->ecx = ghcb->save.rcx;
+ leaf->edx = ghcb->save.rdx;
+
+ return ES_OK;
+}
+
+struct cpuid_ctx {
+ struct ghcb *ghcb;
+ struct es_em_ctxt *ctxt;
+};
+
+static void snp_cpuid_hv_ghcb(void *p, struct cpuid_leaf *leaf)
+{
+ struct cpuid_ctx *ctx = p;
+
+ if (__sev_cpuid_hv_ghcb(ctx->ghcb, ctx->ctxt, leaf))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV);
+}
+
static int vc_handle_cpuid_snp(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
{
+ struct cpuid_ctx ctx = { ghcb, ctxt };
struct pt_regs *regs = ctxt->regs;
struct cpuid_leaf leaf;
int ret;
leaf.fn = regs->ax;
leaf.subfn = regs->cx;
- ret = snp_cpuid(ghcb, ctxt, &leaf);
+ ret = snp_cpuid(snp_cpuid_hv_ghcb, &ctx, &leaf);
if (!ret) {
regs->ax = leaf.eax;
regs->bx = leaf.ebx;
@@ -502,3 +596,50 @@ static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
return ES_OK;
}
+
+void snp_register_ghcb_early(unsigned long paddr)
+{
+ unsigned long pfn = paddr >> PAGE_SHIFT;
+ u64 val;
+
+ sev_es_wr_ghcb_msr(GHCB_MSR_REG_GPA_REQ_VAL(pfn));
+ VMGEXIT();
+
+ val = sev_es_rd_ghcb_msr();
+
+ /* If the response GPA is not ours then abort the guest */
+ if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) ||
+ (GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER);
+}
+
+bool __init sev_es_check_cpu_features(void)
+{
+ if (!has_cpuflag(X86_FEATURE_RDRAND)) {
+ error("RDRAND instruction not supported - no trusted source of randomness available\n");
+ return false;
+ }
+
+ return true;
+}
+
+bool sev_es_negotiate_protocol(void)
+{
+ u64 val;
+
+ /* Do the GHCB protocol version negotiation */
+ sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ);
+ VMGEXIT();
+ val = sev_es_rd_ghcb_msr();
+
+ if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP)
+ return false;
+
+ if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN ||
+ GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX)
+ return false;
+
+ ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), GHCB_PROTOCOL_MAX);
+
+ return true;
+}
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 07ba4935e873..a26e66d66444 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -305,6 +305,8 @@ struct apic {
/* Probe, setup and smpboot functions */
int (*probe)(void);
+ void (*setup)(void);
+ void (*teardown)(void);
int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
void (*init_apic_ldr)(void);
@@ -317,6 +319,8 @@ struct apic {
/* wakeup secondary CPU using 64-bit wakeup point */
int (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip, unsigned int cpu);
+ void (*update_vector)(unsigned int cpu, unsigned int vector, bool set);
+
char *name;
};
@@ -470,6 +474,12 @@ static __always_inline bool apic_id_valid(u32 apic_id)
return apic_id <= apic->max_apic_id;
}
+static __always_inline void apic_update_vector(unsigned int cpu, unsigned int vector, bool set)
+{
+ if (apic->update_vector)
+ apic->update_vector(cpu, vector, set);
+}
+
#else /* CONFIG_X86_LOCAL_APIC */
static inline u32 apic_read(u32 reg) { return 0; }
@@ -481,6 +491,7 @@ static inline void apic_wait_icr_idle(void) { }
static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
static inline void apic_native_eoi(void) { WARN_ON_ONCE(1); }
static inline void apic_setup_apic_calls(void) { }
+static inline void apic_update_vector(unsigned int cpu, unsigned int vector, bool set) { }
#define apic_update_callback(_callback, _fn) do { } while (0)
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 094106b6a538..be39a543fbe5 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -135,6 +135,8 @@
#define APIC_TDR_DIV_128 0xA
#define APIC_EFEAT 0x400
#define APIC_ECTRL 0x410
+#define APIC_SEOI 0x420
+#define APIC_IER 0x480
#define APIC_EILVTn(n) (0x500 + 0x10 * n)
#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */
#define APIC_EILVT_NR_AMD_10H 4
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 02b23aa78955..f7b67cb73915 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -82,6 +82,8 @@
#ifndef __ASSEMBLER__
extern unsigned int output_len;
extern const unsigned long kernel_text_size;
+extern const unsigned long kernel_inittext_offset;
+extern const unsigned long kernel_inittext_size;
extern const unsigned long kernel_total_size;
unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 5a68e9db6518..01ccdd168df0 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -2,12 +2,6 @@
#ifndef _ASM_X86_INIT_H
#define _ASM_X86_INIT_H
-#if defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 170000
-#define __head __section(".head.text") __no_sanitize_undefined __no_stack_protector
-#else
-#define __head __section(".head.text") __no_sanitize_undefined __no_kstack_erase
-#endif
-
struct x86_mapping_info {
void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
void (*free_pgt_page)(void *, void *); /* free buf for page table */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 73393a66d3ab..718a55d82fe4 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -706,8 +706,15 @@
#define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT)
#define MSR_AMD64_SNP_SMT_PROT_BIT 17
#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
-#define MSR_AMD64_SNP_RESV_BIT 18
+#define MSR_AMD64_SNP_SECURE_AVIC_BIT 18
+#define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT)
+#define MSR_AMD64_SNP_RESV_BIT 19
#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
+#define MSR_AMD64_SAVIC_CONTROL 0xc0010138
+#define MSR_AMD64_SAVIC_EN_BIT 0
+#define MSR_AMD64_SAVIC_EN BIT_ULL(MSR_AMD64_SAVIC_EN_BIT)
+#define MSR_AMD64_SAVIC_ALLOWEDNMI_BIT 1
+#define MSR_AMD64_SAVIC_ALLOWEDNMI BIT_ULL(MSR_AMD64_SAVIC_ALLOWEDNMI_BIT)
#define MSR_AMD64_RMP_BASE 0xc0010132
#define MSR_AMD64_RMP_END 0xc0010133
#define MSR_AMD64_RMP_CFG 0xc0010136
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 692af46603a1..914eb32581c7 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -53,6 +53,7 @@ extern void i386_reserve_resources(void);
extern unsigned long __startup_64(unsigned long p2v_offset, struct boot_params *bp);
extern void startup_64_setup_gdt_idt(void);
extern void startup_64_load_idt(void *vc_handler);
+extern void __pi_startup_64_load_idt(void *vc_handler);
extern void early_setup_idt(void);
extern void __init do_early_exception(struct pt_regs *regs, int trapnr);
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index 0020d77a0800..01a6e4dbe423 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -208,6 +208,7 @@ struct snp_psc_desc {
#define GHCB_TERM_SVSM_CAA 9 /* SVSM is present but CAA is not page aligned */
#define GHCB_TERM_SECURE_TSC 10 /* Secure TSC initialization failed */
#define GHCB_TERM_SVSM_CA_REMAP_FAIL 11 /* SVSM is present but CA could not be remapped */
+#define GHCB_TERM_SAVIC_FAIL 12 /* Secure AVIC-specific failure */
#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK)
diff --git a/arch/x86/include/asm/sev-internal.h b/arch/x86/include/asm/sev-internal.h
index 3dfd306d1c9e..c58c47c68ab6 100644
--- a/arch/x86/include/asm/sev-internal.h
+++ b/arch/x86/include/asm/sev-internal.h
@@ -2,7 +2,6 @@
#define DR7_RESET_VALUE 0x400
-extern struct ghcb boot_ghcb_page;
extern u64 sev_hv_features;
extern u64 sev_secrets_pa;
@@ -56,31 +55,15 @@ DECLARE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
DECLARE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
void early_set_pages_state(unsigned long vaddr, unsigned long paddr,
- unsigned long npages, enum psc_op op);
+ unsigned long npages, const struct psc_desc *desc);
DECLARE_PER_CPU(struct svsm_ca *, svsm_caa);
DECLARE_PER_CPU(u64, svsm_caa_pa);
-extern struct svsm_ca *boot_svsm_caa;
extern u64 boot_svsm_caa_pa;
-static __always_inline struct svsm_ca *svsm_get_caa(void)
-{
- if (sev_cfg.use_cas)
- return this_cpu_read(svsm_caa);
- else
- return boot_svsm_caa;
-}
-
-static __always_inline u64 svsm_get_caa_pa(void)
-{
- if (sev_cfg.use_cas)
- return this_cpu_read(svsm_caa_pa);
- else
- return boot_svsm_caa_pa;
-}
-
-int svsm_perform_call_protocol(struct svsm_call *call);
+enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt);
+void vc_forward_exception(struct es_em_ctxt *ctxt);
static inline u64 sev_es_rd_ghcb_msr(void)
{
@@ -97,9 +80,8 @@ static __always_inline void sev_es_wr_ghcb_msr(u64 val)
native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high);
}
-void snp_register_ghcb_early(unsigned long paddr);
-bool sev_es_negotiate_protocol(void);
-bool sev_es_check_cpu_features(void);
+enum es_result sev_es_ghcb_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write);
+
u64 get_hv_features(void);
const struct snp_cpuid_table *snp_cpuid_get_table(void);
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 465b19fd1a2d..f9046c4b9a2b 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -503,6 +503,7 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
}
void setup_ghcb(void);
+void snp_register_ghcb_early(unsigned long paddr);
void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
unsigned long npages);
void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
@@ -511,14 +512,12 @@ void snp_set_memory_shared(unsigned long vaddr, unsigned long npages);
void snp_set_memory_private(unsigned long vaddr, unsigned long npages);
void snp_set_wakeup_secondary_cpu(void);
bool snp_init(struct boot_params *bp);
-void __noreturn snp_abort(void);
void snp_dmi_setup(void);
int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input);
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
u64 sev_get_status(void);
void sev_show_status(void);
-void snp_update_svsm_ca(void);
int prepare_pte_enc(struct pte_enc_desc *d);
void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot);
void snp_kexec_finish(void);
@@ -533,6 +532,10 @@ int snp_svsm_vtpm_send_command(u8 *buffer);
void __init snp_secure_tsc_prepare(void);
void __init snp_secure_tsc_init(void);
+enum es_result savic_register_gpa(u64 gpa);
+enum es_result savic_unregister_gpa(u64 *gpa);
+u64 savic_ghcb_msr_read(u32 reg);
+void savic_ghcb_msr_write(u32 reg, u64 value);
static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
{
@@ -540,8 +543,6 @@ static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
__builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
}
-void vc_forward_exception(struct es_em_ctxt *ctxt);
-
/* I/O parameters for CPUID-related helpers */
struct cpuid_leaf {
u32 fn;
@@ -552,7 +553,13 @@ struct cpuid_leaf {
u32 edx;
};
-int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf);
+int svsm_perform_msr_protocol(struct svsm_call *call);
+int __pi_svsm_perform_msr_protocol(struct svsm_call *call);
+int snp_cpuid(void (*cpuid_fn)(void *ctx, struct cpuid_leaf *leaf),
+ void *ctx, struct cpuid_leaf *leaf);
+
+void svsm_issue_call(struct svsm_call *call, u8 *pending);
+int svsm_process_result_codes(struct svsm_call *call);
void __noreturn sev_es_terminate(unsigned int set, unsigned int reason);
enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
@@ -560,7 +567,18 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
u64 exit_code, u64 exit_info_1,
u64 exit_info_2);
+bool sev_es_negotiate_protocol(void);
+bool sev_es_check_cpu_features(void);
+
+extern u16 ghcb_version;
extern struct ghcb *boot_ghcb;
+extern bool sev_snp_needs_sfw;
+
+struct psc_desc {
+ enum psc_op op;
+ struct svsm_ca *ca;
+ u64 caa_pa;
+};
static inline void sev_evict_cache(void *va, int npages)
{
@@ -600,7 +618,6 @@ static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npag
static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { }
static inline void snp_set_wakeup_secondary_cpu(void) { }
static inline bool snp_init(struct boot_params *bp) { return false; }
-static inline void snp_abort(void) { }
static inline void snp_dmi_setup(void) { }
static inline int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input)
{
@@ -610,7 +627,6 @@ static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
static inline u64 sev_get_status(void) { return 0; }
static inline void sev_show_status(void) { }
-static inline void snp_update_svsm_ca(void) { }
static inline int prepare_pte_enc(struct pte_enc_desc *d) { return 0; }
static inline void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot) { }
static inline void snp_kexec_finish(void) { }
@@ -624,6 +640,10 @@ static inline int snp_svsm_vtpm_send_command(u8 *buffer) { return -ENODEV; }
static inline void __init snp_secure_tsc_prepare(void) { }
static inline void __init snp_secure_tsc_init(void) { }
static inline void sev_evict_cache(void *va, int npages) {}
+static inline enum es_result savic_register_gpa(u64 gpa) { return ES_UNSUPPORTED; }
+static inline enum es_result savic_unregister_gpa(u64 *gpa) { return ES_UNSUPPORTED; }
+static inline void savic_ghcb_msr_write(u32 reg, u64 value) { }
+static inline u64 savic_ghcb_msr_read(u32 reg) { return 0; }
#endif /* CONFIG_AMD_MEM_ENCRYPT */
@@ -635,9 +655,13 @@ void snp_dump_hva_rmpentry(unsigned long address);
int psmash(u64 pfn);
int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable);
int rmp_make_shared(u64 pfn, enum pg_level level);
-void snp_leak_pages(u64 pfn, unsigned int npages);
+void __snp_leak_pages(u64 pfn, unsigned int npages, bool dump_rmp);
void kdump_sev_callback(void);
void snp_fixup_e820_tables(void);
+static inline void snp_leak_pages(u64 pfn, unsigned int pages)
+{
+ __snp_leak_pages(pfn, pages, true);
+}
#else
static inline bool snp_probe_rmptable_info(void) { return false; }
static inline int snp_rmptable_init(void) { return -ENOSYS; }
@@ -650,6 +674,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as
return -ENODEV;
}
static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
+static inline void __snp_leak_pages(u64 pfn, unsigned int npages, bool dump_rmp) {}
static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
static inline void kdump_sev_callback(void) { }
static inline void snp_fixup_e820_tables(void) {}
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 9c640a521a67..650e3256ea7d 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -118,6 +118,10 @@
#define SVM_VMGEXIT_AP_CREATE 1
#define SVM_VMGEXIT_AP_DESTROY 2
#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018
+#define SVM_VMGEXIT_SAVIC 0x8000001a
+#define SVM_VMGEXIT_SAVIC_REGISTER_GPA 0
+#define SVM_VMGEXIT_SAVIC_UNREGISTER_GPA 1
+#define SVM_VMGEXIT_SAVIC_SELF_GPA ~0ULL
#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe
#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 52d1808ee360..581db89477f9 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -18,6 +18,7 @@ ifeq ($(CONFIG_X86_64),y)
# APIC probe will depend on the listing order here
obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o
obj-$(CONFIG_X86_UV) += x2apic_uv_x.o
+obj-$(CONFIG_AMD_SECURE_AVIC) += x2apic_savic.o
obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o
obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o
obj-y += apic_flat_64.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d73ba5a7b623..680d305589a3 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -592,6 +592,8 @@ static void setup_APIC_timer(void)
0xF, ~0UL);
} else
clockevents_register_device(levt);
+
+ apic_update_vector(smp_processor_id(), LOCAL_TIMER_VECTOR, true);
}
/*
@@ -1168,6 +1170,9 @@ void disable_local_APIC(void)
if (!apic_accessible())
return;
+ if (apic->teardown)
+ apic->teardown();
+
apic_soft_disable();
#ifdef CONFIG_X86_32
@@ -1428,63 +1433,61 @@ union apic_ir {
u32 regs[APIC_IR_REGS];
};
-static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)
+static bool apic_check_and_eoi_isr(union apic_ir *isr)
{
int i, bit;
- /* Read the IRRs */
- for (i = 0; i < APIC_IR_REGS; i++)
- irr->regs[i] = apic_read(APIC_IRR + i * 0x10);
-
/* Read the ISRs */
for (i = 0; i < APIC_IR_REGS; i++)
isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
+ /* If the ISR map empty, nothing to do here. */
+ if (bitmap_empty(isr->map, APIC_IR_BITS))
+ return true;
+
/*
- * If the ISR map is not empty. ACK the APIC and run another round
- * to verify whether a pending IRR has been unblocked and turned
- * into a ISR.
+ * There can be multiple ISR bits set when a high priority
+ * interrupt preempted a lower priority one. Issue an EOI for each
+ * set bit. The priority traversal order does not matter as there
+ * can't be new ISR bits raised at this point. What matters is that
+ * an EOI is issued for each ISR bit.
*/
- if (!bitmap_empty(isr->map, APIC_IR_BITS)) {
- /*
- * There can be multiple ISR bits set when a high priority
- * interrupt preempted a lower priority one. Issue an ACK
- * per set bit.
- */
- for_each_set_bit(bit, isr->map, APIC_IR_BITS)
- apic_eoi();
- return true;
- }
+ for_each_set_bit(bit, isr->map, APIC_IR_BITS)
+ apic_eoi();
- return !bitmap_empty(irr->map, APIC_IR_BITS);
+ /* Reread the ISRs, they should be empty now */
+ for (i = 0; i < APIC_IR_REGS; i++)
+ isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
+
+ return bitmap_empty(isr->map, APIC_IR_BITS);
}
/*
- * After a crash, we no longer service the interrupts and a pending
- * interrupt from previous kernel might still have ISR bit set.
+ * If a CPU services an interrupt and crashes before issuing EOI to the
+ * local APIC, the corresponding ISR bit is still set when the crashing CPU
+ * jumps into a crash kernel. Read the ISR and issue an EOI for each set
+ * bit to acknowledge it as otherwise these slots would be locked forever
+ * waiting for an EOI.
*
- * Most probably by now the CPU has serviced that pending interrupt and it
- * might not have done the apic_eoi() because it thought, interrupt
- * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear
- * the ISR bit and cpu thinks it has already serviced the interrupt. Hence
- * a vector might get locked. It was noticed for timer irq (vector
- * 0x31). Issue an extra EOI to clear ISR.
+ * If there are pending bits in the IRR, then they won't be converted into
+ * ISR bits as the CPU has interrupts disabled. They will be delivered once
+ * the CPU enables interrupts and there is nothing which can prevent that.
*
- * If there are pending IRR bits they turn into ISR bits after a higher
- * priority ISR bit has been acked.
+ * In the worst case this results in spurious interrupt warnings.
*/
-static void apic_pending_intr_clear(void)
+static void apic_clear_isr(void)
{
- union apic_ir irr, isr;
+ union apic_ir ir;
unsigned int i;
- /* 512 loops are way oversized and give the APIC a chance to obey. */
- for (i = 0; i < 512; i++) {
- if (!apic_check_and_ack(&irr, &isr))
- return;
- }
- /* Dump the IRR/ISR content if that failed */
- pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map);
+ if (!apic_check_and_eoi_isr(&ir))
+ pr_warn("APIC: Stale ISR: %256pb\n", ir.map);
+
+ for (i = 0; i < APIC_IR_REGS; i++)
+ ir.regs[i] = apic_read(APIC_IRR + i * 0x10);
+
+ if (!bitmap_empty(ir.map, APIC_IR_BITS))
+ pr_warn("APIC: Stale IRR: %256pb\n", ir.map);
}
/**
@@ -1503,6 +1506,9 @@ static void setup_local_APIC(void)
return;
}
+ if (apic->setup)
+ apic->setup();
+
/*
* If this comes from kexec/kcrash the APIC might be enabled in
* SPIV. Soft disable it before doing further initialization.
@@ -1541,8 +1547,7 @@ static void setup_local_APIC(void)
value |= 0x10;
apic_write(APIC_TASKPRI, value);
- /* Clear eventually stale ISR/IRR bits */
- apic_pending_intr_clear();
+ apic_clear_isr();
/*
* Now that we are all set up, enable the APIC
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index a947b46a8b64..bddc54465399 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -134,13 +134,20 @@ static void apic_update_irq_cfg(struct irq_data *irqd, unsigned int vector,
apicd->hw_irq_cfg.vector = vector;
apicd->hw_irq_cfg.dest_apicid = apic->calc_dest_apicid(cpu);
+
+ apic_update_vector(cpu, vector, true);
+
irq_data_update_effective_affinity(irqd, cpumask_of(cpu));
- trace_vector_config(irqd->irq, vector, cpu,
- apicd->hw_irq_cfg.dest_apicid);
+ trace_vector_config(irqd->irq, vector, cpu, apicd->hw_irq_cfg.dest_apicid);
}
-static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
- unsigned int newcpu)
+static void apic_free_vector(unsigned int cpu, unsigned int vector, bool managed)
+{
+ apic_update_vector(cpu, vector, false);
+ irq_matrix_free(vector_matrix, cpu, vector, managed);
+}
+
+static void chip_data_update(struct irq_data *irqd, unsigned int newvec, unsigned int newcpu)
{
struct apic_chip_data *apicd = apic_chip_data(irqd);
struct irq_desc *desc = irq_data_to_desc(irqd);
@@ -174,8 +181,7 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
apicd->prev_cpu = apicd->cpu;
WARN_ON_ONCE(apicd->cpu == newcpu);
} else {
- irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector,
- managed);
+ apic_free_vector(apicd->cpu, apicd->vector, managed);
}
setnew:
@@ -261,7 +267,7 @@ assign_vector_locked(struct irq_data *irqd, const struct cpumask *dest)
trace_vector_alloc(irqd->irq, vector, resvd, vector);
if (vector < 0)
return vector;
- apic_update_vector(irqd, vector, cpu);
+ chip_data_update(irqd, vector, cpu);
return 0;
}
@@ -337,7 +343,7 @@ assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest)
trace_vector_alloc_managed(irqd->irq, vector, vector);
if (vector < 0)
return vector;
- apic_update_vector(irqd, vector, cpu);
+ chip_data_update(irqd, vector, cpu);
return 0;
}
@@ -357,7 +363,7 @@ static void clear_irq_vector(struct irq_data *irqd)
apicd->prev_cpu);
per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN;
- irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
+ apic_free_vector(apicd->cpu, vector, managed);
apicd->vector = 0;
/* Clean up move in progress */
@@ -366,7 +372,7 @@ static void clear_irq_vector(struct irq_data *irqd)
return;
per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN;
- irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
+ apic_free_vector(apicd->prev_cpu, vector, managed);
apicd->prev_vector = 0;
apicd->move_in_progress = 0;
hlist_del_init(&apicd->clist);
@@ -905,7 +911,7 @@ static void free_moved_vector(struct apic_chip_data *apicd)
* affinity mask comes online.
*/
trace_vector_free_moved(apicd->irq, cpu, vector, managed);
- irq_matrix_free(vector_matrix, cpu, vector, managed);
+ apic_free_vector(cpu, vector, managed);
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
hlist_del_init(&apicd->clist);
apicd->prev_vector = 0;
diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c
new file mode 100644
index 000000000000..dbc5678bc3b6
--- /dev/null
+++ b/arch/x86/kernel/apic/x2apic_savic.c
@@ -0,0 +1,428 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Secure AVIC Support (SEV-SNP Guests)
+ *
+ * Copyright (C) 2024 Advanced Micro Devices, Inc.
+ *
+ * Author: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
+ */
+
+#include <linux/cc_platform.h>
+#include <linux/cpumask.h>
+#include <linux/percpu-defs.h>
+#include <linux/align.h>
+
+#include <asm/apic.h>
+#include <asm/sev.h>
+
+#include "local.h"
+
+struct secure_avic_page {
+ u8 regs[PAGE_SIZE];
+} __aligned(PAGE_SIZE);
+
+static struct secure_avic_page __percpu *savic_page __ro_after_init;
+
+static int savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC);
+}
+
+static inline void *get_reg_bitmap(unsigned int cpu, unsigned int offset)
+{
+ return &per_cpu_ptr(savic_page, cpu)->regs[offset];
+}
+
+static inline void update_vector(unsigned int cpu, unsigned int offset,
+ unsigned int vector, bool set)
+{
+ void *bitmap = get_reg_bitmap(cpu, offset);
+
+ if (set)
+ apic_set_vector(vector, bitmap);
+ else
+ apic_clear_vector(vector, bitmap);
+}
+
+#define SAVIC_ALLOWED_IRR 0x204
+
+/*
+ * When Secure AVIC is enabled, RDMSR/WRMSR of the APIC registers
+ * result in #VC exception (for non-accelerated register accesses)
+ * with VMEXIT_AVIC_NOACCEL error code. The #VC exception handler
+ * can read/write the x2APIC register in the guest APIC backing page.
+ *
+ * Since doing this would increase the latency of accessing x2APIC
+ * registers, instead of doing RDMSR/WRMSR based accesses and
+ * handling the APIC register reads/writes in the #VC exception handler,
+ * the read() and write() callbacks directly read/write the APIC register
+ * from/to the vCPU's APIC backing page.
+ */
+static u32 savic_read(u32 reg)
+{
+ void *ap = this_cpu_ptr(savic_page);
+
+ switch (reg) {
+ case APIC_LVTT:
+ case APIC_TMICT:
+ case APIC_TMCCT:
+ case APIC_TDCR:
+ case APIC_LVTTHMR:
+ case APIC_LVTPC:
+ case APIC_LVT0:
+ case APIC_LVT1:
+ case APIC_LVTERR:
+ return savic_ghcb_msr_read(reg);
+ case APIC_ID:
+ case APIC_LVR:
+ case APIC_TASKPRI:
+ case APIC_ARBPRI:
+ case APIC_PROCPRI:
+ case APIC_LDR:
+ case APIC_SPIV:
+ case APIC_ESR:
+ case APIC_EFEAT:
+ case APIC_ECTRL:
+ case APIC_SEOI:
+ case APIC_IER:
+ case APIC_EILVTn(0) ... APIC_EILVTn(3):
+ return apic_get_reg(ap, reg);
+ case APIC_ICR:
+ return (u32)apic_get_reg64(ap, reg);
+ case APIC_ISR ... APIC_ISR + 0x70:
+ case APIC_TMR ... APIC_TMR + 0x70:
+ if (WARN_ONCE(!IS_ALIGNED(reg, 16),
+ "APIC register read offset 0x%x not aligned at 16 bytes", reg))
+ return 0;
+ return apic_get_reg(ap, reg);
+ /* IRR and ALLOWED_IRR offset range */
+ case APIC_IRR ... APIC_IRR + 0x74:
+ /*
+ * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides from
+ * their respective base offset. APIC_IRRs are in the range
+ *
+ * (0x200, 0x210, ..., 0x270)
+ *
+ * while the SAVIC_ALLOWED_IRR range starts 4 bytes later, in the range
+ *
+ * (0x204, 0x214, ..., 0x274).
+ *
+ * Filter out everything else.
+ */
+ if (WARN_ONCE(!(IS_ALIGNED(reg, 16) ||
+ IS_ALIGNED(reg - 4, 16)),
+ "Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset 0x%x", reg))
+ return 0;
+ return apic_get_reg(ap, reg);
+ default:
+ pr_err("Error reading unknown Secure AVIC reg offset 0x%x\n", reg);
+ return 0;
+ }
+}
+
+#define SAVIC_NMI_REQ 0x278
+
+/*
+ * On WRMSR to APIC_SELF_IPI register by the guest, Secure AVIC hardware
+ * updates the APIC_IRR in the APIC backing page of the vCPU. In addition,
+ * hardware evaluates the new APIC_IRR update for interrupt injection to
+ * the vCPU. So, self IPIs are hardware-accelerated.
+ */
+static inline void self_ipi_reg_write(unsigned int vector)
+{
+ native_apic_msr_write(APIC_SELF_IPI, vector);
+}
+
+static void send_ipi_dest(unsigned int cpu, unsigned int vector, bool nmi)
+{
+ if (nmi)
+ apic_set_reg(per_cpu_ptr(savic_page, cpu), SAVIC_NMI_REQ, 1);
+ else
+ update_vector(cpu, APIC_IRR, vector, true);
+}
+
+static void send_ipi_allbut(unsigned int vector, bool nmi)
+{
+ unsigned int cpu, src_cpu;
+
+ guard(irqsave)();
+
+ src_cpu = raw_smp_processor_id();
+
+ for_each_cpu(cpu, cpu_online_mask) {
+ if (cpu == src_cpu)
+ continue;
+ send_ipi_dest(cpu, vector, nmi);
+ }
+}
+
+static inline void self_ipi(unsigned int vector, bool nmi)
+{
+ u32 icr_low = APIC_SELF_IPI | vector;
+
+ if (nmi)
+ icr_low |= APIC_DM_NMI;
+
+ native_x2apic_icr_write(icr_low, 0);
+}
+
+static void savic_icr_write(u32 icr_low, u32 icr_high)
+{
+ unsigned int dsh, vector;
+ u64 icr_data;
+ bool nmi;
+
+ dsh = icr_low & APIC_DEST_ALLBUT;
+ vector = icr_low & APIC_VECTOR_MASK;
+ nmi = ((icr_low & APIC_DM_FIXED_MASK) == APIC_DM_NMI);
+
+ switch (dsh) {
+ case APIC_DEST_SELF:
+ self_ipi(vector, nmi);
+ break;
+ case APIC_DEST_ALLINC:
+ self_ipi(vector, nmi);
+ fallthrough;
+ case APIC_DEST_ALLBUT:
+ send_ipi_allbut(vector, nmi);
+ break;
+ default:
+ send_ipi_dest(icr_high, vector, nmi);
+ break;
+ }
+
+ icr_data = ((u64)icr_high) << 32 | icr_low;
+ if (dsh != APIC_DEST_SELF)
+ savic_ghcb_msr_write(APIC_ICR, icr_data);
+ apic_set_reg64(this_cpu_ptr(savic_page), APIC_ICR, icr_data);
+}
+
+static void savic_write(u32 reg, u32 data)
+{
+ void *ap = this_cpu_ptr(savic_page);
+
+ switch (reg) {
+ case APIC_LVTT:
+ case APIC_TMICT:
+ case APIC_TDCR:
+ case APIC_LVT0:
+ case APIC_LVT1:
+ case APIC_LVTTHMR:
+ case APIC_LVTPC:
+ case APIC_LVTERR:
+ savic_ghcb_msr_write(reg, data);
+ break;
+ case APIC_TASKPRI:
+ case APIC_EOI:
+ case APIC_SPIV:
+ case SAVIC_NMI_REQ:
+ case APIC_ESR:
+ case APIC_ECTRL:
+ case APIC_SEOI:
+ case APIC_IER:
+ case APIC_EILVTn(0) ... APIC_EILVTn(3):
+ apic_set_reg(ap, reg, data);
+ break;
+ case APIC_ICR:
+ savic_icr_write(data, 0);
+ break;
+ case APIC_SELF_IPI:
+ self_ipi_reg_write(data);
+ break;
+ /* ALLOWED_IRR offsets are writable */
+ case SAVIC_ALLOWED_IRR ... SAVIC_ALLOWED_IRR + 0x70:
+ if (IS_ALIGNED(reg - 4, 16)) {
+ apic_set_reg(ap, reg, data);
+ break;
+ }
+ fallthrough;
+ default:
+ pr_err("Error writing unknown Secure AVIC reg offset 0x%x\n", reg);
+ }
+}
+
+static void send_ipi(u32 dest, unsigned int vector, unsigned int dsh)
+{
+ unsigned int icr_low;
+
+ icr_low = __prepare_ICR(dsh, vector, APIC_DEST_PHYSICAL);
+ savic_icr_write(icr_low, dest);
+}
+
+static void savic_send_ipi(int cpu, int vector)
+{
+ u32 dest = per_cpu(x86_cpu_to_apicid, cpu);
+
+ send_ipi(dest, vector, 0);
+}
+
+static void send_ipi_mask(const struct cpumask *mask, unsigned int vector, bool excl_self)
+{
+ unsigned int cpu, this_cpu;
+
+ guard(irqsave)();
+
+ this_cpu = raw_smp_processor_id();
+
+ for_each_cpu(cpu, mask) {
+ if (excl_self && cpu == this_cpu)
+ continue;
+ send_ipi(per_cpu(x86_cpu_to_apicid, cpu), vector, 0);
+ }
+}
+
+static void savic_send_ipi_mask(const struct cpumask *mask, int vector)
+{
+ send_ipi_mask(mask, vector, false);
+}
+
+static void savic_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
+{
+ send_ipi_mask(mask, vector, true);
+}
+
+static void savic_send_ipi_allbutself(int vector)
+{
+ send_ipi(0, vector, APIC_DEST_ALLBUT);
+}
+
+static void savic_send_ipi_all(int vector)
+{
+ send_ipi(0, vector, APIC_DEST_ALLINC);
+}
+
+static void savic_send_ipi_self(int vector)
+{
+ self_ipi_reg_write(vector);
+}
+
+static void savic_update_vector(unsigned int cpu, unsigned int vector, bool set)
+{
+ update_vector(cpu, SAVIC_ALLOWED_IRR, vector, set);
+}
+
+static void savic_eoi(void)
+{
+ unsigned int cpu;
+ int vec;
+
+ cpu = raw_smp_processor_id();
+ vec = apic_find_highest_vector(get_reg_bitmap(cpu, APIC_ISR));
+ if (WARN_ONCE(vec == -1, "EOI write while no active interrupt in APIC_ISR"))
+ return;
+
+ /* Is level-triggered interrupt? */
+ if (apic_test_vector(vec, get_reg_bitmap(cpu, APIC_TMR))) {
+ update_vector(cpu, APIC_ISR, vec, false);
+ /*
+ * Propagate the EOI write to the hypervisor for level-triggered
+ * interrupts. Return to the guest from GHCB protocol event takes
+ * care of re-evaluating interrupt state.
+ */
+ savic_ghcb_msr_write(APIC_EOI, 0);
+ } else {
+ /*
+ * Hardware clears APIC_ISR and re-evaluates the interrupt state
+ * to determine if there is any pending interrupt which can be
+ * delivered to CPU.
+ */
+ native_apic_msr_eoi();
+ }
+}
+
+static void savic_teardown(void)
+{
+ /* Disable Secure AVIC */
+ native_wrmsrq(MSR_AMD64_SAVIC_CONTROL, 0);
+ savic_unregister_gpa(NULL);
+}
+
+static void savic_setup(void)
+{
+ void *ap = this_cpu_ptr(savic_page);
+ enum es_result res;
+ unsigned long gpa;
+
+ /*
+ * Before Secure AVIC is enabled, APIC MSR reads are intercepted.
+ * APIC_ID MSR read returns the value from the hypervisor.
+ */
+ apic_set_reg(ap, APIC_ID, native_apic_msr_read(APIC_ID));
+
+ gpa = __pa(ap);
+
+ /*
+ * The NPT entry for a vCPU's APIC backing page must always be
+ * present when the vCPU is running in order for Secure AVIC to
+ * function. A VMEXIT_BUSY is returned on VMRUN and the vCPU cannot
+ * be resumed if the NPT entry for the APIC backing page is not
+ * present. Notify GPA of the vCPU's APIC backing page to the
+ * hypervisor by calling savic_register_gpa(). Before executing
+ * VMRUN, the hypervisor makes use of this information to make sure
+ * the APIC backing page is mapped in NPT.
+ */
+ res = savic_register_gpa(gpa);
+ if (res != ES_OK)
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
+
+ native_wrmsrq(MSR_AMD64_SAVIC_CONTROL,
+ gpa | MSR_AMD64_SAVIC_EN | MSR_AMD64_SAVIC_ALLOWEDNMI);
+}
+
+static int savic_probe(void)
+{
+ if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC))
+ return 0;
+
+ if (!x2apic_mode) {
+ pr_err("Secure AVIC enabled in non x2APIC mode\n");
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
+ /* unreachable */
+ }
+
+ savic_page = alloc_percpu(struct secure_avic_page);
+ if (!savic_page)
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
+
+ return 1;
+}
+
+static struct apic apic_x2apic_savic __ro_after_init = {
+
+ .name = "secure avic x2apic",
+ .probe = savic_probe,
+ .acpi_madt_oem_check = savic_acpi_madt_oem_check,
+ .setup = savic_setup,
+ .teardown = savic_teardown,
+
+ .dest_mode_logical = false,
+
+ .disable_esr = 0,
+
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+
+ .max_apic_id = UINT_MAX,
+ .x2apic_set_max_apicid = true,
+ .get_apic_id = x2apic_get_apic_id,
+
+ .calc_dest_apicid = apic_default_calc_apicid,
+
+ .send_IPI = savic_send_ipi,
+ .send_IPI_mask = savic_send_ipi_mask,
+ .send_IPI_mask_allbutself = savic_send_ipi_mask_allbutself,
+ .send_IPI_allbutself = savic_send_ipi_allbutself,
+ .send_IPI_all = savic_send_ipi_all,
+ .send_IPI_self = savic_send_ipi_self,
+
+ .nmi_to_offline_cpu = true,
+
+ .read = savic_read,
+ .write = savic_write,
+ .eoi = savic_eoi,
+ .icr_read = native_x2apic_icr_read,
+ .icr_write = savic_icr_write,
+
+ .update_vector = savic_update_vector,
+};
+
+apic_driver(apic_x2apic_savic);
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 533fcf5636fc..fd28b53dbac5 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -52,10 +52,13 @@ SYM_PIC_ALIAS(next_early_pgt);
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
unsigned int __pgtable_l5_enabled __ro_after_init;
+SYM_PIC_ALIAS(__pgtable_l5_enabled);
unsigned int pgdir_shift __ro_after_init = 39;
EXPORT_SYMBOL(pgdir_shift);
+SYM_PIC_ALIAS(pgdir_shift);
unsigned int ptrs_per_p4d __ro_after_init = 1;
EXPORT_SYMBOL(ptrs_per_p4d);
+SYM_PIC_ALIAS(ptrs_per_p4d);
unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4;
EXPORT_SYMBOL(page_offset_base);
@@ -316,5 +319,5 @@ void early_setup_idt(void)
handler = vc_boot_ghcb;
}
- startup_64_load_idt(handler);
+ __pi_startup_64_load_idt(handler);
}
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 76743dfad6ab..80ef5d386b03 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -61,7 +61,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE)
* any particular GDT layout, because we load our own as soon as we
* can.
*/
-__HEAD
+ __INIT
SYM_CODE_START(startup_32)
movl pa(initial_stack),%ecx
@@ -136,6 +136,9 @@ SYM_CODE_END(startup_32)
* If cpu hotplug is not supported then this code can go in init section
* which will be freed later
*/
+#ifdef CONFIG_HOTPLUG_CPU
+ .text
+#endif
SYM_FUNC_START(startup_32_smp)
cld
movl $(__BOOT_DS),%eax
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 3e9b3a3bd039..21816b48537c 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -33,7 +33,7 @@
* because we need identity-mapped pages.
*/
- __HEAD
+ __INIT
.code64
SYM_CODE_START_NOALIGN(startup_64)
UNWIND_HINT_END_OF_STACK
@@ -71,7 +71,7 @@ SYM_CODE_START_NOALIGN(startup_64)
xorl %edx, %edx
wrmsr
- call startup_64_setup_gdt_idt
+ call __pi_startup_64_setup_gdt_idt
/* Now switch to __KERNEL_CS so IRET works reliably */
pushq $__KERNEL_CS
@@ -91,7 +91,7 @@ SYM_CODE_START_NOALIGN(startup_64)
* subsequent code. Pass the boot_params pointer as the first argument.
*/
movq %r15, %rdi
- call sme_enable
+ call __pi_sme_enable
#endif
/* Sanitize CPU configuration */
@@ -111,7 +111,7 @@ SYM_CODE_START_NOALIGN(startup_64)
* programmed into CR3.
*/
movq %r15, %rsi
- call __startup_64
+ call __pi___startup_64
/* Form the CR3 value being sure to include the CR3 modifier */
leaq early_top_pgt(%rip), %rcx
@@ -562,7 +562,7 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb)
/* Call C handler */
movq %rsp, %rdi
movq ORIG_RAX(%rsp), %rsi
- call do_vc_no_ghcb
+ call __pi_do_vc_no_ghcb
/* Unwind pt_regs */
POP_REGS
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 4fa0be732af1..d7af4a64c211 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -160,11 +160,6 @@ SECTIONS
} :text = 0xcccccccc
- /* bootstrapping code */
- .head.text : AT(ADDR(.head.text) - LOAD_OFFSET) {
- HEAD_TEXT
- } :text = 0xcccccccc
-
/* End of text section, which should occupy whole number of pages */
_etext = .;
. = ALIGN(PAGE_SIZE);
@@ -227,6 +222,8 @@ SECTIONS
*/
.altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) {
*(.altinstr_aux)
+ . = ALIGN(PAGE_SIZE);
+ __inittext_end = .;
}
INIT_DATA_SECTION(16)
@@ -535,3 +532,5 @@ xen_elfnote_entry_value =
xen_elfnote_phys32_entry_value =
ABSOLUTE(xen_elfnote_phys32_entry) + ABSOLUTE(pvh_start_xen - LOAD_OFFSET);
#endif
+
+#include "../boot/startup/exports.h"
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index faf3a13fb6ba..2f8c32173972 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -536,12 +536,6 @@ void __init sme_early_init(void)
x86_init.resources.dmi_setup = snp_dmi_setup;
}
- /*
- * Switch the SVSM CA mapping (if active) from identity mapped to
- * kernel mapped.
- */
- snp_update_svsm_ca();
-
if (sev_status & MSR_AMD64_SNP_SECURE_TSC)
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
}
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index f8a33b25ae86..edbf9c998848 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -16,7 +16,7 @@
.text
.code64
-SYM_FUNC_START(sme_encrypt_execute)
+SYM_FUNC_START(__pi_sme_encrypt_execute)
/*
* Entry parameters:
@@ -69,9 +69,9 @@ SYM_FUNC_START(sme_encrypt_execute)
ANNOTATE_UNRET_SAFE
ret
int3
-SYM_FUNC_END(sme_encrypt_execute)
+SYM_FUNC_END(__pi_sme_encrypt_execute)
-SYM_FUNC_START(__enc_copy)
+SYM_FUNC_START_LOCAL(__enc_copy)
ANNOTATE_NOENDBR
/*
* Routine used to encrypt memory in place.
diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
index 1d78e5631bb8..344030c1a81d 100644
--- a/arch/x86/platform/pvh/head.S
+++ b/arch/x86/platform/pvh/head.S
@@ -24,7 +24,7 @@
#include <asm/nospec-branch.h>
#include <xen/interface/elfnote.h>
- __HEAD
+ __INIT
/*
* Entry point for PVH guests.
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 5778bc498415..e5a2b9a912d1 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -740,10 +740,10 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel,
static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
const char *symname)
{
- int headtext = !strcmp(sec_name(sec->shdr.sh_info), ".head.text");
unsigned r_type = ELF64_R_TYPE(rel->r_info);
ElfW(Addr) offset = rel->r_offset;
int shn_abs = (sym->st_shndx == SHN_ABS) && !is_reloc(S_REL, symname);
+
if (sym->st_shndx == SHN_UNDEF)
return 0;
@@ -783,12 +783,6 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
break;
}
- if (headtext) {
- die("Absolute reference to symbol '%s' not permitted in .head.text\n",
- symname);
- break;
- }
-
/*
* Relocation offsets for 64 bit kernels are output
* as 32 bits and sign extended back to 64 bits when
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 942372e69b4d..ee643a6cd691 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -1029,7 +1029,7 @@ int rmp_make_shared(u64 pfn, enum pg_level level)
}
EXPORT_SYMBOL_GPL(rmp_make_shared);
-void snp_leak_pages(u64 pfn, unsigned int npages)
+void __snp_leak_pages(u64 pfn, unsigned int npages, bool dump_rmp)
{
struct page *page = pfn_to_page(pfn);
@@ -1052,14 +1052,15 @@ void snp_leak_pages(u64 pfn, unsigned int npages)
(PageHead(page) && compound_nr(page) <= npages))
list_add_tail(&page->buddy_list, &snp_leaked_pages_list);
- dump_rmpentry(pfn);
+ if (dump_rmp)
+ dump_rmpentry(pfn);
snp_nr_leaked_pages++;
pfn++;
page++;
}
spin_unlock(&snp_leaked_pages_list_lock);
}
-EXPORT_SYMBOL_GPL(snp_leak_pages);
+EXPORT_SYMBOL_GPL(__snp_leak_pages);
void kdump_sev_callback(void)
{
diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile
index 394484929dae..a9626b30044a 100644
--- a/drivers/crypto/ccp/Makefile
+++ b/drivers/crypto/ccp/Makefile
@@ -13,7 +13,8 @@ ccp-$(CONFIG_CRYPTO_DEV_SP_PSP) += psp-dev.o \
tee-dev.o \
platform-access.o \
dbc.o \
- hsti.o
+ hsti.o \
+ sfs.o
obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o
ccp-crypto-objs := ccp-crypto-main.o \
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index 1c5a7189631e..9e21da0e298a 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -17,6 +17,7 @@
#include "psp-dev.h"
#include "sev-dev.h"
#include "tee-dev.h"
+#include "sfs.h"
#include "platform-access.h"
#include "dbc.h"
#include "hsti.h"
@@ -182,6 +183,17 @@ static int psp_check_tee_support(struct psp_device *psp)
return 0;
}
+static int psp_check_sfs_support(struct psp_device *psp)
+{
+ /* Check if device supports SFS feature */
+ if (!psp->capability.sfs) {
+ dev_dbg(psp->dev, "psp does not support SFS\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
static int psp_init(struct psp_device *psp)
{
int ret;
@@ -198,6 +210,12 @@ static int psp_init(struct psp_device *psp)
return ret;
}
+ if (!psp_check_sfs_support(psp)) {
+ ret = sfs_dev_init(psp);
+ if (ret)
+ return ret;
+ }
+
if (psp->vdata->platform_access) {
ret = platform_access_dev_init(psp);
if (ret)
@@ -302,6 +320,8 @@ void psp_dev_destroy(struct sp_device *sp)
tee_dev_destroy(psp);
+ sfs_dev_destroy(psp);
+
dbc_dev_destroy(psp);
platform_access_dev_destroy(psp);
diff --git a/drivers/crypto/ccp/psp-dev.h b/drivers/crypto/ccp/psp-dev.h
index e43ce87ede76..268c83f298cb 100644
--- a/drivers/crypto/ccp/psp-dev.h
+++ b/drivers/crypto/ccp/psp-dev.h
@@ -32,7 +32,8 @@ union psp_cap_register {
unsigned int sev :1,
tee :1,
dbc_thru_ext :1,
- rsvd1 :4,
+ sfs :1,
+ rsvd1 :3,
security_reporting :1,
fused_part :1,
rsvd2 :1,
@@ -68,6 +69,7 @@ struct psp_device {
void *tee_data;
void *platform_access_data;
void *dbc_data;
+ void *sfs_data;
union psp_cap_register capability;
};
@@ -118,12 +120,16 @@ struct psp_ext_request {
* @PSP_SUB_CMD_DBC_SET_UID: Set UID for DBC
* @PSP_SUB_CMD_DBC_GET_PARAMETER: Get parameter from DBC
* @PSP_SUB_CMD_DBC_SET_PARAMETER: Set parameter for DBC
+ * @PSP_SUB_CMD_SFS_GET_FW_VERS: Get firmware versions for ASP and other MP
+ * @PSP_SUB_CMD_SFS_UPDATE: Command to load, verify and execute SFS package
*/
enum psp_sub_cmd {
PSP_SUB_CMD_DBC_GET_NONCE = PSP_DYNAMIC_BOOST_GET_NONCE,
PSP_SUB_CMD_DBC_SET_UID = PSP_DYNAMIC_BOOST_SET_UID,
PSP_SUB_CMD_DBC_GET_PARAMETER = PSP_DYNAMIC_BOOST_GET_PARAMETER,
PSP_SUB_CMD_DBC_SET_PARAMETER = PSP_DYNAMIC_BOOST_SET_PARAMETER,
+ PSP_SUB_CMD_SFS_GET_FW_VERS = PSP_SFS_GET_FW_VERSIONS,
+ PSP_SUB_CMD_SFS_UPDATE = PSP_SFS_UPDATE,
};
int psp_extended_mailbox_cmd(struct psp_device *psp, unsigned int timeout_msecs,
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 9f5ccc1720cb..65d6d0af140a 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -82,6 +82,21 @@ MODULE_FIRMWARE("amd/amd_sev_fam19h_model1xh.sbin"); /* 4th gen EPYC */
static bool psp_dead;
static int psp_timeout;
+enum snp_hv_fixed_pages_state {
+ ALLOCATED,
+ HV_FIXED,
+};
+
+struct snp_hv_fixed_pages_entry {
+ struct list_head list;
+ struct page *page;
+ unsigned int order;
+ bool free;
+ enum snp_hv_fixed_pages_state page_state;
+};
+
+static LIST_HEAD(snp_hv_fixed_pages);
+
/* Trusted Memory Region (TMR):
* The TMR is a 1MB area that must be 1MB aligned. Use the page allocator
* to allocate the memory, which will return aligned memory for the specified
@@ -1073,6 +1088,165 @@ static void snp_set_hsave_pa(void *arg)
wrmsrq(MSR_VM_HSAVE_PA, 0);
}
+/* Hypervisor Fixed pages API interface */
+static void snp_hv_fixed_pages_state_update(struct sev_device *sev,
+ enum snp_hv_fixed_pages_state page_state)
+{
+ struct snp_hv_fixed_pages_entry *entry;
+
+ /* List is protected by sev_cmd_mutex */
+ lockdep_assert_held(&sev_cmd_mutex);
+
+ if (list_empty(&snp_hv_fixed_pages))
+ return;
+
+ list_for_each_entry(entry, &snp_hv_fixed_pages, list)
+ entry->page_state = page_state;
+}
+
+/*
+ * Allocate HV_FIXED pages in 2MB aligned sizes to ensure the whole
+ * 2MB pages are marked as HV_FIXED.
+ */
+struct page *snp_alloc_hv_fixed_pages(unsigned int num_2mb_pages)
+{
+ struct psp_device *psp_master = psp_get_master_device();
+ struct snp_hv_fixed_pages_entry *entry;
+ struct sev_device *sev;
+ unsigned int order;
+ struct page *page;
+
+ if (!psp_master || !psp_master->sev_data)
+ return NULL;
+
+ sev = psp_master->sev_data;
+
+ order = get_order(PMD_SIZE * num_2mb_pages);
+
+ /*
+ * SNP_INIT_EX is protected by sev_cmd_mutex, therefore this list
+ * also needs to be protected using the same mutex.
+ */
+ guard(mutex)(&sev_cmd_mutex);
+
+ /*
+ * This API uses SNP_INIT_EX to transition allocated pages to HV_Fixed
+ * page state, fail if SNP is already initialized.
+ */
+ if (sev->snp_initialized)
+ return NULL;
+
+ /* Re-use freed pages that match the request */
+ list_for_each_entry(entry, &snp_hv_fixed_pages, list) {
+ /* Hypervisor fixed page allocator implements exact fit policy */
+ if (entry->order == order && entry->free) {
+ entry->free = false;
+ memset(page_address(entry->page), 0,
+ (1 << entry->order) * PAGE_SIZE);
+ return entry->page;
+ }
+ }
+
+ page = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+ if (!page)
+ return NULL;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ __free_pages(page, order);
+ return NULL;
+ }
+
+ entry->page = page;
+ entry->order = order;
+ list_add_tail(&entry->list, &snp_hv_fixed_pages);
+
+ return page;
+}
+
+void snp_free_hv_fixed_pages(struct page *page)
+{
+ struct psp_device *psp_master = psp_get_master_device();
+ struct snp_hv_fixed_pages_entry *entry, *nentry;
+
+ if (!psp_master || !psp_master->sev_data)
+ return;
+
+ /*
+ * SNP_INIT_EX is protected by sev_cmd_mutex, therefore this list
+ * also needs to be protected using the same mutex.
+ */
+ guard(mutex)(&sev_cmd_mutex);
+
+ list_for_each_entry_safe(entry, nentry, &snp_hv_fixed_pages, list) {
+ if (entry->page != page)
+ continue;
+
+ /*
+ * HV_FIXED page state cannot be changed until reboot
+ * and they cannot be used by an SNP guest, so they cannot
+ * be returned back to the page allocator.
+ * Mark the pages as free internally to allow possible re-use.
+ */
+ if (entry->page_state == HV_FIXED) {
+ entry->free = true;
+ } else {
+ __free_pages(page, entry->order);
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ return;
+ }
+}
+
+static void snp_add_hv_fixed_pages(struct sev_device *sev, struct sev_data_range_list *range_list)
+{
+ struct snp_hv_fixed_pages_entry *entry;
+ struct sev_data_range *range;
+ int num_elements;
+
+ lockdep_assert_held(&sev_cmd_mutex);
+
+ if (list_empty(&snp_hv_fixed_pages))
+ return;
+
+ num_elements = list_count_nodes(&snp_hv_fixed_pages) +
+ range_list->num_elements;
+
+ /*
+ * Ensure the list of HV_FIXED pages that will be passed to firmware
+ * do not exceed the page-sized argument buffer.
+ */
+ if (num_elements * sizeof(*range) + sizeof(*range_list) > PAGE_SIZE) {
+ dev_warn(sev->dev, "Additional HV_Fixed pages cannot be accommodated, omitting\n");
+ return;
+ }
+
+ range = &range_list->ranges[range_list->num_elements];
+ list_for_each_entry(entry, &snp_hv_fixed_pages, list) {
+ range->base = page_to_pfn(entry->page) << PAGE_SHIFT;
+ range->page_count = 1 << entry->order;
+ range++;
+ }
+ range_list->num_elements = num_elements;
+}
+
+static void snp_leak_hv_fixed_pages(void)
+{
+ struct snp_hv_fixed_pages_entry *entry;
+
+ /* List is protected by sev_cmd_mutex */
+ lockdep_assert_held(&sev_cmd_mutex);
+
+ if (list_empty(&snp_hv_fixed_pages))
+ return;
+
+ list_for_each_entry(entry, &snp_hv_fixed_pages, list)
+ if (entry->page_state == HV_FIXED)
+ __snp_leak_pages(page_to_pfn(entry->page),
+ 1 << entry->order, false);
+}
+
static int snp_filter_reserved_mem_regions(struct resource *rs, void *arg)
{
struct sev_data_range_list *range_list = arg;
@@ -1163,6 +1337,12 @@ static int __sev_snp_init_locked(int *error)
return rc;
}
+ /*
+ * Add HV_Fixed pages from other PSP sub-devices, such as SFS to the
+ * HV_Fixed page list.
+ */
+ snp_add_hv_fixed_pages(sev, snp_range_list);
+
memset(&data, 0, sizeof(data));
data.init_rmp = 1;
data.list_paddr_en = 1;
@@ -1202,6 +1382,7 @@ static int __sev_snp_init_locked(int *error)
return rc;
}
+ snp_hv_fixed_pages_state_update(sev, HV_FIXED);
sev->snp_initialized = true;
dev_dbg(sev->dev, "SEV-SNP firmware initialized\n");
@@ -1784,6 +1965,7 @@ static int __sev_snp_shutdown_locked(int *error, bool panic)
return ret;
}
+ snp_leak_hv_fixed_pages();
sev->snp_initialized = false;
dev_dbg(sev->dev, "SEV-SNP firmware shutdown\n");
diff --git a/drivers/crypto/ccp/sev-dev.h b/drivers/crypto/ccp/sev-dev.h
index 3e4e5574e88a..28021abc85ad 100644
--- a/drivers/crypto/ccp/sev-dev.h
+++ b/drivers/crypto/ccp/sev-dev.h
@@ -65,4 +65,7 @@ void sev_dev_destroy(struct psp_device *psp);
void sev_pci_init(void);
void sev_pci_exit(void);
+struct page *snp_alloc_hv_fixed_pages(unsigned int num_2mb_pages);
+void snp_free_hv_fixed_pages(struct page *page);
+
#endif /* __SEV_DEV_H */
diff --git a/drivers/crypto/ccp/sfs.c b/drivers/crypto/ccp/sfs.c
new file mode 100644
index 000000000000..2f4beaafe7ec
--- /dev/null
+++ b/drivers/crypto/ccp/sfs.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Secure Processor Seamless Firmware Servicing support.
+ *
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ *
+ * Author: Ashish Kalra <ashish.kalra@amd.com>
+ */
+
+#include <linux/firmware.h>
+
+#include "sfs.h"
+#include "sev-dev.h"
+
+#define SFS_DEFAULT_TIMEOUT (10 * MSEC_PER_SEC)
+#define SFS_MAX_PAYLOAD_SIZE (2 * 1024 * 1024)
+#define SFS_NUM_2MB_PAGES_CMDBUF (SFS_MAX_PAYLOAD_SIZE / PMD_SIZE)
+#define SFS_NUM_PAGES_CMDBUF (SFS_MAX_PAYLOAD_SIZE / PAGE_SIZE)
+
+static DEFINE_MUTEX(sfs_ioctl_mutex);
+
+static struct sfs_misc_dev *misc_dev;
+
+static int send_sfs_cmd(struct sfs_device *sfs_dev, int msg)
+{
+ int ret;
+
+ sfs_dev->command_buf->hdr.status = 0;
+ sfs_dev->command_buf->hdr.sub_cmd_id = msg;
+
+ ret = psp_extended_mailbox_cmd(sfs_dev->psp,
+ SFS_DEFAULT_TIMEOUT,
+ (struct psp_ext_request *)sfs_dev->command_buf);
+ if (ret == -EIO) {
+ dev_dbg(sfs_dev->dev,
+ "msg 0x%x failed with PSP error: 0x%x, extended status: 0x%x\n",
+ msg, sfs_dev->command_buf->hdr.status,
+ *(u32 *)sfs_dev->command_buf->buf);
+ }
+
+ return ret;
+}
+
+static int send_sfs_get_fw_versions(struct sfs_device *sfs_dev)
+{
+ /*
+ * SFS_GET_FW_VERSIONS command needs the output buffer to be
+ * initialized to 0xC7 in every byte.
+ */
+ memset(sfs_dev->command_buf->sfs_buffer, 0xc7, PAGE_SIZE);
+ sfs_dev->command_buf->hdr.payload_size = 2 * PAGE_SIZE;
+
+ return send_sfs_cmd(sfs_dev, PSP_SFS_GET_FW_VERSIONS);
+}
+
+static int send_sfs_update_package(struct sfs_device *sfs_dev, const char *payload_name)
+{
+ char payload_path[PAYLOAD_NAME_SIZE + sizeof("amd/")];
+ const struct firmware *firmware;
+ unsigned long package_size;
+ int ret;
+
+ /* Sanitize userspace provided payload name */
+ if (!strnchr(payload_name, PAYLOAD_NAME_SIZE, '\0'))
+ return -EINVAL;
+
+ snprintf(payload_path, sizeof(payload_path), "amd/%s", payload_name);
+
+ ret = firmware_request_nowarn(&firmware, payload_path, sfs_dev->dev);
+ if (ret < 0) {
+ dev_warn_ratelimited(sfs_dev->dev, "firmware request failed for %s (%d)\n",
+ payload_path, ret);
+ return -ENOENT;
+ }
+
+ /*
+ * SFS Update Package command's input buffer contains TEE_EXT_CMD_BUFFER
+ * followed by the Update Package and it should be 64KB aligned.
+ */
+ package_size = ALIGN(firmware->size + PAGE_SIZE, 0x10000U);
+
+ /*
+ * SFS command buffer is a pre-allocated 2MB buffer, fail update package
+ * if SFS payload is larger than the pre-allocated command buffer.
+ */
+ if (package_size > SFS_MAX_PAYLOAD_SIZE) {
+ dev_warn_ratelimited(sfs_dev->dev,
+ "SFS payload size %ld larger than maximum supported payload size of %u\n",
+ package_size, SFS_MAX_PAYLOAD_SIZE);
+ release_firmware(firmware);
+ return -E2BIG;
+ }
+
+ /*
+ * Copy firmware data to a HV_Fixed memory region.
+ */
+ memcpy(sfs_dev->command_buf->sfs_buffer, firmware->data, firmware->size);
+ sfs_dev->command_buf->hdr.payload_size = package_size;
+
+ release_firmware(firmware);
+
+ return send_sfs_cmd(sfs_dev, PSP_SFS_UPDATE);
+}
+
+static long sfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ struct sfs_user_get_fw_versions __user *sfs_get_fw_versions;
+ struct sfs_user_update_package __user *sfs_update_package;
+ struct psp_device *psp_master = psp_get_master_device();
+ char payload_name[PAYLOAD_NAME_SIZE];
+ struct sfs_device *sfs_dev;
+ int ret = 0;
+
+ if (!psp_master || !psp_master->sfs_data)
+ return -ENODEV;
+
+ sfs_dev = psp_master->sfs_data;
+
+ guard(mutex)(&sfs_ioctl_mutex);
+
+ switch (cmd) {
+ case SFSIOCFWVERS:
+ dev_dbg(sfs_dev->dev, "in SFSIOCFWVERS\n");
+
+ sfs_get_fw_versions = (struct sfs_user_get_fw_versions __user *)arg;
+
+ ret = send_sfs_get_fw_versions(sfs_dev);
+ if (ret && ret != -EIO)
+ return ret;
+
+ /*
+ * Return SFS status and extended status back to userspace
+ * if PSP status indicated success or command error.
+ */
+ if (copy_to_user(&sfs_get_fw_versions->blob, sfs_dev->command_buf->sfs_buffer,
+ PAGE_SIZE))
+ return -EFAULT;
+ if (copy_to_user(&sfs_get_fw_versions->sfs_status,
+ &sfs_dev->command_buf->hdr.status,
+ sizeof(sfs_get_fw_versions->sfs_status)))
+ return -EFAULT;
+ if (copy_to_user(&sfs_get_fw_versions->sfs_extended_status,
+ &sfs_dev->command_buf->buf,
+ sizeof(sfs_get_fw_versions->sfs_extended_status)))
+ return -EFAULT;
+ break;
+ case SFSIOCUPDATEPKG:
+ dev_dbg(sfs_dev->dev, "in SFSIOCUPDATEPKG\n");
+
+ sfs_update_package = (struct sfs_user_update_package __user *)arg;
+
+ if (copy_from_user(payload_name, sfs_update_package->payload_name,
+ PAYLOAD_NAME_SIZE))
+ return -EFAULT;
+
+ ret = send_sfs_update_package(sfs_dev, payload_name);
+ if (ret && ret != -EIO)
+ return ret;
+
+ /*
+ * Return SFS status and extended status back to userspace
+ * if PSP status indicated success or command error.
+ */
+ if (copy_to_user(&sfs_update_package->sfs_status,
+ &sfs_dev->command_buf->hdr.status,
+ sizeof(sfs_update_package->sfs_status)))
+ return -EFAULT;
+ if (copy_to_user(&sfs_update_package->sfs_extended_status,
+ &sfs_dev->command_buf->buf,
+ sizeof(sfs_update_package->sfs_extended_status)))
+ return -EFAULT;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static const struct file_operations sfs_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = sfs_ioctl,
+};
+
+static void sfs_exit(struct kref *ref)
+{
+ misc_deregister(&misc_dev->misc);
+ kfree(misc_dev);
+ misc_dev = NULL;
+}
+
+void sfs_dev_destroy(struct psp_device *psp)
+{
+ struct sfs_device *sfs_dev = psp->sfs_data;
+
+ if (!sfs_dev)
+ return;
+
+ /*
+ * Change SFS command buffer back to the default "Write-Back" type.
+ */
+ set_memory_wb((unsigned long)sfs_dev->command_buf, SFS_NUM_PAGES_CMDBUF);
+
+ snp_free_hv_fixed_pages(sfs_dev->page);
+
+ if (sfs_dev->misc)
+ kref_put(&misc_dev->refcount, sfs_exit);
+
+ psp->sfs_data = NULL;
+}
+
+/* Based on sev_misc_init() */
+static int sfs_misc_init(struct sfs_device *sfs)
+{
+ struct device *dev = sfs->dev;
+ int ret;
+
+ /*
+ * SFS feature support can be detected on multiple devices but the SFS
+ * FW commands must be issued on the master. During probe, we do not
+ * know the master hence we create /dev/sfs on the first device probe.
+ */
+ if (!misc_dev) {
+ struct miscdevice *misc;
+
+ misc_dev = kzalloc(sizeof(*misc_dev), GFP_KERNEL);
+ if (!misc_dev)
+ return -ENOMEM;
+
+ misc = &misc_dev->misc;
+ misc->minor = MISC_DYNAMIC_MINOR;
+ misc->name = "sfs";
+ misc->fops = &sfs_fops;
+ misc->mode = 0600;
+
+ ret = misc_register(misc);
+ if (ret)
+ return ret;
+
+ kref_init(&misc_dev->refcount);
+ } else {
+ kref_get(&misc_dev->refcount);
+ }
+
+ sfs->misc = misc_dev;
+ dev_dbg(dev, "registered SFS device\n");
+
+ return 0;
+}
+
+int sfs_dev_init(struct psp_device *psp)
+{
+ struct device *dev = psp->dev;
+ struct sfs_device *sfs_dev;
+ struct page *page;
+ int ret = -ENOMEM;
+
+ sfs_dev = devm_kzalloc(dev, sizeof(*sfs_dev), GFP_KERNEL);
+ if (!sfs_dev)
+ return -ENOMEM;
+
+ /*
+ * Pre-allocate 2MB command buffer for all SFS commands using
+ * SNP HV_Fixed page allocator which also transitions the
+ * SFS command buffer to HV_Fixed page state if SNP is enabled.
+ */
+ page = snp_alloc_hv_fixed_pages(SFS_NUM_2MB_PAGES_CMDBUF);
+ if (!page) {
+ dev_dbg(dev, "Command Buffer HV-Fixed page allocation failed\n");
+ goto cleanup_dev;
+ }
+ sfs_dev->page = page;
+ sfs_dev->command_buf = page_address(page);
+
+ dev_dbg(dev, "Command buffer 0x%px to be marked as HV_Fixed\n", sfs_dev->command_buf);
+
+ /*
+ * SFS command buffer must be mapped as non-cacheable.
+ */
+ ret = set_memory_uc((unsigned long)sfs_dev->command_buf, SFS_NUM_PAGES_CMDBUF);
+ if (ret) {
+ dev_dbg(dev, "Set memory uc failed\n");
+ goto cleanup_cmd_buf;
+ }
+
+ dev_dbg(dev, "Command buffer 0x%px marked uncacheable\n", sfs_dev->command_buf);
+
+ psp->sfs_data = sfs_dev;
+ sfs_dev->dev = dev;
+ sfs_dev->psp = psp;
+
+ ret = sfs_misc_init(sfs_dev);
+ if (ret)
+ goto cleanup_mem_attr;
+
+ dev_notice(sfs_dev->dev, "SFS support is available\n");
+
+ return 0;
+
+cleanup_mem_attr:
+ set_memory_wb((unsigned long)sfs_dev->command_buf, SFS_NUM_PAGES_CMDBUF);
+
+cleanup_cmd_buf:
+ snp_free_hv_fixed_pages(page);
+
+cleanup_dev:
+ psp->sfs_data = NULL;
+ devm_kfree(dev, sfs_dev);
+
+ return ret;
+}
diff --git a/drivers/crypto/ccp/sfs.h b/drivers/crypto/ccp/sfs.h
new file mode 100644
index 000000000000..97704c210efd
--- /dev/null
+++ b/drivers/crypto/ccp/sfs.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * AMD Platform Security Processor (PSP) Seamless Firmware (SFS) Support.
+ *
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ *
+ * Author: Ashish Kalra <ashish.kalra@amd.com>
+ */
+
+#ifndef __SFS_H__
+#define __SFS_H__
+
+#include <uapi/linux/psp-sfs.h>
+
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/psp-sev.h>
+#include <linux/psp-platform-access.h>
+#include <linux/set_memory.h>
+
+#include "psp-dev.h"
+
+struct sfs_misc_dev {
+ struct kref refcount;
+ struct miscdevice misc;
+};
+
+struct sfs_command {
+ struct psp_ext_req_buffer_hdr hdr;
+ u8 buf[PAGE_SIZE - sizeof(struct psp_ext_req_buffer_hdr)];
+ u8 sfs_buffer[];
+} __packed;
+
+struct sfs_device {
+ struct device *dev;
+ struct psp_device *psp;
+
+ struct page *page;
+ struct sfs_command *command_buf;
+
+ struct sfs_misc_dev *misc;
+};
+
+void sfs_dev_destroy(struct psp_device *psp);
+int sfs_dev_init(struct psp_device *psp);
+
+#endif /* __SFS_H__ */
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index cafc90d4caaf..0d05eac7c72b 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -788,7 +788,9 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry,
*kernel_entry = addr + entry;
- return efi_adjust_memory_range_protection(addr, kernel_text_size);
+ return efi_adjust_memory_range_protection(addr, kernel_text_size) ?:
+ efi_adjust_memory_range_protection(addr + kernel_inittext_offset,
+ kernel_inittext_size);
}
static void __noreturn enter_kernel(unsigned long kernel_addr,
diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h
index 0bf7d33a1048..7fcec025c5e0 100644
--- a/include/linux/cc_platform.h
+++ b/include/linux/cc_platform.h
@@ -96,6 +96,14 @@ enum cc_attr {
* enabled to run SEV-SNP guests.
*/
CC_ATTR_HOST_SEV_SNP,
+
+ /**
+ * @CC_ATTR_SNP_SECURE_AVIC: Secure AVIC mode is active.
+ *
+ * The host kernel is running with the necessary features enabled
+ * to run SEV-SNP guests with full Secure AVIC capabilities.
+ */
+ CC_ATTR_SNP_SECURE_AVIC,
};
#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
diff --git a/include/linux/psp-platform-access.h b/include/linux/psp-platform-access.h
index 1504fb012c05..540abf7de048 100644
--- a/include/linux/psp-platform-access.h
+++ b/include/linux/psp-platform-access.h
@@ -7,6 +7,8 @@
enum psp_platform_access_msg {
PSP_CMD_NONE = 0x0,
+ PSP_SFS_GET_FW_VERSIONS,
+ PSP_SFS_UPDATE,
PSP_CMD_HSTI_QUERY = 0x14,
PSP_I2C_REQ_BUS_CMD = 0x64,
PSP_DYNAMIC_BOOST_GET_NONCE,
diff --git a/include/uapi/linux/psp-sfs.h b/include/uapi/linux/psp-sfs.h
new file mode 100644
index 000000000000..94e51670383c
--- /dev/null
+++ b/include/uapi/linux/psp-sfs.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * Userspace interface for AMD Seamless Firmware Servicing (SFS)
+ *
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ *
+ * Author: Ashish Kalra <ashish.kalra@amd.com>
+ */
+
+#ifndef __PSP_SFS_USER_H__
+#define __PSP_SFS_USER_H__
+
+#include <linux/types.h>
+
+/**
+ * SFS: AMD Seamless Firmware Support (SFS) interface
+ */
+
+#define PAYLOAD_NAME_SIZE 64
+#define TEE_EXT_CMD_BUFFER_SIZE 4096
+
+/**
+ * struct sfs_user_get_fw_versions - get current level of base firmware (output).
+ * @blob: current level of base firmware for ASP and patch levels (input/output).
+ * @sfs_status: 32-bit SFS status value (output).
+ * @sfs_extended_status: 32-bit SFS extended status value (output).
+ */
+struct sfs_user_get_fw_versions {
+ __u8 blob[TEE_EXT_CMD_BUFFER_SIZE];
+ __u32 sfs_status;
+ __u32 sfs_extended_status;
+} __packed;
+
+/**
+ * struct sfs_user_update_package - update SFS package (input).
+ * @payload_name: name of SFS package to load, verify and execute (input).
+ * @sfs_status: 32-bit SFS status value (output).
+ * @sfs_extended_status: 32-bit SFS extended status value (output).
+ */
+struct sfs_user_update_package {
+ char payload_name[PAYLOAD_NAME_SIZE];
+ __u32 sfs_status;
+ __u32 sfs_extended_status;
+} __packed;
+
+/**
+ * Seamless Firmware Support (SFS) IOC
+ *
+ * possible return codes for all SFS IOCTLs:
+ * 0: success
+ * -EINVAL: invalid input
+ * -E2BIG: excess data passed
+ * -EFAULT: failed to copy to/from userspace
+ * -EBUSY: mailbox in recovery or in use
+ * -ENODEV: driver not bound with PSP device
+ * -EACCES: request isn't authorized
+ * -EINVAL: invalid parameter
+ * -ETIMEDOUT: request timed out
+ * -EAGAIN: invalid request for state machine
+ * -ENOENT: not implemented
+ * -ENFILE: overflow
+ * -EPERM: invalid signature
+ * -EIO: PSP I/O error
+ */
+#define SFS_IOC_TYPE 'S'
+
+/**
+ * SFSIOCFWVERS - returns blob containing FW versions
+ * ASP provides the current level of Base Firmware for the ASP
+ * and the other microprocessors as well as current patch
+ * level(s).
+ */
+#define SFSIOCFWVERS _IOWR(SFS_IOC_TYPE, 0x1, struct sfs_user_get_fw_versions)
+
+/**
+ * SFSIOCUPDATEPKG - updates package/payload
+ * ASP loads, verifies and executes the SFS package.
+ * By default, the SFS package/payload is loaded from
+ * /lib/firmware/amd, but alternative firmware loading
+ * path can be specified using kernel parameter
+ * firmware_class.path or the firmware loading path
+ * can be customized using sysfs file:
+ * /sys/module/firmware_class/parameters/path.
+ */
+#define SFSIOCUPDATEPKG _IOWR(SFS_IOC_TYPE, 0x2, struct sfs_user_update_package)
+
+#endif /* __PSP_SFS_USER_H__ */
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 98c4713c1b09..0ad5cc70ecbe 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -880,3 +880,15 @@ unsigned int arch_reloc_size(struct reloc *reloc)
return 8;
}
}
+
+bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc)
+{
+ switch (reloc_type(reloc)) {
+ case R_X86_64_32:
+ case R_X86_64_32S:
+ case R_X86_64_64:
+ return true;
+ default:
+ return false;
+ }
+}
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 80239843e9f0..0f6b197cfcb0 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -87,6 +87,7 @@ static const struct option check_options[] = {
OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"),
OPT_BOOLEAN('u', "uaccess", &opts.uaccess, "validate uaccess rules for SMAP"),
OPT_BOOLEAN(0 , "cfi", &opts.cfi, "annotate kernel control flow integrity (kCFI) function preambles"),
+ OPT_BOOLEAN(0 , "noabs", &opts.noabs, "reject absolute references in allocatable sections"),
OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump),
OPT_GROUP("Options:"),
@@ -162,6 +163,7 @@ static bool opts_valid(void)
opts.hack_noinstr ||
opts.ibt ||
opts.mcount ||
+ opts.noabs ||
opts.noinstr ||
opts.orc ||
opts.retpoline ||
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index d14f20ef1db1..093fcd01dd6e 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -3564,7 +3564,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
if (func && insn_func(insn) && func != insn_func(insn)->pfunc) {
/* Ignore KCFI type preambles, which always fall through */
if (!strncmp(func->name, "__cfi_", 6) ||
- !strncmp(func->name, "__pfx_", 6))
+ !strncmp(func->name, "__pfx_", 6) ||
+ !strncmp(func->name, "__pi___cfi_", 11) ||
+ !strncmp(func->name, "__pi___pfx_", 11))
return 0;
if (file->ignore_unreachables)
@@ -4644,6 +4646,47 @@ static void disas_warned_funcs(struct objtool_file *file)
disas_funcs(funcs);
}
+__weak bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc)
+{
+ unsigned int type = reloc_type(reloc);
+ size_t sz = elf_addr_size(elf);
+
+ return (sz == 8) ? (type == R_ABS64) : (type == R_ABS32);
+}
+
+static int check_abs_references(struct objtool_file *file)
+{
+ struct section *sec;
+ struct reloc *reloc;
+ int ret = 0;
+
+ for_each_sec(file, sec) {
+ /* absolute references in non-loadable sections are fine */
+ if (!(sec->sh.sh_flags & SHF_ALLOC))
+ continue;
+
+ /* section must have an associated .rela section */
+ if (!sec->rsec)
+ continue;
+
+ /*
+ * Special case for compiler generated metadata that is not
+ * consumed until after boot.
+ */
+ if (!strcmp(sec->name, "__patchable_function_entries"))
+ continue;
+
+ for_each_reloc(sec->rsec, reloc) {
+ if (arch_absolute_reloc(file->elf, reloc)) {
+ WARN("section %s has absolute relocation at offset 0x%lx",
+ sec->name, reloc_offset(reloc));
+ ret++;
+ }
+ }
+ }
+ return ret;
+}
+
struct insn_chunk {
void *addr;
struct insn_chunk *next;
@@ -4777,6 +4820,9 @@ int check(struct objtool_file *file)
goto out;
}
+ if (opts.noabs)
+ warnings += check_abs_references(file);
+
if (opts.orc && nr_insns) {
ret = orc_create(file);
if (ret)
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 01ef6f415adf..be33c7b43180 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -97,6 +97,7 @@ bool arch_is_embedded_insn(struct symbol *sym);
int arch_rewrite_retpolines(struct objtool_file *file);
bool arch_pc_relative_reloc(struct reloc *reloc);
+bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc);
unsigned int arch_reloc_size(struct reloc *reloc);
unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table);
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 6b08666fa69d..ab22673862e1 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -26,6 +26,7 @@ struct opts {
bool uaccess;
int prefix;
bool cfi;
+ bool noabs;
/* options: */
bool backtrace;
diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h
index 6a922d046b8e..802895fae3ca 100644
--- a/tools/objtool/noreturns.h
+++ b/tools/objtool/noreturns.h
@@ -45,7 +45,6 @@ NORETURN(rewind_stack_and_make_dead)
NORETURN(rust_begin_unwind)
NORETURN(rust_helper_BUG)
NORETURN(sev_es_terminate)
-NORETURN(snp_abort)
NORETURN(start_kernel)
NORETURN(stop_this_cpu)
NORETURN(usercopy_abort)