summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2026-02-09 18:51:37 +0100
committerPaolo Bonzini <pbonzini@redhat.com>2026-02-09 18:51:37 +0100
commit4215ee0d7bb5358882375c84d3cd0488bb5813b2 (patch)
tree6c02ccb1bb32d96fff24ee9e3602e0c85153a9aa
parent687603fb2bf1205d6f7028e30848434e3b126a7a (diff)
parent20c3c4108d58f87c711bf44cb0b498b3ac5af6bf (diff)
Merge tag 'kvm-x86-svm-6.20' of https://github.com/kvm-x86/linux into HEAD
KVM SVM changes for 6.20 - Drop a user-triggerable WARN on nested_svm_load_cr3() failure. - Add support for virtualizing ERAPS. Note, correct virtualization of ERAPS relies on an upcoming, publicly announced change in the APM to reduce the set of conditions where hardware (i.e. KVM) *must* flush the RAP. - Ignore nSVM intercepts for instructions that are not supported according to L1's virtual CPU model. - Add support for expedited writes to the fast MMIO bus, a la VMX's fastpath for EPT Misconfig. - Don't set GIF when clearing EFER.SVME, as GIF exists independently of SVM, and allow userspace to restore nested state with GIF=0. - Treat exit_code as an unsigned 64-bit value through all of KVM. - Add support for fetching SNP certificates from userspace. - Fix a bug where KVM would use vmcb02 instead of vmcb01 when emulating VMLOAD or VMSAVE on behalf of L2. - Misc fixes and cleanups.
-rw-r--r--Documentation/virt/kvm/api.rst44
-rw-r--r--Documentation/virt/kvm/x86/amd-memory-encryption.rst52
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h8
-rw-r--r--arch/x86/include/asm/svm.h9
-rw-r--r--arch/x86/include/uapi/asm/kvm.h2
-rw-r--r--arch/x86/include/uapi/asm/svm.h32
-rw-r--r--arch/x86/kvm/cpuid.c9
-rw-r--r--arch/x86/kvm/svm/avic.c4
-rw-r--r--arch/x86/kvm/svm/hyperv.c7
-rw-r--r--arch/x86/kvm/svm/nested.c82
-rw-r--r--arch/x86/kvm/svm/sev.c129
-rw-r--r--arch/x86/kvm/svm/svm.c121
-rw-r--r--arch/x86/kvm/svm/svm.h49
-rw-r--r--arch/x86/kvm/trace.h6
-rw-r--r--arch/x86/kvm/x86.c12
-rw-r--r--include/hyperv/hvgdk.h2
-rw-r--r--include/uapi/linux/kvm.h9
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm2
-rw-r--r--tools/testing/selftests/kvm/include/x86/svm.h3
-rw-r--r--tools/testing/selftests/kvm/x86/nested_set_state_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c)128
-rw-r--r--tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c4
22 files changed, 559 insertions, 156 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index bfa0ab343081..49f043246f95 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -7382,6 +7382,50 @@ Please note that the kernel is allowed to use the kvm_run structure as the
primary storage for certain register types. Therefore, the kernel may use the
values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set.
+::
+
+ /* KVM_EXIT_SNP_REQ_CERTS */
+ struct kvm_exit_snp_req_certs {
+ __u64 gpa;
+ __u64 npages;
+ __u64 ret;
+ };
+
+KVM_EXIT_SNP_REQ_CERTS indicates an SEV-SNP guest with certificate-fetching
+enabled (see KVM_SEV_SNP_ENABLE_REQ_CERTS) has generated an Extended Guest
+Request NAE #VMGEXIT (SNP_GUEST_REQUEST) with message type MSG_REPORT_REQ,
+i.e. has requested an attestation report from firmware, and would like the
+certificate data corresponding to the attestation report signature to be
+provided by the hypervisor as part of the request.
+
+To allow for userspace to provide the certificate, the 'gpa' and 'npages'
+are forwarded verbatim from the guest request (the RAX and RBX GHCB fields
+respectively). 'ret' is not an "output" from KVM, and is always '0' on
+exit. KVM verifies the 'gpa' is 4KiB aligned prior to exiting to userspace,
+but otherwise the information from the guest isn't validated.
+
+Upon the next KVM_RUN, e.g. after userspace has serviced the request (or not),
+KVM will complete the #VMGEXIT, using the 'ret' field to determine whether to
+signal success or failure to the guest, and on failure, what reason code will
+be communicated via SW_EXITINFO2. If 'ret' is set to an unsupported value (see
+the table below), KVM_RUN will fail with -EINVAL. For a 'ret' of 'ENOSPC', KVM
+also consumes the 'npages' field, i.e. userspace can use the field to inform
+the guest of the number of pages needed to hold all the certificate data.
+
+The supported 'ret' values and their respective SW_EXITINFO2 encodings:
+
+ ====== =============================================================
+ 0 0x0, i.e. success. KVM will emit an SNP_GUEST_REQUEST command
+ to SNP firmware.
+ ENOSPC 0x0000000100000000, i.e. not enough guest pages to hold the
+ certificate table and certificate data. KVM will also set the
+ RBX field in the GHBC to 'npages'.
+ EAGAIN 0x0000000200000000, i.e. the host is busy and the guest should
+ retry the request.
+ EIO 0xffffffff00000000, for all other errors (this return code is
+ a KVM-defined hypervisor value, as allowed by the GHCB)
+ ====== =============================================================
+
.. _cap_enable:
diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
index 1ddb6a86ce7f..543b5e5dd8d4 100644
--- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst
+++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
@@ -572,6 +572,52 @@ Returns: 0 on success, -negative on error
See SNP_LAUNCH_FINISH in the SEV-SNP specification [snp-fw-abi]_ for further
details on the input parameters in ``struct kvm_sev_snp_launch_finish``.
+21. KVM_SEV_SNP_ENABLE_REQ_CERTS
+--------------------------------
+
+The KVM_SEV_SNP_ENABLE_REQ_CERTS command will configure KVM to exit to
+userspace with a ``KVM_EXIT_SNP_REQ_CERTS`` exit type as part of handling
+a guest attestation report, which will to allow userspace to provide a
+certificate corresponding to the endorsement key used by firmware to sign
+that attestation report.
+
+Returns: 0 on success, -negative on error
+
+NOTE: The endorsement key used by firmware may change as a result of
+management activities like updating SEV-SNP firmware or loading new
+endorsement keys, so some care should be taken to keep the returned
+certificate data in sync with the actual endorsement key in use by
+firmware at the time the attestation request is sent to SNP firmware. The
+recommended scheme to do this is to use file locking (e.g. via fcntl()'s
+F_OFD_SETLK) in the following manner:
+
+ - Prior to obtaining/providing certificate data as part of servicing an
+ exit type of ``KVM_EXIT_SNP_REQ_CERTS``, the VMM should obtain a
+ shared/read or exclusive/write lock on the certificate blob file before
+ reading it and returning it to KVM, and continue to hold the lock until
+ the attestation request is actually sent to firmware. To facilitate
+ this, the VMM can set the ``immediate_exit`` flag of kvm_run just after
+ supplying the certificate data, and just before resuming the vCPU.
+ This will ensure the vCPU will exit again to userspace with ``-EINTR``
+ after it finishes fetching the attestation request from firmware, at
+ which point the VMM can safely drop the file lock.
+
+ - Tools/libraries that perform updates to SNP firmware TCB values or
+ endorsement keys (e.g. via /dev/sev interfaces such as ``SNP_COMMIT``,
+ ``SNP_SET_CONFIG``, or ``SNP_VLEK_LOAD``, see
+ Documentation/virt/coco/sev-guest.rst for more details) in such a way
+ that the certificate blob needs to be updated, should similarly take an
+ exclusive lock on the certificate blob for the duration of any updates
+ to endorsement keys or the certificate blob contents to ensure that
+ VMMs using the above scheme will not return certificate blob data that
+ is out of sync with the endorsement key used by firmware at the time
+ the attestation request is actually issued.
+
+This scheme is recommended so that tools can use a fairly generic/natural
+approach to synchronizing firmware/certificate updates via file-locking,
+which should make it easier to maintain interoperability across
+tools/VMMs/vendors.
+
Device attribute API
====================
@@ -579,11 +625,15 @@ Attributes of the SEV implementation can be retrieved through the
``KVM_HAS_DEVICE_ATTR`` and ``KVM_GET_DEVICE_ATTR`` ioctls on the ``/dev/kvm``
device node, using group ``KVM_X86_GRP_SEV``.
-Currently only one attribute is implemented:
+The following attributes are currently implemented:
* ``KVM_X86_SEV_VMSA_FEATURES``: return the set of all bits that
are accepted in the ``vmsa_features`` of ``KVM_SEV_INIT2``.
+* ``KVM_X86_SEV_SNP_REQ_CERTS``: return a value of 1 if the kernel supports the
+ ``KVM_EXIT_SNP_REQ_CERTS`` exit, which allows for fetching endorsement key
+ certificates from userspace for each SNP attestation request the guest issues.
+
Firmware Management
===================
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index c3b53beb1300..81f7b3b91986 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -472,6 +472,7 @@
#define X86_FEATURE_GP_ON_USER_CPUID (20*32+17) /* User CPUID faulting */
#define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */
+#define X86_FEATURE_ERAPS (20*32+24) /* Enhanced Return Address Predictor Security */
#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5a3bfa293e8b..0353d8b6988c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -195,7 +195,15 @@ enum kvm_reg {
VCPU_EXREG_PDPTR = NR_VCPU_REGS,
VCPU_EXREG_CR0,
+ /*
+ * Alias AMD's ERAPS (not a real register) to CR3 so that common code
+ * can trigger emulation of the RAP (Return Address Predictor) with
+ * minimal support required in common code. Piggyback CR3 as the RAP
+ * is cleared on writes to CR3, i.e. marking CR3 dirty will naturally
+ * mark ERAPS dirty as well.
+ */
VCPU_EXREG_CR3,
+ VCPU_EXREG_ERAPS = VCPU_EXREG_CR3,
VCPU_EXREG_CR4,
VCPU_EXREG_RFLAGS,
VCPU_EXREG_SEGMENTS,
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 56aa99503dc4..edde36097ddc 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -131,13 +131,13 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u64 tsc_offset;
u32 asid;
u8 tlb_ctl;
- u8 reserved_2[3];
+ u8 erap_ctl;
+ u8 reserved_2[2];
u32 int_ctl;
u32 int_vector;
u32 int_state;
u8 reserved_3[4];
- u32 exit_code;
- u32 exit_code_hi;
+ u64 exit_code;
u64 exit_info_1;
u64 exit_info_2;
u32 exit_int_info;
@@ -182,6 +182,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define TLB_CONTROL_FLUSH_ASID 3
#define TLB_CONTROL_FLUSH_ASID_LOCAL 7
+#define ERAP_CONTROL_ALLOW_LARGER_RAP BIT(0)
+#define ERAP_CONTROL_CLEAR_RAP BIT(1)
+
#define V_TPR_MASK 0x0f
#define V_IRQ_SHIFT 8
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 7ceff6583652..b2c928c5965d 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -503,6 +503,7 @@ struct kvm_sync_regs {
#define KVM_X86_GRP_SEV 1
# define KVM_X86_SEV_VMSA_FEATURES 0
# define KVM_X86_SNP_POLICY_BITS 1
+# define KVM_X86_SEV_SNP_REQ_CERTS 2
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
@@ -743,6 +744,7 @@ enum sev_cmd_id {
KVM_SEV_SNP_LAUNCH_START = 100,
KVM_SEV_SNP_LAUNCH_UPDATE,
KVM_SEV_SNP_LAUNCH_FINISH,
+ KVM_SEV_SNP_ENABLE_REQ_CERTS,
KVM_SEV_NR_MAX,
};
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 650e3256ea7d..010a45c9f614 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -103,38 +103,38 @@
#define SVM_EXIT_VMGEXIT 0x403
/* SEV-ES software-defined VMGEXIT events */
-#define SVM_VMGEXIT_MMIO_READ 0x80000001
-#define SVM_VMGEXIT_MMIO_WRITE 0x80000002
-#define SVM_VMGEXIT_NMI_COMPLETE 0x80000003
-#define SVM_VMGEXIT_AP_HLT_LOOP 0x80000004
-#define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005
+#define SVM_VMGEXIT_MMIO_READ 0x80000001ull
+#define SVM_VMGEXIT_MMIO_WRITE 0x80000002ull
+#define SVM_VMGEXIT_NMI_COMPLETE 0x80000003ull
+#define SVM_VMGEXIT_AP_HLT_LOOP 0x80000004ull
+#define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005ull
#define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0
#define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1
-#define SVM_VMGEXIT_PSC 0x80000010
-#define SVM_VMGEXIT_GUEST_REQUEST 0x80000011
-#define SVM_VMGEXIT_EXT_GUEST_REQUEST 0x80000012
-#define SVM_VMGEXIT_AP_CREATION 0x80000013
+#define SVM_VMGEXIT_PSC 0x80000010ull
+#define SVM_VMGEXIT_GUEST_REQUEST 0x80000011ull
+#define SVM_VMGEXIT_EXT_GUEST_REQUEST 0x80000012ull
+#define SVM_VMGEXIT_AP_CREATION 0x80000013ull
#define SVM_VMGEXIT_AP_CREATE_ON_INIT 0
#define SVM_VMGEXIT_AP_CREATE 1
#define SVM_VMGEXIT_AP_DESTROY 2
-#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018
-#define SVM_VMGEXIT_SAVIC 0x8000001a
+#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018ull
+#define SVM_VMGEXIT_SAVIC 0x8000001aull
#define SVM_VMGEXIT_SAVIC_REGISTER_GPA 0
#define SVM_VMGEXIT_SAVIC_UNREGISTER_GPA 1
#define SVM_VMGEXIT_SAVIC_SELF_GPA ~0ULL
-#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
-#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe
+#define SVM_VMGEXIT_HV_FEATURES 0x8000fffdull
+#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffeull
#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \
/* SW_EXITINFO1[3:0] */ \
(((((u64)reason_set) & 0xf)) | \
/* SW_EXITINFO1[11:4] */ \
((((u64)reason_code) & 0xff) << 4))
-#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff
+#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffffull
/* Exit code reserved for hypervisor/software use */
-#define SVM_EXIT_SW 0xf0000000
+#define SVM_EXIT_SW 0xf0000000ull
-#define SVM_EXIT_ERR -1
+#define SVM_EXIT_ERR -1ull
#define SVM_EXIT_REASONS \
{ SVM_EXIT_READ_CR0, "read_cr0" }, \
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 88a5426674a1..c590a5bd3196 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -1223,6 +1223,7 @@ void kvm_set_cpu_caps(void)
/* PrefetchCtlMsr */
/* GpOnUserCpuid */
/* EPSF */
+ F(ERAPS),
SYNTHESIZED_F(SBPB),
SYNTHESIZED_F(IBPB_BRTYPE),
SYNTHESIZED_F(SRSO_NO),
@@ -1803,8 +1804,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
break;
case 0x80000021:
- entry->ebx = entry->edx = 0;
+ entry->edx = 0;
cpuid_entry_override(entry, CPUID_8000_0021_EAX);
+
+ if (kvm_cpu_cap_has(X86_FEATURE_ERAPS))
+ entry->ebx &= GENMASK(23, 16);
+ else
+ entry->ebx = 0;
+
cpuid_entry_override(entry, CPUID_8000_0021_ECX);
break;
/* AMD Extended Performance Monitoring and Debug */
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 0f6c8596719b..f92214b1a938 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -1224,13 +1224,13 @@ static bool __init avic_want_avic_enabled(void)
* In "auto" mode, enable AVIC by default for Zen4+ if x2AVIC is
* supported (to avoid enabling partial support by default, and because
* x2AVIC should be supported by all Zen4+ CPUs). Explicitly check for
- * family 0x19 and later (Zen5+), as the kernel's synthetic ZenX flags
+ * family 0x1A and later (Zen5+), as the kernel's synthetic ZenX flags
* aren't inclusive of previous generations, i.e. the kernel will set
* at most one ZenX feature flag.
*/
if (avic == AVIC_AUTO_MODE)
avic = boot_cpu_has(X86_FEATURE_X2AVIC) &&
- (boot_cpu_data.x86 > 0x19 || cpu_feature_enabled(X86_FEATURE_ZEN4));
+ (cpu_feature_enabled(X86_FEATURE_ZEN4) || boot_cpu_data.x86 >= 0x1A);
if (!avic || !npt_enabled)
return false;
diff --git a/arch/x86/kvm/svm/hyperv.c b/arch/x86/kvm/svm/hyperv.c
index 088f6429b24c..4f24dcb45116 100644
--- a/arch/x86/kvm/svm/hyperv.c
+++ b/arch/x86/kvm/svm/hyperv.c
@@ -10,8 +10,13 @@ void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ /*
+ * The exit code used by Hyper-V for software-defined exits is reserved
+ * by AMD specifically for such use cases.
+ */
+ BUILD_BUG_ON(HV_SVM_EXITCODE_ENL != SVM_EXIT_SW);
+
svm->vmcb->control.exit_code = HV_SVM_EXITCODE_ENL;
- svm->vmcb->control.exit_code_hi = 0;
svm->vmcb->control.exit_info_1 = HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH;
svm->vmcb->control.exit_info_2 = 0;
nested_svm_vmexit(svm);
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index ba0f11c68372..79cb85b8a156 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -45,7 +45,6 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
* correctly fill in the high bits of exit_info_1.
*/
vmcb->control.exit_code = SVM_EXIT_NPF;
- vmcb->control.exit_code_hi = 0;
vmcb->control.exit_info_1 = (1ULL << 32);
vmcb->control.exit_info_2 = fault->address;
}
@@ -403,6 +402,19 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
return __nested_vmcb_check_controls(vcpu, ctl);
}
+/*
+ * If a feature is not advertised to L1, clear the corresponding vmcb12
+ * intercept.
+ */
+#define __nested_svm_sanitize_intercept(__vcpu, __control, fname, iname) \
+do { \
+ if (!guest_cpu_cap_has(__vcpu, X86_FEATURE_##fname)) \
+ vmcb12_clr_intercept(__control, INTERCEPT_##iname); \
+} while (0)
+
+#define nested_svm_sanitize_intercept(__vcpu, __control, name) \
+ __nested_svm_sanitize_intercept(__vcpu, __control, name, name)
+
static
void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
struct vmcb_ctrl_area_cached *to,
@@ -413,15 +425,21 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
for (i = 0; i < MAX_INTERCEPT; i++)
to->intercepts[i] = from->intercepts[i];
+ __nested_svm_sanitize_intercept(vcpu, to, XSAVE, XSETBV);
+ nested_svm_sanitize_intercept(vcpu, to, INVPCID);
+ nested_svm_sanitize_intercept(vcpu, to, RDTSCP);
+ nested_svm_sanitize_intercept(vcpu, to, SKINIT);
+ nested_svm_sanitize_intercept(vcpu, to, RDPRU);
+
to->iopm_base_pa = from->iopm_base_pa;
to->msrpm_base_pa = from->msrpm_base_pa;
to->tsc_offset = from->tsc_offset;
to->tlb_ctl = from->tlb_ctl;
+ to->erap_ctl = from->erap_ctl;
to->int_ctl = from->int_ctl;
to->int_vector = from->int_vector;
to->int_state = from->int_state;
to->exit_code = from->exit_code;
- to->exit_code_hi = from->exit_code_hi;
to->exit_info_1 = from->exit_info_1;
to->exit_info_2 = from->exit_info_2;
to->exit_int_info = from->exit_int_info;
@@ -663,7 +681,6 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
vmcb02->save.rsp = vmcb12->save.rsp;
vmcb02->save.rip = vmcb12->save.rip;
- /* These bits will be set properly on the first execution when new_vmc12 is true */
if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
vmcb02->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1;
svm->vcpu.arch.dr6 = svm->nested.save.dr6 | DR6_ACTIVE_LOW;
@@ -727,8 +744,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
enter_guest_mode(vcpu);
/*
- * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
- * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
+ * Filled at exit: exit_code, exit_info_1, exit_info_2, exit_int_info,
+ * exit_int_info_err, next_rip, insn_len, insn_bytes.
*/
if (guest_cpu_cap_has(vcpu, X86_FEATURE_VGIF) &&
@@ -867,6 +884,19 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
}
/*
+ * Take ALLOW_LARGER_RAP from vmcb12 even though it should be safe to
+ * let L2 use a larger RAP since KVM will emulate the necessary clears,
+ * as it's possible L1 deliberately wants to restrict L2 to the legacy
+ * RAP size. Unconditionally clear the RAP on nested VMRUN, as KVM is
+ * responsible for emulating the host vs. guest tags (L1 is the "host",
+ * L2 is the "guest").
+ */
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS))
+ vmcb02->control.erap_ctl = (svm->nested.ctl.erap_ctl &
+ ERAP_CONTROL_ALLOW_LARGER_RAP) |
+ ERAP_CONTROL_CLEAR_RAP;
+
+ /*
* Merge guest and host intercepts - must be called with vcpu in
* guest-mode to take effect.
*/
@@ -985,7 +1015,6 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
if (!nested_vmcb_check_save(vcpu) ||
!nested_vmcb_check_controls(vcpu)) {
vmcb12->control.exit_code = SVM_EXIT_ERR;
- vmcb12->control.exit_code_hi = -1u;
vmcb12->control.exit_info_1 = 0;
vmcb12->control.exit_info_2 = 0;
goto out;
@@ -1018,7 +1047,6 @@ out_exit_err:
svm->soft_int_injected = false;
svm->vmcb->control.exit_code = SVM_EXIT_ERR;
- svm->vmcb->control.exit_code_hi = -1u;
svm->vmcb->control.exit_info_1 = 0;
svm->vmcb->control.exit_info_2 = 0;
@@ -1130,11 +1158,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb12->control.int_state = vmcb02->control.int_state;
vmcb12->control.exit_code = vmcb02->control.exit_code;
- vmcb12->control.exit_code_hi = vmcb02->control.exit_code_hi;
vmcb12->control.exit_info_1 = vmcb02->control.exit_info_1;
vmcb12->control.exit_info_2 = vmcb02->control.exit_info_2;
- if (vmcb12->control.exit_code != SVM_EXIT_ERR)
+ if (!svm_is_vmrun_failure(vmcb12->control.exit_code))
nested_save_pending_event_to_vmcb12(svm, vmcb12);
if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
@@ -1161,6 +1188,9 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
kvm_nested_vmexit_handle_ibrs(vcpu);
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS))
+ vmcb01->control.erap_ctl |= ERAP_CONTROL_CLEAR_RAP;
+
svm_switch_vmcb(svm, &svm->vmcb01);
/*
@@ -1363,6 +1393,8 @@ void svm_leave_nested(struct kvm_vcpu *vcpu)
nested_svm_uninit_mmu_context(vcpu);
vmcb_mark_all_dirty(svm->vmcb);
+ svm_set_gif(svm, true);
+
if (kvm_apicv_activated(vcpu->kvm))
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
}
@@ -1422,9 +1454,12 @@ static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
static int nested_svm_intercept(struct vcpu_svm *svm)
{
- u32 exit_code = svm->vmcb->control.exit_code;
+ u64 exit_code = svm->vmcb->control.exit_code;
int vmexit = NESTED_EXIT_HOST;
+ if (svm_is_vmrun_failure(exit_code))
+ return NESTED_EXIT_DONE;
+
switch (exit_code) {
case SVM_EXIT_MSR:
vmexit = nested_svm_exit_handled_msr(svm);
@@ -1432,7 +1467,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
case SVM_EXIT_IOIO:
vmexit = nested_svm_intercept_ioio(svm);
break;
- case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
+ case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f:
/*
* Host-intercepted exceptions have been checked already in
* nested_svm_exit_special. There is nothing to do here,
@@ -1440,15 +1475,10 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
*/
vmexit = NESTED_EXIT_DONE;
break;
- }
- case SVM_EXIT_ERR: {
- vmexit = NESTED_EXIT_DONE;
- break;
- }
- default: {
+ default:
if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
vmexit = NESTED_EXIT_DONE;
- }
+ break;
}
return vmexit;
@@ -1496,7 +1526,6 @@ static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu)
struct vmcb *vmcb = svm->vmcb;
vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + ex->vector;
- vmcb->control.exit_code_hi = 0;
if (ex->has_error_code)
vmcb->control.exit_info_1 = ex->error_code;
@@ -1667,11 +1696,11 @@ static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst,
dst->tsc_offset = from->tsc_offset;
dst->asid = from->asid;
dst->tlb_ctl = from->tlb_ctl;
+ dst->erap_ctl = from->erap_ctl;
dst->int_ctl = from->int_ctl;
dst->int_vector = from->int_vector;
dst->int_state = from->int_state;
dst->exit_code = from->exit_code;
- dst->exit_code_hi = from->exit_code_hi;
dst->exit_info_1 = from->exit_info_1;
dst->exit_info_2 = from->exit_info_2;
dst->exit_int_info = from->exit_int_info;
@@ -1782,12 +1811,12 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
/*
* If in guest mode, vcpu->arch.efer actually refers to the L2 guest's
* EFER.SVME, but EFER.SVME still has to be 1 for VMRUN to succeed.
+ * If SVME is disabled, the only valid states are "none" and GIF=1
+ * (clearing SVME does NOT set GIF, i.e. GIF=0 is allowed).
*/
- if (!(vcpu->arch.efer & EFER_SVME)) {
- /* GIF=1 and no guest mode are required if SVME=0. */
- if (kvm_state->flags != KVM_STATE_NESTED_GIF_SET)
- return -EINVAL;
- }
+ if (!(vcpu->arch.efer & EFER_SVME) && kvm_state->flags &&
+ kvm_state->flags != KVM_STATE_NESTED_GIF_SET)
+ return -EINVAL;
/* SMM temporarily disables SVM, so we cannot be in guest mode. */
if (is_smm(vcpu) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
@@ -1870,10 +1899,9 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
* thus MMU might not be initialized correctly.
* Set it again to fix this.
*/
-
ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
nested_npt_enabled(svm), false);
- if (WARN_ON_ONCE(ret))
+ if (ret)
goto out_free;
svm->nested.force_msr_bitmap_recalc = true;
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index f59c65abe3cf..f9aad5c1447e 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -41,6 +41,16 @@
#define GHCB_HV_FT_SUPPORTED (GHCB_HV_FT_SNP | GHCB_HV_FT_SNP_AP_CREATION)
+/*
+ * The GHCB spec essentially states that all non-zero error codes other than
+ * those explicitly defined above should be treated as an error by the guest.
+ * Define a generic error to cover that case, and choose a value that is not
+ * likely to overlap with new explicit error codes should more be added to
+ * the GHCB spec later. KVM will use this to report generic errors when
+ * handling SNP guest requests.
+ */
+#define SNP_GUEST_VMM_ERR_GENERIC (~0U)
+
/* enable/disable SEV support */
static bool sev_enabled = true;
module_param_named(sev, sev_enabled, bool, 0444);
@@ -53,11 +63,6 @@ module_param_named(sev_es, sev_es_enabled, bool, 0444);
static bool sev_snp_enabled = true;
module_param_named(sev_snp, sev_snp_enabled, bool, 0444);
-/* enable/disable SEV-ES DebugSwap support */
-static bool sev_es_debug_swap_enabled = true;
-module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444);
-static u64 sev_supported_vmsa_features;
-
static unsigned int nr_ciphertext_hiding_asids;
module_param_named(ciphertext_hiding_asids, nr_ciphertext_hiding_asids, uint, 0444);
@@ -84,6 +89,8 @@ module_param_named(ciphertext_hiding_asids, nr_ciphertext_hiding_asids, uint, 04
static u64 snp_supported_policy_bits __ro_after_init;
+static u64 sev_supported_vmsa_features __ro_after_init;
+
#define INITIAL_VMSA_GPA 0xFFFFFFFFF000
static u8 sev_enc_bit;
@@ -2151,6 +2158,9 @@ int sev_dev_get_attr(u32 group, u64 attr, u64 *val)
*val = snp_supported_policy_bits;
return 0;
+ case KVM_X86_SEV_SNP_REQ_CERTS:
+ *val = sev_snp_enabled ? 1 : 0;
+ return 0;
default:
return -ENXIO;
}
@@ -2567,6 +2577,16 @@ e_free:
return ret;
}
+static int snp_enable_certs(struct kvm *kvm)
+{
+ if (kvm->created_vcpus || !sev_snp_guest(kvm))
+ return -EINVAL;
+
+ to_kvm_sev_info(kvm)->snp_certs_enabled = true;
+
+ return 0;
+}
+
int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
{
struct kvm_sev_cmd sev_cmd;
@@ -2672,6 +2692,9 @@ int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
case KVM_SEV_SNP_LAUNCH_FINISH:
r = snp_launch_finish(kvm, &sev_cmd);
break;
+ case KVM_SEV_SNP_ENABLE_REQ_CERTS:
+ r = snp_enable_certs(kvm);
+ break;
default:
r = -EINVAL;
goto out;
@@ -3150,12 +3173,10 @@ out:
sev_es_enabled = sev_es_supported;
sev_snp_enabled = sev_snp_supported;
- if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) ||
- !cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP))
- sev_es_debug_swap_enabled = false;
-
sev_supported_vmsa_features = 0;
- if (sev_es_debug_swap_enabled)
+
+ if (sev_es_enabled && cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) &&
+ cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP))
sev_supported_vmsa_features |= SVM_SEV_FEAT_DEBUG_SWAP;
if (sev_snp_enabled && tsc_khz && cpu_feature_enabled(X86_FEATURE_SNP_SECURE_TSC))
@@ -3275,11 +3296,6 @@ skip_vmsa_free:
kvfree(svm->sev_es.ghcb_sa);
}
-static u64 kvm_get_cached_sw_exit_code(struct vmcb_control_area *control)
-{
- return (((u64)control->exit_code_hi) << 32) | control->exit_code;
-}
-
static void dump_ghcb(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
@@ -3301,7 +3317,7 @@ static void dump_ghcb(struct vcpu_svm *svm)
*/
pr_err("GHCB (GPA=%016llx) snapshot:\n", svm->vmcb->control.ghcb_gpa);
pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
- kvm_get_cached_sw_exit_code(control), kvm_ghcb_sw_exit_code_is_valid(svm));
+ control->exit_code, kvm_ghcb_sw_exit_code_is_valid(svm));
pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
control->exit_info_1, kvm_ghcb_sw_exit_info_1_is_valid(svm));
pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
@@ -3335,7 +3351,6 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
struct vmcb_control_area *control = &svm->vmcb->control;
struct kvm_vcpu *vcpu = &svm->vcpu;
struct ghcb *ghcb = svm->sev_es.ghcb;
- u64 exit_code;
/*
* The GHCB protocol so far allows for the following data
@@ -3369,9 +3384,7 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
__kvm_emulate_msr_write(vcpu, MSR_IA32_XSS, kvm_ghcb_get_xss(svm));
/* Copy the GHCB exit information into the VMCB fields */
- exit_code = kvm_ghcb_get_sw_exit_code(svm);
- control->exit_code = lower_32_bits(exit_code);
- control->exit_code_hi = upper_32_bits(exit_code);
+ control->exit_code = kvm_ghcb_get_sw_exit_code(svm);
control->exit_info_1 = kvm_ghcb_get_sw_exit_info_1(svm);
control->exit_info_2 = kvm_ghcb_get_sw_exit_info_2(svm);
svm->sev_es.sw_scratch = kvm_ghcb_get_sw_scratch_if_valid(svm);
@@ -3384,15 +3397,8 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
struct kvm_vcpu *vcpu = &svm->vcpu;
- u64 exit_code;
u64 reason;
- /*
- * Retrieve the exit code now even though it may not be marked valid
- * as it could help with debugging.
- */
- exit_code = kvm_get_cached_sw_exit_code(control);
-
/* Only GHCB Usage code 0 is supported */
if (svm->sev_es.ghcb->ghcb_usage) {
reason = GHCB_ERR_INVALID_USAGE;
@@ -3406,7 +3412,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
!kvm_ghcb_sw_exit_info_2_is_valid(svm))
goto vmgexit_err;
- switch (exit_code) {
+ switch (control->exit_code) {
case SVM_EXIT_READ_DR7:
break;
case SVM_EXIT_WRITE_DR7:
@@ -3507,15 +3513,19 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
return 0;
vmgexit_err:
+ /*
+ * Print the exit code even though it may not be marked valid as it
+ * could help with debugging.
+ */
if (reason == GHCB_ERR_INVALID_USAGE) {
vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
svm->sev_es.ghcb->ghcb_usage);
} else if (reason == GHCB_ERR_INVALID_EVENT) {
vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n",
- exit_code);
+ control->exit_code);
} else {
vcpu_unimpl(vcpu, "vmgexit: exit code %#llx input is not valid\n",
- exit_code);
+ control->exit_code);
dump_ghcb(svm);
}
@@ -4155,6 +4165,36 @@ out_unlock:
return ret;
}
+static int snp_req_certs_err(struct vcpu_svm *svm, u32 vmm_error)
+{
+ ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, SNP_GUEST_ERR(vmm_error, 0));
+
+ return 1; /* resume guest */
+}
+
+static int snp_complete_req_certs(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb_control_area *control = &svm->vmcb->control;
+
+ switch (READ_ONCE(vcpu->run->snp_req_certs.ret)) {
+ case 0:
+ return snp_handle_guest_req(svm, control->exit_info_1,
+ control->exit_info_2);
+ case ENOSPC:
+ vcpu->arch.regs[VCPU_REGS_RBX] = vcpu->run->snp_req_certs.npages;
+ return snp_req_certs_err(svm, SNP_GUEST_VMM_ERR_INVALID_LEN);
+ case EAGAIN:
+ return snp_req_certs_err(svm, SNP_GUEST_VMM_ERR_BUSY);
+ case EIO:
+ return snp_req_certs_err(svm, SNP_GUEST_VMM_ERR_GENERIC);
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
+
static int snp_handle_ext_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa)
{
struct kvm *kvm = svm->vcpu.kvm;
@@ -4170,14 +4210,15 @@ static int snp_handle_ext_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t r
/*
* As per GHCB spec, requests of type MSG_REPORT_REQ also allow for
* additional certificate data to be provided alongside the attestation
- * report via the guest-provided data pages indicated by RAX/RBX. The
- * certificate data is optional and requires additional KVM enablement
- * to provide an interface for userspace to provide it, but KVM still
- * needs to be able to handle extended guest requests either way. So
- * provide a stub implementation that will always return an empty
- * certificate table in the guest-provided data pages.
+ * report via the guest-provided data pages indicated by RAX/RBX. If
+ * userspace enables KVM_EXIT_SNP_REQ_CERTS, then exit to userspace
+ * to give userspace an opportunity to provide the certificate data
+ * before issuing/completing the attestation request. Otherwise, return
+ * an empty certificate table in the guest-provided data pages and
+ * handle the attestation request immediately.
*/
if (msg_type == SNP_MSG_REPORT_REQ) {
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_vcpu *vcpu = &svm->vcpu;
u64 data_npages;
gpa_t data_gpa;
@@ -4191,6 +4232,15 @@ static int snp_handle_ext_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t r
if (!PAGE_ALIGNED(data_gpa))
goto request_invalid;
+ if (sev->snp_certs_enabled) {
+ vcpu->run->exit_reason = KVM_EXIT_SNP_REQ_CERTS;
+ vcpu->run->snp_req_certs.gpa = data_gpa;
+ vcpu->run->snp_req_certs.npages = data_npages;
+ vcpu->run->snp_req_certs.ret = 0;
+ vcpu->arch.complete_userspace_io = snp_complete_req_certs;
+ return 0;
+ }
+
/*
* As per GHCB spec (see "SNP Extended Guest Request"), the
* certificate table is terminated by 24-bytes of zeroes.
@@ -4354,7 +4404,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_control_area *control = &svm->vmcb->control;
- u64 ghcb_gpa, exit_code;
+ u64 ghcb_gpa;
int ret;
/* Validate the GHCB */
@@ -4396,8 +4446,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
svm_vmgexit_success(svm, 0);
- exit_code = kvm_get_cached_sw_exit_code(control);
- switch (exit_code) {
+ switch (control->exit_code) {
case SVM_VMGEXIT_MMIO_READ:
ret = setup_vmgexit_scratch(svm, true, control->exit_info_2);
if (ret)
@@ -4489,7 +4538,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
ret = -EINVAL;
break;
default:
- ret = svm_invoke_exit_handler(vcpu, exit_code);
+ ret = svm_invoke_exit_handler(vcpu, control->exit_code);
}
return ret;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 4394be40fe78..8b0ac67becae 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -215,7 +215,6 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
if (!(efer & EFER_SVME)) {
svm_leave_nested(vcpu);
- svm_set_gif(svm, true);
/* #GP intercept is still needed for vmware backdoor */
if (!enable_vmware_backdoor)
clr_exception_intercept(svm, GP_VECTOR);
@@ -996,10 +995,14 @@ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu)
svm_set_intercept(svm, INTERCEPT_RDTSCP);
}
+ /*
+ * No need to toggle VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK here, it is
+ * always set if vls is enabled. If the intercepts are set, the bit is
+ * meaningless anyway.
+ */
if (guest_cpuid_is_intel_compatible(vcpu)) {
svm_set_intercept(svm, INTERCEPT_VMLOAD);
svm_set_intercept(svm, INTERCEPT_VMSAVE);
- svm->vmcb->control.virt_ext &= ~VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
} else {
/*
* If hardware supports Virtual VMLOAD VMSAVE then enable it
@@ -1008,7 +1011,6 @@ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu)
if (vls) {
svm_clr_intercept(svm, INTERCEPT_VMLOAD);
svm_clr_intercept(svm, INTERCEPT_VMSAVE);
- svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
}
}
}
@@ -1141,6 +1143,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu, bool init_event)
svm_clr_intercept(svm, INTERCEPT_PAUSE);
}
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS))
+ svm->vmcb->control.erap_ctl |= ERAP_CONTROL_ALLOW_LARGER_RAP;
+
if (kvm_vcpu_apicv_active(vcpu))
avic_init_vmcb(svm, vmcb);
@@ -1153,6 +1158,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu, bool init_event)
svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
}
+ if (vls)
+ svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+
if (vcpu->kvm->arch.bus_lock_detection_enabled)
svm_set_intercept(svm, INTERCEPT_BUSLOCK);
@@ -1862,13 +1870,16 @@ static int pf_interception(struct kvm_vcpu *vcpu)
svm->vmcb->control.insn_len);
}
+static int svm_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
+ void *insn, int insn_len);
+
static int npf_interception(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
int rc;
- u64 fault_address = svm->vmcb->control.exit_info_2;
u64 error_code = svm->vmcb->control.exit_info_1;
+ gpa_t gpa = svm->vmcb->control.exit_info_2;
/*
* WARN if hardware generates a fault with an error code that collides
@@ -1879,17 +1890,35 @@ static int npf_interception(struct kvm_vcpu *vcpu)
if (WARN_ON_ONCE(error_code & PFERR_SYNTHETIC_MASK))
error_code &= ~PFERR_SYNTHETIC_MASK;
+ /*
+ * Expedite fast MMIO kicks if the next RIP is known and KVM is allowed
+ * emulate a page fault, e.g. skipping the current instruction is wrong
+ * if the #NPF occurred while vectoring an event.
+ */
+ if ((error_code & PFERR_RSVD_MASK) && !is_guest_mode(vcpu)) {
+ const int emul_type = EMULTYPE_PF | EMULTYPE_NO_DECODE;
+
+ if (svm_check_emulate_instruction(vcpu, emul_type, NULL, 0))
+ return 1;
+
+ if (nrips && svm->vmcb->control.next_rip &&
+ !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
+ trace_kvm_fast_mmio(gpa);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+ }
+
if (sev_snp_guest(vcpu->kvm) && (error_code & PFERR_GUEST_ENC_MASK))
error_code |= PFERR_PRIVATE_ACCESS;
- trace_kvm_page_fault(vcpu, fault_address, error_code);
- rc = kvm_mmu_page_fault(vcpu, fault_address, error_code,
+ trace_kvm_page_fault(vcpu, gpa, error_code);
+ rc = kvm_mmu_page_fault(vcpu, gpa, error_code,
static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
svm->vmcb->control.insn_bytes : NULL,
svm->vmcb->control.insn_len);
if (rc > 0 && error_code & PFERR_GUEST_RMP_MASK)
- sev_handle_rmp_fault(vcpu, fault_address, error_code);
+ sev_handle_rmp_fault(vcpu, gpa, error_code);
return rc;
}
@@ -2099,12 +2128,13 @@ static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload)
ret = kvm_skip_emulated_instruction(vcpu);
+ /* KVM always performs VMLOAD/VMSAVE on VMCB01 (see __svm_vcpu_run()) */
if (vmload) {
- svm_copy_vmloadsave_state(svm->vmcb, vmcb12);
+ svm_copy_vmloadsave_state(svm->vmcb01.ptr, vmcb12);
svm->sysenter_eip_hi = 0;
svm->sysenter_esp_hi = 0;
} else {
- svm_copy_vmloadsave_state(vmcb12, svm->vmcb);
+ svm_copy_vmloadsave_state(vmcb12, svm->vmcb01.ptr);
}
kvm_vcpu_unmap(vcpu, &map);
@@ -2443,7 +2473,6 @@ static bool check_selective_cr0_intercepted(struct kvm_vcpu *vcpu,
if (cr0 ^ val) {
svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
- svm->vmcb->control.exit_code_hi = 0;
ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
}
@@ -3272,10 +3301,11 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
pr_err("%-20s%d\n", "asid:", control->asid);
pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
+ pr_err("%-20s%d\n", "erap_ctl:", control->erap_ctl);
pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
pr_err("%-20s%08x\n", "int_state:", control->int_state);
- pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
+ pr_err("%-20s%016llx\n", "exit_code:", control->exit_code);
pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
@@ -3443,23 +3473,21 @@ no_vmsa:
sev_free_decrypted_vmsa(vcpu, save);
}
-static bool svm_check_exit_valid(u64 exit_code)
+int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 __exit_code)
{
- return (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
- svm_exit_handlers[exit_code]);
-}
+ u32 exit_code = __exit_code;
-static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
-{
- dump_vmcb(vcpu);
- kvm_prepare_unexpected_reason_exit(vcpu, exit_code);
- return 0;
-}
-
-int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code)
-{
- if (!svm_check_exit_valid(exit_code))
- return svm_handle_invalid_exit(vcpu, exit_code);
+ /*
+ * SVM uses negative values, i.e. 64-bit values, to indicate that VMRUN
+ * failed. Report all such errors to userspace (note, VMEXIT_INVALID,
+ * a.k.a. SVM_EXIT_ERR, is special cased by svm_handle_exit()). Skip
+ * the check when running as a VM, as KVM has historically left garbage
+ * in bits 63:32, i.e. running KVM-on-KVM would hit false positives if
+ * the underlying kernel is buggy.
+ */
+ if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR) &&
+ (u64)exit_code != __exit_code)
+ goto unexpected_vmexit;
#ifdef CONFIG_MITIGATION_RETPOLINE
if (exit_code == SVM_EXIT_MSR)
@@ -3477,7 +3505,19 @@ int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code)
return sev_handle_vmgexit(vcpu);
#endif
#endif
+ if (exit_code >= ARRAY_SIZE(svm_exit_handlers))
+ goto unexpected_vmexit;
+
+ exit_code = array_index_nospec(exit_code, ARRAY_SIZE(svm_exit_handlers));
+ if (!svm_exit_handlers[exit_code])
+ goto unexpected_vmexit;
+
return svm_exit_handlers[exit_code](vcpu);
+
+unexpected_vmexit:
+ dump_vmcb(vcpu);
+ kvm_prepare_unexpected_reason_exit(vcpu, __exit_code);
+ return 0;
}
static void svm_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
@@ -3516,7 +3556,6 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_run *kvm_run = vcpu->run;
- u32 exit_code = svm->vmcb->control.exit_code;
/* SEV-ES guests must use the CR write traps to track CR registers. */
if (!sev_es_guest(vcpu->kvm)) {
@@ -3540,7 +3579,7 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
return 1;
}
- if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
+ if (svm_is_vmrun_failure(svm->vmcb->control.exit_code)) {
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
kvm_run->fail_entry.hardware_entry_failure_reason
= svm->vmcb->control.exit_code;
@@ -3552,7 +3591,7 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
if (exit_fastpath != EXIT_FASTPATH_NONE)
return 1;
- return svm_invoke_exit_handler(vcpu, exit_code);
+ return svm_invoke_exit_handler(vcpu, svm->vmcb->control.exit_code);
}
static int pre_svm_run(struct kvm_vcpu *vcpu)
@@ -3983,6 +4022,13 @@ static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
invlpga(gva, svm->vmcb->control.asid);
}
+static void svm_flush_tlb_guest(struct kvm_vcpu *vcpu)
+{
+ kvm_register_mark_dirty(vcpu, VCPU_EXREG_ERAPS);
+
+ svm_flush_tlb_asid(vcpu);
+}
+
static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4241,6 +4287,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
}
svm->vmcb->save.cr2 = vcpu->arch.cr2;
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS) &&
+ kvm_register_is_dirty(vcpu, VCPU_EXREG_ERAPS))
+ svm->vmcb->control.erap_ctl |= ERAP_CONTROL_CLEAR_RAP;
+
svm_hv_update_vp_id(svm->vmcb, vcpu);
/*
@@ -4311,13 +4361,21 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
/* Track VMRUNs that have made past consistency checking */
if (svm->nested.nested_run_pending &&
- svm->vmcb->control.exit_code != SVM_EXIT_ERR)
+ !svm_is_vmrun_failure(svm->vmcb->control.exit_code))
++vcpu->stat.nested_run;
svm->nested.nested_run_pending = 0;
}
svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
+
+ /*
+ * Unconditionally mask off the CLEAR_RAP bit, the AND is just as cheap
+ * as the TEST+Jcc to avoid it.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_ERAPS))
+ svm->vmcb->control.erap_ctl &= ~ERAP_CONTROL_CLEAR_RAP;
+
vmcb_mark_all_clean(svm->vmcb);
/* if exit due to PF check for async PF */
@@ -4618,7 +4676,6 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
if (static_cpu_has(X86_FEATURE_NRIPS))
vmcb->control.next_rip = info->next_rip;
vmcb->control.exit_code = icpt_info.exit_code;
- vmcb->control.exit_code_hi = 0;
vmexit = nested_svm_exit_handled(svm);
ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
@@ -5073,7 +5130,7 @@ struct kvm_x86_ops svm_x86_ops __initdata = {
.flush_tlb_all = svm_flush_tlb_all,
.flush_tlb_current = svm_flush_tlb_current,
.flush_tlb_gva = svm_flush_tlb_gva,
- .flush_tlb_guest = svm_flush_tlb_asid,
+ .flush_tlb_guest = svm_flush_tlb_guest,
.vcpu_pre_run = svm_vcpu_pre_run,
.vcpu_run = svm_vcpu_run,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 01be93a53d07..ebd7b36b1ceb 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -115,6 +115,7 @@ struct kvm_sev_info {
void *guest_resp_buf; /* Bounce buffer for SNP Guest Request output */
struct mutex guest_req_mutex; /* Must acquire before using bounce buffers */
cpumask_var_t have_run_cpus; /* CPUs that have done VMRUN for this VM. */
+ bool snp_certs_enabled; /* SNP certificate-fetching support. */
};
struct kvm_svm {
@@ -156,11 +157,11 @@ struct vmcb_ctrl_area_cached {
u64 tsc_offset;
u32 asid;
u8 tlb_ctl;
+ u8 erap_ctl;
u32 int_ctl;
u32 int_vector;
u32 int_state;
- u32 exit_code;
- u32 exit_code_hi;
+ u64 exit_code;
u64 exit_info_1;
u64 exit_info_2;
u32 exit_int_info;
@@ -424,6 +425,14 @@ static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
return container_of(vcpu, struct vcpu_svm, vcpu);
}
+static inline bool svm_is_vmrun_failure(u64 exit_code)
+{
+ if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
+ return (u32)exit_code == (u32)SVM_EXIT_ERR;
+
+ return exit_code == SVM_EXIT_ERR;
+}
+
/*
* Only the PDPTRs are loaded on demand into the shadow MMU. All other
* fields are synchronized on VM-Exit, because accessing the VMCB is cheap.
@@ -434,28 +443,47 @@ static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
*/
#define SVM_REGS_LAZY_LOAD_SET (1 << VCPU_EXREG_PDPTR)
-static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
+static inline void __vmcb_set_intercept(unsigned long *intercepts, u32 bit)
{
WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
- __set_bit(bit, (unsigned long *)&control->intercepts);
+ __set_bit(bit, intercepts);
}
-static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit)
+static inline void __vmcb_clr_intercept(unsigned long *intercepts, u32 bit)
{
WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
- __clear_bit(bit, (unsigned long *)&control->intercepts);
+ __clear_bit(bit, intercepts);
}
-static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
+static inline bool __vmcb_is_intercept(unsigned long *intercepts, u32 bit)
{
WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
- return test_bit(bit, (unsigned long *)&control->intercepts);
+ return test_bit(bit, intercepts);
+}
+
+static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
+{
+ __vmcb_set_intercept((unsigned long *)&control->intercepts, bit);
+}
+
+static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit)
+{
+ __vmcb_clr_intercept((unsigned long *)&control->intercepts, bit);
+}
+
+static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
+{
+ return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit);
+}
+
+static inline void vmcb12_clr_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
+{
+ __vmcb_clr_intercept((unsigned long *)&control->intercepts, bit);
}
static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
{
- WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
- return test_bit(bit, (unsigned long *)&control->intercepts);
+ return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit);
}
static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
@@ -762,7 +790,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm);
static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
{
svm->vmcb->control.exit_code = exit_code;
- svm->vmcb->control.exit_code_hi = 0;
svm->vmcb->control.exit_info_1 = 0;
svm->vmcb->control.exit_info_2 = 0;
return nested_svm_vmexit(svm);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index e79bc9cb7162..e7fdbe9efc90 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -383,10 +383,10 @@ TRACE_EVENT(kvm_apic,
#define kvm_print_exit_reason(exit_reason, isa) \
(isa == KVM_ISA_VMX) ? \
__print_symbolic(exit_reason & 0xffff, VMX_EXIT_REASONS) : \
- __print_symbolic(exit_reason, SVM_EXIT_REASONS), \
+ __print_symbolic_u64(exit_reason, SVM_EXIT_REASONS), \
(isa == KVM_ISA_VMX && exit_reason & ~0xffff) ? " " : "", \
(isa == KVM_ISA_VMX) ? \
- __print_flags(exit_reason & ~0xffff, " ", VMX_EXIT_REASON_FLAGS) : ""
+ __print_flags_u64(exit_reason & ~0xffff, " ", VMX_EXIT_REASON_FLAGS) : ""
#define TRACE_EVENT_KVM_EXIT(name) \
TRACE_EVENT(name, \
@@ -781,7 +781,7 @@ TRACE_EVENT_KVM_EXIT(kvm_nested_vmexit);
* Tracepoint for #VMEXIT reinjected to the guest
*/
TRACE_EVENT(kvm_nested_vmexit_inject,
- TP_PROTO(__u32 exit_code,
+ TP_PROTO(__u64 exit_code,
__u64 exit_info1, __u64 exit_info2,
__u32 exit_int_info, __u32 exit_int_info_err, __u32 isa),
TP_ARGS(exit_code, exit_info1, exit_info2,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 72d37c8930ad..1f66c7441272 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -14143,6 +14143,13 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
return 1;
}
+ /*
+ * When ERAPS is supported, invalidating a specific PCID clears
+ * the RAP (Return Address Predicator).
+ */
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS))
+ kvm_register_is_dirty(vcpu, VCPU_EXREG_ERAPS);
+
kvm_invalidate_pcid(vcpu, operand.pcid);
return kvm_skip_emulated_instruction(vcpu);
@@ -14156,6 +14163,11 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
fallthrough;
case INVPCID_TYPE_ALL_INCL_GLOBAL:
+ /*
+ * Don't bother marking VCPU_EXREG_ERAPS dirty, SVM will take
+ * care of doing so when emulating the full guest TLB flush
+ * (the RAP is cleared on all implicit TLB flushes).
+ */
kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
return kvm_skip_emulated_instruction(vcpu);
diff --git a/include/hyperv/hvgdk.h b/include/hyperv/hvgdk.h
index dd6d4939ea29..384c3f3ff4a5 100644
--- a/include/hyperv/hvgdk.h
+++ b/include/hyperv/hvgdk.h
@@ -281,7 +281,7 @@ struct hv_vmcb_enlightenments {
#define HV_VMCB_NESTED_ENLIGHTENMENTS 31
/* Synthetic VM-Exit */
-#define HV_SVM_EXITCODE_ENL 0xf0000000
+#define HV_SVM_EXITCODE_ENL 0xf0000000ull
#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH (1)
/* VM_PARTITION_ASSIST_PAGE */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 88cca0e22ece..958d1c20f927 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -135,6 +135,12 @@ struct kvm_xen_exit {
} u;
};
+struct kvm_exit_snp_req_certs {
+ __u64 gpa;
+ __u64 npages;
+ __u64 ret;
+};
+
#define KVM_S390_GET_SKEYS_NONE 1
#define KVM_S390_SKEYS_MAX 1048576
@@ -181,6 +187,7 @@ struct kvm_xen_exit {
#define KVM_EXIT_TDX 40
#define KVM_EXIT_ARM_SEA 41
#define KVM_EXIT_ARM_LDST64B 42
+#define KVM_EXIT_SNP_REQ_CERTS 43
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -483,6 +490,8 @@ struct kvm_run {
__u64 gva;
__u64 gpa;
} arm_sea;
+ /* KVM_EXIT_SNP_REQ_CERTS */
+ struct kvm_exit_snp_req_certs snp_req_certs;
/* Fix the size of the union. */
char padding[256];
};
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 192694c97356..c3c464513b4e 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -93,6 +93,7 @@ TEST_GEN_PROGS_x86 += x86/nested_dirty_log_test
TEST_GEN_PROGS_x86 += x86/nested_emulation_test
TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
TEST_GEN_PROGS_x86 += x86/nested_invalid_cr3_test
+TEST_GEN_PROGS_x86 += x86/nested_set_state_test
TEST_GEN_PROGS_x86 += x86/nested_tsc_adjust_test
TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test
TEST_GEN_PROGS_x86 += x86/nested_vmsave_vmload_test
@@ -121,7 +122,6 @@ TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state
TEST_GEN_PROGS_x86 += x86/vmx_msrs_test
TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state
TEST_GEN_PROGS_x86 += x86/vmx_nested_la57_state_test
-TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test
TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test
TEST_GEN_PROGS_x86 += x86/xapic_ipi_test
TEST_GEN_PROGS_x86 += x86/xapic_state_test
diff --git a/tools/testing/selftests/kvm/include/x86/svm.h b/tools/testing/selftests/kvm/include/x86/svm.h
index 29cffd0a9181..10b30b38bb3f 100644
--- a/tools/testing/selftests/kvm/include/x86/svm.h
+++ b/tools/testing/selftests/kvm/include/x86/svm.h
@@ -92,8 +92,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u32 int_vector;
u32 int_state;
u8 reserved_3[4];
- u32 exit_code;
- u32 exit_code_hi;
+ u64 exit_code;
u64 exit_info_1;
u64 exit_info_2;
u32 exit_int_info;
diff --git a/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86/nested_set_state_test.c
index 67a62a5a8895..0f2102b43629 100644
--- a/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_set_state_test.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * vmx_set_nested_state_test
- *
* Copyright (C) 2019, Google LLC.
*
* This test verifies the integrity of calling the ioctl KVM_SET_NESTED_STATE.
@@ -11,6 +9,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
+#include "svm_util.h"
#include <errno.h>
#include <linux/kvm.h>
@@ -241,8 +240,108 @@ void test_vmx_nested_state(struct kvm_vcpu *vcpu)
TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
"Size must be between %ld and %d. The size returned was %d.",
sizeof(*state), state_sz, state->size);
- TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull.");
- TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull.");
+
+ TEST_ASSERT_EQ(state->hdr.vmx.vmxon_pa, -1ull);
+ TEST_ASSERT_EQ(state->hdr.vmx.vmcs12_pa, -1ull);
+ TEST_ASSERT_EQ(state->flags, 0);
+
+ free(state);
+}
+
+static void vcpu_efer_enable_svm(struct kvm_vcpu *vcpu)
+{
+ uint64_t old_efer = vcpu_get_msr(vcpu, MSR_EFER);
+
+ vcpu_set_msr(vcpu, MSR_EFER, old_efer | EFER_SVME);
+}
+
+static void vcpu_efer_disable_svm(struct kvm_vcpu *vcpu)
+{
+ uint64_t old_efer = vcpu_get_msr(vcpu, MSR_EFER);
+
+ vcpu_set_msr(vcpu, MSR_EFER, old_efer & ~EFER_SVME);
+}
+
+void set_default_svm_state(struct kvm_nested_state *state, int size)
+{
+ memset(state, 0, size);
+ state->format = 1;
+ state->size = size;
+ state->hdr.svm.vmcb_pa = 0x3000;
+}
+
+void test_svm_nested_state(struct kvm_vcpu *vcpu)
+{
+ /* Add a page for VMCB. */
+ const int state_sz = sizeof(struct kvm_nested_state) + getpagesize();
+ struct kvm_nested_state *state =
+ (struct kvm_nested_state *)malloc(state_sz);
+
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_SVM);
+
+ /* The format must be set to 1. 0 for VMX, 1 for SVM. */
+ set_default_svm_state(state, state_sz);
+ state->format = 0;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /* Invalid flags are rejected, KVM_STATE_NESTED_EVMCS is VMX-only */
+ set_default_svm_state(state, state_sz);
+ state->flags = KVM_STATE_NESTED_EVMCS;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * If EFER.SVME is clear, guest mode is disallowed and GIF can be set or
+ * cleared.
+ */
+ vcpu_efer_disable_svm(vcpu);
+
+ set_default_svm_state(state, state_sz);
+ state->flags = KVM_STATE_NESTED_GUEST_MODE;
+ test_nested_state_expect_einval(vcpu, state);
+
+ state->flags = 0;
+ test_nested_state(vcpu, state);
+
+ state->flags = KVM_STATE_NESTED_GIF_SET;
+ test_nested_state(vcpu, state);
+
+ /* Enable SVM in the guest EFER. */
+ vcpu_efer_enable_svm(vcpu);
+
+ /* Setting vmcb_pa to a non-aligned address is only fine when not entering guest mode */
+ set_default_svm_state(state, state_sz);
+ state->hdr.svm.vmcb_pa = -1ull;
+ state->flags = 0;
+ test_nested_state(vcpu, state);
+ state->flags = KVM_STATE_NESTED_GUEST_MODE;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * Size must be large enough to fit kvm_nested_state and VMCB
+ * only when entering guest mode.
+ */
+ set_default_svm_state(state, state_sz/2);
+ state->flags = 0;
+ test_nested_state(vcpu, state);
+ state->flags = KVM_STATE_NESTED_GUEST_MODE;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * Test that if we leave nesting the state reflects that when we get it
+ * again, except for vmcb_pa, which is always returned as 0 when not in
+ * guest mode.
+ */
+ set_default_svm_state(state, state_sz);
+ state->hdr.svm.vmcb_pa = -1ull;
+ state->flags = KVM_STATE_NESTED_GIF_SET;
+ test_nested_state(vcpu, state);
+ vcpu_nested_state_get(vcpu, state);
+ TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
+ "Size must be between %ld and %d. The size returned was %d.",
+ sizeof(*state), state_sz, state->size);
+
+ TEST_ASSERT_EQ(state->hdr.svm.vmcb_pa, 0);
+ TEST_ASSERT_EQ(state->flags, KVM_STATE_NESTED_GIF_SET);
free(state);
}
@@ -255,20 +354,20 @@ int main(int argc, char *argv[])
have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
- /*
- * AMD currently does not implement set_nested_state, so for now we
- * just early out.
- */
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
vm = vm_create_with_one_vcpu(&vcpu, NULL);
/*
- * First run tests with VMX disabled to check error handling.
+ * First run tests with VMX/SVM disabled to check error handling.
+ * test_{vmx/svm}_nested_state() will re-enable as needed.
*/
- vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
+ else
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_SVM);
/* Passing a NULL kvm_nested_state causes a EFAULT. */
test_nested_state_expect_efault(vcpu, NULL);
@@ -297,7 +396,10 @@ int main(int argc, char *argv[])
state.flags = KVM_STATE_NESTED_RUN_PENDING;
test_nested_state_expect_einval(vcpu, &state);
- test_vmx_nested_state(vcpu);
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ test_vmx_nested_state(vcpu);
+ else
+ test_svm_nested_state(vcpu);
kvm_vm_free(vm);
return 0;
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
index 7b6481d6c0d3..4bd1655f9e6d 100644
--- a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
@@ -103,7 +103,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
run_guest(vmcb, svm->vmcb_gpa);
__GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
- "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+ "Expected VMMCAL #VMEXIT, got '0x%lx', info1 = '0x%lx, info2 = '0x%lx'",
vmcb->control.exit_code,
vmcb->control.exit_info_1, vmcb->control.exit_info_2);
@@ -133,7 +133,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
run_guest(vmcb, svm->vmcb_gpa);
__GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
- "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+ "Expected HLT #VMEXIT, got '0x%lx', info1 = '0x%lx, info2 = '0x%lx'",
vmcb->control.exit_code,
vmcb->control.exit_info_1, vmcb->control.exit_info_2);