From 89e54ec592324ed8b2b1eb41f91c1a4b724b0db5 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Thu, 25 Aug 2022 22:57:53 +0000 Subject: KVM: x86: Update trace function for nested VM entry to support VMX Update trace function for nested VM entry to support VMX. Existing trace function only supports nested VMX and the information printed out is AMD specific. So, rename trace_kvm_nested_vmrun() to trace_kvm_nested_vmenter(), since 'vmenter' is generic. Add a new field 'isa' to recognize Intel and AMD; Update the output to print out VMX/SVM related naming respectively, eg., vmcb vs. vmcs; npt vs. ept. Opportunistically update the call site of trace_kvm_nested_vmenter() to make one line per parameter. Signed-off-by: Mingwei Zhang Link: https://lore.kernel.org/r/20220825225755.907001-2-mizhang@google.com [sean: align indentation, s/update/rename in changelog] Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86/kvm/svm/nested.c') diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 76dcc8a3e849..540a37225519 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -781,11 +781,13 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, struct vcpu_svm *svm = to_svm(vcpu); int ret; - trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb12_gpa, - vmcb12->save.rip, - vmcb12->control.int_ctl, - vmcb12->control.event_inj, - vmcb12->control.nested_ctl); + trace_kvm_nested_vmenter(svm->vmcb->save.rip, + vmcb12_gpa, + vmcb12->save.rip, + vmcb12->control.int_ctl, + vmcb12->control.event_inj, + vmcb12->control.nested_ctl, + KVM_ISA_SVM); trace_kvm_nested_intercepts(vmcb12->control.intercepts[INTERCEPT_CR] & 0xffff, vmcb12->control.intercepts[INTERCEPT_CR] >> 16, -- cgit v1.2.3 From 02dfc44f205772f0edc0d8e58420205c1cf2f83b Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Thu, 25 Aug 2022 22:57:55 +0000 Subject: KVM: x86: Print guest pgd in kvm_nested_vmenter() Print guest pgd in kvm_nested_vmenter() to enrich the information for tracing. When tdp is enabled, print the value of tdp page table (EPT/NPT); when tdp is disabled, print the value of non-nested CR3. Suggested-by: Sean Christopherson Signed-off-by: Mingwei Zhang Link: https://lore.kernel.org/r/20220825225755.907001-4-mizhang@google.com [sean: print nested_cr3 vs. nested_eptp vs. guest_cr3] Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kvm/svm/nested.c') diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 540a37225519..66f63e58efcc 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -787,6 +787,8 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, vmcb12->control.int_ctl, vmcb12->control.event_inj, vmcb12->control.nested_ctl, + vmcb12->control.nested_cr3, + vmcb12->save.cr3, KVM_ISA_SVM); trace_kvm_nested_intercepts(vmcb12->control.intercepts[INTERCEPT_CR] & 0xffff, -- cgit v1.2.3 From d4963e319f1f7851a098df6610a27f9f4cf6d42a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 30 Aug 2022 23:16:01 +0000 Subject: KVM: x86: Make kvm_queued_exception a properly named, visible struct Move the definition of "struct kvm_queued_exception" out of kvm_vcpu_arch in anticipation of adding a second instance in kvm_vcpu_arch to handle exceptions that occur when vectoring an injected exception and are morphed to VM-Exit instead of leading to #DF. Opportunistically take advantage of the churn to rename "nr" to "vector". No functional change intended. Signed-off-by: Sean Christopherson Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20220830231614.3580124-15-seanjc@google.com Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 47 +++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) (limited to 'arch/x86/kvm/svm/nested.c') diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 66f63e58efcc..bbfbceaca6ad 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -468,7 +468,7 @@ static void nested_save_pending_event_to_vmcb12(struct vcpu_svm *svm, unsigned int nr; if (vcpu->arch.exception.injected) { - nr = vcpu->arch.exception.nr; + nr = vcpu->arch.exception.vector; exit_int_info = nr | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT; if (vcpu->arch.exception.has_error_code) { @@ -1310,42 +1310,45 @@ int nested_svm_check_permissions(struct kvm_vcpu *vcpu) static bool nested_exit_on_exception(struct vcpu_svm *svm) { - unsigned int nr = svm->vcpu.arch.exception.nr; + unsigned int vector = svm->vcpu.arch.exception.vector; - return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(nr)); + return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(vector)); } -static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm) +static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu) { - unsigned int nr = svm->vcpu.arch.exception.nr; + struct kvm_queued_exception *ex = &vcpu->arch.exception; + struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; - vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; + vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + ex->vector; vmcb->control.exit_code_hi = 0; - if (svm->vcpu.arch.exception.has_error_code) - vmcb->control.exit_info_1 = svm->vcpu.arch.exception.error_code; + if (ex->has_error_code) + vmcb->control.exit_info_1 = ex->error_code; /* * EXITINFO2 is undefined for all exception intercepts other * than #PF. */ - if (nr == PF_VECTOR) { - if (svm->vcpu.arch.exception.nested_apf) - vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; - else if (svm->vcpu.arch.exception.has_payload) - vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload; + if (ex->vector == PF_VECTOR) { + if (ex->nested_apf) + vmcb->control.exit_info_2 = vcpu->arch.apf.nested_apf_token; + else if (ex->has_payload) + vmcb->control.exit_info_2 = ex->payload; else - vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; - } else if (nr == DB_VECTOR) { + vmcb->control.exit_info_2 = vcpu->arch.cr2; + } else if (ex->vector == DB_VECTOR) { /* See inject_pending_event. */ - kvm_deliver_exception_payload(&svm->vcpu); - if (svm->vcpu.arch.dr7 & DR7_GD) { - svm->vcpu.arch.dr7 &= ~DR7_GD; - kvm_update_dr7(&svm->vcpu); + kvm_deliver_exception_payload(vcpu, ex); + + if (vcpu->arch.dr7 & DR7_GD) { + vcpu->arch.dr7 &= ~DR7_GD; + kvm_update_dr7(vcpu); } - } else - WARN_ON(svm->vcpu.arch.exception.has_payload); + } else { + WARN_ON(ex->has_payload); + } nested_svm_vmexit(svm); } @@ -1383,7 +1386,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu) return -EBUSY; if (!nested_exit_on_exception(svm)) return 0; - nested_svm_inject_exception_vmexit(svm); + nested_svm_inject_exception_vmexit(vcpu); return 0; } -- cgit v1.2.3 From 72c14e00bdc445e96045c28d04bba45cbe69cf95 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 30 Aug 2022 23:16:02 +0000 Subject: KVM: x86: Formalize blocking of nested pending exceptions Capture nested_run_pending as block_pending_exceptions so that the logic of why exceptions are blocked only needs to be documented once instead of at every place that employs the logic. No functional change intended. Signed-off-by: Sean Christopherson Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20220830231614.3580124-16-seanjc@google.com Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'arch/x86/kvm/svm/nested.c') diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index bbfbceaca6ad..2ecc64c3f6ee 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1360,10 +1360,22 @@ static inline bool nested_exit_on_init(struct vcpu_svm *svm) static int svm_check_nested_events(struct kvm_vcpu *vcpu) { - struct vcpu_svm *svm = to_svm(vcpu); - bool block_nested_events = - kvm_event_needs_reinjection(vcpu) || svm->nested.nested_run_pending; struct kvm_lapic *apic = vcpu->arch.apic; + struct vcpu_svm *svm = to_svm(vcpu); + /* + * Only a pending nested run blocks a pending exception. If there is a + * previously injected event, the pending exception occurred while said + * event was being delivered and thus needs to be handled. + */ + bool block_nested_exceptions = svm->nested.nested_run_pending; + /* + * New events (not exceptions) are only recognized at instruction + * boundaries. If an event needs reinjection, then KVM is handling a + * VM-Exit that occurred _during_ instruction execution; new events are + * blocked until the instruction completes. + */ + bool block_nested_events = block_nested_exceptions || + kvm_event_needs_reinjection(vcpu); if (lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &apic->pending_events)) { @@ -1376,13 +1388,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu) } if (vcpu->arch.exception.pending) { - /* - * Only a pending nested run can block a pending exception. - * Otherwise an injected NMI/interrupt should either be - * lost or delivered to the nested hypervisor in the EXITINTINFO - * vmcb field, while delivering the pending exception. - */ - if (svm->nested.nested_run_pending) + if (block_nested_exceptions) return -EBUSY; if (!nested_exit_on_exception(svm)) return 0; -- cgit v1.2.3 From 7709aba8f71613ae5d18d8c00adb54948e6bedb3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 30 Aug 2022 23:16:08 +0000 Subject: KVM: x86: Morph pending exceptions to pending VM-Exits at queue time Morph pending exceptions to pending VM-Exits (due to interception) when the exception is queued instead of waiting until nested events are checked at VM-Entry. This fixes a longstanding bug where KVM fails to handle an exception that occurs during delivery of a previous exception, KVM (L0) and L1 both want to intercept the exception (e.g. #PF for shadow paging), and KVM determines that the exception is in the guest's domain, i.e. queues the new exception for L2. Deferring the interception check causes KVM to esclate various combinations of injected+pending exceptions to double fault (#DF) without consulting L1's interception desires, and ends up injecting a spurious #DF into L2. KVM has fudged around the issue for #PF by special casing emulated #PF injection for shadow paging, but the underlying issue is not unique to shadow paging in L0, e.g. if KVM is intercepting #PF because the guest has a smaller maxphyaddr and L1 (but not L0) is using shadow paging. Other exceptions are affected as well, e.g. if KVM is intercepting #GP for one of SVM's workaround or for the VMware backdoor emulation stuff. The other cases have gone unnoticed because the #DF is spurious if and only if L1 resolves the exception, e.g. KVM's goofs go unnoticed if L1 would have injected #DF anyways. The hack-a-fix has also led to ugly code, e.g. bailing from the emulator if #PF injection forced a nested VM-Exit and the emulator finds itself back in L1. Allowing for direct-to-VM-Exit queueing also neatly solves the async #PF in L2 mess; no need to set a magic flag and token, simply queue a #PF nested VM-Exit. Deal with event migration by flagging that a pending exception was queued by userspace and check for interception at the next KVM_RUN, e.g. so that KVM does the right thing regardless of the order in which userspace restores nested state vs. event state. When "getting" events from userspace, simply drop any pending excpetion that is destined to be intercepted if there is also an injected exception to be migrated. Ideally, KVM would migrate both events, but that would require new ABI, and practically speaking losing the event is unlikely to be noticed, let alone fatal. The injected exception is captured, RIP still points at the original faulting instruction, etc... So either the injection on the target will trigger the same intercepted exception, or the source of the intercepted exception was transient and/or non-deterministic, thus dropping it is ok-ish. Fixes: a04aead144fd ("KVM: nSVM: fix running nested guests when npt=0") Fixes: feaf0c7dc473 ("KVM: nVMX: Do not generate #DF if #PF happens during exception delivery into L2") Cc: Jim Mattson Signed-off-by: Sean Christopherson Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20220830231614.3580124-22-seanjc@google.com Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 45 +++++++++++++-------------------------------- 1 file changed, 13 insertions(+), 32 deletions(-) (limited to 'arch/x86/kvm/svm/nested.c') diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 2ecc64c3f6ee..cf22900f7c54 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -55,28 +55,6 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, nested_svm_vmexit(svm); } -static bool nested_svm_handle_page_fault_workaround(struct kvm_vcpu *vcpu, - struct x86_exception *fault) -{ - struct vcpu_svm *svm = to_svm(vcpu); - struct vmcb *vmcb = svm->vmcb; - - WARN_ON(!is_guest_mode(vcpu)); - - if (vmcb12_is_intercept(&svm->nested.ctl, - INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) && - !WARN_ON_ONCE(svm->nested.nested_run_pending)) { - vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR; - vmcb->control.exit_code_hi = 0; - vmcb->control.exit_info_1 = fault->error_code; - vmcb->control.exit_info_2 = fault->address; - nested_svm_vmexit(svm); - return true; - } - - return false; -} - static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1308,16 +1286,17 @@ int nested_svm_check_permissions(struct kvm_vcpu *vcpu) return 0; } -static bool nested_exit_on_exception(struct vcpu_svm *svm) +static bool nested_svm_is_exception_vmexit(struct kvm_vcpu *vcpu, u8 vector, + u32 error_code) { - unsigned int vector = svm->vcpu.arch.exception.vector; + struct vcpu_svm *svm = to_svm(vcpu); return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(vector)); } static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu) { - struct kvm_queued_exception *ex = &vcpu->arch.exception; + struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit; struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; @@ -1332,9 +1311,7 @@ static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu) * than #PF. */ if (ex->vector == PF_VECTOR) { - if (ex->nested_apf) - vmcb->control.exit_info_2 = vcpu->arch.apf.nested_apf_token; - else if (ex->has_payload) + if (ex->has_payload) vmcb->control.exit_info_2 = ex->payload; else vmcb->control.exit_info_2 = vcpu->arch.cr2; @@ -1387,15 +1364,19 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu) return 0; } - if (vcpu->arch.exception.pending) { + if (vcpu->arch.exception_vmexit.pending) { if (block_nested_exceptions) return -EBUSY; - if (!nested_exit_on_exception(svm)) - return 0; nested_svm_inject_exception_vmexit(vcpu); return 0; } + if (vcpu->arch.exception.pending) { + if (block_nested_exceptions) + return -EBUSY; + return 0; + } + if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) { if (block_nested_events) return -EBUSY; @@ -1733,8 +1714,8 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) struct kvm_x86_nested_ops svm_nested_ops = { .leave_nested = svm_leave_nested, + .is_exception_vmexit = nested_svm_is_exception_vmexit, .check_events = svm_check_nested_events, - .handle_page_fault_workaround = nested_svm_handle_page_fault_workaround, .triple_fault = nested_svm_triple_fault, .get_nested_state_pages = svm_get_nested_state_pages, .get_state = svm_get_nested_state, -- cgit v1.2.3 From e746c1f1b94ac9fa6c1f02fce808a6b2f3ef8cbd Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 30 Aug 2022 23:16:11 +0000 Subject: KVM: x86: Rename inject_pending_events() to kvm_check_and_inject_events() Rename inject_pending_events() to kvm_check_and_inject_events() in order to capture the fact that it handles more than just pending events, and to (mostly) align with kvm_check_nested_events(), which omits the "inject" for brevity. Add a comment above kvm_check_and_inject_events() to provide a high-level synopsis, and to document a virtualization hole (KVM erratum if you will) that exists due to KVM not strictly tracking instruction boundaries with respect to coincident instruction restarts and asynchronous events. No functional change inteded. Signed-off-by: Sean Christopherson Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20220830231614.3580124-25-seanjc@google.com Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm/svm/nested.c') diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index cf22900f7c54..4c620999d230 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1316,7 +1316,7 @@ static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu) else vmcb->control.exit_info_2 = vcpu->arch.cr2; } else if (ex->vector == DB_VECTOR) { - /* See inject_pending_event. */ + /* See kvm_check_and_inject_events(). */ kvm_deliver_exception_payload(vcpu, ex); if (vcpu->arch.dr7 & DR7_GD) { -- cgit v1.2.3