diff options
| author | Paolo Bonzini <pbonzini@redhat.com> | 2026-06-12 10:47:24 +0200 |
|---|---|---|
| committer | Paolo Bonzini <pbonzini@redhat.com> | 2026-06-12 10:47:24 +0200 |
| commit | 4e6df939687caf878bb493570ff1c583bba86e7c (patch) | |
| tree | 27583241d34b3bf04fa027676ef894e97929dd2f | |
| parent | 743204d772648242c50ceebc72e8ff31aab1cff4 (diff) | |
| parent | 8b9ef3220050e19a076f3fa12fa12b01f9f33446 (diff) | |
Merge branch 'kvm-single-pdptrs' into HEAD
The non-MMU changes/preliminary cleanups from the "split kvm_mmu in
three" series[1]. The final outcome is to have a single copy of the
PDPTRs (in vcpu->arch) instead of two (in root_mmu and nested_mmu).
[1] https://lore.kernel.org/kvm/20260603105814.10236-1-pbonzini@redhat.com/T/#t
| -rw-r--r-- | arch/x86/include/asm/kvm_host.h | 5 | ||||
| -rw-r--r-- | arch/x86/kvm/regs.h | 12 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/nested.c | 27 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/svm.c | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/nested.c | 11 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 20 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 9 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.h | 2 |
8 files changed, 46 insertions, 42 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3886b536c8a5..eee473717c0e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -525,10 +525,7 @@ struct kvm_mmu { * the bits spte never used. */ struct rsvd_bits_validate shadow_zero_check; - struct rsvd_bits_validate guest_rsvd_check; - - u64 pdptrs[4]; /* pae */ }; enum pmc_type { @@ -888,6 +885,8 @@ struct kvm_vcpu_arch { */ struct kvm_mmu *walk_mmu; + u64 pdptrs[4]; /* pae */ + struct kvm_mmu_memory_cache mmu_pte_list_desc_cache; struct kvm_mmu_memory_cache mmu_shadow_page_cache; struct kvm_mmu_memory_cache mmu_shadowed_info_cache; diff --git a/arch/x86/kvm/regs.h b/arch/x86/kvm/regs.h index a57ba26279ed..5bda738afb7c 100644 --- a/arch/x86/kvm/regs.h +++ b/arch/x86/kvm/regs.h @@ -136,6 +136,14 @@ static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu, return test_bit(reg, vcpu->arch.regs_dirty); } +static inline void kvm_register_mark_for_reload(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + kvm_assert_register_caching_allowed(vcpu); + __clear_bit(reg, vcpu->arch.regs_avail); + __clear_bit(reg, vcpu->arch.regs_dirty); +} + static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu, enum kvm_reg reg) { @@ -259,12 +267,12 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) if (!kvm_register_is_available(vcpu, VCPU_REG_PDPTR)) kvm_x86_call(cache_reg)(vcpu, VCPU_REG_PDPTR); - return vcpu->arch.walk_mmu->pdptrs[index]; + return vcpu->arch.pdptrs[index]; } static inline void kvm_pdptr_write(struct kvm_vcpu *vcpu, int index, u64 value) { - vcpu->arch.walk_mmu->pdptrs[index] = value; + vcpu->arch.pdptrs[index] = value; } static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 1ab8b95975a4..9aedb88c832d 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -717,9 +717,12 @@ static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, if (CC(!kvm_vcpu_is_legal_cr3(vcpu, cr3))) return -EINVAL; - if (reload_pdptrs && !nested_npt && is_pae_paging(vcpu) && - CC(!load_pdptrs(vcpu, cr3))) - return -EINVAL; + if (reload_pdptrs && is_pae_paging(vcpu)) { + if (nested_npt) + kvm_register_mark_for_reload(vcpu, VCPU_REG_PDPTR); + else if (CC(!load_pdptrs(vcpu, cr3))) + return -EINVAL; + } vcpu->arch.cr3 = cr3; @@ -2111,15 +2114,21 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) if (WARN_ON(!is_guest_mode(vcpu))) return true; - if (!vcpu->arch.pdptrs_from_userspace && - !nested_npt_enabled(to_svm(vcpu)) && is_pae_paging(vcpu)) + if (is_pae_paging(vcpu)) { /* - * Reload the guest's PDPTRs since after a migration - * the guest CR3 might be restored prior to setting the nested - * state which can lead to a load of wrong PDPTRs. + * After migration, CR3 may have been restored before + * KVM_SET_NESTED_STATE, so the PDPTR load into mmu->pdptrs[] + * may have treated CR3 as an L1 GPA. For nNPT, drop the + * cache so the next access reloads them with the proper + * nGPA translation. For !nNPT, reload eagerly unless userspace + * already supplied authoritative PDPTRs via KVM_SET_SREGS2. */ - if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3))) + if (nested_npt_enabled(to_svm(vcpu))) + kvm_register_mark_for_reload(vcpu, VCPU_REG_PDPTR); + else if (!vcpu->arch.pdptrs_from_userspace && + CC(!load_pdptrs(vcpu, vcpu->arch.cr3))) return false; + } if (!nested_svm_merge_msrpm(vcpu)) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 526e0fdcd16b..e6408c3e8419 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1538,7 +1538,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) switch (reg) { case VCPU_REG_PDPTR: /* - * When !npt_enabled, mmu->pdptrs[] is already available since + * When !npt_enabled, vcpu->pdptrs[] is already available since * it is always updated per SDM when moving to CRs. */ if (npt_enabled) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index b2c851cc7d5c..3a293640d58c 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2640,17 +2640,6 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip); - /* - * L1 may access the L2's PDPTR, so save them to construct - * vmcs12 - */ - if (enable_ept) { - vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0); - vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); - vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); - vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); - } - if (kvm_mpx_supported() && vmx->vcpu.arch.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c548f22375ad..a1a5edb39a7e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3384,30 +3384,26 @@ void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu) void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu) { - struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - if (!kvm_register_is_dirty(vcpu, VCPU_REG_PDPTR)) return; if (is_pae_paging(vcpu)) { - vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]); - vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]); - vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]); - vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]); + vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]); + vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]); + vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]); + vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]); } } void ept_save_pdptrs(struct kvm_vcpu *vcpu) { - struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - if (WARN_ON_ONCE(!is_pae_paging(vcpu))) return; - mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); - mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); - mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); - mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3); + vcpu->arch.pdptrs[0] = vmcs_read64(GUEST_PDPTR0); + vcpu->arch.pdptrs[1] = vmcs_read64(GUEST_PDPTR1); + vcpu->arch.pdptrs[2] = vmcs_read64(GUEST_PDPTR2); + vcpu->arch.pdptrs[3] = vmcs_read64(GUEST_PDPTR3); kvm_register_mark_available(vcpu, VCPU_REG_PDPTR); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cf122b8c3210..d9d51803b7b2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1031,7 +1031,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) gpa_t real_gpa; int i; int ret; - u64 pdpte[ARRAY_SIZE(mmu->pdptrs)]; + u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; /* * If the MMU is nested, CR3 holds an L2 GPA and needs to be translated @@ -1060,10 +1060,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) * Marking VCPU_REG_PDPTR dirty doesn't work for !tdp_enabled. * Shadow page roots need to be reconstructed instead. */ - if (!tdp_enabled && memcmp(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs))) + if (!tdp_enabled && memcmp(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs))) kvm_mmu_free_roots(vcpu->kvm, mmu, KVM_MMU_ROOT_CURRENT); - memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs)); + memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); kvm_register_mark_dirty(vcpu, VCPU_REG_PDPTR); kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu); vcpu->arch.pdptrs_from_userspace = false; @@ -14240,6 +14240,9 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva) return 1; } + if (WARN_ON_ONCE(tdp_enabled)) + return 0; + pcid_enabled = kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE); switch (type) { diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index acb22167901f..9de577ef9c97 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -254,7 +254,7 @@ static inline bool x86_exception_has_error_code(unsigned int vector) static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) { - return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; + return vcpu->arch.mmu == &vcpu->arch.guest_mmu; } static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu) |
