summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2026-06-12 10:47:24 +0200
committerPaolo Bonzini <pbonzini@redhat.com>2026-06-12 10:47:24 +0200
commit4e6df939687caf878bb493570ff1c583bba86e7c (patch)
tree27583241d34b3bf04fa027676ef894e97929dd2f
parent743204d772648242c50ceebc72e8ff31aab1cff4 (diff)
parent8b9ef3220050e19a076f3fa12fa12b01f9f33446 (diff)
Merge branch 'kvm-single-pdptrs' into HEAD
The non-MMU changes/preliminary cleanups from the "split kvm_mmu in three" series[1]. The final outcome is to have a single copy of the PDPTRs (in vcpu->arch) instead of two (in root_mmu and nested_mmu). [1] https://lore.kernel.org/kvm/20260603105814.10236-1-pbonzini@redhat.com/T/#t
-rw-r--r--arch/x86/include/asm/kvm_host.h5
-rw-r--r--arch/x86/kvm/regs.h12
-rw-r--r--arch/x86/kvm/svm/nested.c27
-rw-r--r--arch/x86/kvm/svm/svm.c2
-rw-r--r--arch/x86/kvm/vmx/nested.c11
-rw-r--r--arch/x86/kvm/vmx/vmx.c20
-rw-r--r--arch/x86/kvm/x86.c9
-rw-r--r--arch/x86/kvm/x86.h2
8 files changed, 46 insertions, 42 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3886b536c8a5..eee473717c0e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -525,10 +525,7 @@ struct kvm_mmu {
* the bits spte never used.
*/
struct rsvd_bits_validate shadow_zero_check;
-
struct rsvd_bits_validate guest_rsvd_check;
-
- u64 pdptrs[4]; /* pae */
};
enum pmc_type {
@@ -888,6 +885,8 @@ struct kvm_vcpu_arch {
*/
struct kvm_mmu *walk_mmu;
+ u64 pdptrs[4]; /* pae */
+
struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
struct kvm_mmu_memory_cache mmu_shadow_page_cache;
struct kvm_mmu_memory_cache mmu_shadowed_info_cache;
diff --git a/arch/x86/kvm/regs.h b/arch/x86/kvm/regs.h
index a57ba26279ed..5bda738afb7c 100644
--- a/arch/x86/kvm/regs.h
+++ b/arch/x86/kvm/regs.h
@@ -136,6 +136,14 @@ static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
return test_bit(reg, vcpu->arch.regs_dirty);
}
+static inline void kvm_register_mark_for_reload(struct kvm_vcpu *vcpu,
+ enum kvm_reg reg)
+{
+ kvm_assert_register_caching_allowed(vcpu);
+ __clear_bit(reg, vcpu->arch.regs_avail);
+ __clear_bit(reg, vcpu->arch.regs_dirty);
+}
+
static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
@@ -259,12 +267,12 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
if (!kvm_register_is_available(vcpu, VCPU_REG_PDPTR))
kvm_x86_call(cache_reg)(vcpu, VCPU_REG_PDPTR);
- return vcpu->arch.walk_mmu->pdptrs[index];
+ return vcpu->arch.pdptrs[index];
}
static inline void kvm_pdptr_write(struct kvm_vcpu *vcpu, int index, u64 value)
{
- vcpu->arch.walk_mmu->pdptrs[index] = value;
+ vcpu->arch.pdptrs[index] = value;
}
static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 1ab8b95975a4..9aedb88c832d 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -717,9 +717,12 @@ static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
if (CC(!kvm_vcpu_is_legal_cr3(vcpu, cr3)))
return -EINVAL;
- if (reload_pdptrs && !nested_npt && is_pae_paging(vcpu) &&
- CC(!load_pdptrs(vcpu, cr3)))
- return -EINVAL;
+ if (reload_pdptrs && is_pae_paging(vcpu)) {
+ if (nested_npt)
+ kvm_register_mark_for_reload(vcpu, VCPU_REG_PDPTR);
+ else if (CC(!load_pdptrs(vcpu, cr3)))
+ return -EINVAL;
+ }
vcpu->arch.cr3 = cr3;
@@ -2111,15 +2114,21 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
if (WARN_ON(!is_guest_mode(vcpu)))
return true;
- if (!vcpu->arch.pdptrs_from_userspace &&
- !nested_npt_enabled(to_svm(vcpu)) && is_pae_paging(vcpu))
+ if (is_pae_paging(vcpu)) {
/*
- * Reload the guest's PDPTRs since after a migration
- * the guest CR3 might be restored prior to setting the nested
- * state which can lead to a load of wrong PDPTRs.
+ * After migration, CR3 may have been restored before
+ * KVM_SET_NESTED_STATE, so the PDPTR load into mmu->pdptrs[]
+ * may have treated CR3 as an L1 GPA. For nNPT, drop the
+ * cache so the next access reloads them with the proper
+ * nGPA translation. For !nNPT, reload eagerly unless userspace
+ * already supplied authoritative PDPTRs via KVM_SET_SREGS2.
*/
- if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3)))
+ if (nested_npt_enabled(to_svm(vcpu)))
+ kvm_register_mark_for_reload(vcpu, VCPU_REG_PDPTR);
+ else if (!vcpu->arch.pdptrs_from_userspace &&
+ CC(!load_pdptrs(vcpu, vcpu->arch.cr3)))
return false;
+ }
if (!nested_svm_merge_msrpm(vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 526e0fdcd16b..e6408c3e8419 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1538,7 +1538,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
switch (reg) {
case VCPU_REG_PDPTR:
/*
- * When !npt_enabled, mmu->pdptrs[] is already available since
+ * When !npt_enabled, vcpu->pdptrs[] is already available since
* it is always updated per SDM when moving to CRs.
*/
if (npt_enabled)
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index b2c851cc7d5c..3a293640d58c 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2640,17 +2640,6 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
- /*
- * L1 may access the L2's PDPTR, so save them to construct
- * vmcs12
- */
- if (enable_ept) {
- vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
- vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
- vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
- vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
- }
-
if (kvm_mpx_supported() && vmx->vcpu.arch.nested_run_pending &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c548f22375ad..a1a5edb39a7e 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3384,30 +3384,26 @@ void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu)
{
- struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
-
if (!kvm_register_is_dirty(vcpu, VCPU_REG_PDPTR))
return;
if (is_pae_paging(vcpu)) {
- vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
- vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
- vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
- vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]);
+ vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]);
+ vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]);
+ vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]);
+ vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]);
}
}
void ept_save_pdptrs(struct kvm_vcpu *vcpu)
{
- struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
-
if (WARN_ON_ONCE(!is_pae_paging(vcpu)))
return;
- mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
- mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
- mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
- mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
+ vcpu->arch.pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
+ vcpu->arch.pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
+ vcpu->arch.pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
+ vcpu->arch.pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
kvm_register_mark_available(vcpu, VCPU_REG_PDPTR);
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cf122b8c3210..d9d51803b7b2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1031,7 +1031,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
gpa_t real_gpa;
int i;
int ret;
- u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
+ u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
/*
* If the MMU is nested, CR3 holds an L2 GPA and needs to be translated
@@ -1060,10 +1060,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
* Marking VCPU_REG_PDPTR dirty doesn't work for !tdp_enabled.
* Shadow page roots need to be reconstructed instead.
*/
- if (!tdp_enabled && memcmp(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs)))
+ if (!tdp_enabled && memcmp(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)))
kvm_mmu_free_roots(vcpu->kvm, mmu, KVM_MMU_ROOT_CURRENT);
- memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
+ memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
kvm_register_mark_dirty(vcpu, VCPU_REG_PDPTR);
kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu);
vcpu->arch.pdptrs_from_userspace = false;
@@ -14240,6 +14240,9 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
return 1;
}
+ if (WARN_ON_ONCE(tdp_enabled))
+ return 0;
+
pcid_enabled = kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE);
switch (type) {
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index acb22167901f..9de577ef9c97 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -254,7 +254,7 @@ static inline bool x86_exception_has_error_code(unsigned int vector)
static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
+ return vcpu->arch.mmu == &vcpu->arch.guest_mmu;
}
static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu)