summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-06-05 10:38:45 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-06-05 10:38:45 -0700
commit95b78879a80ab034cd56cc70f057c3cb4f2612a6 (patch)
tree0bcf23729bccc2b27b6f88f863985294d93ef119
parentd1b0937f0eadbc30b528d37589ec7fb6ce9f4114 (diff)
parent7ec0360122d8f5033177ed0210d5ad71ec5b50c7 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "arm64: - Correctly drop the ITS translation cache reference when it actually gets invalidated - Take the SRCU lock for SW page table walks - Restore POR_EL0 access to host EL0, avoiding POR_EL0 becoming inaccessible from EL0 after running a guest - Reassign nested_mmus array behind mmu_lock, ensuring that vcpu init and MMU notifiers are mutually exclusive - Correctly handle FEAT_XNX at stage-2 s390: - More fixes for the new page table management and nested virtualization x86: - More fixes for GHCB issues: - Read start/end indices of page size change requests exactly once per vmexit - Unmap and unpin the GHCB as needed on vCPU free" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (23 commits) KVM: arm64: Correctly identify executable PTEs at stage-2 KVM: arm64: nv: Fix handling of XN[0] when !FEAT_XNX KVM: arm64: Reassign nested_mmus array behind mmu_lock KVM: arm64: Restore POR_EL0 access to host EL0 KVM: arm64: Take the SRCU lock for page table walks in fault injection and AT emulation KVM: arm64: vgic-its: Drop the translation cache reference only for the erased entry KVM: SEV: Unmap and unpin the GHCB as needed on vCPU free KVM: SEV: Decouple the need to sync the GHCB SA from the need to free the SA KVM: SEV: Move sev_free_vcpu() down below sev_es_unmap_ghcb() KVM: Don't WARN if memory is dirtied without a vCPU when the VM is dying KVM: SEV: Read start/end indices of PSC requests exactly once per #VMGEXIT KVM: SEV: Add an anonymous "psc" struct to track current PSC metadata KVM: SEV: Make it more obvious when KVM is writing back the current PSC index KVM: s390: Remove ptep_zap_softleaf_entry() KVM: s390: Fix possible reference leak in fault-in code KVM: s390: Prevent memslots outside the ASCE range KVM: s390: Lock pte when making page secure KVM: s390: Fix fault-in code KVM: s390: vsie: Fix rmap handling in _do_shadow_crste() KVM: s390: Fix guest / virtual address confusion in _essa_clear_cbrl() ...
-rw-r--r--arch/arm64/include/asm/kvm_nested.h4
-rw-r--r--arch/arm64/kvm/at.c6
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h2
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c4
-rw-r--r--arch/arm64/kvm/nested.c33
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c6
-rw-r--r--arch/s390/include/asm/gmap_helpers.h1
-rw-r--r--arch/s390/kvm/faultin.c30
-rw-r--r--arch/s390/kvm/gaccess.c11
-rw-r--r--arch/s390/kvm/gmap.c19
-rw-r--r--arch/s390/kvm/gmap.h3
-rw-r--r--arch/s390/kvm/kvm-s390.c33
-rw-r--r--arch/s390/kvm/priv.c8
-rw-r--r--arch/s390/kvm/pv.c21
-rw-r--r--arch/s390/mm/gmap_helpers.c128
-rw-r--r--arch/x86/kvm/svm/sev.c238
-rw-r--r--arch/x86/kvm/svm/svm.h9
-rw-r--r--virt/kvm/kvm_main.c3
18 files changed, 337 insertions, 222 deletions
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index dc2957662ff2..cdf3e8422ea1 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -132,7 +132,7 @@ static inline bool kvm_s2_trans_exec_el0(struct kvm *kvm, struct kvm_s2_trans *t
u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc);
if (!kvm_has_xnx(kvm))
- xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10);
+ xn &= 0b10;
switch (xn) {
case 0b00:
@@ -148,7 +148,7 @@ static inline bool kvm_s2_trans_exec_el1(struct kvm *kvm, struct kvm_s2_trans *t
u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc);
if (!kvm_has_xnx(kvm))
- xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10);
+ xn &= 0b10;
switch (xn) {
case 0b00:
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index 9f8f0ae8e86e..889c2c15d7bd 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -1569,7 +1569,8 @@ int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
/* Do the stage-2 translation */
ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
out.esr = 0;
- ret = kvm_walk_nested_s2(vcpu, ipa, &out);
+ scoped_guard(srcu, &vcpu->kvm->srcu)
+ ret = kvm_walk_nested_s2(vcpu, ipa, &out);
if (ret < 0)
return ret;
@@ -1665,7 +1666,8 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level)
}
/* Walk the guest's PT, looking for a match along the way */
- ret = walk_s1(vcpu, &wi, &wr, va);
+ scoped_guard(srcu, &vcpu->kvm->srcu)
+ ret = walk_s1(vcpu, &wi, &wr, va);
switch (ret) {
case -EINTR:
/* We interrupted the walk on a match, return the level */
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 320cd45d49c5..e9b36a3b27bb 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -181,6 +181,8 @@ static inline void __deactivate_cptr_traps_vhe(struct kvm_vcpu *vcpu)
val |= CPACR_EL1_ZEN;
if (cpus_have_final_cap(ARM64_SME))
val |= CPACR_EL1_SMEN;
+ if (cpus_have_final_cap(ARM64_HAS_S1POE))
+ val |= CPACR_EL1_E0POE;
write_sysreg(val, cpacr_el1);
}
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 0c1defa5fb0f..91a7dfad6686 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -925,7 +925,9 @@ static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
static bool stage2_pte_executable(kvm_pte_t pte)
{
- return kvm_pte_valid(pte) && !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
+ enum kvm_pgtable_prot prot = kvm_pgtable_stage2_pte_prot(pte);
+
+ return prot & (KVM_PGTABLE_PROT_UX | KVM_PGTABLE_PROT_PX);
}
static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 38f672e94087..6f7bc9a9992e 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -89,21 +89,28 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
* again, and there is no reason to affect the whole VM for this.
*/
num_mmus = atomic_read(&kvm->online_vcpus) * S2_MMU_PER_VCPU;
- tmp = kvrealloc(kvm->arch.nested_mmus,
- size_mul(sizeof(*kvm->arch.nested_mmus), num_mmus),
- GFP_KERNEL_ACCOUNT | __GFP_ZERO);
- if (!tmp)
- return -ENOMEM;
- swap(kvm->arch.nested_mmus, tmp);
+ if (num_mmus > kvm->arch.nested_mmus_size) {
+ tmp = kvcalloc(num_mmus, sizeof(*tmp), GFP_KERNEL_ACCOUNT);
+ if (!tmp)
+ return -ENOMEM;
- /*
- * If we went through a realocation, adjust the MMU back-pointers in
- * the previously initialised kvm_pgtable structures.
- */
- if (kvm->arch.nested_mmus != tmp)
- for (int i = 0; i < kvm->arch.nested_mmus_size; i++)
- kvm->arch.nested_mmus[i].pgt->mmu = &kvm->arch.nested_mmus[i];
+ write_lock(&kvm->mmu_lock);
+
+ if (kvm->arch.nested_mmus_size) {
+ memcpy(tmp, kvm->arch.nested_mmus,
+ size_mul(sizeof(*tmp), kvm->arch.nested_mmus_size));
+
+ for (int i = 0; i < kvm->arch.nested_mmus_size; i++)
+ tmp[i].pgt->mmu = &tmp[i];
+ }
+
+ swap(kvm->arch.nested_mmus, tmp);
+
+ write_unlock(&kvm->mmu_lock);
+
+ kvfree(tmp);
+ }
for (int i = kvm->arch.nested_mmus_size; !ret && i < num_mmus; i++)
ret = init_nested_s2_mmu(kvm, &kvm->arch.nested_mmus[i]);
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 1d7e5d560af4..1e3706ac3b8e 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -597,8 +597,10 @@ static void vgic_its_invalidate_cache(struct vgic_its *its)
unsigned long idx;
xa_for_each(&its->translation_cache, idx, irq) {
- xa_erase(&its->translation_cache, idx);
- vgic_put_irq(kvm, irq);
+ /* Only the context that erases the entry drops its cache ref. */
+ irq = xa_erase(&its->translation_cache, idx);
+ if (irq)
+ vgic_put_irq(kvm, irq);
}
}
diff --git a/arch/s390/include/asm/gmap_helpers.h b/arch/s390/include/asm/gmap_helpers.h
index 2d3ae421077e..d2b616604a46 100644
--- a/arch/s390/include/asm/gmap_helpers.h
+++ b/arch/s390/include/asm/gmap_helpers.h
@@ -12,5 +12,6 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr);
void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end);
int gmap_helper_disable_cow_sharing(void);
void gmap_helper_try_set_pte_unused(struct mm_struct *mm, unsigned long vmaddr);
+pte_t *try_get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl);
#endif /* _ASM_S390_GMAP_HELPERS_H */
diff --git a/arch/s390/kvm/faultin.c b/arch/s390/kvm/faultin.c
index ddf0ca71f374..fee80047bd94 100644
--- a/arch/s390/kvm/faultin.c
+++ b/arch/s390/kvm/faultin.c
@@ -36,7 +36,8 @@ int kvm_s390_faultin_gfn(struct kvm_vcpu *vcpu, struct kvm *kvm, struct guest_fa
struct kvm_s390_mmu_cache *mc = NULL;
struct kvm_memory_slot *slot;
unsigned long inv_seq;
- int foll, rc = 0;
+ int rc = -EAGAIN;
+ int foll;
foll = f->write_attempt ? FOLL_WRITE : 0;
foll |= f->attempt_pfault ? FOLL_NOWAIT : 0;
@@ -53,7 +54,14 @@ int kvm_s390_faultin_gfn(struct kvm_vcpu *vcpu, struct kvm *kvm, struct guest_fa
return 0;
}
- while (1) {
+ if (!mc) {
+ local_mc = kvm_s390_new_mmu_cache();
+ if (!local_mc)
+ return -ENOMEM;
+ mc = local_mc;
+ }
+
+ while (rc == -EAGAIN) {
f->valid = false;
inv_seq = kvm->mmu_invalidate_seq;
/* Pairs with the smp_wmb() in kvm_mmu_invalidate_end(). */
@@ -93,14 +101,7 @@ int kvm_s390_faultin_gfn(struct kvm_vcpu *vcpu, struct kvm *kvm, struct guest_fa
if (is_error_pfn(f->pfn))
return -EFAULT;
- if (!mc) {
- local_mc = kvm_s390_new_mmu_cache();
- if (!local_mc)
- return -ENOMEM;
- mc = local_mc;
- }
-
- /* Loop, will automatically release the faulted page. */
+ /* Loop, release the faulted page. */
if (mmu_invalidate_retry_gfn_unsafe(kvm, inv_seq, f->gfn)) {
kvm_release_faultin_page(kvm, f->page, true, false);
continue;
@@ -110,20 +111,19 @@ int kvm_s390_faultin_gfn(struct kvm_vcpu *vcpu, struct kvm *kvm, struct guest_fa
if (!mmu_invalidate_retry_gfn(kvm, inv_seq, f->gfn)) {
f->valid = true;
rc = gmap_link(mc, kvm->arch.gmap, f, slot);
- kvm_release_faultin_page(kvm, f->page, !!rc, f->write_attempt);
- f->page = NULL;
}
+ kvm_release_faultin_page(kvm, f->page, !!rc, f->write_attempt);
}
- kvm_release_faultin_page(kvm, f->page, true, false);
if (rc == -ENOMEM) {
rc = kvm_s390_mmu_cache_topup(mc);
if (rc)
return rc;
- } else if (rc != -EAGAIN) {
- return rc;
+ rc = -EAGAIN;
}
}
+
+ return rc;
}
int kvm_s390_get_guest_page(struct kvm *kvm, struct guest_fault *f, gfn_t gfn, bool w)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 4f8d5592c9a9..20e28b183c1a 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1466,15 +1466,17 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni
struct guest_fault *f, bool p)
{
union crste newcrste, oldcrste;
- gfn_t gfn;
+ unsigned long mask;
+ gfn_t r_gfn;
int rc;
lockdep_assert_held(&sg->kvm->mmu_lock);
lockdep_assert_held(&sg->parent->children_lock);
- gfn = f->gfn & (is_pmd(*table) ? _SEGMENT_FR_MASK : _REGION3_FR_MASK);
+ mask = is_pmd(*table) ? _SEGMENT_FR_MASK : _REGION3_FR_MASK;
+ r_gfn = gpa_to_gfn(raddr) & mask;
scoped_guard(spinlock, &sg->host_to_rmap_lock)
- rc = gmap_insert_rmap(sg, gfn, gpa_to_gfn(raddr), host->h.tt);
+ rc = gmap_insert_rmap(sg, f->gfn & mask, r_gfn, host->h.tt);
if (rc)
return rc;
@@ -1497,8 +1499,7 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni
return -EAGAIN;
newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, 0, !p);
- gfn = gpa_to_gfn(raddr);
- while (!dat_crstep_xchg_atomic(table, READ_ONCE(*table), newcrste, gfn, sg->asce))
+ while (!dat_crstep_xchg_atomic(table, READ_ONCE(*table), newcrste, r_gfn, sg->asce))
;
return 0;
}
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
index 957126ab991c..52d55ddea8d4 100644
--- a/arch/s390/kvm/gmap.c
+++ b/arch/s390/kvm/gmap.c
@@ -395,15 +395,28 @@ static long _gmap_unmap_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct
struct gmap_unmap_priv *priv = walk->priv;
struct folio *folio = NULL;
union crste old = *crstep;
+ bool ok;
if (!old.h.fc)
return 0;
if (old.s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags))
folio = phys_to_folio(crste_origin_large(old));
- /* No races should happen because kvm->mmu_lock is held in write mode */
- KVM_BUG_ON(!gmap_crstep_xchg_atomic(priv->gmap, crstep, old, _CRSTE_EMPTY(old.h.tt), gfn),
- priv->gmap->kvm);
+ /*
+ * No races should happen because kvm->mmu_lock is held in write mode,
+ * but the unmap operation could have triggered an unshadow, which
+ * causes gmap_crstep_xchg_atomic() to return false and clear the
+ * vsie_notif bit. Allow the operation to fail once, if the old crste
+ * had the vsie_notif bit set. A second failure is not allowed, for
+ * the reasons above.
+ */
+ ok = gmap_crstep_xchg_atomic(priv->gmap, crstep, old, _CRSTE_EMPTY(old.h.tt), gfn);
+ if (!ok) {
+ KVM_BUG_ON(!old.s.fc1.vsie_notif, priv->gmap->kvm);
+ old.s.fc1.vsie_notif = 0;
+ ok = gmap_crstep_xchg_atomic(priv->gmap, crstep, old, _CRSTE_EMPTY(old.h.tt), gfn);
+ KVM_BUG_ON(!ok, priv->gmap->kvm);
+ }
if (folio)
uv_convert_from_secure_folio(folio);
diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h
index 742e42a31744..5374f21aaf8d 100644
--- a/arch/s390/kvm/gmap.h
+++ b/arch/s390/kvm/gmap.h
@@ -273,11 +273,14 @@ static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, unio
gmap_unmap_prefix(gmap, gfn, gfn + align);
}
if (crste_leaf(oldcrste) && crste_needs_unshadow(oldcrste, newcrste)) {
+ newcrste = oldcrste;
newcrste.s.fc1.vsie_notif = 0;
if (needs_lock)
gmap_handle_vsie_unshadow_event(gmap, gfn);
else
_gmap_handle_vsie_unshadow_event(gmap, gfn);
+ dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, gmap->asce);
+ return false;
}
if (!oldcrste.s.fc1.d && newcrste.s.fc1.d && !newcrste.s.fc1.s)
SetPageDirty(phys_to_page(crste_origin_large(newcrste)));
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index e09960c2e6ed..ffb20a64d328 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -999,7 +999,10 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
break;
}
case KVM_S390_VM_MEM_LIMIT_SIZE: {
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *ms;
unsigned long new_limit;
+ int bkt;
if (kvm_is_ucontrol(kvm))
return -EINVAL;
@@ -1007,6 +1010,9 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
if (get_user(new_limit, (u64 __user *)attr->addr))
return -EFAULT;
+ guard(mutex)(&kvm->lock);
+
+ new_limit = ALIGN(new_limit, HPAGE_SIZE);
if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
new_limit > kvm->arch.mem_limit)
return -E2BIG;
@@ -1014,12 +1020,27 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
if (!new_limit)
return -EINVAL;
- ret = -EBUSY;
- if (!kvm->created_vcpus)
- ret = gmap_set_limit(kvm->arch.gmap, gpa_to_gfn(new_limit));
+ if (kvm->created_vcpus)
+ return -EBUSY;
+
+ ret = 0;
+ scoped_guard(mutex, &kvm->slots_lock) {
+ slots = kvm_memslots(kvm);
+ if (slots && !kvm_memslots_empty(slots)) {
+ kvm_for_each_memslot(ms, bkt, slots) {
+ if (gpa_to_gfn(new_limit) < ms->base_gfn + ms->npages) {
+ ret = -EBUSY;
+ break;
+ }
+ }
+ }
+ if (!ret)
+ ret = gmap_set_limit(kvm->arch.gmap, gpa_to_gfn(new_limit));
+ }
+ if (ret)
+ break;
VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
- VM_EVENT(kvm, 3, "New guest asce: 0x%p",
- (void *)kvm->arch.gmap->asce.val);
+ VM_EVENT(kvm, 3, "New guest asce: 0x%p", (void *)kvm->arch.gmap->asce.val);
break;
}
default:
@@ -5672,6 +5693,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
return -EINVAL;
if ((new->base_gfn + new->npages) * PAGE_SIZE > kvm->arch.mem_limit)
return -EINVAL;
+ if (!asce_contains_gfn(kvm->arch.gmap->asce, new->base_gfn + new->npages - 1))
+ return -EINVAL;
}
if (!kvm->arch.migration_mode)
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index cc0553da14cb..447ec7ed423d 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1188,6 +1188,7 @@ static void _essa_clear_cbrl(struct kvm_vcpu *vcpu, unsigned long *cbrl, int len
union crste *crstep;
union pgste pgste;
union pte *ptep;
+ hva_t hva;
int i;
lockdep_assert_held(&vcpu->kvm->mmu_lock);
@@ -1199,8 +1200,11 @@ static void _essa_clear_cbrl(struct kvm_vcpu *vcpu, unsigned long *cbrl, int len
if (!ptep || ptep->s.pr)
continue;
pgste = pgste_get_lock(ptep);
- if (pgste.usage == PGSTE_GPS_USAGE_UNUSED || pgste.zero)
- gmap_helper_zap_one_page(vcpu->kvm->mm, cbrl[i]);
+ if (pgste.usage == PGSTE_GPS_USAGE_UNUSED || pgste.zero) {
+ hva = gpa_to_hva(vcpu->kvm, cbrl[i]);
+ if (!kvm_is_error_hva(hva))
+ gmap_helper_zap_one_page(vcpu->kvm->mm, hva);
+ }
pgste_set_unlock(ptep, pgste);
}
}
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index c2dafd812a3b..4b865e75351c 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -17,6 +17,7 @@
#include <linux/pagewalk.h>
#include <linux/sched/mm.h>
#include <linux/mmu_notifier.h>
+#include <asm/gmap_helpers.h>
#include "kvm-s390.h"
#include "dat.h"
#include "gaccess.h"
@@ -73,6 +74,7 @@ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_str
struct pv_make_secure {
void *uvcb;
struct folio *folio;
+ struct kvm *kvm;
int rc;
bool needs_export;
};
@@ -103,9 +105,21 @@ static void _kvm_s390_pv_make_secure(struct guest_fault *f)
{
struct pv_make_secure *priv = f->priv;
struct folio *folio;
+ spinlock_t *ptl; /* pte lock from try_get_locked_pte() */
+ pte_t *ptep;
folio = pfn_folio(f->pfn);
priv->rc = -EAGAIN;
+
+ if (!mmap_read_trylock(priv->kvm->mm))
+ return;
+
+ ptep = try_get_locked_pte(priv->kvm->mm, gfn_to_hva(priv->kvm, f->gfn), &ptl);
+ if (IS_ERR_VALUE(ptep)) {
+ priv->rc = PTR_ERR(ptep);
+ goto out;
+ }
+
if (folio_trylock(folio)) {
priv->rc = __kvm_s390_pv_make_secure(f, folio);
if (priv->rc == -E2BIG || priv->rc == -EBUSY) {
@@ -114,6 +128,11 @@ static void _kvm_s390_pv_make_secure(struct guest_fault *f)
}
folio_unlock(folio);
}
+
+ if (ptep)
+ pte_unmap_unlock(ptep, ptl);
+out:
+ mmap_read_unlock(priv->kvm->mm);
}
/**
@@ -127,7 +146,7 @@ static void _kvm_s390_pv_make_secure(struct guest_fault *f)
*/
int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb)
{
- struct pv_make_secure priv = { .uvcb = uvcb };
+ struct pv_make_secure priv = { .uvcb = uvcb, .kvm = kvm, };
struct guest_fault f = {
.write_attempt = true,
.gfn = gpa_to_gfn(gaddr),
diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
index f8789ffcc05c..1cfe4724fbe2 100644
--- a/arch/s390/mm/gmap_helpers.c
+++ b/arch/s390/mm/gmap_helpers.c
@@ -17,22 +17,68 @@
#include <asm/gmap_helpers.h>
/**
- * ptep_zap_softleaf_entry() - discard a software leaf entry.
+ * try_get_locked_pte() - like get_locked_pte(), but atomic and with trylock
* @mm: the mm
- * @entry: the software leaf entry that needs to be zapped
+ * @vmaddr: the userspace virtual address whose pte is to be found
+ * @ptl: will be set to the pointer to the lock used to lock the pte in case
+ * of success.
*
- * Discards the given software leaf entry. If the leaf entry was an actual
- * swap entry (and not a migration entry, for example), the actual swapped
- * page is also discarded from swap.
+ * This function returns the pointer to the pte corresponding to @addr in @mm,
+ * similarly to get_locked_pte(). Unlike get_locked_pte(), no attempt is made
+ * to allocate missing page tables. If a missing or large entry is found, the
+ * function will return NULL. If the ptl lock is contended, %-EAGAIN is
+ * returned.
+ *
+ * In case of success, *@ptl will point to the locked pte lock for the returned
+ * pte, like get_locked_pte() does.
+ *
+ * Context: mmap_lock or vma lock for read or for write needs to be held.
+ * Return:
+ * * %NULL if the pte cannot be reached.
+ * * %-EAGAIN if the pte can be reached, but cannot be locked.
+ * * the pointer to the pte corresponding to @addr in @mm, if it can be reached
+ * and locked.
*/
-static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry)
+pte_t *try_get_locked_pte(struct mm_struct *mm, unsigned long vmaddr, spinlock_t **ptl)
{
- if (softleaf_is_swap(entry))
- dec_mm_counter(mm, MM_SWAPENTS);
- else if (softleaf_is_migration(entry))
- dec_mm_counter(mm, mm_counter(softleaf_to_folio(entry)));
- swap_put_entries_direct(entry, 1);
+ pmd_t *pmdp, pmd, pmdval;
+ pud_t *pudp, pud;
+ p4d_t *p4dp, p4d;
+ pgd_t *pgdp, pgd;
+ pte_t *ptep;
+
+ pgdp = pgd_offset(mm, vmaddr);
+ pgd = pgdp_get(pgdp);
+ if (pgd_none(pgd) || !pgd_present(pgd))
+ return NULL;
+ p4dp = p4d_offset(pgdp, vmaddr);
+ p4d = p4dp_get(p4dp);
+ if (p4d_none(p4d) || !p4d_present(p4d))
+ return NULL;
+ pudp = pud_offset(p4dp, vmaddr);
+ pud = pudp_get(pudp);
+ if (pud_none(pud) || pud_leaf(pud) || !pud_present(pud))
+ return NULL;
+ pmdp = pmd_offset(pudp, vmaddr);
+ pmd = pmdp_get_lockless(pmdp);
+ if (pmd_none(pmd) || pmd_leaf(pmd) || !pmd_present(pmd))
+ return NULL;
+ ptep = pte_offset_map_rw_nolock(mm, pmdp, vmaddr, &pmdval, ptl);
+ if (!ptep)
+ return NULL;
+
+ if (spin_trylock(*ptl)) {
+ if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pmdp)))) {
+ pte_unmap_unlock(ptep, *ptl);
+ return ERR_PTR(-EAGAIN);
+ }
+ return ptep;
+ }
+
+ pte_unmap(ptep);
+ return ERR_PTR(-EAGAIN);
}
+EXPORT_SYMBOL_GPL(try_get_locked_pte);
/**
* gmap_helper_zap_one_page() - discard a page if it was swapped.
@@ -46,7 +92,8 @@ static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry)
void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
{
struct vm_area_struct *vma;
- spinlock_t *ptl;
+ spinlock_t *ptl; /* Lock for the host (userspace) page table */
+ softleaf_t sl;
pte_t *ptep;
mmap_assert_locked(mm);
@@ -57,11 +104,13 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
return;
/* Get pointer to the page table entry */
- ptep = get_locked_pte(mm, vmaddr, &ptl);
- if (unlikely(!ptep))
+ ptep = try_get_locked_pte(mm, vmaddr, &ptl);
+ if (IS_ERR_OR_NULL(ptep))
return;
- if (pte_swap(*ptep)) {
- ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep));
+ sl = softleaf_from_pte(*ptep);
+ if (pte_swap(*ptep) && softleaf_is_swap(sl)) {
+ dec_mm_counter(mm, MM_SWAPENTS);
+ swap_put_entries_direct(sl, 1);
pte_clear(mm, vmaddr, ptep);
}
pte_unmap_unlock(ptep, ptl);
@@ -113,37 +162,9 @@ EXPORT_SYMBOL_GPL(gmap_helper_discard);
*/
void gmap_helper_try_set_pte_unused(struct mm_struct *mm, unsigned long vmaddr)
{
- pmd_t *pmdp, pmd, pmdval;
- pud_t *pudp, pud;
- p4d_t *p4dp, p4d;
- pgd_t *pgdp, pgd;
spinlock_t *ptl; /* Lock for the host (userspace) page table */
pte_t *ptep;
- pgdp = pgd_offset(mm, vmaddr);
- pgd = pgdp_get(pgdp);
- if (pgd_none(pgd) || !pgd_present(pgd))
- return;
-
- p4dp = p4d_offset(pgdp, vmaddr);
- p4d = p4dp_get(p4dp);
- if (p4d_none(p4d) || !p4d_present(p4d))
- return;
-
- pudp = pud_offset(p4dp, vmaddr);
- pud = pudp_get(pudp);
- if (pud_none(pud) || pud_leaf(pud) || !pud_present(pud))
- return;
-
- pmdp = pmd_offset(pudp, vmaddr);
- pmd = pmdp_get_lockless(pmdp);
- if (pmd_none(pmd) || pmd_leaf(pmd) || !pmd_present(pmd))
- return;
-
- ptep = pte_offset_map_rw_nolock(mm, pmdp, vmaddr, &pmdval, &ptl);
- if (!ptep)
- return;
-
/*
* Several paths exists that takes the ptl lock and then call the
* mmu_notifier, which takes the mmu_lock. The unmap path, instead,
@@ -156,21 +177,12 @@ void gmap_helper_try_set_pte_unused(struct mm_struct *mm, unsigned long vmaddr)
* If the lock is contended the bit is not set and the deadlock is
* avoided.
*/
- if (spin_trylock(ptl)) {
- /*
- * Make sure the pte we are touching is still the correct
- * one. In theory this check should not be needed, but
- * better safe than sorry.
- * Disabling interrupts or holding the mmap lock is enough to
- * guarantee that no concurrent updates to the page tables
- * are possible.
- */
- if (likely(pmd_same(pmdval, pmdp_get_lockless(pmdp))))
- __atomic64_or(_PAGE_UNUSED, (long *)ptep);
- spin_unlock(ptl);
- }
+ ptep = try_get_locked_pte(mm, vmaddr, &ptl);
+ if (IS_ERR_OR_NULL(ptep))
+ return;
- pte_unmap(ptep);
+ __atomic64_or(_PAGE_UNUSED, (long *)ptep);
+ pte_unmap_unlock(ptep, ptl);
}
EXPORT_SYMBOL_GPL(gmap_helper_try_set_pte_unused);
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 62b5befe0eed..6c6a6d663e29 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -3313,37 +3313,6 @@ void sev_guest_memory_reclaimed(struct kvm *kvm)
sev_writeback_caches(kvm);
}
-void sev_free_vcpu(struct kvm_vcpu *vcpu)
-{
- struct vcpu_svm *svm;
-
- if (!is_sev_es_guest(vcpu))
- return;
-
- svm = to_svm(vcpu);
-
- /*
- * If it's an SNP guest, then the VMSA was marked in the RMP table as
- * a guest-owned page. Transition the page to hypervisor state before
- * releasing it back to the system.
- */
- if (is_sev_snp_guest(vcpu)) {
- u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT;
-
- if (kvm_rmp_make_shared(vcpu->kvm, pfn, PG_LEVEL_4K))
- goto skip_vmsa_free;
- }
-
- if (vcpu->arch.guest_state_protected)
- sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa);
-
- __free_page(virt_to_page(svm->sev_es.vmsa));
-
-skip_vmsa_free:
- if (svm->sev_es.ghcb_sa_free)
- kvfree(svm->sev_es.ghcb_sa);
-}
-
static void dump_ghcb(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
@@ -3583,6 +3552,20 @@ vmgexit_err:
return 1;
}
+static void __sev_es_unmap_ghcb(struct vcpu_svm *svm)
+{
+ if (svm->sev_es.ghcb_sa_free) {
+ kvfree(svm->sev_es.ghcb_sa);
+ svm->sev_es.ghcb_sa = NULL;
+ svm->sev_es.ghcb_sa_free = false;
+ }
+
+ if (svm->sev_es.ghcb) {
+ kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map);
+ svm->sev_es.ghcb = NULL;
+ }
+}
+
void sev_es_unmap_ghcb(struct vcpu_svm *svm)
{
/* Clear any indication that the vCPU is in a type of AP Reset Hold */
@@ -3591,31 +3574,51 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
if (!svm->sev_es.ghcb)
return;
- if (svm->sev_es.ghcb_sa_free) {
- /*
- * The scratch area lives outside the GHCB, so there is a
- * buffer that, depending on the operation performed, may
- * need to be synced, then freed.
- */
- if (svm->sev_es.ghcb_sa_sync) {
- kvm_write_guest(svm->vcpu.kvm,
- svm->sev_es.sw_scratch,
- svm->sev_es.ghcb_sa,
- svm->sev_es.ghcb_sa_len);
- svm->sev_es.ghcb_sa_sync = false;
- }
-
- kvfree(svm->sev_es.ghcb_sa);
- svm->sev_es.ghcb_sa = NULL;
- svm->sev_es.ghcb_sa_free = false;
+ /*
+ * If the scratch area lives outside the GHCB, there's a buffer that,
+ * depending on the operation performed, may need to be synced.
+ */
+ if (svm->sev_es.ghcb_sa_sync) {
+ kvm_write_guest(svm->vcpu.kvm, svm->sev_es.sw_scratch,
+ svm->sev_es.ghcb_sa, svm->sev_es.ghcb_sa_len);
+ svm->sev_es.ghcb_sa_sync = false;
}
trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->sev_es.ghcb);
sev_es_sync_to_ghcb(svm);
- kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map);
- svm->sev_es.ghcb = NULL;
+ __sev_es_unmap_ghcb(svm);
+}
+
+void sev_free_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm;
+
+ if (!is_sev_es_guest(vcpu))
+ return;
+
+ svm = to_svm(vcpu);
+
+ /*
+ * If it's an SNP guest, then the VMSA was marked in the RMP table as
+ * a guest-owned page. Transition the page to hypervisor state before
+ * releasing it back to the system.
+ */
+ if (is_sev_snp_guest(vcpu)) {
+ u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT;
+
+ if (kvm_rmp_make_shared(vcpu->kvm, pfn, PG_LEVEL_4K))
+ goto skip_vmsa_free;
+ }
+
+ if (vcpu->arch.guest_state_protected)
+ sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa);
+
+ __free_page(virt_to_page(svm->sev_es.vmsa));
+
+skip_vmsa_free:
+ __sev_es_unmap_ghcb(svm);
}
int pre_sev_run(struct vcpu_svm *svm, int cpu)
@@ -3685,6 +3688,8 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 min_len)
goto e_scratch;
}
+ WARN_ON_ONCE(svm->sev_es.ghcb_sa_sync || svm->sev_es.ghcb_sa_free);
+
if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
/* Scratch area begins within GHCB */
ghcb_scratch_beg = control->ghcb_gpa +
@@ -3706,6 +3711,8 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 min_len)
scratch_va = (void *)svm->sev_es.ghcb;
scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
+ svm->sev_es.ghcb_sa_sync = false;
+ svm->sev_es.ghcb_sa_free = false;
svm->sev_es.ghcb_sa_len = ghcb_scratch_end - scratch_gpa_beg;
} else {
/* GHCB v2 requires the scratch area to be within the GHCB. */
@@ -3841,13 +3848,11 @@ struct psc_buffer {
struct psc_entry entries[];
} __packed;
-static int snp_begin_psc(struct vcpu_svm *svm);
+static int snp_do_psc(struct vcpu_svm *svm);
static void snp_complete_psc(struct vcpu_svm *svm, u64 psc_ret)
{
- svm->sev_es.psc_inflight = 0;
- svm->sev_es.psc_idx = 0;
- svm->sev_es.psc_2m = false;
+ memset(&svm->sev_es.psc, 0, sizeof(svm->sev_es.psc));
/*
* PSC requests always get a "no action" response in SW_EXITINFO1, with
@@ -3860,9 +3865,8 @@ static void snp_complete_psc(struct vcpu_svm *svm, u64 psc_ret)
static void __snp_complete_one_psc(struct vcpu_svm *svm)
{
- struct psc_buffer *psc = svm->sev_es.ghcb_sa;
- struct psc_entry *entries = psc->entries;
- struct psc_hdr *hdr = &psc->hdr;
+ struct vcpu_sev_es_state *sev_es = &svm->sev_es;
+ struct psc_buffer *guest_psc = sev_es->ghcb_sa;
__u16 idx;
/*
@@ -3870,14 +3874,15 @@ static void __snp_complete_one_psc(struct vcpu_svm *svm)
* corresponding entries in the guest's PSC buffer and zero out the
* count of in-flight PSC entries.
*/
- for (idx = svm->sev_es.psc_idx; svm->sev_es.psc_inflight;
- svm->sev_es.psc_inflight--, idx++) {
- struct psc_entry entry = READ_ONCE(entries[idx]);
+ for (idx = sev_es->psc.cur_idx; sev_es->psc.batch_size;
+ sev_es->psc.batch_size--, idx++) {
+ struct psc_entry entry = READ_ONCE(guest_psc->entries[idx]);
- entries[idx].cur_page = entry.pagesize ? 512 : 1;
+ guest_psc->entries[idx].cur_page = entry.pagesize ? 512 : 1;
}
- hdr->cur_entry = idx;
+ sev_es->psc.cur_idx = idx;
+ guest_psc->hdr.cur_entry = idx;
}
static int snp_complete_one_psc(struct kvm_vcpu *vcpu)
@@ -3892,63 +3897,30 @@ static int snp_complete_one_psc(struct kvm_vcpu *vcpu)
__snp_complete_one_psc(svm);
/* Handle the next range (if any). */
- return snp_begin_psc(svm);
+ return snp_do_psc(svm);
}
-static int snp_begin_psc(struct vcpu_svm *svm)
+static int snp_do_psc(struct vcpu_svm *svm)
{
struct vcpu_sev_es_state *sev_es = &svm->sev_es;
- struct psc_buffer *psc = sev_es->ghcb_sa;
- struct psc_entry *entries = psc->entries;
+ struct psc_buffer *guest_psc = sev_es->ghcb_sa;
struct kvm_vcpu *vcpu = &svm->vcpu;
- struct psc_hdr *hdr = &psc->hdr;
struct psc_entry entry_start;
- u16 idx, idx_start, idx_end, max_nr_entries;
int npages;
bool huge;
u64 gfn;
-
- if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE)) {
- snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC);
- return 1;
- }
-
- /*
- * GHCB v2 requires the scratch area to reside within the GHCB itself,
- * and PSC requests are only supported for GHCB v2+. Thus it should be
- * impossible to exceed the max PSC entry count (which is derived from
- * the size of the shared GHCB buffer).
- */
- max_nr_entries = (sev_es->ghcb_sa_len - sizeof(struct psc_hdr)) /
- sizeof(struct psc_entry);
- if (WARN_ON_ONCE(max_nr_entries > VMGEXIT_PSC_MAX_COUNT)) {
- snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC);
- return 1;
- }
+ u16 idx;
next_range:
/* There should be no other PSCs in-flight at this point. */
- if (WARN_ON_ONCE(svm->sev_es.psc_inflight)) {
+ if (WARN_ON_ONCE(svm->sev_es.psc.batch_size)) {
snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC);
return 1;
}
- /*
- * The PSC descriptor buffer can be modified by a misbehaved guest after
- * validation, so take care to only use validated copies of values used
- * for things like array indexing.
- */
- idx_start = READ_ONCE(hdr->cur_entry);
- idx_end = READ_ONCE(hdr->end_entry);
-
- if (idx_end >= max_nr_entries) {
- snp_complete_psc(svm, VMGEXIT_PSC_ERROR_INVALID_HDR);
- return 1;
- }
-
/* Find the start of the next range which needs processing. */
- for (idx = idx_start; idx <= idx_end; idx++, hdr->cur_entry++) {
- entry_start = READ_ONCE(entries[idx]);
+ for (idx = sev_es->psc.cur_idx; idx <= sev_es->psc.end_idx; idx++) {
+ entry_start = READ_ONCE(guest_psc->entries[idx]);
gfn = entry_start.gfn;
huge = entry_start.pagesize;
@@ -3974,32 +3946,40 @@ next_range:
if (npages)
break;
+
+ /*
+ * Increment the guest-visible index to communicate the current
+ * entry back to the guest, e.g. in case of failure. No need
+ * for READ_ONCE() as KVM doesn't consume the field, i.e. a
+ * misbehaving guest can only break itself.
+ */
+ guest_psc->hdr.cur_entry++;
}
- if (idx > idx_end) {
+ if (idx > sev_es->psc.end_idx) {
/* Nothing more to process. */
snp_complete_psc(svm, 0);
return 1;
}
- svm->sev_es.psc_2m = huge;
- svm->sev_es.psc_idx = idx;
- svm->sev_es.psc_inflight = 1;
+ sev_es->psc.is_2m = huge;
+ sev_es->psc.cur_idx = idx;
+ sev_es->psc.batch_size = 1;
/*
* Find all subsequent PSC entries that contain adjacent GPA
* ranges/operations and can be combined into a single
* KVM_HC_MAP_GPA_RANGE exit.
*/
- while (++idx <= idx_end) {
- struct psc_entry entry = READ_ONCE(entries[idx]);
+ while (++idx <= sev_es->psc.end_idx) {
+ struct psc_entry entry = READ_ONCE(guest_psc->entries[idx]);
if (entry.operation != entry_start.operation ||
entry.gfn != entry_start.gfn + npages ||
entry.cur_page || !!entry.pagesize != huge)
break;
- svm->sev_es.psc_inflight++;
+ sev_es->psc.batch_size++;
npages += huge ? 512 : 1;
}
@@ -4041,6 +4021,46 @@ next_range:
BUG();
}
+static int snp_begin_psc(struct vcpu_svm *svm)
+{
+ struct vcpu_sev_es_state *sev_es = &svm->sev_es;
+ struct psc_buffer *guest_psc = sev_es->ghcb_sa;
+ u16 max_nr_entries;
+
+ if (!user_exit_on_hypercall(svm->vcpu.kvm, KVM_HC_MAP_GPA_RANGE)) {
+ snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC);
+ return 1;
+ }
+
+ /*
+ * GHCB v2 requires the scratch area to reside within the GHCB itself,
+ * and PSC requests are only supported for GHCB v2+. Thus it should be
+ * impossible to exceed the max PSC entry count (which is derived from
+ * the size of the shared GHCB buffer).
+ */
+ max_nr_entries = (sev_es->ghcb_sa_len - sizeof(struct psc_hdr)) /
+ sizeof(struct psc_entry);
+ if (WARN_ON_ONCE(max_nr_entries > VMGEXIT_PSC_MAX_COUNT)) {
+ snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC);
+ return 1;
+ }
+
+ /*
+ * The PSC descriptor buffer can be modified by a misbehaved guest after
+ * validation, so take care to only use validated copies of values used
+ * for things like array indexing.
+ */
+ sev_es->psc.cur_idx = READ_ONCE(guest_psc->hdr.cur_entry);
+ sev_es->psc.end_idx = READ_ONCE(guest_psc->hdr.end_entry);
+
+ if (sev_es->psc.end_idx >= max_nr_entries) {
+ snp_complete_psc(svm, VMGEXIT_PSC_ERROR_INVALID_HDR);
+ return 1;
+ }
+
+ return snp_do_psc(svm);
+}
+
/*
* Invoked as part of svm_vcpu_reset() processing of an init event.
*/
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index a10668d17a16..5137416be593 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -257,9 +257,12 @@ struct vcpu_sev_es_state {
bool ghcb_sa_free;
/* SNP Page-State-Change buffer entries currently being processed */
- u16 psc_idx;
- u16 psc_inflight;
- bool psc_2m;
+ struct {
+ u16 cur_idx;
+ u16 end_idx;
+ u16 batch_size;
+ bool is_2m;
+ } psc;
u64 ghcb_registered_gpa;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 89489996fbc1..881f92d7a469 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3520,7 +3520,8 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
if (WARN_ON_ONCE(vcpu && vcpu->kvm != kvm))
return;
- WARN_ON_ONCE(!vcpu && !kvm_arch_allow_write_without_running_vcpu(kvm));
+ WARN_ON_ONCE(!vcpu && refcount_read(&kvm->users_count) &&
+ !kvm_arch_allow_write_without_running_vcpu(kvm));
#endif
if (memslot && kvm_slot_dirty_track_enabled(memslot)) {