summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-05-13 11:53:51 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-05-13 11:53:51 -0700
commite1914add2799225a87502051415fc5c32aeb02ae (patch)
treed7a14aed6faf4e13febde2ef037cff7e545ad4a4 /arch/arm64/kvm
parent1f63dd8ca0dc05a8272bb8155f643c691d29bb11 (diff)
parent2d5d3fc593c9b7e41bee86175d7b9e11f470072e (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvmHEADmaster
Pull kvm fixes from Paolo Bonzini: "arm64: - Add the pKVM side of the workaround for ARM's erratum 4193714, provided that the EL3 firmware does its part of the job. KVM will refuse to initialise otherwise - Correctly handle 52bit VAs for guest EL2 stage-1 translations when running under NV with E2H==0 - Correctly deal with permission faults in guest_memfd memslots - Fix the steal-time selftest after the infrastructure was reworked - Make sure the host cannot pass a non-sensical clock update to the EL2 tracing infrastructure - Appoint Steffen Eiden as a reviewer in anticipation of the KVM/s390 ability to run arm64 guests, which will inevitably lead to arm64 code being directly used on s390 - Make sure that EL2 is configured with both exception entry and exit being Context Synchronization Events - Handle the current vcpu being NULL on EL2 panic - Fix the selftest_vcpu memcache being empty at the point of donation or sharing - Check that the memcache has enough capacity before engaging on the share/donate path - Fix __deactivate_fgt() to use its parameter rather than a variable in the macro context s390: - Fix array overrun with large amounts of PCI devices x86: - Never use L0's PAUSE loop exiting while L2 is running, since it's unlikely that a nested guest will help solving the hypervisor's spinlock contention - Fix emulation of MOVNTDQA - Fix typo in Xen hypercall tracepoint - Add back an optimization that was left behind when recently fixing a bug - Add module parameter to disable CET, whose implementation seems to have issues. For now it remains enabled by default Generic: - Reject offset causing an unsigned overflow in kvm_reset_dirty_gfn() Documentation: - Update stale links Selftests: - Fix guest_memfd_test with host page size > guest page size" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (22 commits) KVM: VMX: introduce module parameter to disable CET KVM: x86: Swap the dst and src operand for MOVNTDQA KVM: x86: use again the flush argument of __link_shadow_page() KVM: selftests: Ensure gmem file sizes are multiple of host page size Documentation: kvm: update links in the references section of AMD Memory Encryption KVM: nSVM: Never use L0's PAUSE loop exiting while L2 is running KVM: x86: Fix Xen hypercall tracepoint argument assignment KVM: Reject wrapped offset in kvm_reset_dirty_gfn() KVM: arm64: Pre-check vcpu memcache for host->guest donate KVM: arm64: Pre-check vcpu memcache for host->guest share KVM: arm64: Seed pkvm_ownership_selftest vcpu memcache KVM: arm64: Fix __deactivate_fgt macro parameter typo KVM: arm64: Guard against NULL vcpu on VHE hyp panic path KVM: arm64: Make EL2 exception entry and exit context-synchronization events MAINTAINERS: Add Steffen as reviewer for KVM/arm64 KVM: arm64: Remove potential UB on nvhe tracing clock update KVM: selftests: arm64: Fix steal_time test after UAPI refactoring KVM: arm64: Handle permission faults with guest_memfd KVM: arm64: nv: Consider the DS bit when translating TCR_EL2 KVM: arm64: Work around C1-Pro erratum 4193714 for protected guests ...
Diffstat (limited to 'arch/arm64/kvm')
-rw-r--r--arch/arm64/kvm/arm.c21
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/clock.c3
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c47
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c16
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c3
-rw-r--r--arch/arm64/kvm/mmu.c29
7 files changed, 109 insertions, 12 deletions
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 8bb2c7422cc8..34c9950884d5 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -4,6 +4,7 @@
* Author: Christoffer Dall <c.dall@virtualopensystems.com>
*/
+#include <linux/arm-smccc.h>
#include <linux/bug.h>
#include <linux/cpu_pm.h>
#include <linux/errno.h>
@@ -2638,6 +2639,22 @@ static int init_pkvm_host_sve_state(void)
return 0;
}
+static int pkvm_check_sme_dvmsync_fw_call(void)
+{
+ struct arm_smccc_res res;
+
+ if (!cpus_have_final_cap(ARM64_WORKAROUND_4193714))
+ return 0;
+
+ arm_smccc_1_1_smc(ARM_SMCCC_CPU_WORKAROUND_4193714, &res);
+ if (res.a0) {
+ kvm_err("pKVM requires firmware support for C1-Pro erratum 4193714\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
/*
* Finalizes the initialization of hyp mode, once everything else is initialized
* and the initialziation process cannot fail.
@@ -2838,6 +2855,10 @@ static int __init init_hyp_mode(void)
if (err)
goto out_err;
+ err = pkvm_check_sme_dvmsync_fw_call();
+ if (err)
+ goto out_err;
+
err = kvm_hyp_init_protection(hyp_va_bits);
if (err) {
kvm_err("Failed to init hyp memory protection\n");
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 98b2976837b1..bf0eb5e43427 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -245,7 +245,7 @@ static inline void __activate_traps_ich_hfgxtr(struct kvm_vcpu *vcpu)
__activate_fgt(hctxt, vcpu, ICH_HFGITR_EL2);
}
-#define __deactivate_fgt(htcxt, vcpu, reg) \
+#define __deactivate_fgt(hctxt, vcpu, reg) \
do { \
write_sysreg_s(ctxt_sys_reg(hctxt, reg), \
SYS_ ## reg); \
diff --git a/arch/arm64/kvm/hyp/nvhe/clock.c b/arch/arm64/kvm/hyp/nvhe/clock.c
index 32fc4313fe43..a7fc61976fd0 100644
--- a/arch/arm64/kvm/hyp/nvhe/clock.c
+++ b/arch/arm64/kvm/hyp/nvhe/clock.c
@@ -35,6 +35,9 @@ void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc)
struct clock_data *clock = &trace_clock_data;
u64 bank = clock->cur ^ 1;
+ if (!mult || shift >= 64)
+ return;
+
clock->data[bank].mult = mult;
clock->data[bank].shift = shift;
clock->data[bank].epoch_ns = epoch_ns;
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 28a471d1927c..25f04629014e 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -5,6 +5,7 @@
*/
#include <linux/kvm_host.h>
+
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
@@ -14,6 +15,7 @@
#include <hyp/fault.h>
+#include <nvhe/arm-smccc.h>
#include <nvhe/gfp.h>
#include <nvhe/memory.h>
#include <nvhe/mem_protect.h>
@@ -29,6 +31,19 @@ static struct hyp_pool host_s2_pool;
static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm);
#define current_vm (*this_cpu_ptr(&__current_vm))
+static void pkvm_sme_dvmsync_fw_call(void)
+{
+ if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714)) {
+ struct arm_smccc_res res;
+
+ /*
+ * Ignore the return value. Probing for the workaround
+ * availability took place in init_hyp_mode().
+ */
+ hyp_smccc_1_1_smc(ARM_SMCCC_CPU_WORKAROUND_4193714, &res);
+ }
+}
+
static void guest_lock_component(struct pkvm_hyp_vm *vm)
{
hyp_spin_lock(&vm->lock);
@@ -574,8 +589,14 @@ static int host_stage2_set_owner_metadata_locked(phys_addr_t addr, u64 size,
ret = host_stage2_try(kvm_pgtable_stage2_annotate, &host_mmu.pgt,
addr, size, &host_s2_pool,
KVM_HOST_INVALID_PTE_TYPE_DONATION, annotation);
- if (!ret)
+ if (!ret) {
+ /*
+ * After stage2 maintenance has happened, but before the page
+ * owner has changed.
+ */
+ pkvm_sme_dvmsync_fw_call();
__host_update_page_state(addr, size, PKVM_NOPAGE);
+ }
return ret;
}
@@ -1369,6 +1390,22 @@ unlock:
return ret && ret != -EHWPOISON ? ret : 0;
}
+/*
+ * share/donate install at most one stage-2 leaf (PAGE_SIZE, or one
+ * KVM_PGTABLE_LAST_LEVEL - 1 block for share). kvm_mmu_cache_min_pages()
+ * bounds the worst-case allocation: exact for the PAGE_SIZE leaf,
+ * conservative by one for the block.
+ */
+static int __guest_check_pgtable_memcache(struct pkvm_hyp_vcpu *vcpu)
+{
+ struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
+
+ if (vcpu->vcpu.arch.pkvm_memcache.nr_pages < kvm_mmu_cache_min_pages(vm->pgt.mmu))
+ return -ENOMEM;
+
+ return 0;
+}
+
int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
{
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
@@ -1388,6 +1425,10 @@ int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
if (ret)
goto unlock;
+ ret = __guest_check_pgtable_memcache(vcpu);
+ if (ret)
+ goto unlock;
+
meta = host_stage2_encode_gfn_meta(vm, gfn);
WARN_ON(host_stage2_set_owner_metadata_locked(phys, PAGE_SIZE,
PKVM_ID_GUEST, meta));
@@ -1453,6 +1494,10 @@ int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu
}
}
+ ret = __guest_check_pgtable_memcache(vcpu);
+ if (ret)
+ goto unlock;
+
for_each_hyp_page(page, phys, size) {
set_host_state(page, PKVM_PAGE_SHARED_OWNED);
page->host_share_guest_count++;
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index e7496eb85628..eb1c10120f9f 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -752,16 +752,30 @@ static struct pkvm_hyp_vcpu selftest_vcpu = {
struct pkvm_hyp_vcpu *init_selftest_vm(void *virt)
{
struct hyp_page *p = hyp_virt_to_page(virt);
+ unsigned long min_pages, seeded = 0;
int i;
selftest_vm.kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
WARN_ON(kvm_guest_prepare_stage2(&selftest_vm, virt));
+ /*
+ * Mirror pkvm_refill_memcache() for the share/donate pre-checks;
+ * the selftest invokes those functions directly and would
+ * otherwise see an empty memcache.
+ */
+ min_pages = kvm_mmu_cache_min_pages(&selftest_vm.kvm.arch.mmu);
+
for (i = 0; i < pkvm_selftest_pages(); i++) {
if (p[i].refcount)
continue;
p[i].refcount = 1;
- hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i]));
+ if (seeded < min_pages) {
+ push_hyp_memcache(&selftest_vcpu.vcpu.arch.pkvm_memcache,
+ hyp_page_to_virt(&p[i]), hyp_virt_to_phys);
+ seeded++;
+ } else {
+ hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i]));
+ }
}
selftest_vm.kvm.arch.pkvm.handle = __pkvm_reserve_vm();
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 9db3f11a4754..1e8995add14f 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -663,7 +663,8 @@ static void __noreturn __hyp_call_panic(u64 spsr, u64 elr, u64 par)
host_ctxt = host_data_ptr(host_ctxt);
vcpu = host_ctxt->__hyp_running_vcpu;
- __deactivate_traps(vcpu);
+ if (vcpu)
+ __deactivate_traps(vcpu);
sysreg_restore_host_state_vhe(host_ctxt);
panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n",
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index d089c107d9b7..4da9281312eb 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1576,21 +1576,24 @@ struct kvm_s2_fault_desc {
static int gmem_abort(const struct kvm_s2_fault_desc *s2fd)
{
bool write_fault, exec_fault;
+ bool perm_fault = kvm_vcpu_trap_is_permission_fault(s2fd->vcpu);
enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
struct kvm_pgtable *pgt = s2fd->vcpu->arch.hw_mmu->pgt;
unsigned long mmu_seq;
struct page *page;
struct kvm *kvm = s2fd->vcpu->kvm;
- void *memcache;
+ void *memcache = NULL;
kvm_pfn_t pfn;
gfn_t gfn;
int ret;
- memcache = get_mmu_memcache(s2fd->vcpu);
- ret = topup_mmu_memcache(s2fd->vcpu, memcache);
- if (ret)
- return ret;
+ if (!perm_fault) {
+ memcache = get_mmu_memcache(s2fd->vcpu);
+ ret = topup_mmu_memcache(s2fd->vcpu, memcache);
+ if (ret)
+ return ret;
+ }
if (s2fd->nested)
gfn = kvm_s2_trans_output(s2fd->nested) >> PAGE_SHIFT;
@@ -1631,9 +1634,19 @@ static int gmem_abort(const struct kvm_s2_fault_desc *s2fd)
goto out_unlock;
}
- ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, s2fd->fault_ipa, PAGE_SIZE,
- __pfn_to_phys(pfn), prot,
- memcache, flags);
+ if (perm_fault) {
+ /*
+ * Drop the SW bits in favour of those stored in the
+ * PTE, which will be preserved.
+ */
+ prot &= ~KVM_NV_GUEST_MAP_SZ;
+ ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, s2fd->fault_ipa,
+ prot, flags);
+ } else {
+ ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, s2fd->fault_ipa, PAGE_SIZE,
+ __pfn_to_phys(pfn), prot,
+ memcache, flags);
+ }
out_unlock:
kvm_release_faultin_page(kvm, page, !!ret, prot & KVM_PGTABLE_PROT_W);