summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2026-06-12 10:51:42 +0200
committerPaolo Bonzini <pbonzini@redhat.com>2026-06-12 10:51:42 +0200
commit751d041a13bdc9d72bf7efdc86224da1174ff31d (patch)
tree1c63eae598a3cc92b734b425f57a67efb2648612
parent4e6df939687caf878bb493570ff1c583bba86e7c (diff)
parent1ee27dacbe5dc4def481794d899d67b0d4570094 (diff)
Merge tag 'kvmarm-7.2' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for 7.2 * New features: - None. Zilch. Nada. Que dalle. * Fixes and other improvements: - Significant cleanup of the vgic-v5 PPI support which was merged in 7.1. This makes the code more maintainable, and squashes a couple of bugs in the meantime. - Set of fixes for the handling of the MMU in an NV context, particularly VNCR-triggered faults. S1POE support is fixed as well. - Large set of pKVM fixes, mostly addressing recurring issues around hypervisor tracking of donated pages in obscure cases where the donation could fail and leave things in a bizarre state. - Fixes for the so-called "lazy vgic init", which resulted in sleeping operations in non-preemptible sections. This turned out to be far more invasive than initially expected... - Reduce the overhead of L1/L2 context switch by not touching the FP registers. - Fix the way non-implemented page sizes are dealt with when a guest insist on using them for S2 translation. - The usual set of low-impact fixes and cleanups all over the map.
-rw-r--r--Documentation/virt/kvm/devices/arm-vgic-v5.rst6
-rw-r--r--Documentation/virt/kvm/devices/vcpu.rst7
-rw-r--r--arch/arm64/include/asm/kvm_host.h8
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h1
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h3
-rw-r--r--arch/arm64/kernel/cpufeature.c1
-rw-r--r--arch/arm64/kernel/hyp-stub.S4
-rw-r--r--arch/arm64/kvm/arch_timer.c137
-rw-r--r--arch/arm64/kvm/arm.c41
-rw-r--r--arch/arm64/kvm/at.c146
-rw-r--r--arch/arm64/kvm/emulate-nested.c12
-rw-r--r--arch/arm64/kvm/fpsimd.c26
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h2
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mem_protect.h3
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c21
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c37
-rw-r--r--arch/arm64/kvm/hyp/nvhe/page_alloc.c21
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c4
-rw-r--r--arch/arm64/kvm/hyp/vgic-v5-sr.c82
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c2
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c4
-rw-r--r--arch/arm64/kvm/mmu.c39
-rw-r--r--arch/arm64/kvm/nested.c234
-rw-r--r--arch/arm64/kvm/pmu-emul.c31
-rw-r--r--arch/arm64/kvm/sys_regs.c20
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c45
-rw-r--r--arch/arm64/kvm/vgic/vgic-irqfd.c6
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c21
-rw-r--r--arch/arm64/kvm/vgic/vgic-kvm-device.c9
-rw-r--r--arch/arm64/kvm/vgic/vgic-v5.c51
-rw-r--r--arch/arm64/kvm/vgic/vgic.c33
-rw-r--r--arch/arm64/kvm/vgic/vgic.h3
-rw-r--r--drivers/irqchip/irq-gic-v5.c13
-rw-r--r--include/kvm/arm_arch_timer.h7
-rw-r--r--include/kvm/arm_pmu.h5
-rw-r--r--include/kvm/arm_vgic.h19
-rw-r--r--tools/testing/selftests/kvm/arm64/no-vgic.c1
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_v5.c10
40 files changed, 651 insertions, 470 deletions
diff --git a/Documentation/virt/kvm/devices/arm-vgic-v5.rst b/Documentation/virt/kvm/devices/arm-vgic-v5.rst
index 29335ea823fc..70b9162755c7 100644
--- a/Documentation/virt/kvm/devices/arm-vgic-v5.rst
+++ b/Documentation/virt/kvm/devices/arm-vgic-v5.rst
@@ -12,8 +12,8 @@ Only one VGIC instance may be instantiated through this API. The created VGIC
will act as the VM interrupt controller, requiring emulated user-space devices
to inject interrupts to the VGIC instead of directly to CPUs.
-Creating a guest GICv5 device requires a host GICv5 host. The current VGICv5
-device only supports PPI interrupts. These can either be injected from emulated
+Creating a guest GICv5 device requires a GICv5 host. The current VGICv5 device
+only supports PPI interrupts. These can either be injected from emulated
in-kernel devices (such as the Arch Timer, or PMU), or via the KVM_IRQ_LINE
ioctl.
@@ -25,7 +25,7 @@ Groups:
request the initialization of the VGIC, no additional parameter in
kvm_device_attr.addr. Must be called after all VCPUs have been created.
- KVM_DEV_ARM_VGIC_USERPSPACE_PPIs
+ KVM_DEV_ARM_VGIC_USERSPACE_PPIS
request the mask of userspace-drivable PPIs. Only a subset of the PPIs can
be directly driven from userspace with GICv5, and the returned mask
informs userspace of which it is allowed to drive via KVM_IRQ_LINE.
diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst
index 5e3805820010..66e714f2fcfa 100644
--- a/Documentation/virt/kvm/devices/vcpu.rst
+++ b/Documentation/virt/kvm/devices/vcpu.rst
@@ -37,8 +37,11 @@ Returns:
A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
type must be same for each vcpu. As a PPI, the interrupt number is the same for
-all vcpus, while as an SPI it must be a separate number per vcpu. For
-GICv5-based guests, the architected PPI (23) must be used.
+all vcpus, while as an SPI it must be a separate number per vcpu.
+
+For GICv5-based guests, the architected PPI (23) must be used, and must be
+communicated as the full GICv5-style Interrupt ID, i.e., 0x20000017. This ioctl
+can be omitted altogether for a GICv5-based guest.
1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
---------------------------------------
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a49042bfa801..cb5ef7e6c2fe 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1112,7 +1112,8 @@ struct kvm_vcpu_arch {
#define IN_NESTED_ERET __vcpu_single_flag(sflags, BIT(7))
/* SError pending for nested guest */
#define NESTED_SERROR_PENDING __vcpu_single_flag(sflags, BIT(8))
-
+/* KVM is currently emulating an L2 to L1 exception */
+#define IN_NESTED_EXCEPTION __vcpu_single_flag(sflags, BIT(9))
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
#define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \
@@ -1273,13 +1274,14 @@ void kvm_arm_resume_guest(struct kvm *kvm);
#define vcpu_has_run_once(vcpu) (!!READ_ONCE((vcpu)->pid))
#ifndef __KVM_NVHE_HYPERVISOR__
-#define kvm_call_hyp_nvhe(f, ...) \
+#define kvm_call_hyp_nvhe(f, ...) \
({ \
struct arm_smccc_res res; \
\
arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(f), \
##__VA_ARGS__, &res); \
- WARN_ON(res.a0 != SMCCC_RET_SUCCESS); \
+ if (WARN_ON(res.a0 != SMCCC_RET_SUCCESS)) \
+ res.a1 = -EOPNOTSUPP; \
\
res.a1; \
})
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 8d06b62e7188..e9b2b0c40ec6 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -157,5 +157,6 @@ extern unsigned long kvm_nvhe_sym(__icache_flags);
extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl);
extern unsigned long kvm_nvhe_sym(hyp_nr_cpus);
+extern unsigned int kvm_nvhe_sym(hyp_gicv3_nr_lr);
#endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 01e9c72d6aa7..6eae7e7e2a68 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -318,8 +318,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
* Must be called from hyp code running at EL2 with an updated VTTBR
* and interrupts disabled.
*/
-static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu,
- struct kvm_arch *arch)
+static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu)
{
write_sysreg(mmu->vtcr, vtcr_el2);
write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 6d53bb15cf7b..62b0d77217ee 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -266,6 +266,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_ATS1A_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_LUT_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0),
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 634ddc904244..37c6976e44a4 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -104,11 +104,9 @@ SYM_CODE_START_LOCAL(__finalise_el2)
mov_q x0, HCR_HOST_VHE_FLAGS
msr_hcr_el2 x0
- // Use the EL1 allocated stack, per-cpu offset
+ // Use the EL1 allocated stack
mrs x0, sp_el1
mov sp, x0
- mrs x0, tpidr_el1
- msr tpidr_el2, x0
// FP configuration, vectors
mrs_s x0, SYS_CPACR_EL12
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index cbea4d9ee955..4155fe89b58a 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -39,10 +39,9 @@ static const u8 default_ppi[] = {
[TIMER_HVTIMER] = 28,
};
-static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
struct arch_timer_context *timer_ctx);
-static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
+static bool kvm_timer_pending(struct arch_timer_context *timer_ctx);
static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
struct arch_timer_context *timer,
enum kvm_arch_timer_regs treg,
@@ -52,11 +51,17 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
enum kvm_arch_timer_regs treg);
static bool kvm_arch_timer_get_input_level(int vintid);
-static struct irq_ops arch_timer_irq_ops = {
+static unsigned long kvm_arch_timer_get_irq_flags(void)
+{
+ return kvm_vgic_global_state.no_hw_deactivation ? VGIC_IRQ_SW_RESAMPLE : 0;
+}
+
+static const struct irq_ops arch_timer_irq_ops = {
+ .get_flags = kvm_arch_timer_get_irq_flags,
.get_input_level = kvm_arch_timer_get_input_level,
};
-static struct irq_ops arch_timer_irq_ops_vgic_v5 = {
+static const struct irq_ops arch_timer_irq_ops_vgic_v5 = {
.get_input_level = kvm_arch_timer_get_input_level,
.queue_irq_unlock = vgic_v5_ppi_queue_irq_unlock,
.set_direct_injection = vgic_v5_set_ppi_dvi,
@@ -224,7 +229,7 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
else
ctx = map.direct_ptimer;
- if (kvm_timer_should_fire(ctx))
+ if (kvm_timer_pending(ctx))
kvm_timer_update_irq(vcpu, true, ctx);
if (userspace_irqchip(vcpu->kvm) &&
@@ -257,7 +262,7 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx));
}
-static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
+static bool kvm_timer_enabled(struct arch_timer_context *timer_ctx)
{
WARN_ON(timer_ctx && timer_ctx->loaded);
return timer_ctx &&
@@ -294,7 +299,7 @@ static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i];
WARN(ctx->loaded, "timer %d loaded\n", i);
- if (kvm_timer_irq_can_fire(ctx))
+ if (kvm_timer_enabled(ctx))
min_delta = min(min_delta, kvm_timer_compute_delta(ctx));
}
@@ -358,7 +363,7 @@ static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt)
return HRTIMER_NORESTART;
}
-static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
+static bool kvm_timer_pending(struct arch_timer_context *timer_ctx)
{
enum kvm_arch_timers index;
u64 cval, now;
@@ -391,7 +396,7 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
!(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
}
- if (!kvm_timer_irq_can_fire(timer_ctx))
+ if (!kvm_timer_enabled(timer_ctx))
return false;
cval = timer_get_cval(timer_ctx);
@@ -405,22 +410,30 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0;
}
+static u64 kvm_timer_needs_notify(struct kvm_vcpu *vcpu)
+{
+ u64 v = vcpu->run->s.regs.device_irq_level;
+
+ v ^= kvm_timer_pending(vcpu_vtimer(vcpu)) ? KVM_ARM_DEV_EL1_VTIMER : 0;
+ v ^= kvm_timer_pending(vcpu_ptimer(vcpu)) ? KVM_ARM_DEV_EL1_PTIMER : 0;
+
+ return v & (KVM_ARM_DEV_EL1_VTIMER | KVM_ARM_DEV_EL1_PTIMER);
+}
+
+bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
+{
+ return !!kvm_timer_needs_notify(vcpu);
+}
+
/*
* Reflect the timer output level into the kvm_run structure
*/
-void kvm_timer_update_run(struct kvm_vcpu *vcpu)
+bool kvm_timer_update_run(struct kvm_vcpu *vcpu)
{
- struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
- struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
- struct kvm_sync_regs *regs = &vcpu->run->s.regs;
-
- /* Populate the device bitmap with the timer states */
- regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER |
- KVM_ARM_DEV_EL1_PTIMER);
- if (kvm_timer_should_fire(vtimer))
- regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER;
- if (kvm_timer_should_fire(ptimer))
- regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
+ u64 mask = kvm_timer_needs_notify(vcpu);
+ if (mask)
+ vcpu->run->s.regs.device_irq_level ^= mask;
+ return !!mask;
}
static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level)
@@ -446,9 +459,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
{
kvm_timer_update_status(timer_ctx, new_level);
- timer_ctx->irq.level = new_level;
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx),
- timer_ctx->irq.level);
+ new_level);
if (userspace_irqchip(vcpu->kvm))
return;
@@ -466,28 +478,25 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
kvm_vgic_inject_irq(vcpu->kvm, vcpu,
timer_irq(timer_ctx),
- timer_ctx->irq.level,
+ new_level,
timer_ctx);
}
/* Only called for a fully emulated timer */
static void timer_emulate(struct arch_timer_context *ctx)
{
- bool should_fire = kvm_timer_should_fire(ctx);
+ bool pending = kvm_timer_pending(ctx);
- trace_kvm_timer_emulate(ctx, should_fire);
+ trace_kvm_timer_emulate(ctx, pending);
- if (should_fire != ctx->irq.level)
- kvm_timer_update_irq(timer_context_to_vcpu(ctx), should_fire, ctx);
-
- kvm_timer_update_status(ctx, should_fire);
+ kvm_timer_update_irq(timer_context_to_vcpu(ctx), pending, ctx);
/*
- * If the timer can fire now, we don't need to have a soft timer
- * scheduled for the future. If the timer cannot fire at all,
- * then we also don't need a soft timer.
+ * If the timer is pending, we don't need to have a soft timer
+ * scheduled for the future. If the timer is disabled, then
+ * we don't need a soft timer either.
*/
- if (should_fire || !kvm_timer_irq_can_fire(ctx))
+ if (pending || !kvm_timer_enabled(ctx))
return;
soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx));
@@ -594,10 +603,10 @@ static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
* If no timers are capable of raising interrupts (disabled or
* masked), then there's no more work for us to do.
*/
- if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
- !kvm_timer_irq_can_fire(map.direct_ptimer) &&
- !kvm_timer_irq_can_fire(map.emul_vtimer) &&
- !kvm_timer_irq_can_fire(map.emul_ptimer) &&
+ if (!kvm_timer_enabled(map.direct_vtimer) &&
+ !kvm_timer_enabled(map.direct_ptimer) &&
+ !kvm_timer_enabled(map.emul_vtimer) &&
+ !kvm_timer_enabled(map.emul_ptimer) &&
!vcpu_has_wfit_active(vcpu))
return;
@@ -677,6 +686,7 @@ static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, boo
static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
{
struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx);
+ bool pending = kvm_timer_pending(ctx);
bool phys_active = false;
/*
@@ -685,12 +695,12 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
* this point and the register restoration, we'll take the
* interrupt anyway.
*/
- kvm_timer_update_irq(vcpu, kvm_timer_should_fire(ctx), ctx);
+ kvm_timer_update_irq(vcpu, pending, ctx);
if (irqchip_in_kernel(vcpu->kvm))
phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));
- phys_active |= ctx->irq.level;
+ phys_active |= pending;
phys_active |= vgic_is_v5(vcpu->kvm);
set_timer_irq_phys_active(ctx, phys_active);
@@ -699,6 +709,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ bool pending = kvm_timer_pending(vtimer);
/*
* Update the timer output so that it is likely to match the
@@ -706,7 +717,7 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
* this point and the register restoration, we'll take the
* interrupt anyway.
*/
- kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer);
+ kvm_timer_update_irq(vcpu, pending, vtimer);
/*
* When using a userspace irqchip with the architected timers and a
@@ -718,7 +729,7 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
* being de-asserted, we unmask the interrupt again so that we exit
* from the guest when the timer fires.
*/
- if (vtimer->irq.level)
+ if (pending)
disable_percpu_irq(host_vtimer_irq);
else
enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
@@ -904,23 +915,6 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
timer_set_traps(vcpu, &map);
}
-bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
-{
- struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
- struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
- struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
- bool vlevel, plevel;
-
- if (likely(irqchip_in_kernel(vcpu->kvm)))
- return false;
-
- vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER;
- plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER;
-
- return kvm_timer_should_fire(vtimer) != vlevel ||
- kvm_timer_should_fire(ptimer) != plevel;
-}
-
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
@@ -1006,7 +1000,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
- if (!kvm_timer_should_fire(vtimer)) {
+ if (!kvm_timer_pending(vtimer)) {
kvm_timer_update_irq(vcpu, false, vtimer);
if (static_branch_likely(&has_gic_active_state))
set_timer_irq_phys_active(vtimer, false);
@@ -1288,7 +1282,12 @@ static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
static int timer_irq_set_irqchip_state(struct irq_data *d,
enum irqchip_irq_state which, bool val)
{
- if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d))
+ bool passthrough = which != IRQCHIP_STATE_ACTIVE ||
+ !irqd_is_forwarded_to_vcpu(d) ||
+ (kvm_vgic_global_state.type == VGIC_V5 &&
+ vgic_is_v3(kvm_get_running_vcpu()->kvm));
+
+ if (passthrough)
return irq_chip_set_parent_state(d, which, val);
if (val)
@@ -1301,15 +1300,7 @@ static int timer_irq_set_irqchip_state(struct irq_data *d,
static void timer_irq_eoi(struct irq_data *d)
{
- /*
- * On a GICv5 host, we still need to call EOI on the parent for
- * PPIs. The host driver already handles irqs which are forwarded to
- * vcpus, and skips the GIC CDDI while still doing the GIC CDEOI. This
- * is required to emulate the EOIMode=1 on GICv5 hardware. Failure to
- * call EOI unsurprisingly results in *BAD* lock-ups.
- */
- if (!irqd_is_forwarded_to_vcpu(d) ||
- kvm_vgic_global_state.type == VGIC_V5)
+ if (!irqd_is_forwarded_to_vcpu(d))
irq_chip_eoi_parent(d);
}
@@ -1392,8 +1383,6 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info)
return -ENOMEM;
}
- if (kvm_vgic_global_state.no_hw_deactivation)
- arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
(void *)TIMER_VTIMER));
}
@@ -1579,7 +1568,7 @@ static bool kvm_arch_timer_get_input_level(int vintid)
ctx = vcpu_get_timer(vcpu, i);
if (timer_irq(ctx) == vintid)
- return kvm_timer_should_fire(ctx);
+ return kvm_timer_pending(ctx);
}
/* A timer IRQ has fired, but no matching timer was found? */
@@ -1591,8 +1580,8 @@ static bool kvm_arch_timer_get_input_level(int vintid)
int kvm_timer_enable(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
+ const struct irq_ops *ops;
struct timer_map map;
- struct irq_ops *ops;
int ret;
if (timer->enabled)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 9453321ef8c6..d3bbb26b012c 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -52,6 +52,7 @@
#include <linux/irqchip/arm-gic-v5.h>
+#include "vgic/vgic.h"
#include "sys_regs.h"
static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
@@ -1166,6 +1167,15 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
return !kvm_supports_32bit_el0();
}
+static bool kvm_irq_update_run(struct kvm_vcpu *vcpu)
+{
+ bool r;
+
+ r = kvm_timer_update_run(vcpu);
+ r |= kvm_pmu_update_run(vcpu);
+ return r;
+}
+
/**
* kvm_vcpu_exit_request - returns true if the VCPU should *not* enter the guest
* @vcpu: The VCPU pointer
@@ -1187,13 +1197,11 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret)
/*
* If we're using a userspace irqchip, then check if we need
* to tell a userspace irqchip about timer or PMU level
- * changes and if so, exit to userspace (the actual level
- * state gets updated in kvm_timer_update_run and
- * kvm_pmu_update_run below).
+ * changes and if so, exit to userspace while updating the run
+ * state.
*/
if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
- if (kvm_timer_should_notify_user(vcpu) ||
- kvm_pmu_should_notify_user(vcpu)) {
+ if (unlikely(kvm_irq_update_run(vcpu))) {
*ret = -EINTR;
run->exit_reason = KVM_EXIT_INTR;
return true;
@@ -1408,11 +1416,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
ret = handle_exit(vcpu, ret);
}
- /* Tell userspace about in-kernel device output levels */
- if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
- kvm_timer_update_run(vcpu);
- kvm_pmu_update_run(vcpu);
- }
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+ kvm_irq_update_run(vcpu);
kvm_sigset_deactivate(vcpu);
@@ -1496,8 +1501,13 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
return vcpu_interrupt_line(vcpu, irq_num, level);
case KVM_ARM_IRQ_TYPE_PPI:
- if (!irqchip_in_kernel(kvm))
+ if (irqchip_in_kernel(kvm)) {
+ int ret = vgic_lazy_init(kvm);
+ if (ret)
+ return ret;
+ } else {
return -ENXIO;
+ }
vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
if (!vcpu)
@@ -1524,8 +1534,13 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
return kvm_vgic_inject_irq(kvm, vcpu, irq_num, level, NULL);
case KVM_ARM_IRQ_TYPE_SPI:
- if (!irqchip_in_kernel(kvm))
+ if (irqchip_in_kernel(kvm)) {
+ int ret = vgic_lazy_init(kvm);
+ if (ret)
+ return ret;
+ } else {
return -ENXIO;
+ }
if (vgic_is_v5(kvm)) {
/* Build a GICv5-style IntID here */
@@ -2426,6 +2441,8 @@ static int __init init_subsystems(void)
switch (err) {
case 0:
vgic_present = true;
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ kvm_nvhe_sym(hyp_gicv3_nr_lr) = kvm_vgic_global_state.nr_lr;
break;
case -ENODEV:
case -ENXIO:
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index 889c2c15d7bd..b8ded434c63f 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -136,14 +136,106 @@ static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
wi->e0poe = (wi->regime != TR_EL2) && (val & TCR2_EL1_E0POE);
}
+#define _has_tgran(__r, __sz) \
+ ({ \
+ u64 _s1, _mmfr0 = __r; \
+ \
+ _s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \
+ TGRAN##__sz, _mmfr0); \
+ \
+ _s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI; \
+ })
+
+static bool has_tgran(u64 mmfr0, unsigned int shift)
+{
+ switch (shift) {
+ case 12:
+ return _has_tgran(mmfr0, 4);
+ case 14:
+ return _has_tgran(mmfr0, 16);
+ case 16:
+ return _has_tgran(mmfr0, 64);
+ default:
+ BUG();
+ }
+}
+
+static unsigned int tcr_to_tg0_pgshift(u64 tcr)
+{
+ u64 tg0 = tcr & TCR_TG0_MASK;
+
+ switch (tg0) {
+ case TCR_TG0_4K:
+ return 12;
+ case TCR_TG0_16K:
+ return 14;
+ case TCR_TG0_64K:
+ default: /* IMPDEF: treat any other value as 64k */
+ return 16;
+ }
+}
+
+static unsigned int tcr_to_tg1_pgshift(u64 tcr)
+{
+ u64 tg1 = tcr & TCR_TG1_MASK;
+
+ switch (tg1) {
+ case TCR_TG1_4K:
+ return 12;
+ case TCR_TG1_16K:
+ return 14;
+ case TCR_TG1_64K:
+ default: /* IMPDEF: treat any other value as 64k */
+ return 16;
+ }
+}
+
+static unsigned int fallback_tgran_shift(u64 mmfr0)
+{
+ if (has_tgran(mmfr0, PAGE_SHIFT))
+ return PAGE_SHIFT;
+ else if (has_tgran(mmfr0, 12))
+ return 12;
+ else if (has_tgran(mmfr0, 14))
+ return 14;
+ else if (has_tgran(mmfr0, 16))
+ return 16;
+ else /* Should be unreacheable */
+ return PAGE_SHIFT;
+}
+
+static unsigned int tcr_tg_pgshift(struct kvm *kvm, u64 tcr, bool upper_range)
+{
+ u64 mmfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR0_EL1);
+ unsigned int shift;
+
+ /* Someone was silly enough to encode TG0/TG1 differently */
+ if (upper_range)
+ shift = tcr_to_tg1_pgshift(tcr);
+ else
+ shift = tcr_to_tg0_pgshift(tcr);
+
+ /*
+ * If TGx is programmed to an unimplemented value (not advertised in
+ * ID_AA64MMFR0_EL1), we should treat it as if an implemented value is
+ * written, as per the architecture. Choose an available one while
+ * prioritizing PAGE_SIZE.
+ */
+ if (!has_tgran(mmfr0, shift))
+ return fallback_tgran_shift(mmfr0);
+
+ return shift;
+}
+
static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
struct s1_walk_result *wr, u64 va)
{
- u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr;
+ u64 hcr, sctlr, tcr, ps, ia_bits, ttbr;
unsigned int stride, x;
- bool va55, tbi, lva;
+ bool va55, tbi, lva, upper_range;
va55 = va & BIT(55);
+ upper_range = va55 && wi->regime != TR_EL2;
if (vcpu_has_nv(vcpu)) {
hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
@@ -174,35 +266,12 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
BUG();
}
- /* Someone was silly enough to encode TG0/TG1 differently */
- if (va55 && wi->regime != TR_EL2) {
+ if (upper_range)
wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
- tg = FIELD_GET(TCR_TG1_MASK, tcr);
-
- switch (tg << TCR_TG1_SHIFT) {
- case TCR_TG1_4K:
- wi->pgshift = 12; break;
- case TCR_TG1_16K:
- wi->pgshift = 14; break;
- case TCR_TG1_64K:
- default: /* IMPDEF: treat any other value as 64k */
- wi->pgshift = 16; break;
- }
- } else {
+ else
wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
- tg = FIELD_GET(TCR_TG0_MASK, tcr);
-
- switch (tg << TCR_TG0_SHIFT) {
- case TCR_TG0_4K:
- wi->pgshift = 12; break;
- case TCR_TG0_16K:
- wi->pgshift = 14; break;
- case TCR_TG0_64K:
- default: /* IMPDEF: treat any other value as 64k */
- wi->pgshift = 16; break;
- }
- }
+ wi->pgshift = tcr_tg_pgshift(vcpu->kvm, tcr, upper_range);
wi->pa52bit = has_52bit_pa(vcpu, wi, tcr);
ia_bits = get_ia_size(wi);
@@ -423,6 +492,9 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
if (wi->s2) {
ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans);
+ if (ret == -EAGAIN)
+ return ret;
+
if (ret) {
fail_s1_walk(wr,
(s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level,
@@ -492,15 +564,18 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
/* Block mapping, check the validity of the level */
if (!(desc & BIT(1))) {
bool valid_block = false;
+ bool lpa = kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52);
switch (BIT(wi->pgshift)) {
case SZ_4K:
valid_block = level == 1 || level == 2 || (wi->pa52bit && level == 0);
break;
case SZ_16K:
- case SZ_64K:
valid_block = level == 2 || (wi->pa52bit && level == 1);
break;
+ case SZ_64K:
+ valid_block = level == 2 || (lpa && level == 1);
+ break;
}
if (!valid_block)
@@ -521,8 +596,12 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
}
ret = kvm_swap_s1_desc(vcpu, ipa, desc, new_desc, wi);
- if (ret)
+ if (ret == -EAGAIN)
return ret;
+ if (ret) {
+ fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
+ return ret;
+ }
desc = new_desc;
}
@@ -1380,7 +1459,7 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
}
}
write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR);
- __load_stage2(mmu, mmu->arch);
+ __load_stage2(mmu);
skip_mmu_switch:
/* Temporarily switch back to guest context */
@@ -1553,7 +1632,10 @@ int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
return 0;
}
- __kvm_at_s1e01(vcpu, op, vaddr);
+ ret = __kvm_at_s1e01(vcpu, op, vaddr);
+ if (ret)
+ return ret;
+
par = vcpu_read_sys_reg(vcpu, PAR_EL1);
if (par & SYS_PAR_EL1_F)
return 0;
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index dba7ced74ca5..e688bc5139c1 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -2631,6 +2631,14 @@ bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index)
fgtreg = HFGITR2_EL2;
break;
+ case ICH_HFGRTR_GROUP:
+ fgtreg = is_read ? ICH_HFGRTR_EL2 : ICH_HFGWTR_EL2;
+ break;
+
+ case ICH_HFGITR_GROUP:
+ fgtreg = ICH_HFGITR_EL2;
+ break;
+
default:
/* Something is really wrong, bail out */
WARN_ONCE(1, "Bad FGT group (encoding %08x, config %016llx)\n",
@@ -2862,6 +2870,8 @@ static int kvm_inject_nested(struct kvm_vcpu *vcpu, u64 esr_el2,
preempt_disable();
+ vcpu_set_flag(vcpu, IN_NESTED_EXCEPTION);
+
/*
* We may have an exception or PC update in the EL0/EL1 context.
* Commit it before entering EL2.
@@ -2884,6 +2894,8 @@ static int kvm_inject_nested(struct kvm_vcpu *vcpu, u64 esr_el2,
__kvm_adjust_pc(vcpu);
kvm_arch_vcpu_load(vcpu, smp_processor_id());
+ vcpu_clear_flag(vcpu, IN_NESTED_EXCEPTION);
+
preempt_enable();
if (kvm_vcpu_has_pmu(vcpu))
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 15e17aca1dec..3f6b1e29cd6b 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -29,6 +29,20 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
return;
/*
+ * Avoid needless save/restore of the guest's common
+ * FPSIMD/SVE/SME regs during transitions between L1/L2.
+ *
+ * These transitions only happens in a non-preemptible context
+ * where the host regs have already been saved and unbound. The
+ * live registers are either free or owned by the guest.
+ */
+ if (vcpu_get_flag(vcpu, IN_NESTED_ERET) ||
+ vcpu_get_flag(vcpu, IN_NESTED_EXCEPTION)) {
+ WARN_ON_ONCE(host_owns_fp_regs());
+ return;
+ }
+
+ /*
* Ensure that any host FPSIMD/SVE/SME state is saved and unbound such
* that the host kernel is responsible for restoring this state upon
* return to userspace, and the hyp code doesn't need to save anything.
@@ -102,6 +116,18 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
{
unsigned long flags;
+ /*
+ * See comment in kvm_arch_vcpu_load_fp(). Note that we also rely on
+ * the guest's max VL to have been set by fpsimd_lazy_switch_to_host()
+ * so that any intervening kernel-mode SIMD (NEON or otherwise)
+ * operation sees the full guest state that needs saving.
+ */
+ if (vcpu_get_flag(vcpu, IN_NESTED_ERET) ||
+ vcpu_get_flag(vcpu, IN_NESTED_EXCEPTION)) {
+ WARN_ON_ONCE(host_owns_fp_regs());
+ return;
+ }
+
local_irq_save(flags);
if (guest_owns_fp_regs()) {
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index e9b36a3b27bb..ff5d279cd6fc 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -141,7 +141,7 @@ static inline void __activate_cptr_traps_vhe(struct kvm_vcpu *vcpu)
if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0)))
val &= ~CPACR_EL1_ZEN;
- if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP))
+ if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S1POE, IMP))
val |= cptr & CPACR_EL1_E0POE;
val |= cptr & CPTR_EL2_TCPAC;
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 3cbfae0e3dda..29935c7da1de 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -56,6 +56,7 @@ int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot p
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id);
int kvm_host_prepare_stage2(void *pgt_pool_base);
int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd);
+void kvm_guest_destroy_stage2(struct pkvm_hyp_vm *vm);
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
int hyp_pin_shared_mem(void *from, void *to);
@@ -67,7 +68,7 @@ int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages,
static __always_inline void __load_host_stage2(void)
{
if (static_branch_likely(&kvm_protected_mode_initialized))
- __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);
+ __load_stage2(&host_mmu.arch.mmu);
else
write_sysreg(0, vttbr_el2);
}
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 06db299c37a8..1d01c6e547f5 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -24,6 +24,9 @@
DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
+/* Number of implemented GICv3 LRs. Used by flush_hyp_vcpu(). */
+unsigned int hyp_gicv3_nr_lr;
+
void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
@@ -128,10 +131,18 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt;
+ /* __hyp_running_vcpu must be NULL in a guest context. */
+ hyp_vcpu->vcpu.arch.ctxt.__hyp_running_vcpu = NULL;
+
hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2;
- hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWI | HCR_TWE);
+ /*
+ * HCR_EL2.VSE is host-owned (a pending virtual SError to inject), not a
+ * trap-control bit, so it must flow to the hyp vCPU alongside TWI/TWE
+ * for the vSError to be delivered. sync_hyp_vcpu() reflects it back.
+ */
+ hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWI | HCR_TWE | HCR_VSE);
hyp_vcpu->vcpu.arch.hcr_el2 |= READ_ONCE(host_vcpu->arch.hcr_el2) &
- (HCR_TWI | HCR_TWE);
+ (HCR_TWI | HCR_TWE | HCR_VSE);
hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags;
@@ -139,6 +150,12 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3 = host_vcpu->arch.vgic_cpu.vgic_v3;
+ /* Bound used_lrs by the number of implemented list registers. */
+ hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3.used_lrs =
+ min_t(unsigned int,
+ hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3.used_lrs,
+ hyp_gicv3_nr_lr);
+
hyp_vcpu->vcpu.arch.pid = host_vcpu->arch.pid;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 25f04629014e..4e329e39a695 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -217,7 +217,6 @@ static void *guest_s2_zalloc_page(void *mc)
memset(addr, 0, PAGE_SIZE);
p = hyp_virt_to_page(addr);
p->refcount = 1;
- p->order = 0;
return addr;
}
@@ -306,23 +305,27 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
return 0;
}
+void kvm_guest_destroy_stage2(struct pkvm_hyp_vm *vm)
+{
+ guest_lock_component(vm);
+ kvm_pgtable_stage2_destroy(&vm->pgt);
+ vm->kvm.arch.mmu.pgd_phys = 0ULL;
+ guest_unlock_component(vm);
+}
+
void reclaim_pgtable_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
{
struct hyp_page *page;
void *addr;
/* Dump all pgtable pages in the hyp_pool */
- guest_lock_component(vm);
- kvm_pgtable_stage2_destroy(&vm->pgt);
- vm->kvm.arch.mmu.pgd_phys = 0ULL;
- guest_unlock_component(vm);
+ kvm_guest_destroy_stage2(vm);
/* Drain the hyp_pool into the memcache */
addr = hyp_alloc_pages(&vm->pool, 0);
while (addr) {
page = hyp_virt_to_page(addr);
page->refcount = 0;
- page->order = 0;
push_hyp_memcache(mc, addr, hyp_virt_to_phys);
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
addr = hyp_alloc_pages(&vm->pool, 0);
@@ -352,7 +355,7 @@ int __pkvm_prot_finalize(void)
kvm_flush_dcache_to_poc(params, sizeof(*params));
write_sysreg_hcr(params->hcr_el2);
- __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);
+ __load_stage2(&host_mmu.arch.mmu);
/*
* Make sure to have an ISB before the TLB maintenance below but only
@@ -851,6 +854,16 @@ static int __hyp_check_page_state_range(phys_addr_t phys, u64 size, enum pkvm_pa
return 0;
}
+static int __hyp_check_page_count_range(phys_addr_t phys, u64 size)
+{
+ for_each_hyp_page(page, phys, size) {
+ if (page->refcount)
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
static bool guest_pte_is_poisoned(kvm_pte_t pte)
{
if (kvm_pte_valid(pte))
@@ -1049,7 +1062,6 @@ unlock:
int __pkvm_host_unshare_hyp(u64 pfn)
{
u64 phys = hyp_pfn_to_phys(pfn);
- u64 virt = (u64)__hyp_va(phys);
u64 size = PAGE_SIZE;
int ret;
@@ -1062,10 +1074,9 @@ int __pkvm_host_unshare_hyp(u64 pfn)
ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
if (ret)
goto unlock;
- if (hyp_page_count((void *)virt)) {
- ret = -EBUSY;
+ ret = __hyp_check_page_count_range(phys, size);
+ if (ret)
goto unlock;
- }
__hyp_set_page_state_range(phys, size, PKVM_NOPAGE);
WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED));
@@ -1128,6 +1139,10 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
if (ret)
goto unlock;
+ ret = __hyp_check_page_count_range(phys, size);
+ if (ret)
+ goto unlock;
+
__hyp_set_page_state_range(phys, size, PKVM_NOPAGE);
WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST));
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index a1eb27a1a747..57f86aa0f82f 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -94,13 +94,22 @@ static void __hyp_attach_page(struct hyp_pool *pool,
struct hyp_page *p)
{
phys_addr_t phys = hyp_page_to_phys(p);
- u8 order = p->order;
struct hyp_page *buddy;
+ bool coalesce = true;
+ u8 order = p->order;
- memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order);
+ /*
+ * 'external' pages are never coalesced and their ->order field
+ * untrusted as they bypass hyp_pool_init(). Enforce order-0.
+ */
+ if (phys < pool->range_start || phys >= pool->range_end) {
+ order = 0;
+ coalesce = false;
+ }
+
+ memset(hyp_page_to_virt(p), 0, PAGE_SIZE << order);
- /* Skip coalescing for 'external' pages being freed into the pool. */
- if (phys < pool->range_start || phys >= pool->range_end)
+ if (!coalesce)
goto insert;
/*
@@ -237,8 +246,10 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
/* Init the vmemmap portion */
p = hyp_phys_to_page(phys);
- for (i = 0; i < nr_pages; i++)
+ for (i = 0; i < nr_pages; i++) {
hyp_set_page_refcounted(&p[i]);
+ p[i].order = 0;
+ }
/* Attach the unused pages to the buddy tree */
for (i = reserved_pages; i < nr_pages; i++)
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index eb1c10120f9f..3b2c4fbc34d8 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -853,10 +853,12 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
/* Must be called last since this publishes the VM. */
ret = insert_vm_table_entry(handle, hyp_vm);
if (ret)
- goto err_remove_mappings;
+ goto err_destroy_stage2;
return 0;
+err_destroy_stage2:
+ kvm_guest_destroy_stage2(hyp_vm);
err_remove_mappings:
unmap_donated_memory(hyp_vm, vm_size);
unmap_donated_memory(pgd, pgd_size);
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 8d1df3d33595..7318e3e6a5f3 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -315,7 +315,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__sysreg_restore_state_nvhe(guest_ctxt);
mmu = kern_hyp_va(vcpu->arch.hw_mmu);
- __load_stage2(mmu, kern_hyp_va(mmu->arch));
+ __load_stage2(mmu);
__activate_traps(vcpu);
__hyp_vgic_restore_state(vcpu);
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index b29140995d48..fdb90483340c 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -110,7 +110,7 @@ static void enter_vmid_context(struct kvm_s2_mmu *mmu,
if (vcpu)
__load_host_stage2();
else
- __load_stage2(mmu, kern_hyp_va(mmu->arch));
+ __load_stage2(mmu);
asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
}
@@ -128,7 +128,7 @@ static void exit_vmid_context(struct tlb_inv_context *cxt)
return;
if (vcpu)
- __load_stage2(mmu, kern_hyp_va(mmu->arch));
+ __load_stage2(mmu);
else
__load_host_stage2();
diff --git a/arch/arm64/kvm/hyp/vgic-v5-sr.c b/arch/arm64/kvm/hyp/vgic-v5-sr.c
index 47e6bcd43702..6d69dfe89a96 100644
--- a/arch/arm64/kvm/hyp/vgic-v5-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v5-sr.c
@@ -30,10 +30,9 @@ void __vgic_v5_save_ppi_state(struct vgic_v5_cpu_if *cpu_if)
{
/*
* The following code assumes that the bitmap storage that we have for
- * PPIs is either 64 (architected PPIs, only) or 128 bits (architected &
- * impdef PPIs).
+ * PPIs is either 64 (architected PPIs, only).
*/
- BUILD_BUG_ON(VGIC_V5_NR_PRIVATE_IRQS % 64);
+ BUILD_BUG_ON(VGIC_V5_NR_PRIVATE_IRQS != 64);
bitmap_write(host_data_ptr(vgic_v5_ppi_state)->activer_exit,
read_sysreg_s(SYS_ICH_PPI_ACTIVER0_EL2), 0, 64);
@@ -49,22 +48,6 @@ void __vgic_v5_save_ppi_state(struct vgic_v5_cpu_if *cpu_if)
cpu_if->vgic_ppi_priorityr[6] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR6_EL2);
cpu_if->vgic_ppi_priorityr[7] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR7_EL2);
- if (VGIC_V5_NR_PRIVATE_IRQS == 128) {
- bitmap_write(host_data_ptr(vgic_v5_ppi_state)->activer_exit,
- read_sysreg_s(SYS_ICH_PPI_ACTIVER1_EL2), 64, 64);
- bitmap_write(host_data_ptr(vgic_v5_ppi_state)->pendr,
- read_sysreg_s(SYS_ICH_PPI_PENDR1_EL2), 64, 64);
-
- cpu_if->vgic_ppi_priorityr[8] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR8_EL2);
- cpu_if->vgic_ppi_priorityr[9] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR9_EL2);
- cpu_if->vgic_ppi_priorityr[10] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR10_EL2);
- cpu_if->vgic_ppi_priorityr[11] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR11_EL2);
- cpu_if->vgic_ppi_priorityr[12] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR12_EL2);
- cpu_if->vgic_ppi_priorityr[13] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR13_EL2);
- cpu_if->vgic_ppi_priorityr[14] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR14_EL2);
- cpu_if->vgic_ppi_priorityr[15] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR15_EL2);
- }
-
/* Now that we are done, disable DVI */
write_sysreg_s(0, SYS_ICH_PPI_DVIR0_EL2);
write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2);
@@ -74,9 +57,6 @@ void __vgic_v5_restore_ppi_state(struct vgic_v5_cpu_if *cpu_if)
{
DECLARE_BITMAP(pendr, VGIC_V5_NR_PRIVATE_IRQS);
- /* We assume 64 or 128 PPIs - see above comment */
- BUILD_BUG_ON(VGIC_V5_NR_PRIVATE_IRQS % 64);
-
/* Enable DVI so that the guest's interrupt config takes over */
write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_dvir, 0, 64),
SYS_ICH_PPI_DVIR0_EL2);
@@ -108,50 +88,20 @@ void __vgic_v5_restore_ppi_state(struct vgic_v5_cpu_if *cpu_if)
write_sysreg_s(cpu_if->vgic_ppi_priorityr[7],
SYS_ICH_PPI_PRIORITYR7_EL2);
- if (VGIC_V5_NR_PRIVATE_IRQS == 128) {
- /* Enable DVI so that the guest's interrupt config takes over */
- write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_dvir, 64, 64),
- SYS_ICH_PPI_DVIR1_EL2);
-
- write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_activer, 64, 64),
- SYS_ICH_PPI_ACTIVER1_EL2);
- write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_enabler, 64, 64),
- SYS_ICH_PPI_ENABLER1_EL2);
- write_sysreg_s(bitmap_read(pendr, 64, 64),
- SYS_ICH_PPI_PENDR1_EL2);
-
- write_sysreg_s(cpu_if->vgic_ppi_priorityr[8],
- SYS_ICH_PPI_PRIORITYR8_EL2);
- write_sysreg_s(cpu_if->vgic_ppi_priorityr[9],
- SYS_ICH_PPI_PRIORITYR9_EL2);
- write_sysreg_s(cpu_if->vgic_ppi_priorityr[10],
- SYS_ICH_PPI_PRIORITYR10_EL2);
- write_sysreg_s(cpu_if->vgic_ppi_priorityr[11],
- SYS_ICH_PPI_PRIORITYR11_EL2);
- write_sysreg_s(cpu_if->vgic_ppi_priorityr[12],
- SYS_ICH_PPI_PRIORITYR12_EL2);
- write_sysreg_s(cpu_if->vgic_ppi_priorityr[13],
- SYS_ICH_PPI_PRIORITYR13_EL2);
- write_sysreg_s(cpu_if->vgic_ppi_priorityr[14],
- SYS_ICH_PPI_PRIORITYR14_EL2);
- write_sysreg_s(cpu_if->vgic_ppi_priorityr[15],
- SYS_ICH_PPI_PRIORITYR15_EL2);
- } else {
- write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2);
-
- write_sysreg_s(0, SYS_ICH_PPI_ACTIVER1_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_ENABLER1_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_PENDR1_EL2);
-
- write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR8_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR9_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR10_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR11_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR12_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR13_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR14_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR15_EL2);
- }
+ write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2);
+
+ write_sysreg_s(0, SYS_ICH_PPI_ACTIVER1_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_ENABLER1_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PENDR1_EL2);
+
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR8_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR9_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR10_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR11_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR12_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR13_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR14_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR15_EL2);
}
void __vgic_v5_save_state(struct vgic_v5_cpu_if *cpu_if)
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 1e8995add14f..bbe9cebd3d9d 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -219,7 +219,7 @@ void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu)
__vcpu_load_switch_sysregs(vcpu);
__vcpu_load_activate_traps(vcpu);
- __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
+ __load_stage2(vcpu->arch.hw_mmu);
}
void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index f7b9dfe3f3a5..c386d9f1c101 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -60,7 +60,7 @@ static void enter_vmid_context(struct kvm_s2_mmu *mmu,
* place before clearing TGE. __load_stage2() already
* has an ISB in order to deal with this.
*/
- __load_stage2(mmu, mmu->arch);
+ __load_stage2(mmu);
val = read_sysreg(hcr_el2);
val &= ~HCR_TGE;
write_sysreg_hcr(val);
@@ -78,7 +78,7 @@ static void exit_vmid_context(struct tlb_inv_context *cxt)
/* ... and the stage-2 MMU context that we switched away from */
if (cxt->mmu)
- __load_stage2(cxt->mmu, cxt->mmu->arch);
+ __load_stage2(cxt->mmu);
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
/* Restore the registers to what they were */
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 4da9281312eb..8811ad60cf72 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -501,6 +501,10 @@ static int share_pfn_hyp(u64 pfn)
rb_link_node(&this->node, parent, node);
rb_insert_color(&this->node, &hyp_shared_pfns);
ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn);
+ if (ret) {
+ rb_erase(&this->node, &hyp_shared_pfns);
+ kfree(this);
+ }
unlock:
mutex_unlock(&hyp_shared_pfns_lock);
@@ -520,13 +524,17 @@ static int unshare_pfn_hyp(u64 pfn)
goto unlock;
}
- this->count--;
- if (this->count)
+ if (this->count > 1) {
+ this->count--;
+ goto unlock;
+ }
+
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn);
+ if (ret)
goto unlock;
rb_erase(&this->node, &hyp_shared_pfns);
kfree(this);
- ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn);
unlock:
mutex_unlock(&hyp_shared_pfns_lock);
@@ -536,8 +544,8 @@ unlock:
int kvm_share_hyp(void *from, void *to)
{
phys_addr_t start, end, cur;
+ int ret = 0;
u64 pfn;
- int ret;
if (is_kernel_in_hyp_mode())
return 0;
@@ -559,10 +567,24 @@ int kvm_share_hyp(void *from, void *to)
pfn = __phys_to_pfn(cur);
ret = share_pfn_hyp(pfn);
if (ret)
- return ret;
+ break;
}
- return 0;
+ if (!ret)
+ return 0;
+
+ /*
+ * Roll back the pages shared by this call. A failed unshare leaks
+ * the page (it stays shared with the hypervisor and is no longer
+ * reusable for pKVM) but breaks no isolation guarantee, so warn and
+ * continue. Not expected in practice.
+ */
+ for (end = cur, cur = start; cur < end; cur += PAGE_SIZE) {
+ pfn = __phys_to_pfn(cur);
+ WARN_ON(unshare_pfn_hyp(pfn));
+ }
+
+ return ret;
}
void kvm_unshare_hyp(void *from, void *to)
@@ -577,6 +599,11 @@ void kvm_unshare_hyp(void *from, void *to)
end = PAGE_ALIGN(__pa(to));
for (cur = start; cur < end; cur += PAGE_SIZE) {
pfn = __phys_to_pfn(cur);
+ /*
+ * A failed unshare leaks the page: it stays shared with the
+ * hypervisor and is no longer reusable for pKVM. No isolation
+ * guarantee is broken, and this is not expected in practice.
+ */
WARN_ON(unshare_pfn_hyp(pfn));
}
}
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 6f7bc9a9992e..fb54f6dad995 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -359,8 +359,13 @@ static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa,
if (new_desc != desc) {
ret = swap_guest_s2_desc(vcpu, paddr, desc, new_desc, wi);
- if (ret)
+ if (ret == -EAGAIN)
return ret;
+ if (ret) {
+ out->esr = ESR_ELx_FSC_SEA_TTW(level);
+ out->desc = desc;
+ return 1;
+ }
desc = new_desc;
}
@@ -385,32 +390,104 @@ static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa,
return 0;
}
-static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
+#define _has_tgran_2(__r, __sz) \
+ ({ \
+ u64 _s1, _s2, _mmfr0 = __r; \
+ \
+ _s2 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \
+ TGRAN##__sz##_2, _mmfr0); \
+ \
+ _s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \
+ TGRAN##__sz, _mmfr0); \
+ \
+ ((_s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_NI && \
+ _s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz) || \
+ (_s2 == ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz && \
+ _s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI)); \
+ })
+
+static bool has_tgran_2(u64 mmfr0, unsigned int shift)
{
- wi->t0sz = vtcr & TCR_EL2_T0SZ_MASK;
+ switch (shift) {
+ case 12:
+ return _has_tgran_2(mmfr0, 4);
+ case 14:
+ return _has_tgran_2(mmfr0, 16);
+ case 16:
+ return _has_tgran_2(mmfr0, 64);
+ default:
+ BUG();
+ }
+}
- switch (FIELD_GET(VTCR_EL2_TG0_MASK, vtcr)) {
+static unsigned int fallback_tgran2_shift(u64 mmfr0)
+{
+ if (has_tgran_2(mmfr0, PAGE_SHIFT))
+ return PAGE_SHIFT;
+ else if (has_tgran_2(mmfr0, 12))
+ return 12;
+ else if (has_tgran_2(mmfr0, 14))
+ return 14;
+ else if (has_tgran_2(mmfr0, 16))
+ return 16;
+ else
+ return PAGE_SHIFT;
+}
+
+static unsigned int vtcr_to_tg0_pgshift(struct kvm *kvm, u64 vtcr)
+{
+ u64 tg0 = FIELD_GET(VTCR_EL2_TG0_MASK, vtcr);
+ u64 mmfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR0_EL1);
+ unsigned int shift;
+
+ switch (tg0) {
case VTCR_EL2_TG0_4K:
- wi->pgshift = 12; break;
+ shift = 12;
+ break;
case VTCR_EL2_TG0_16K:
- wi->pgshift = 14; break;
+ shift = 14;
+ break;
case VTCR_EL2_TG0_64K:
- default: /* IMPDEF: treat any other value as 64k */
- wi->pgshift = 16; break;
+ /* IMPDEF: treat any other value as 64k, subject to fallback */
+ default:
+ shift = 16;
}
+ /*
+ * If TGx is programmed to an unimplemented value (not advertised in
+ * ID_AA64MMFR0_EL1), we should treat it as if an implemented value is
+ * written, as per the architecture. Choose an available one while
+ * prioritizing PAGE_SIZE.
+ */
+ if (!has_tgran_2(mmfr0, shift))
+ return fallback_tgran2_shift(mmfr0);
+
+ return shift;
+}
+
+static size_t vtcr_to_tg0_pgsize(struct kvm *kvm, u64 vtcr)
+{
+ return BIT(vtcr_to_tg0_pgshift(kvm, vtcr));
+}
+
+static void setup_s2_walk(struct kvm_vcpu *vcpu, struct s2_walk_info *wi)
+{
+ u64 vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2);
+
+ wi->baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
+ wi->t0sz = vtcr & VTCR_EL2_T0SZ_MASK;
+ wi->pgshift = vtcr_to_tg0_pgshift(vcpu->kvm, vtcr);
wi->sl = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
/* Global limit for now, should eventually be per-VM */
wi->max_oa_bits = min(get_kvm_ipa_limit(),
ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false));
-
wi->ha = vtcr & VTCR_EL2_HA;
+ wi->be = vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_EE;
}
int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
struct kvm_s2_trans *result)
{
- u64 vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2);
struct s2_walk_info wi;
int ret;
@@ -419,11 +496,7 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
if (!vcpu_has_nv(vcpu))
return 0;
- wi.baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
-
- vtcr_to_walk_info(vtcr, &wi);
-
- wi.be = vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_EE;
+ setup_s2_walk(vcpu, &wi);
ret = walk_nested_s2_pgd(vcpu, gipa, &wi, result);
if (ret)
@@ -519,20 +592,21 @@ static u8 pgshift_level_to_ttl(u16 shift, u8 level)
*/
static u8 get_guest_mapping_ttl(struct kvm_s2_mmu *mmu, u64 addr)
{
- u64 tmp, sz = 0, vtcr = mmu->tlb_vtcr;
+ size_t tg0_size = vtcr_to_tg0_pgsize(kvm_s2_mmu_to_kvm(mmu), mmu->tlb_vtcr);
+ u64 tmp, sz = 0;
kvm_pte_t pte;
u8 ttl, level;
lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(mmu)->mmu_lock);
- switch (FIELD_GET(VTCR_EL2_TG0_MASK, vtcr)) {
- case VTCR_EL2_TG0_4K:
+ switch (tg0_size) {
+ case SZ_4K:
ttl = (TLBI_TTL_TG_4K << 2);
break;
- case VTCR_EL2_TG0_16K:
+ case SZ_16K:
ttl = (TLBI_TTL_TG_16K << 2);
break;
- case VTCR_EL2_TG0_64K:
+ case SZ_64K:
default: /* IMPDEF: treat any other value as 64k */
ttl = (TLBI_TTL_TG_64K << 2);
break;
@@ -542,19 +616,19 @@ static u8 get_guest_mapping_ttl(struct kvm_s2_mmu *mmu, u64 addr)
again:
/* Iteratively compute the block sizes for a particular granule size */
- switch (FIELD_GET(VTCR_EL2_TG0_MASK, vtcr)) {
- case VTCR_EL2_TG0_4K:
+ switch (tg0_size) {
+ case SZ_4K:
if (sz < SZ_4K) sz = SZ_4K;
else if (sz < SZ_2M) sz = SZ_2M;
else if (sz < SZ_1G) sz = SZ_1G;
else sz = 0;
break;
- case VTCR_EL2_TG0_16K:
+ case SZ_16K:
if (sz < SZ_16K) sz = SZ_16K;
else if (sz < SZ_32M) sz = SZ_32M;
else sz = 0;
break;
- case VTCR_EL2_TG0_64K:
+ case SZ_64K:
default: /* IMPDEF: treat any other value as 64k */
if (sz < SZ_64K) sz = SZ_64K;
else if (sz < SZ_512M) sz = SZ_512M;
@@ -605,14 +679,14 @@ unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val)
if (!max_size) {
/* Compute the maximum extent of the invalidation */
- switch (FIELD_GET(VTCR_EL2_TG0_MASK, mmu->tlb_vtcr)) {
- case VTCR_EL2_TG0_4K:
+ switch (vtcr_to_tg0_pgsize(kvm, mmu->tlb_vtcr)) {
+ case SZ_4K:
max_size = SZ_1G;
break;
- case VTCR_EL2_TG0_16K:
+ case SZ_16K:
max_size = SZ_32M;
break;
- case VTCR_EL2_TG0_64K:
+ case SZ_64K:
default: /* IMPDEF: treat any other value as 64k */
/*
* No, we do not support 52bit IPA in nested yet. Once
@@ -804,18 +878,24 @@ void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
}
}
+static void this_cpu_reset_vncr_fixmap(struct kvm_vcpu *vcpu)
+{
+ if (!host_data_test_flag(L1_VNCR_MAPPED))
+ return;
+
+ BUG_ON(vcpu->arch.vncr_tlb->cpu != smp_processor_id());
+ BUG_ON(is_hyp_ctxt(vcpu));
+
+ clear_fixmap(vncr_fixmap(vcpu->arch.vncr_tlb->cpu));
+ vcpu->arch.vncr_tlb->cpu = -1;
+ host_data_clear_flag(L1_VNCR_MAPPED);
+ atomic_dec(&vcpu->kvm->arch.vncr_map_count);
+}
+
void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu)
{
/* Unconditionally drop the VNCR mapping if we have one */
- if (host_data_test_flag(L1_VNCR_MAPPED)) {
- BUG_ON(vcpu->arch.vncr_tlb->cpu != smp_processor_id());
- BUG_ON(is_hyp_ctxt(vcpu));
-
- clear_fixmap(vncr_fixmap(vcpu->arch.vncr_tlb->cpu));
- vcpu->arch.vncr_tlb->cpu = -1;
- host_data_clear_flag(L1_VNCR_MAPPED);
- atomic_dec(&vcpu->kvm->arch.vncr_map_count);
- }
+ this_cpu_reset_vncr_fixmap(vcpu);
/*
* Keep a reference on the associated stage-2 MMU if the vCPU is
@@ -904,9 +984,21 @@ static void invalidate_vncr(struct vncr_tlb *vt)
clear_fixmap(vncr_fixmap(vt->cpu));
}
+/*
+ * VNCR TLB invalidation occurs from MMU notifiers or TLBI instructions, and
+ * either can race against a vcpu not being onlined yet (no pseudo-TLB
+ * allocated). Similarly, the TLB might be invalid. Skip those, as they
+ * obviously don't participate in the invalidation at this stage.
+ */
+#define kvm_for_each_vncr_tlb(idx, vcpup, tlbp, kvm) \
+ kvm_for_each_vcpu(idx, vcpup, kvm) \
+ if (((tlbp) = vcpup->arch.vncr_tlb) && \
+ (tlbp)->valid)
+
static void kvm_invalidate_vncr_ipa(struct kvm *kvm, u64 start, u64 end)
{
struct kvm_vcpu *vcpu;
+ struct vncr_tlb *vt;
unsigned long i;
lockdep_assert_held_write(&kvm->mmu_lock);
@@ -914,24 +1006,9 @@ static void kvm_invalidate_vncr_ipa(struct kvm *kvm, u64 start, u64 end)
if (!kvm_has_feat(kvm, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY))
return;
- kvm_for_each_vcpu(i, vcpu, kvm) {
- struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
+ kvm_for_each_vncr_tlb(i, vcpu, vt, kvm) {
u64 ipa_start, ipa_end, ipa_size;
- /*
- * Careful here: We end-up here from an MMU notifier,
- * and this can race against a vcpu not being onlined
- * yet, without the pseudo-TLB being allocated.
- *
- * Skip those, as they obviously don't participate in
- * the invalidation at this stage.
- */
- if (!vt)
- continue;
-
- if (!vt->valid)
- continue;
-
ipa_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift,
vt->wr.level));
ipa_start = vt->wr.pa & ~(ipa_size - 1);
@@ -961,17 +1038,14 @@ static void invalidate_vncr_va(struct kvm *kvm,
struct s1e2_tlbi_scope *scope)
{
struct kvm_vcpu *vcpu;
+ struct vncr_tlb *vt;
unsigned long i;
lockdep_assert_held_write(&kvm->mmu_lock);
- kvm_for_each_vcpu(i, vcpu, kvm) {
- struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
+ kvm_for_each_vncr_tlb(i, vcpu, vt, kvm) {
u64 va_start, va_end, va_size;
- if (!vt->valid)
- continue;
-
va_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift,
vt->wr.level));
va_start = vt->gva & ~(va_size - 1);
@@ -1255,8 +1329,20 @@ int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu)
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY))
return 0;
- vcpu->arch.vncr_tlb = kzalloc_obj(*vcpu->arch.vncr_tlb,
- GFP_KERNEL_ACCOUNT);
+ if (!vcpu->arch.vncr_tlb) {
+ struct vncr_tlb *vt = kzalloc_obj(*vcpu->arch.vncr_tlb,
+ GFP_KERNEL_ACCOUNT);
+
+ /*
+ * Taking the lock on assignment ensures that the TLB is
+ * seen as initialised when following the pointer (release
+ * semantics of the unlock), and avoids having acquires on
+ * each user which already take the lock.
+ */
+ scoped_guard(write_lock, &vcpu->kvm->mmu_lock)
+ vcpu->arch.vncr_tlb = vt;
+ }
+
if (!vcpu->arch.vncr_tlb)
return -ENOMEM;
@@ -1289,7 +1375,8 @@ static int kvm_translate_vncr(struct kvm_vcpu *vcpu, bool *is_gmem)
* We also prepare the next walk wilst we're at it.
*/
scoped_guard(write_lock, &vcpu->kvm->mmu_lock) {
- invalidate_vncr(vt);
+ this_cpu_reset_vncr_fixmap(vcpu);
+ vt->valid = false;
vt->wi = (struct s1_walk_info) {
.regime = TR_EL20,
@@ -1333,8 +1420,10 @@ static int kvm_translate_vncr(struct kvm_vcpu *vcpu, bool *is_gmem)
}
scoped_guard(write_lock, &vcpu->kvm->mmu_lock) {
- if (mmu_invalidate_retry(vcpu->kvm, mmu_seq))
+ if (mmu_invalidate_retry(vcpu->kvm, mmu_seq)) {
+ kvm_release_faultin_page(vcpu->kvm, page, true, false);
return -EAGAIN;
+ }
vt->gva = va;
vt->hpa = pfn << PAGE_SHIFT;
@@ -1505,21 +1594,6 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu)
}
}
-#define has_tgran_2(__r, __sz) \
- ({ \
- u64 _s1, _s2, _mmfr0 = __r; \
- \
- _s2 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \
- TGRAN##__sz##_2, _mmfr0); \
- \
- _s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \
- TGRAN##__sz, _mmfr0); \
- \
- ((_s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_NI && \
- _s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz) || \
- (_s2 == ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz && \
- _s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI)); \
- })
/*
* Our emulated CPU doesn't support all the possible features. For the
* sake of simplicity (and probably mental sanity), wipe out a number
@@ -1606,15 +1680,15 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
*/
switch (PAGE_SIZE) {
case SZ_4K:
- if (has_tgran_2(orig_val, 4))
+ if (_has_tgran_2(orig_val, 4))
val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP);
fallthrough;
case SZ_16K:
- if (has_tgran_2(orig_val, 16))
+ if (_has_tgran_2(orig_val, 16))
val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP);
fallthrough;
case SZ_64K:
- if (has_tgran_2(orig_val, 64))
+ if (_has_tgran_2(orig_val, 64))
val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP);
break;
}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index c816db5d6761..98305bbfc095 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -396,44 +396,31 @@ static bool kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = &vcpu->arch.pmu;
- bool overflow;
- overflow = kvm_pmu_overflow_status(vcpu);
- if (pmu->irq_level == overflow)
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
return;
- pmu->irq_level = overflow;
-
- if (likely(irqchip_in_kernel(vcpu->kvm))) {
- int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu,
- pmu->irq_num, overflow, pmu);
- WARN_ON(ret);
- }
+ WARN_ON(kvm_vgic_inject_irq(vcpu->kvm, vcpu, pmu->irq_num,
+ kvm_pmu_overflow_status(vcpu), pmu));
}
bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
{
- struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
- if (likely(irqchip_in_kernel(vcpu->kvm)))
- return false;
-
- return pmu->irq_level != run_level;
+ return kvm_pmu_overflow_status(vcpu) != run_level;
}
/*
* Reflect the PMU overflow interrupt output level into the kvm_run structure
*/
-void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
+bool kvm_pmu_update_run(struct kvm_vcpu *vcpu)
{
- struct kvm_sync_regs *regs = &vcpu->run->s.regs;
-
- /* Populate the timer bitmap for user space */
- regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
- if (vcpu->arch.pmu.irq_level)
- regs->device_irq_level |= KVM_ARM_DEV_PMU;
+ bool update = kvm_pmu_should_notify_user(vcpu);
+ if (update)
+ vcpu->run->s.regs.device_irq_level ^= KVM_ARM_DEV_PMU;
+ return update;
}
/**
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index fa5c93c7a135..5d5c579d4579 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -724,6 +724,7 @@ static bool access_gicv5_ppi_enabler(struct kvm_vcpu *vcpu,
{
unsigned long *mask = vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask;
struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+ unsigned long reg = p->regval;
int i;
/* We never expect to get here with a read! */
@@ -731,27 +732,23 @@ static bool access_gicv5_ppi_enabler(struct kvm_vcpu *vcpu,
return undef_access(vcpu, p, r);
/*
- * If we're only handling architected PPIs and the guest writes to the
- * enable for the non-architected PPIs, we just return as there's
- * nothing to do at all. We don't even allocate the storage for them in
- * this case.
+ * As we're only handling architected PPIs, the guest writes to the
+ * enable for the non-architected PPIs just return as there's
+ * nothing to do at all. We don't even allocate the storage for them.
*/
- if (VGIC_V5_NR_PRIVATE_IRQS == 64 && p->Op2 % 2)
+ if (p->Op2 % 2)
return true;
/*
- * Merge the raw guest write into out bitmap at an offset of either 0 or
- * 64, then and it with our PPI mask.
+ * Merge the raw guest write into out bitmap, anded with our PPI mask.
*/
- bitmap_write(cpu_if->vgic_ppi_enabler, p->regval, 64 * (p->Op2 % 2), 64);
- bitmap_and(cpu_if->vgic_ppi_enabler, cpu_if->vgic_ppi_enabler, mask,
- VGIC_V5_NR_PRIVATE_IRQS);
+ bitmap_and(cpu_if->vgic_ppi_enabler, &reg, mask, VGIC_V5_NR_PRIVATE_IRQS);
/*
* Sync the change in enable states to the vgic_irqs. We consider all
* PPIs as we don't expose many to the guest.
*/
- for_each_set_bit(i, mask, VGIC_V5_NR_PRIVATE_IRQS) {
+ for_each_visible_v5_ppi(i, vcpu->kvm) {
u32 intid = vgic_v5_make_ppi(i);
struct vgic_irq *irq;
@@ -4212,6 +4209,7 @@ static struct sys_reg_desc sys_insn_descs[] = {
SYS_INSN(AT_S1E0W, handle_at_s1e01),
SYS_INSN(AT_S1E1RP, handle_at_s1e01),
SYS_INSN(AT_S1E1WP, handle_at_s1e01),
+ SYS_INSN(AT_S1E1A, handle_at_s1e01),
{ SYS_DESC(SYS_DC_CSW), access_dcsw },
{ SYS_DESC(SYS_DC_CGSW), access_dcgsw },
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 933983bb2005..907057881b26 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -271,18 +271,12 @@ int kvm_vgic_vcpu_nv_init(struct kvm_vcpu *vcpu)
return ret;
}
-static void vgic_allocate_private_irq(struct kvm_vcpu *vcpu, int i, u32 type)
+static void vgic_setup_private_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
+ u32 type)
{
- struct vgic_irq *irq = &vcpu->arch.vgic_cpu.private_irqs[i];
+ irq->intid = irq - &vcpu->arch.vgic_cpu.private_irqs[0];
- INIT_LIST_HEAD(&irq->ap_list);
- raw_spin_lock_init(&irq->irq_lock);
- irq->vcpu = NULL;
- irq->target_vcpu = vcpu;
- refcount_set(&irq->refcount, 0);
-
- irq->intid = i;
- if (vgic_irq_is_sgi(i)) {
+ if (vgic_irq_is_sgi(irq->intid)) {
/* SGIs */
irq->enabled = 1;
irq->config = VGIC_CONFIG_EDGE;
@@ -303,18 +297,11 @@ static void vgic_allocate_private_irq(struct kvm_vcpu *vcpu, int i, u32 type)
}
}
-static void vgic_v5_allocate_private_irq(struct kvm_vcpu *vcpu, int i, u32 type)
+static void vgic_v5_setup_private_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
{
- struct vgic_irq *irq = &vcpu->arch.vgic_cpu.private_irqs[i];
- u32 intid = vgic_v5_make_ppi(i);
-
- INIT_LIST_HEAD(&irq->ap_list);
- raw_spin_lock_init(&irq->irq_lock);
- irq->vcpu = NULL;
- irq->target_vcpu = vcpu;
- refcount_set(&irq->refcount, 0);
+ int i = irq - &vcpu->arch.vgic_cpu.private_irqs[0];
- irq->intid = intid;
+ irq->intid = vgic_v5_make_ppi(i);
/* The only Edge architected PPI is the SW_PPI */
if (i == GICV5_ARCH_PPI_SW_PPI)
@@ -323,7 +310,7 @@ static void vgic_v5_allocate_private_irq(struct kvm_vcpu *vcpu, int i, u32 type)
irq->config = VGIC_CONFIG_LEVEL;
/* Register the GICv5-specific PPI ops */
- vgic_v5_set_ppi_ops(vcpu, intid);
+ vgic_v5_set_ppi_ops(vcpu, irq->intid);
}
static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type)
@@ -349,15 +336,19 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type)
if (!vgic_cpu->private_irqs)
return -ENOMEM;
- /*
- * Enable and configure all SGIs to be edge-triggered and
- * configure all PPIs as level-triggered.
- */
for (i = 0; i < num_private_irqs; i++) {
+ struct vgic_irq *irq = &vcpu->arch.vgic_cpu.private_irqs[i];
+
+ INIT_LIST_HEAD(&irq->ap_list);
+ raw_spin_lock_init(&irq->irq_lock);
+ irq->vcpu = NULL;
+ irq->target_vcpu = vcpu;
+ refcount_set(&irq->refcount, 0);
+
if (vgic_is_v5(vcpu->kvm))
- vgic_v5_allocate_private_irq(vcpu, i, type);
+ vgic_v5_setup_private_irq(vcpu, irq);
else
- vgic_allocate_private_irq(vcpu, i, type);
+ vgic_setup_private_irq(vcpu, irq, type);
}
return 0;
diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c
index b9b86e3a6c86..19a1094536e6 100644
--- a/arch/arm64/kvm/vgic/vgic-irqfd.c
+++ b/arch/arm64/kvm/vgic/vgic-irqfd.c
@@ -20,9 +20,15 @@ static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e,
int level, bool line_status)
{
unsigned int spi_id = e->irqchip.pin + VGIC_NR_PRIVATE_IRQS;
+ int ret;
if (!vgic_valid_spi(kvm, spi_id))
return -EINVAL;
+
+ ret = vgic_lazy_init(kvm);
+ if (ret)
+ return ret;
+
return kvm_vgic_inject_irq(kvm, NULL, spi_id, level, NULL);
}
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 1e3706ac3b8e..4477f870c7b3 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -27,7 +27,7 @@ static struct kvm_device_ops kvm_arm_vgic_its_ops;
static int vgic_its_save_tables_v0(struct vgic_its *its);
static int vgic_its_restore_tables_v0(struct vgic_its *its);
-static int vgic_its_commit_v0(struct vgic_its *its);
+static void vgic_its_commit_v0(struct vgic_its *its);
static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
struct kvm_vcpu *filter_vcpu, bool needs_inv);
@@ -168,7 +168,7 @@ struct vgic_its_abi {
int ite_esz;
int (*save_tables)(struct vgic_its *its);
int (*restore_tables)(struct vgic_its *its);
- int (*commit)(struct vgic_its *its);
+ void (*commit)(struct vgic_its *its);
};
#define ABI_0_ESZ 8
@@ -192,13 +192,13 @@ inline const struct vgic_its_abi *vgic_its_get_abi(struct vgic_its *its)
return &its_table_abi_versions[its->abi_rev];
}
-static int vgic_its_set_abi(struct vgic_its *its, u32 rev)
+static void vgic_its_set_abi(struct vgic_its *its, u32 rev)
{
const struct vgic_its_abi *abi;
its->abi_rev = rev;
abi = vgic_its_get_abi(its);
- return abi->commit(its);
+ abi->commit(its);
}
/*
@@ -472,7 +472,8 @@ static int vgic_mmio_uaccess_write_its_iidr(struct kvm *kvm,
if (rev >= NR_ITS_ABIS)
return -EINVAL;
- return vgic_its_set_abi(its, rev);
+ vgic_its_set_abi(its, rev);
+ return 0;
}
static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
@@ -1890,14 +1891,11 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
its->baser_coll_table = INITIAL_BASER_VALUE |
((u64)GITS_BASER_TYPE_COLLECTION << GITS_BASER_TYPE_SHIFT);
dev->kvm->arch.vgic.propbaser = INITIAL_PROPBASER_VALUE;
-
dev->private = its;
- ret = vgic_its_set_abi(its, NR_ITS_ABIS - 1);
-
+ vgic_its_set_abi(its, NR_ITS_ABIS - 1);
mutex_unlock(&dev->kvm->arch.config_lock);
-
- return ret;
+ return 0;
}
static void vgic_its_destroy(struct kvm_device *kvm_dev)
@@ -2612,7 +2610,7 @@ static int vgic_its_restore_tables_v0(struct vgic_its *its)
return ret;
}
-static int vgic_its_commit_v0(struct vgic_its *its)
+static void vgic_its_commit_v0(struct vgic_its *its)
{
const struct vgic_its_abi *abi;
@@ -2625,7 +2623,6 @@ static int vgic_its_commit_v0(struct vgic_its *its)
its->baser_device_table |= (GIC_ENCODE_SZ(abi->dte_esz, 5)
<< GITS_BASER_ENTRY_SIZE_SHIFT);
- return 0;
}
static void vgic_its_reset(struct kvm *kvm, struct vgic_its *its)
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index a96c77dccf35..90be99443df3 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -730,18 +730,15 @@ static int vgic_v5_get_userspace_ppis(struct kvm_device *dev,
guard(mutex)(&dev->kvm->arch.config_lock);
/*
- * We either support 64 or 128 PPIs. In the former case, we need to
- * return 0s for the second 64 bits as we have no storage backing those.
+ * We only support 64 PPIs, so, we need to return 0s for the
+ * second 64 bits as we have no storage backing those.
*/
ret = put_user(bitmap_read(gicv5_vm->userspace_ppis, 0, 64), uaddr);
if (ret)
return ret;
uaddr++;
- if (VGIC_V5_NR_PRIVATE_IRQS == 128)
- ret = put_user(bitmap_read(gicv5_vm->userspace_ppis, 64, 128), uaddr);
- else
- ret = put_user(0, uaddr);
+ ret = put_user(0, uaddr);
return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c
index fdd39ea7f83e..d4789ff3e740 100644
--- a/arch/arm64/kvm/vgic/vgic-v5.c
+++ b/arch/arm64/kvm/vgic/vgic-v5.c
@@ -10,7 +10,7 @@
#include "vgic.h"
-static struct vgic_v5_ppi_caps ppi_caps;
+#define ppi_caps kvm_vgic_global_state.vgic_v5_ppi_caps
/*
* Not all PPIs are guaranteed to be implemented for GICv5. Deterermine which
@@ -18,20 +18,17 @@ static struct vgic_v5_ppi_caps ppi_caps;
*/
static void vgic_v5_get_implemented_ppis(void)
{
- if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF))
- return;
-
/*
* If we have KVM, we have EL2, which means that we have support for the
* EL1 and EL2 Physical & Virtual timers.
*/
- __assign_bit(GICV5_ARCH_PPI_CNTHP, ppi_caps.impl_ppi_mask, 1);
- __assign_bit(GICV5_ARCH_PPI_CNTV, ppi_caps.impl_ppi_mask, 1);
- __assign_bit(GICV5_ARCH_PPI_CNTHV, ppi_caps.impl_ppi_mask, 1);
- __assign_bit(GICV5_ARCH_PPI_CNTP, ppi_caps.impl_ppi_mask, 1);
+ __set_bit(GICV5_ARCH_PPI_CNTHP, ppi_caps.impl_ppi_mask);
+ __set_bit(GICV5_ARCH_PPI_CNTV, ppi_caps.impl_ppi_mask);
+ __set_bit(GICV5_ARCH_PPI_CNTHV, ppi_caps.impl_ppi_mask);
+ __set_bit(GICV5_ARCH_PPI_CNTP, ppi_caps.impl_ppi_mask);
/* The SW_PPI should be available */
- __assign_bit(GICV5_ARCH_PPI_SW_PPI, ppi_caps.impl_ppi_mask, 1);
+ __set_bit(GICV5_ARCH_PPI_SW_PPI, ppi_caps.impl_ppi_mask);
/* The PMUIRQ is available if we have the PMU */
__assign_bit(GICV5_ARCH_PPI_PMUIRQ, ppi_caps.impl_ppi_mask, system_supports_pmuv3());
@@ -146,9 +143,7 @@ int vgic_v5_init(struct kvm *kvm)
/* We only allow userspace to drive the SW_PPI, if it is implemented. */
bitmap_zero(kvm->arch.vgic.gicv5_vm.userspace_ppis,
VGIC_V5_NR_PRIVATE_IRQS);
- __assign_bit(GICV5_ARCH_PPI_SW_PPI,
- kvm->arch.vgic.gicv5_vm.userspace_ppis,
- VGIC_V5_NR_PRIVATE_IRQS);
+ __set_bit(GICV5_ARCH_PPI_SW_PPI, kvm->arch.vgic.gicv5_vm.userspace_ppis);
bitmap_and(kvm->arch.vgic.gicv5_vm.userspace_ppis,
kvm->arch.vgic.gicv5_vm.userspace_ppis,
ppi_caps.impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);
@@ -197,7 +192,7 @@ int vgic_v5_finalize_ppi_state(struct kvm *kvm)
/* Expose PPIs with an owner or the SW_PPI, only */
scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) {
if (irq->owner || i == GICV5_ARCH_PPI_SW_PPI) {
- __assign_bit(i, kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, 1);
+ __set_bit(i, kvm->arch.vgic.gicv5_vm.vgic_ppi_mask);
__assign_bit(i, kvm->arch.vgic.gicv5_vm.vgic_ppi_hmr,
irq->config == VGIC_CONFIG_LEVEL);
}
@@ -243,9 +238,9 @@ static u32 vgic_v5_get_effective_priority_mask(struct kvm_vcpu *vcpu)
/*
* For GICv5, the PPIs are mostly directly managed by the hardware. We (the
- * hypervisor) handle the pending, active, enable state save/restore, but don't
- * need the PPIs to be queued on a per-VCPU AP list. Therefore, sanity check the
- * state, unlock, and return.
+ * hypervisor) handle the pending, active, enable state save/restore, but
+ * don't need the PPIs to be queued on a per-VCPU AP list. Therefore,
+ * unlock, kick the vcpu and return.
*/
bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
unsigned long flags)
@@ -255,12 +250,7 @@ bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
lockdep_assert_held(&irq->irq_lock);
- if (WARN_ON_ONCE(!__irq_is_ppi(KVM_DEV_TYPE_ARM_VGIC_V5, irq->intid)))
- goto out_unlock_fail;
-
vcpu = irq->target_vcpu;
- if (WARN_ON_ONCE(!vcpu))
- goto out_unlock_fail;
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
@@ -269,11 +259,6 @@ bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
kvm_vcpu_kick(vcpu);
return true;
-
-out_unlock_fail:
- raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
-
- return false;
}
/*
@@ -287,10 +272,10 @@ void vgic_v5_set_ppi_dvi(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool dvi)
lockdep_assert_held(&irq->irq_lock);
ppi = vgic_v5_get_hwirq_id(irq->intid);
- __assign_bit(ppi, cpu_if->vgic_ppi_dvir, dvi);
+ assign_bit(ppi, cpu_if->vgic_ppi_dvir, dvi);
}
-static struct irq_ops vgic_v5_ppi_irq_ops = {
+static const struct irq_ops vgic_v5_ppi_irq_ops = {
.queue_irq_unlock = vgic_v5_ppi_queue_irq_unlock,
.set_direct_injection = vgic_v5_set_ppi_dvi,
};
@@ -316,7 +301,7 @@ static void vgic_v5_sync_ppi_priorities(struct kvm_vcpu *vcpu)
* those actually exposed to the guest by first iterating over the mask
* of exposed PPIs.
*/
- for_each_set_bit(i, vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS) {
+ for_each_visible_v5_ppi(i, vcpu->kvm) {
u32 intid = vgic_v5_make_ppi(i);
struct vgic_irq *irq;
int pri_idx, pri_reg, pri_bit;
@@ -358,7 +343,7 @@ bool vgic_v5_has_pending_ppi(struct kvm_vcpu *vcpu)
if (!priority_mask)
return false;
- for_each_set_bit(i, vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS) {
+ for_each_visible_v5_ppi(i, vcpu->kvm) {
u32 intid = vgic_v5_make_ppi(i);
bool has_pending = false;
struct vgic_irq *irq;
@@ -391,8 +376,7 @@ void vgic_v5_fold_ppi_state(struct kvm_vcpu *vcpu)
activer = host_data_ptr(vgic_v5_ppi_state)->activer_exit;
pendr = host_data_ptr(vgic_v5_ppi_state)->pendr;
- for_each_set_bit(i, vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask,
- VGIC_V5_NR_PRIVATE_IRQS) {
+ for_each_visible_v5_ppi(i, vcpu->kvm) {
u32 intid = vgic_v5_make_ppi(i);
struct vgic_irq *irq;
@@ -429,8 +413,7 @@ void vgic_v5_flush_ppi_state(struct kvm_vcpu *vcpu)
* ICC_PPI_PENDRx_EL1, however.
*/
bitmap_zero(pendr, VGIC_V5_NR_PRIVATE_IRQS);
- for_each_set_bit(i, vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask,
- VGIC_V5_NR_PRIVATE_IRQS) {
+ for_each_visible_v5_ppi(i, vcpu->kvm) {
u32 intid = vgic_v5_make_ppi(i);
struct vgic_irq *irq;
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 1e9fe8764584..5a4768d8cd4f 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -106,24 +106,23 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid)
struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid)
{
+ enum kvm_device_type type;
+
if (WARN_ON(!vcpu))
return NULL;
- if (vgic_is_v5(vcpu->kvm)) {
- u32 int_num, hwirq_id;
-
- if (!__irq_is_ppi(KVM_DEV_TYPE_ARM_VGIC_V5, intid))
- return NULL;
-
- hwirq_id = FIELD_GET(GICV5_HWIRQ_ID, intid);
- int_num = array_index_nospec(hwirq_id, VGIC_V5_NR_PRIVATE_IRQS);
+ type = vcpu->kvm->arch.vgic.vgic_model;
- return &vcpu->arch.vgic_cpu.private_irqs[int_num];
- }
+ if (__irq_is_sgi(type, intid) || __irq_is_ppi(type, intid)) {
+ switch (type) {
+ case KVM_DEV_TYPE_ARM_VGIC_V5:
+ intid = vgic_v5_get_hwirq_id(intid);
+ intid = array_index_nospec(intid, VGIC_V5_NR_PRIVATE_IRQS);
+ break;
+ default:
+ intid = array_index_nospec(intid, VGIC_NR_PRIVATE_IRQS);
+ }
- /* SGIs and PPIs */
- if (intid < VGIC_NR_PRIVATE_IRQS) {
- intid = array_index_nospec(intid, VGIC_NR_PRIVATE_IRQS);
return &vcpu->arch.vgic_cpu.private_irqs[intid];
}
@@ -534,11 +533,9 @@ int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
{
struct vgic_irq *irq;
unsigned long flags;
- int ret;
- ret = vgic_lazy_init(kvm);
- if (ret)
- return ret;
+ if (unlikely(!vgic_initialized(kvm)))
+ return 0;
if (!vcpu && irq_is_private(kvm, intid))
return -EINVAL;
@@ -573,7 +570,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
}
void kvm_vgic_set_irq_ops(struct kvm_vcpu *vcpu, u32 vintid,
- struct irq_ops *ops)
+ const struct irq_ops *ops)
{
struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 9d941241c8a2..f45f7e3ec4d6 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -378,6 +378,9 @@ void vgic_v5_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void vgic_v5_restore_state(struct kvm_vcpu *vcpu);
void vgic_v5_save_state(struct kvm_vcpu *vcpu);
+#define for_each_visible_v5_ppi(__i, __k) \
+ for_each_set_bit(__i, (__k)->arch.vgic.gicv5_vm.vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS)
+
static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu;
diff --git a/drivers/irqchip/irq-gic-v5.c b/drivers/irqchip/irq-gic-v5.c
index c1af07083cef..e9d1795235a6 100644
--- a/drivers/irqchip/irq-gic-v5.c
+++ b/drivers/irqchip/irq-gic-v5.c
@@ -208,17 +208,13 @@ static void gicv5_hwirq_eoi(u32 hwirq_id, u8 hwirq_type)
FIELD_PREP(GICV5_GIC_CDDI_TYPE_MASK, hwirq_type);
gic_insn(cddi, CDDI);
-
- gic_insn(0, CDEOI);
}
static void gicv5_ppi_irq_eoi(struct irq_data *d)
{
/* Skip deactivate for forwarded PPI interrupts */
- if (irqd_is_forwarded_to_vcpu(d)) {
- gic_insn(0, CDEOI);
+ if (irqd_is_forwarded_to_vcpu(d))
return;
- }
gicv5_hwirq_eoi(d->hwirq, GICV5_HWIRQ_TYPE_PPI);
}
@@ -969,6 +965,13 @@ static void __exception_irq_entry gicv5_handle_irq(struct pt_regs *regs)
*/
isb();
+ /*
+ * Ensure that we can receive the next interrupts in the event that we
+ * have a long running handler or directly enter a guest by doing the
+ * priority drop immediately.
+ */
+ gic_insn(0, CDEOI);
+
hwirq = FIELD_GET(GICV5_HWIRQ_INTID, ia);
handle_irq_per_domain(hwirq);
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index bf8cc9589bd0..15a4f97f8105 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -66,11 +66,6 @@ struct arch_timer_context {
*/
bool loaded;
- /* Output level of the timer IRQ */
- struct {
- bool level;
- } irq;
-
/* Who am I? */
enum kvm_arch_timers timer_id;
@@ -104,7 +99,7 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_timer_sync_nested(struct kvm_vcpu *vcpu);
void kvm_timer_sync_user(struct kvm_vcpu *vcpu);
bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu);
-void kvm_timer_update_run(struct kvm_vcpu *vcpu);
+bool kvm_timer_update_run(struct kvm_vcpu *vcpu);
void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
void kvm_timer_init_vm(struct kvm *kvm);
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 0a36a3d5c894..b5e5942204fc 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -32,7 +32,6 @@ struct kvm_pmu {
struct kvm_pmc pmc[KVM_ARMV8_PMU_MAX_COUNTERS];
int irq_num;
bool created;
- bool irq_level;
};
struct arm_pmu_entry {
@@ -54,7 +53,7 @@ void kvm_pmu_reprogram_counter_mask(struct kvm_vcpu *vcpu, u64 val);
void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu);
void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu);
bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu);
-void kvm_pmu_update_run(struct kvm_vcpu *vcpu);
+bool kvm_pmu_update_run(struct kvm_vcpu *vcpu);
void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val);
void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val);
void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
@@ -131,7 +130,7 @@ static inline bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
{
return false;
}
-static inline void kvm_pmu_update_run(struct kvm_vcpu *vcpu) {}
+static inline bool kvm_pmu_update_run(struct kvm_vcpu *vcpu) { return false; }
static inline void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) {}
static inline void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) {}
static inline void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu,
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 1388dc6028a9..fe49fb56dc3c 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -177,6 +177,11 @@ struct vgic_global {
bool has_gcie_v3_compat;
u32 ich_vtr_el2;
+
+ /* GICv5 PPI capabilities */
+ struct {
+ DECLARE_BITMAP(impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);
+ } vgic_v5_ppi_caps;
};
extern struct vgic_global kvm_vgic_global_state;
@@ -200,7 +205,7 @@ struct vgic_irq;
*/
struct irq_ops {
/* Per interrupt flags for special-cased interrupts */
- unsigned long flags;
+ unsigned long (*get_flags)(void);
#define VGIC_IRQ_SW_RESAMPLE BIT(0) /* Clear the active state for resampling */
@@ -266,7 +271,7 @@ struct vgic_irq {
u8 priority;
u8 group; /* 0 == group 0, 1 == group 1 */
- struct irq_ops *ops;
+ const struct irq_ops *ops;
void *owner; /* Opaque pointer to reserve an interrupt
for in-kernel devices. */
@@ -274,7 +279,8 @@ struct vgic_irq {
static inline bool vgic_irq_needs_resampling(struct vgic_irq *irq)
{
- return irq->ops && (irq->ops->flags & VGIC_IRQ_SW_RESAMPLE);
+ return irq->ops && irq->ops->get_flags &&
+ (irq->ops->get_flags() & VGIC_IRQ_SW_RESAMPLE);
}
struct vgic_register_region;
@@ -492,11 +498,6 @@ struct vgic_v5_cpu_if {
struct gicv5_vpe gicv5_vpe;
};
-/* What PPI capabilities does a GICv5 host have */
-struct vgic_v5_ppi_caps {
- DECLARE_BITMAP(impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);
-};
-
struct vgic_cpu {
/* CPU vif control registers for world switch */
union {
@@ -557,7 +558,7 @@ void kvm_vgic_init_cpu_hardware(void);
int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
unsigned int intid, bool level, void *owner);
void kvm_vgic_set_irq_ops(struct kvm_vcpu *vcpu, u32 vintid,
- struct irq_ops *ops);
+ const struct irq_ops *ops);
void kvm_vgic_clear_irq_ops(struct kvm_vcpu *vcpu, u32 vintid);
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
u32 vintid);
diff --git a/tools/testing/selftests/kvm/arm64/no-vgic.c b/tools/testing/selftests/kvm/arm64/no-vgic.c
index 25b2e3222f68..ab57902ce429 100644
--- a/tools/testing/selftests/kvm/arm64/no-vgic.c
+++ b/tools/testing/selftests/kvm/arm64/no-vgic.c
@@ -159,6 +159,7 @@ static void guest_code_gicv5(void)
check_gicv5_gic_op(CDAFF);
check_gicv5_gic_op(CDDI);
check_gicv5_gic_op(CDDIS);
+ check_gicv5_gic_op(CDEN);
check_gicv5_gic_op(CDEOI);
check_gicv5_gic_op(CDHM);
check_gicv5_gic_op(CDPEND);
diff --git a/tools/testing/selftests/kvm/arm64/vgic_v5.c b/tools/testing/selftests/kvm/arm64/vgic_v5.c
index d785b660d847..96cfd6bb32f6 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_v5.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_v5.c
@@ -20,8 +20,6 @@ struct vm_gic {
u32 gic_dev_type;
};
-static u64 max_phys_size;
-
#define GUEST_CMD_IRQ_CDIA 10
#define GUEST_CMD_IRQ_DIEOI 11
#define GUEST_CMD_IS_AWAKE 12
@@ -131,6 +129,8 @@ static void test_vgic_v5_ppis(u32 gic_dev_type)
while (1) {
ret = run_vcpu(vcpus[0]);
+ if (ret)
+ break;
switch (get_ucall(vcpus[0], &uc)) {
case UCALL_SYNC:
@@ -146,7 +146,7 @@ static void test_vgic_v5_ppis(u32 gic_dev_type)
irq = FIELD_PREP(KVM_ARM_IRQ_NUM_MASK, 3);
irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT;
- _kvm_irq_line(v.vm, irq, level);
+ kvm_irq_line(v.vm, irq, level);
} else if (uc.args[1] == GUEST_CMD_IS_AWAKE) {
pr_info("Guest skipping WFI due to pending IRQ\n");
} else if (uc.args[1] == GUEST_CMD_IRQ_CDIA) {
@@ -208,13 +208,9 @@ void run_tests(u32 gic_dev_type)
int main(int ac, char **av)
{
int ret;
- int pa_bits;
test_disable_default_vgic();
- pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
- max_phys_size = 1ULL << pa_bits;
-
ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V5);
if (ret) {
pr_info("No GICv5 support; Not running GIC_v5 tests.\n");