diff options
| author | Paolo Bonzini <pbonzini@redhat.com> | 2026-02-09 18:18:19 +0100 |
|---|---|---|
| committer | Paolo Bonzini <pbonzini@redhat.com> | 2026-02-09 18:18:19 +0100 |
| commit | 549006326978fbf66d9db2953cb1e4fb5487da65 (patch) | |
| tree | dbf6a9a1c54c785dd3a2349a6f248bbfc863c426 /arch | |
| parent | c14f646638ddf647e080d4755e9a008dc9db03e7 (diff) | |
| parent | 6316366129d2885fae07c2774f4b7ae0a45fb55d (diff) | |
Merge tag 'kvmarm-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for 7.0
- Add support for FEAT_IDST, allowing ID registers that are not
implemented to be reported as a normal trap rather than as an UNDEF
exception.
- Add sanitisation of the VTCR_EL2 register, fixing a number of
UXN/PXN/XN bugs in the process.
- Full handling of RESx bits, instead of only RES0, and resulting in
SCTLR_EL2 being added to the list of sanitised registers.
- More pKVM fixes for features that are not supposed to be exposed to
guests.
- Make sure that MTE being disabled on the pKVM host doesn't give it
the ability to attack the hypervisor.
- Allow pKVM's host stage-2 mappings to use the Force Write Back
version of the memory attributes by using the "pass-through'
encoding.
- Fix trapping of ICC_DIR_EL1 on GICv5 hosts emulating GICv3 for the
guest.
- Preliminary work for guest GICv5 support.
- A bunch of debugfs fixes, removing pointless custom iterators stored
in guest data structures.
- A small set of FPSIMD cleanups.
- Selftest fixes addressing the incorrect alignment of page
allocation.
- Other assorted low-impact fixes and spelling fixes.
Diffstat (limited to 'arch')
58 files changed, 1302 insertions, 890 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 93173f0a09c7..fcfb62ec4bae 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1680,7 +1680,6 @@ config MITIGATE_SPECTRE_BRANCH_HISTORY config ARM64_SW_TTBR0_PAN bool "Emulate Privileged Access Never using TTBR0_EL1 switching" depends on !KCSAN - select ARM64_PAN help Enabling this option prevents the kernel from accessing user-space memory directly by pointing TTBR0_EL1 to a reserved @@ -1859,36 +1858,6 @@ config ARM64_HW_AFDBM to work on pre-ARMv8.1 hardware and the performance impact is minimal. If unsure, say Y. -config ARM64_PAN - bool "Enable support for Privileged Access Never (PAN)" - default y - help - Privileged Access Never (PAN; part of the ARMv8.1 Extensions) - prevents the kernel or hypervisor from accessing user-space (EL0) - memory directly. - - Choosing this option will cause any unprotected (not using - copy_to_user et al) memory access to fail with a permission fault. - - The feature is detected at runtime, and will remain as a 'nop' - instruction if the cpu does not implement the feature. - -config ARM64_LSE_ATOMICS - bool - default ARM64_USE_LSE_ATOMICS - -config ARM64_USE_LSE_ATOMICS - bool "Atomic instructions" - default y - help - As part of the Large System Extensions, ARMv8.1 introduces new - atomic instructions that are designed specifically to scale in - very large systems. - - Say Y here to make use of these instructions for the in-kernel - atomic routines. This incurs a small overhead on CPUs that do - not support these instructions. - endmenu # "ARMv8.1 architectural features" menu "ARMv8.2 architectural features" @@ -2125,7 +2094,6 @@ config ARM64_MTE depends on ARM64_AS_HAS_MTE && ARM64_TAGGED_ADDR_ABI depends on AS_HAS_ARMV8_5 # Required for tag checking in the uaccess routines - select ARM64_PAN select ARCH_HAS_SUBPAGE_FAULTS select ARCH_USES_HIGH_VMA_FLAGS select ARCH_USES_PG_ARCH_2 @@ -2157,7 +2125,6 @@ menu "ARMv8.7 architectural features" config ARM64_EPAN bool "Enable support for Enhanced Privileged Access Never (EPAN)" default y - depends on ARM64_PAN help Enhanced Privileged Access Never (EPAN) allows Privileged Access Never to be used with Execute-only mappings. diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 2c8029472ad4..177c691914f8 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -19,8 +19,6 @@ cpucap_is_possible(const unsigned int cap) "cap must be < ARM64_NCAPS"); switch (cap) { - case ARM64_HAS_PAN: - return IS_ENABLED(CONFIG_ARM64_PAN); case ARM64_HAS_EPAN: return IS_ENABLED(CONFIG_ARM64_EPAN); case ARM64_SVE: diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index cacd20df1786..85f4c1615472 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -83,9 +83,19 @@ /* Enable GCS if supported */ mrs_s x1, SYS_ID_AA64PFR1_EL1 ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 - cbz x1, .Lset_hcrx_\@ + cbz x1, .Lskip_gcs_hcrx_\@ orr x0, x0, #HCRX_EL2_GCSEn +.Lskip_gcs_hcrx_\@: + /* Enable LS64, LS64_V if supported */ + mrs_s x1, SYS_ID_AA64ISAR1_EL1 + ubfx x1, x1, #ID_AA64ISAR1_EL1_LS64_SHIFT, #4 + cbz x1, .Lset_hcrx_\@ + orr x0, x0, #HCRX_EL2_EnALS + cmp x1, #ID_AA64ISAR1_EL1_LS64_LS64_V + b.lt .Lset_hcrx_\@ + orr x0, x0, #HCRX_EL2_EnASR + .Lset_hcrx_\@: msr_s SYS_HCRX_EL2, x0 .Lskip_hcrx_\@: @@ -225,7 +235,6 @@ ICH_HFGRTR_EL2_ICC_ICSR_EL1 | \ ICH_HFGRTR_EL2_ICC_PCR_EL1 | \ ICH_HFGRTR_EL2_ICC_HPPIR_EL1 | \ - ICH_HFGRTR_EL2_ICC_HAPR_EL1 | \ ICH_HFGRTR_EL2_ICC_CR0_EL1 | \ ICH_HFGRTR_EL2_ICC_IDRn_EL1 | \ ICH_HFGRTR_EL2_ICC_APR_EL1) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 4975a92cbd17..7e86d400864e 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -124,6 +124,7 @@ #define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n)) #define ESR_ELx_FSC_SECC (0x18) #define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n)) +#define ESR_ELx_FSC_EXCL_ATOMIC (0x35) #define ESR_ELx_FSC_ADDRSZ (0x00) /* @@ -488,6 +489,13 @@ static inline bool esr_fsc_is_access_flag_fault(unsigned long esr) (esr == ESR_ELx_FSC_ACCESS_L(0)); } +static inline bool esr_fsc_is_excl_atomic_fault(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return esr == ESR_ELx_FSC_EXCL_ATOMIC; +} + static inline bool esr_fsc_is_addr_sz_fault(unsigned long esr) { esr &= ESR_ELx_FSC; diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 1f63814ae6c4..72ea4bda79f3 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -179,6 +179,7 @@ #define KERNEL_HWCAP_MTE_FAR __khwcap3_feature(MTE_FAR) #define KERNEL_HWCAP_MTE_STORE_ONLY __khwcap3_feature(MTE_STORE_ONLY) #define KERNEL_HWCAP_LSFE __khwcap3_feature(LSFE) +#define KERNEL_HWCAP_LS64 __khwcap3_feature(LS64) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index e1d30ba99d01..f463a654a2bb 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -671,7 +671,6 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant, enum aarch64_insn_register Rn, enum aarch64_insn_register Rd, u8 lsb); -#ifdef CONFIG_ARM64_LSE_ATOMICS u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result, enum aarch64_insn_register address, enum aarch64_insn_register value, @@ -683,28 +682,6 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, enum aarch64_insn_register value, enum aarch64_insn_size_type size, enum aarch64_insn_mem_order_type order); -#else -static inline -u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result, - enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size, - enum aarch64_insn_mem_atomic_op op, - enum aarch64_insn_mem_order_type order) -{ - return AARCH64_BREAK_FAULT; -} - -static inline -u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, - enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size, - enum aarch64_insn_mem_order_type order) -{ - return AARCH64_BREAK_FAULT; -} -#endif u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type); u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type); u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result, diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index e500600e4b9b..3f9233b5a130 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -101,7 +101,7 @@ HCR_BSU_IS | HCR_FB | HCR_TACR | \ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID1) -#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) +#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK) #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H | HCR_AMO | HCR_IMO | HCR_FMO) @@ -124,37 +124,7 @@ #define TCR_EL2_MASK (TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \ TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK) -/* VTCR_EL2 Registers bits */ -#define VTCR_EL2_DS TCR_EL2_DS -#define VTCR_EL2_RES1 (1U << 31) -#define VTCR_EL2_HD (1 << 22) -#define VTCR_EL2_HA (1 << 21) -#define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT -#define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK -#define VTCR_EL2_TG0_MASK TCR_TG0_MASK -#define VTCR_EL2_TG0_4K TCR_TG0_4K -#define VTCR_EL2_TG0_16K TCR_TG0_16K -#define VTCR_EL2_TG0_64K TCR_TG0_64K -#define VTCR_EL2_SH0_MASK TCR_SH0_MASK -#define VTCR_EL2_SH0_INNER TCR_SH0_INNER -#define VTCR_EL2_ORGN0_MASK TCR_ORGN0_MASK -#define VTCR_EL2_ORGN0_WBWA TCR_ORGN0_WBWA -#define VTCR_EL2_IRGN0_MASK TCR_IRGN0_MASK -#define VTCR_EL2_IRGN0_WBWA TCR_IRGN0_WBWA -#define VTCR_EL2_SL0_SHIFT 6 -#define VTCR_EL2_SL0_MASK (3 << VTCR_EL2_SL0_SHIFT) -#define VTCR_EL2_T0SZ_MASK 0x3f -#define VTCR_EL2_VS_SHIFT 19 -#define VTCR_EL2_VS_8BIT (0 << VTCR_EL2_VS_SHIFT) -#define VTCR_EL2_VS_16BIT (1 << VTCR_EL2_VS_SHIFT) - -#define VTCR_EL2_T0SZ(x) TCR_T0SZ(x) - /* - * We configure the Stage-2 page tables to always restrict the IPA space to be - * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are - * not known to exist and will break with this configuration. - * * The VTCR_EL2 is configured per VM and is initialised in kvm_init_stage2_mmu. * * Note that when using 4K pages, we concatenate two first level page tables @@ -162,9 +132,6 @@ * */ -#define VTCR_EL2_COMMON_BITS (VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ - VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1) - /* * VTCR_EL2:SL0 indicates the entry level for Stage2 translation. * Interestingly, it depends on the page size. @@ -196,30 +163,35 @@ */ #ifdef CONFIG_ARM64_64K_PAGES -#define VTCR_EL2_TGRAN VTCR_EL2_TG0_64K +#define VTCR_EL2_TGRAN 64K #define VTCR_EL2_TGRAN_SL0_BASE 3UL #elif defined(CONFIG_ARM64_16K_PAGES) -#define VTCR_EL2_TGRAN VTCR_EL2_TG0_16K +#define VTCR_EL2_TGRAN 16K #define VTCR_EL2_TGRAN_SL0_BASE 3UL #else /* 4K */ -#define VTCR_EL2_TGRAN VTCR_EL2_TG0_4K +#define VTCR_EL2_TGRAN 4K #define VTCR_EL2_TGRAN_SL0_BASE 2UL #endif #define VTCR_EL2_LVLS_TO_SL0(levels) \ - ((VTCR_EL2_TGRAN_SL0_BASE - (4 - (levels))) << VTCR_EL2_SL0_SHIFT) + FIELD_PREP(VTCR_EL2_SL0, (VTCR_EL2_TGRAN_SL0_BASE - (4 - (levels)))) #define VTCR_EL2_SL0_TO_LVLS(sl0) \ ((sl0) + 4 - VTCR_EL2_TGRAN_SL0_BASE) #define VTCR_EL2_LVLS(vtcr) \ - VTCR_EL2_SL0_TO_LVLS(((vtcr) & VTCR_EL2_SL0_MASK) >> VTCR_EL2_SL0_SHIFT) + VTCR_EL2_SL0_TO_LVLS(FIELD_GET(VTCR_EL2_SL0, (vtcr))) + +#define VTCR_EL2_FLAGS (SYS_FIELD_PREP_ENUM(VTCR_EL2, SH0, INNER) | \ + SYS_FIELD_PREP_ENUM(VTCR_EL2, ORGN0, WBWA) | \ + SYS_FIELD_PREP_ENUM(VTCR_EL2, IRGN0, WBWA) | \ + SYS_FIELD_PREP_ENUM(VTCR_EL2, TG0, VTCR_EL2_TGRAN) | \ + VTCR_EL2_RES1) -#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN) -#define VTCR_EL2_IPA(vtcr) (64 - ((vtcr) & VTCR_EL2_T0SZ_MASK)) +#define VTCR_EL2_IPA(vtcr) (64 - FIELD_GET(VTCR_EL2_T0SZ, (vtcr))) /* * ARM VMSAv8-64 defines an algorithm for finding the translation table @@ -344,6 +316,8 @@ #define PAR_TO_HPFAR(par) \ (((par) & GENMASK_ULL(52 - 1, 12)) >> 8) +#define FAR_TO_FIPA_OFFSET(far) ((far) & GENMASK_ULL(11, 0)) + #define ECN(x) { ESR_ELx_EC_##x, #x } #define kvm_arm_exception_class \ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index ce516d8187b1..a1ad12c72ebf 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -300,8 +300,6 @@ void kvm_get_kimage_voffset(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); void kvm_compute_final_ctr_el0(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); -void kvm_pan_patch_el2_entry(struct alt_instr *alt, - __le32 *origptr, __le32 *updptr, int nr_inst); void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, u64 elr_phys, u64 par, uintptr_t vcpu, u64 far, u64 hpfar); diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 55d34192a8de..5bf3d7e1d92c 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -45,8 +45,10 @@ bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); void kvm_skip_instr32(struct kvm_vcpu *vcpu); void kvm_inject_undefined(struct kvm_vcpu *vcpu); +void kvm_inject_sync(struct kvm_vcpu *vcpu, u64 esr); int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr); int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr); +int kvm_inject_dabt_excl_atomic(struct kvm_vcpu *vcpu, u64 addr); void kvm_inject_size_fault(struct kvm_vcpu *vcpu); static inline int kvm_inject_sea_dabt(struct kvm_vcpu *vcpu, u64 addr) @@ -678,6 +680,12 @@ static inline void vcpu_set_hcrx(struct kvm_vcpu *vcpu) if (kvm_has_sctlr2(kvm)) vcpu->arch.hcrx_el2 |= HCRX_EL2_SCTLR2En; + + if (kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64)) + vcpu->arch.hcrx_el2 |= HCRX_EL2_EnALS; + + if (kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_V)) + vcpu->arch.hcrx_el2 |= HCRX_EL2_EnASR; } } #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ac7f970c7883..5d5a3bbdb95e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -201,7 +201,7 @@ struct kvm_s2_mmu { * host to parse the guest S2. * This either contains: * - the virtual VTTBR programmed by the guest hypervisor with - * CnP cleared + * CnP cleared * - The value 1 (VMID=0, BADDR=0, CnP=1) if invalid * * We also cache the full VTCR which gets used for TLB invalidation, @@ -373,9 +373,6 @@ struct kvm_arch { /* Maximum number of counters for the guest */ u8 nr_pmu_counters; - /* Iterator for idreg debugfs */ - u8 idreg_debugfs_iter; - /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; struct maple_tree smccc_filter; @@ -495,7 +492,6 @@ enum vcpu_sysreg { DBGVCR32_EL2, /* Debug Vector Catch Register */ /* EL2 registers */ - SCTLR_EL2, /* System Control Register (EL2) */ ACTLR_EL2, /* Auxiliary Control Register (EL2) */ CPTR_EL2, /* Architectural Feature Trap Register (EL2) */ HACR_EL2, /* Hypervisor Auxiliary Control Register */ @@ -526,6 +522,7 @@ enum vcpu_sysreg { /* Anything from this can be RES0/RES1 sanitised */ MARKER(__SANITISED_REG_START__), + SCTLR_EL2, /* System Control Register (EL2) */ TCR2_EL2, /* Extended Translation Control Register (EL2) */ SCTLR2_EL2, /* System Control Register 2 (EL2) */ MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */ @@ -626,18 +623,45 @@ enum vcpu_sysreg { NR_SYS_REGS /* Nothing after this line! */ }; +struct resx { + u64 res0; + u64 res1; +}; + struct kvm_sysreg_masks { - struct { - u64 res0; - u64 res1; - } mask[NR_SYS_REGS - __SANITISED_REG_START__]; + struct resx mask[NR_SYS_REGS - __SANITISED_REG_START__]; }; +static inline struct resx __kvm_get_sysreg_resx(struct kvm_arch *arch, + enum vcpu_sysreg sr) +{ + struct kvm_sysreg_masks *masks; + + masks = arch->sysreg_masks; + if (likely(masks && + sr >= __SANITISED_REG_START__ && sr < NR_SYS_REGS)) + return masks->mask[sr - __SANITISED_REG_START__]; + + return (struct resx){}; +} + +#define kvm_get_sysreg_resx(k, sr) __kvm_get_sysreg_resx(&(k)->arch, (sr)) + +static inline void __kvm_set_sysreg_resx(struct kvm_arch *arch, + enum vcpu_sysreg sr, struct resx resx) +{ + arch->sysreg_masks->mask[sr - __SANITISED_REG_START__] = resx; +} + +#define kvm_set_sysreg_resx(k, sr, resx) \ + __kvm_set_sysreg_resx(&(k)->arch, (sr), (resx)) + struct fgt_masks { const char *str; u64 mask; u64 nmask; u64 res0; + u64 res1; }; extern struct fgt_masks hfgrtr_masks; @@ -710,11 +734,11 @@ struct cpu_sve_state { struct kvm_host_data { #define KVM_HOST_DATA_FLAG_HAS_SPE 0 #define KVM_HOST_DATA_FLAG_HAS_TRBE 1 -#define KVM_HOST_DATA_FLAG_TRBE_ENABLED 4 -#define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 5 -#define KVM_HOST_DATA_FLAG_VCPU_IN_HYP_CONTEXT 6 -#define KVM_HOST_DATA_FLAG_L1_VNCR_MAPPED 7 -#define KVM_HOST_DATA_FLAG_HAS_BRBE 8 +#define KVM_HOST_DATA_FLAG_TRBE_ENABLED 2 +#define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 3 +#define KVM_HOST_DATA_FLAG_VCPU_IN_HYP_CONTEXT 4 +#define KVM_HOST_DATA_FLAG_L1_VNCR_MAPPED 5 +#define KVM_HOST_DATA_FLAG_HAS_BRBE 6 unsigned long flags; struct kvm_cpu_context host_ctxt; @@ -1606,7 +1630,7 @@ static inline bool kvm_arch_has_irq_bypass(void) } void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt); -void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *res1); +struct resx get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg); void check_feature_map(void); void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu); @@ -1655,4 +1679,6 @@ static __always_inline enum fgt_group_id __fgt_reg_to_group_id(enum vcpu_sysreg p; \ }) +long kvm_get_cap_for_kvm_ioctl(unsigned int ioctl, long *ext); + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 2dc5e6e742bb..d968aca0461a 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -103,6 +103,7 @@ alternative_cb_end void kvm_update_va_mask(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); void kvm_compute_layout(void); +u32 kvm_hyp_va_bits(void); void kvm_apply_hyp_relocations(void); #define __hyp_pa(x) (((phys_addr_t)(x)) + hyp_physvirt_offset) @@ -185,7 +186,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); phys_addr_t kvm_get_idmap_vector(void); -int __init kvm_mmu_init(u32 *hyp_va_bits); +int __init kvm_mmu_init(u32 hyp_va_bits); static inline void *__kvm_vector_slot2addr(void *base, enum arm64_hyp_spectre_vector slot) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index c0ad262a8289..c201168f2857 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -87,15 +87,9 @@ typedef u64 kvm_pte_t; #define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55) -#define __KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) -#define __KVM_PTE_LEAF_ATTR_HI_S1_UXN BIT(54) -#define __KVM_PTE_LEAF_ATTR_HI_S1_PXN BIT(53) - -#define KVM_PTE_LEAF_ATTR_HI_S1_XN \ - ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? \ - (__KVM_PTE_LEAF_ATTR_HI_S1_UXN | \ - __KVM_PTE_LEAF_ATTR_HI_S1_PXN) : \ - __KVM_PTE_LEAF_ATTR_HI_S1_XN; }) +#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) +#define KVM_PTE_LEAF_ATTR_HI_S1_UXN BIT(54) +#define KVM_PTE_LEAF_ATTR_HI_S1_PXN BIT(53) #define KVM_PTE_LEAF_ATTR_HI_S2_XN GENMASK(54, 53) @@ -237,13 +231,12 @@ struct kvm_pgtable_mm_ops { /** * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags. - * @KVM_PGTABLE_S2_NOFWB: Don't enforce Normal-WB even if the CPUs have - * ARM64_HAS_STAGE2_FWB. * @KVM_PGTABLE_S2_IDMAP: Only use identity mappings. + * @KVM_PGTABLE_S2_AS_S1: Final memory attributes are that of Stage-1. */ enum kvm_pgtable_stage2_flags { - KVM_PGTABLE_S2_NOFWB = BIT(0), - KVM_PGTABLE_S2_IDMAP = BIT(1), + KVM_PGTABLE_S2_IDMAP = BIT(0), + KVM_PGTABLE_S2_AS_S1 = BIT(1), }; /** diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 0aecd4ac5f45..757076ad4ec9 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -9,6 +9,7 @@ #include <linux/arm_ffa.h> #include <linux/memblock.h> #include <linux/scatterlist.h> +#include <asm/kvm_host.h> #include <asm/kvm_pgtable.h> /* Maximum number of VMs that can co-exist under pKVM. */ @@ -23,10 +24,12 @@ void pkvm_destroy_hyp_vm(struct kvm *kvm); int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu); /* - * This functions as an allow-list of protected VM capabilities. - * Features not explicitly allowed by this function are denied. + * Check whether the specific capability is allowed in pKVM. + * + * Certain features are allowed only for non-protected VMs in pKVM, which is why + * this takes the VM (kvm) as a parameter. */ -static inline bool kvm_pvm_ext_allowed(long ext) +static inline bool kvm_pkvm_ext_allowed(struct kvm *kvm, long ext) { switch (ext) { case KVM_CAP_IRQCHIP: @@ -42,11 +45,32 @@ static inline bool kvm_pvm_ext_allowed(long ext) case KVM_CAP_ARM_PTRAUTH_ADDRESS: case KVM_CAP_ARM_PTRAUTH_GENERIC: return true; - default: + case KVM_CAP_ARM_MTE: return false; + default: + return !kvm || !kvm_vm_is_protected(kvm); } } +/* + * Check whether the KVM VM IOCTL is allowed in pKVM. + * + * Certain features are allowed only for non-protected VMs in pKVM, which is why + * this takes the VM (kvm) as a parameter. + */ +static inline bool kvm_pkvm_ioctl_allowed(struct kvm *kvm, unsigned int ioctl) +{ + long ext; + int r; + + r = kvm_get_cap_for_kvm_ioctl(ioctl, &ext); + + if (WARN_ON_ONCE(r < 0)) + return false; + + return kvm_pkvm_ext_allowed(kvm, ext); +} + extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; extern unsigned int kvm_nvhe_sym(hyp_memblock_nr); diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index 3129a5819d0e..1e77c45bb0a8 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -4,8 +4,6 @@ #include <asm/atomic_ll_sc.h> -#ifdef CONFIG_ARM64_LSE_ATOMICS - #define __LSE_PREAMBLE ".arch_extension lse\n" #include <linux/compiler_types.h> @@ -27,11 +25,4 @@ #define ARM64_LSE_ATOMIC_INSN(llsc, lse) \ ALTERNATIVE(llsc, __LSE_PREAMBLE lse, ARM64_HAS_LSE_ATOMICS) -#else /* CONFIG_ARM64_LSE_ATOMICS */ - -#define __lse_ll_sc_body(op, ...) __ll_sc_##op(__VA_ARGS__) - -#define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc - -#endif /* CONFIG_ARM64_LSE_ATOMICS */ #endif /* __ASM_LSE_H */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 9d54b2ea49d6..a2b7a33966ff 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -175,19 +175,24 @@ #define MT_DEVICE_nGnRE 4 /* - * Memory types for Stage-2 translation + * Memory types for Stage-2 translation when HCR_EL2.FWB=0. See R_HMNDG, + * R_TNHFM, R_GQFSF and I_MCQKW for the details on how these attributes get + * combined with Stage-1. */ #define MT_S2_NORMAL 0xf #define MT_S2_NORMAL_NC 0x5 #define MT_S2_DEVICE_nGnRE 0x1 +#define MT_S2_AS_S1 MT_S2_NORMAL /* - * Memory types for Stage-2 translation when ID_AA64MMFR2_EL1.FWB is 0001 - * Stage-2 enforces Normal-WB and Device-nGnRE + * Memory types for Stage-2 translation when HCR_EL2.FWB=1. Stage-2 enforces + * Normal-WB and Device-nGnRE, unless we actively say that S1 wins. See + * R_VRJSW and R_RHWZM for details. */ #define MT_S2_FWB_NORMAL 6 #define MT_S2_FWB_NORMAL_NC 5 #define MT_S2_FWB_DEVICE_nGnRE 1 +#define MT_S2_FWB_AS_S1 7 #ifdef CONFIG_ARM64_4K_PAGES #define IOREMAP_MAX_ORDER (PUD_SHIFT) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 161e8660eddd..d27e8872fe3c 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -109,10 +109,10 @@ static inline bool __pure lpa2_is_enabled(void) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC) #define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_KERNEL_EXEC_CONT) -#define PAGE_S2_MEMATTR(attr, has_fwb) \ +#define PAGE_S2_MEMATTR(attr) \ ({ \ u64 __val; \ - if (has_fwb) \ + if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) \ __val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr); \ else \ __val = PTE_S2_MEMATTR(MT_S2_ ## attr); \ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 106b15eb232a..f4436ecc630c 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -504,7 +504,6 @@ #define SYS_VPIDR_EL2 sys_reg(3, 4, 0, 0, 0) #define SYS_VMPIDR_EL2 sys_reg(3, 4, 0, 0, 5) -#define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) #define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1) #define SYS_SCTLR2_EL2 sys_reg(3, 4, 1, 0, 3) #define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0) @@ -517,7 +516,6 @@ #define SYS_TTBR1_EL2 sys_reg(3, 4, 2, 0, 1) #define SYS_TCR_EL2 sys_reg(3, 4, 2, 0, 2) #define SYS_VTTBR_EL2 sys_reg(3, 4, 2, 1, 0) -#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2) #define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) @@ -561,7 +559,6 @@ #define SYS_ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5) #define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) #define SYS_ICH_ELRSR_EL2 sys_reg(3, 4, 12, 11, 5) -#define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) #define __SYS__LR0_EL2(x) sys_reg(3, 4, 12, 12, x) #define SYS_ICH_LR0_EL2 __SYS__LR0_EL2(0) @@ -838,12 +835,6 @@ #define SCTLR_ELx_A (BIT(1)) #define SCTLR_ELx_M (BIT(0)) -/* SCTLR_EL2 specific flags. */ -#define SCTLR_EL2_RES1 ((BIT(4)) | (BIT(5)) | (BIT(11)) | (BIT(16)) | \ - (BIT(18)) | (BIT(22)) | (BIT(23)) | (BIT(28)) | \ - (BIT(29))) - -#define SCTLR_EL2_BT (BIT(36)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL2 SCTLR_ELx_EE #else @@ -989,26 +980,6 @@ #define ICH_LR_PRIORITY_SHIFT 48 #define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT) -/* ICH_VMCR_EL2 bit definitions */ -#define ICH_VMCR_ACK_CTL_SHIFT 2 -#define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT) -#define ICH_VMCR_FIQ_EN_SHIFT 3 -#define ICH_VMCR_FIQ_EN_MASK (1 << ICH_VMCR_FIQ_EN_SHIFT) -#define ICH_VMCR_CBPR_SHIFT 4 -#define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT) -#define ICH_VMCR_EOIM_SHIFT 9 -#define ICH_VMCR_EOIM_MASK (1 << ICH_VMCR_EOIM_SHIFT) -#define ICH_VMCR_BPR1_SHIFT 18 -#define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT) -#define ICH_VMCR_BPR0_SHIFT 21 -#define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT) -#define ICH_VMCR_PMR_SHIFT 24 -#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT) -#define ICH_VMCR_ENG0_SHIFT 0 -#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT) -#define ICH_VMCR_ENG1_SHIFT 1 -#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT) - /* * Permission Indirection Extension (PIE) permission encodings. * Encodings with the _O suffix, have overlays applied (Permission Overlay Extension). diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 6490930deef8..9810106a3f66 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -124,14 +124,12 @@ static inline bool uaccess_ttbr0_enable(void) static inline void __uaccess_disable_hw_pan(void) { - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, - CONFIG_ARM64_PAN)); + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN)); } static inline void __uaccess_enable_hw_pan(void) { - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, - CONFIG_ARM64_PAN)); + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN)); } static inline void uaccess_disable_privileged(void) diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 575564ecdb0b..06f83ca8de56 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -146,5 +146,6 @@ #define HWCAP3_MTE_FAR (1UL << 0) #define HWCAP3_MTE_STORE_ONLY (1UL << 1) #define HWCAP3_LSFE (1UL << 2) +#define HWCAP3_LS64 (1UL << 3) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c840a93b9ef9..35a3ffeb4efa 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -240,6 +240,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_LS64_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_XS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, 0), @@ -2164,7 +2165,6 @@ static bool has_bbml2_noabort(const struct arm64_cpu_capabilities *caps, int sco return cpu_supports_bbml2_noabort(); } -#ifdef CONFIG_ARM64_PAN static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) { /* @@ -2176,7 +2176,6 @@ static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); set_pstate_pan(1); } -#endif /* CONFIG_ARM64_PAN */ #ifdef CONFIG_ARM64_RAS_EXTN static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) @@ -2260,6 +2259,16 @@ static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_E0PD */ +static void cpu_enable_ls64(struct arm64_cpu_capabilities const *cap) +{ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_EnALS, SCTLR_EL1_EnALS); +} + +static void cpu_enable_ls64_v(struct arm64_cpu_capabilities const *cap) +{ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_EnASR, 0); +} + #ifdef CONFIG_ARM64_PSEUDO_NMI static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry, int scope) @@ -2326,16 +2335,16 @@ static bool can_trap_icv_dir_el1(const struct arm64_cpu_capabilities *entry, BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV3_CPUIF); BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV5_LEGACY); - if (!this_cpu_has_cap(ARM64_HAS_GICV3_CPUIF) && - !is_midr_in_range_list(has_vgic_v3)) - return false; - if (!is_hyp_mode_available()) return false; if (this_cpu_has_cap(ARM64_HAS_GICV5_LEGACY)) return true; + if (!this_cpu_has_cap(ARM64_HAS_GICV3_CPUIF) && + !is_midr_in_range_list(has_vgic_v3)) + return false; + if (is_kernel_in_hyp_mode()) res.a1 = read_sysreg_s(SYS_ICH_VTR_EL2); else @@ -2541,7 +2550,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, ECV, CNTPOFF) }, -#ifdef CONFIG_ARM64_PAN { .desc = "Privileged Access Never", .capability = ARM64_HAS_PAN, @@ -2550,7 +2558,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_pan, ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, PAN, IMP) }, -#endif /* CONFIG_ARM64_PAN */ #ifdef CONFIG_ARM64_EPAN { .desc = "Enhanced Privileged Access Never", @@ -2560,7 +2567,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, PAN, PAN3) }, #endif /* CONFIG_ARM64_EPAN */ -#ifdef CONFIG_ARM64_LSE_ATOMICS { .desc = "LSE atomic instructions", .capability = ARM64_HAS_LSE_ATOMICS, @@ -2568,7 +2574,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, ATOMIC, IMP) }, -#endif /* CONFIG_ARM64_LSE_ATOMICS */ { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, @@ -3148,6 +3153,22 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, XNX, IMP) }, + { + .desc = "LS64", + .capability = ARM64_HAS_LS64, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .cpu_enable = cpu_enable_ls64, + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LS64, LS64) + }, + { + .desc = "LS64_V", + .capability = ARM64_HAS_LS64_V, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .cpu_enable = cpu_enable_ls64_v, + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LS64, LS64_V) + }, {}, }; @@ -3267,6 +3288,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR1_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_EBF16), HWCAP_CAP(ID_AA64ISAR1_EL1, DGH, IMP, CAP_HWCAP, KERNEL_HWCAP_DGH), HWCAP_CAP(ID_AA64ISAR1_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_I8MM), + HWCAP_CAP(ID_AA64ISAR1_EL1, LS64, LS64, CAP_HWCAP, KERNEL_HWCAP_LS64), HWCAP_CAP(ID_AA64ISAR2_EL1, LUT, IMP, CAP_HWCAP, KERNEL_HWCAP_LUT), HWCAP_CAP(ID_AA64ISAR3_EL1, FAMINMAX, IMP, CAP_HWCAP, KERNEL_HWCAP_FAMINMAX), HWCAP_CAP(ID_AA64ISAR3_EL1, LSFE, IMP, CAP_HWCAP, KERNEL_HWCAP_LSFE), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index c44e6d94f5de..6149bc91251d 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -81,6 +81,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_PACA] = "paca", [KERNEL_HWCAP_PACG] = "pacg", [KERNEL_HWCAP_GCS] = "gcs", + [KERNEL_HWCAP_LS64] = "ls64", [KERNEL_HWCAP_DCPODP] = "dcpodp", [KERNEL_HWCAP_SVE2] = "sve2", [KERNEL_HWCAP_SVEAES] = "sveaes", diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index ca04b338cb0d..87a822e5c4ca 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -299,7 +299,7 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) isb 0: - init_el2_hcr HCR_HOST_NVHE_FLAGS + init_el2_hcr HCR_HOST_NVHE_FLAGS | HCR_ATA init_el2_state /* Hypervisor stub */ diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 211f0e2e55e2..85bc629270bd 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -86,7 +86,6 @@ KVM_NVHE_ALIAS(kvm_patch_vector_branch); KVM_NVHE_ALIAS(kvm_update_va_mask); KVM_NVHE_ALIAS(kvm_get_kimage_voffset); KVM_NVHE_ALIAS(kvm_compute_final_ctr_el0); -KVM_NVHE_ALIAS(kvm_pan_patch_el2_entry); KVM_NVHE_ALIAS(spectre_bhb_patch_loop_iter); KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable); KVM_NVHE_ALIAS(spectre_bhb_patch_wa3); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 80a580e019c5..b3801f532b10 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -887,6 +887,7 @@ static u8 spectre_bhb_loop_affected(void) MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), {}, }; static const struct midr_range spectre_bhb_k24_list[] = { diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 99a07972068d..600f250753b4 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -1056,10 +1056,14 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) ctxt->timer_id = timerid; - if (timerid == TIMER_VTIMER) - ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; - else - ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; + if (!kvm_vm_is_protected(vcpu->kvm)) { + if (timerid == TIMER_VTIMER) + ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; + else + ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; + } else { + ctxt->offset.vm_offset = NULL; + } hrtimer_setup(&ctxt->hrtimer, kvm_hrtimer_expire, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); @@ -1083,7 +1087,8 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) timer_context_init(vcpu, i); /* Synchronize offsets across timers of a VM if not already provided */ - if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { + if (!vcpu_is_protected(vcpu) && + !test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read()); timer_set_offset(vcpu_ptimer(vcpu), 0); } @@ -1687,6 +1692,9 @@ int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, if (offset->reserved) return -EINVAL; + if (kvm_vm_is_protected(kvm)) + return -EINVAL; + mutex_lock(&kvm->lock); if (!kvm_trylock_all_vcpus(kvm)) { diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 620a465248d1..0f40d3da0550 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -40,6 +40,7 @@ #include <asm/kvm_pkvm.h> #include <asm/kvm_ptrauth.h> #include <asm/sections.h> +#include <asm/stacktrace/nvhe.h> #include <kvm/arm_hypercalls.h> #include <kvm/arm_pmu.h> @@ -58,6 +59,51 @@ enum kvm_wfx_trap_policy { static enum kvm_wfx_trap_policy kvm_wfi_trap_policy __read_mostly = KVM_WFX_NOTRAP_SINGLE_TASK; static enum kvm_wfx_trap_policy kvm_wfe_trap_policy __read_mostly = KVM_WFX_NOTRAP_SINGLE_TASK; +/* + * Tracks KVM IOCTLs and their associated KVM capabilities. + */ +struct kvm_ioctl_cap_map { + unsigned int ioctl; + long ext; +}; + +/* Make KVM_CAP_NR_VCPUS the reference for features we always supported */ +#define KVM_CAP_ARM_BASIC KVM_CAP_NR_VCPUS + +/* + * Sorted by ioctl to allow for potential binary search, + * though linear scan is sufficient for this size. + */ +static const struct kvm_ioctl_cap_map vm_ioctl_caps[] = { + { KVM_CREATE_IRQCHIP, KVM_CAP_IRQCHIP }, + { KVM_ARM_SET_DEVICE_ADDR, KVM_CAP_ARM_SET_DEVICE_ADDR }, + { KVM_ARM_MTE_COPY_TAGS, KVM_CAP_ARM_MTE }, + { KVM_SET_DEVICE_ATTR, KVM_CAP_DEVICE_CTRL }, + { KVM_GET_DEVICE_ATTR, KVM_CAP_DEVICE_CTRL }, + { KVM_HAS_DEVICE_ATTR, KVM_CAP_DEVICE_CTRL }, + { KVM_ARM_SET_COUNTER_OFFSET, KVM_CAP_COUNTER_OFFSET }, + { KVM_ARM_GET_REG_WRITABLE_MASKS, KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES }, + { KVM_ARM_PREFERRED_TARGET, KVM_CAP_ARM_BASIC }, +}; + +/* + * Set *ext to the capability. + * Return 0 if found, or -EINVAL if no IOCTL matches. + */ +long kvm_get_cap_for_kvm_ioctl(unsigned int ioctl, long *ext) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vm_ioctl_caps); i++) { + if (vm_ioctl_caps[i].ioctl == ioctl) { + *ext = vm_ioctl_caps[i].ext; + return 0; + } + } + + return -EINVAL; +} + DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_base); @@ -87,7 +133,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, if (cap->flags) return -EINVAL; - if (kvm_vm_is_protected(kvm) && !kvm_pvm_ext_allowed(cap->cap)) + if (is_protected_kvm_enabled() && !kvm_pkvm_ext_allowed(kvm, cap->cap)) return -EINVAL; switch (cap->cap) { @@ -303,7 +349,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; - if (kvm && kvm_vm_is_protected(kvm) && !kvm_pvm_ext_allowed(ext)) + if (is_protected_kvm_enabled() && !kvm_pkvm_ext_allowed(kvm, ext)) return 0; switch (ext) { @@ -1894,6 +1940,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) void __user *argp = (void __user *)arg; struct kvm_device_attr attr; + if (is_protected_kvm_enabled() && !kvm_pkvm_ioctl_allowed(kvm, ioctl)) + return -EINVAL; + switch (ioctl) { case KVM_CREATE_IRQCHIP: { int ret; @@ -2045,6 +2094,12 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS; else params->hcr_el2 = HCR_HOST_NVHE_FLAGS; + + if (system_supports_mte()) + params->hcr_el2 |= HCR_ATA; + else + params->hcr_el2 |= HCR_TID5; + if (cpus_have_final_cap(ARM64_KVM_HVHE)) params->hcr_el2 |= HCR_E2H; params->vttbr = params->vtcr = 0; @@ -2569,7 +2624,7 @@ static void pkvm_hyp_init_ptrauth(void) /* Inits Hyp-mode on all online CPUs */ static int __init init_hyp_mode(void) { - u32 hyp_va_bits; + u32 hyp_va_bits = kvm_hyp_va_bits(); int cpu; int err = -ENOMEM; @@ -2583,7 +2638,7 @@ static int __init init_hyp_mode(void) /* * Allocate Hyp PGD and setup Hyp identity mapping */ - err = kvm_mmu_init(&hyp_va_bits); + err = kvm_mmu_init(hyp_va_bits); if (err) goto out_err; diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 808d26bed182..885bd5bb2f41 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1704,7 +1704,6 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level) } } -#ifdef CONFIG_ARM64_LSE_ATOMICS static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new) { u64 tmp = old; @@ -1729,12 +1728,6 @@ static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new) return ret; } -#else -static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new) -{ - return -EINVAL; -} -#endif static int __llsc_swap_desc(u64 __user *ptep, u64 old, u64 new) { diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 24bb3f36e9d5..d9f553cbf9df 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -16,14 +16,18 @@ */ struct reg_bits_to_feat_map { union { - u64 bits; - u64 *res0p; + u64 bits; + struct fgt_masks *masks; }; #define NEVER_FGU BIT(0) /* Can trap, but never UNDEF */ #define CALL_FUNC BIT(1) /* Needs to evaluate tons of crap */ -#define FIXED_VALUE BIT(2) /* RAZ/WI or RAO/WI in KVM */ -#define RES0_POINTER BIT(3) /* Pointer to RES0 value instead of bits */ +#define FORCE_RESx BIT(2) /* Unconditional RESx */ +#define MASKS_POINTER BIT(3) /* Pointer to fgt_masks struct instead of bits */ +#define AS_RES1 BIT(4) /* RES1 when not supported */ +#define REQUIRES_E2H1 BIT(5) /* Add HCR_EL2.E2H RES1 as a pre-condition */ +#define RES1_WHEN_E2H0 BIT(6) /* RES1 when E2H=0 and not supported */ +#define RES1_WHEN_E2H1 BIT(7) /* RES1 when E2H=1 and not supported */ unsigned long flags; @@ -36,7 +40,6 @@ struct reg_bits_to_feat_map { s8 lo_lim; }; bool (*match)(struct kvm *); - bool (*fval)(struct kvm *, u64 *); }; }; @@ -69,18 +72,17 @@ struct reg_feat_map_desc { .lo_lim = id ##_## fld ##_## lim \ } -#define __NEEDS_FEAT_2(m, f, w, fun, dummy) \ +#define __NEEDS_FEAT_1(m, f, w, fun) \ { \ .w = (m), \ .flags = (f) | CALL_FUNC, \ - .fval = (fun), \ + .match = (fun), \ } -#define __NEEDS_FEAT_1(m, f, w, fun) \ +#define __NEEDS_FEAT_0(m, f, w, ...) \ { \ .w = (m), \ - .flags = (f) | CALL_FUNC, \ - .match = (fun), \ + .flags = (f), \ } #define __NEEDS_FEAT_FLAG(m, f, w, ...) \ @@ -89,11 +91,8 @@ struct reg_feat_map_desc { #define NEEDS_FEAT_FLAG(m, f, ...) \ __NEEDS_FEAT_FLAG(m, f, bits, __VA_ARGS__) -#define NEEDS_FEAT_FIXED(m, ...) \ - __NEEDS_FEAT_FLAG(m, FIXED_VALUE, bits, __VA_ARGS__, 0) - -#define NEEDS_FEAT_RES0(p, ...) \ - __NEEDS_FEAT_FLAG(p, RES0_POINTER, res0p, __VA_ARGS__) +#define NEEDS_FEAT_MASKS(p, ...) \ + __NEEDS_FEAT_FLAG(p, MASKS_POINTER, masks, __VA_ARGS__) /* * Declare the dependency between a set of bits and a set of features, @@ -101,27 +100,32 @@ struct reg_feat_map_desc { */ #define NEEDS_FEAT(m, ...) NEEDS_FEAT_FLAG(m, 0, __VA_ARGS__) +/* Declare fixed RESx bits */ +#define FORCE_RES0(m) NEEDS_FEAT_FLAG(m, FORCE_RESx) +#define FORCE_RES1(m) NEEDS_FEAT_FLAG(m, FORCE_RESx | AS_RES1) + /* - * Declare the dependency between a non-FGT register, a set of - * feature, and the set of individual bits it contains. This generates - * a struct reg_feat_map_desc. + * Declare the dependency between a non-FGT register, a set of features, + * and the set of individual bits it contains. This generates a struct + * reg_feat_map_desc. */ #define DECLARE_FEAT_MAP(n, r, m, f) \ struct reg_feat_map_desc n = { \ .name = #r, \ - .feat_map = NEEDS_FEAT(~r##_RES0, f), \ + .feat_map = NEEDS_FEAT(~(r##_RES0 | \ + r##_RES1), f), \ .bit_feat_map = m, \ .bit_feat_map_sz = ARRAY_SIZE(m), \ } /* * Specialised version of the above for FGT registers that have their - * RES0 masks described as struct fgt_masks. + * RESx masks described as struct fgt_masks. */ #define DECLARE_FEAT_MAP_FGT(n, msk, m, f) \ struct reg_feat_map_desc n = { \ .name = #msk, \ - .feat_map = NEEDS_FEAT_RES0(&msk.res0, f),\ + .feat_map = NEEDS_FEAT_MASKS(&msk, f), \ .bit_feat_map = m, \ .bit_feat_map_sz = ARRAY_SIZE(m), \ } @@ -140,6 +144,7 @@ struct reg_feat_map_desc { #define FEAT_AA64EL1 ID_AA64PFR0_EL1, EL1, IMP #define FEAT_AA64EL2 ID_AA64PFR0_EL1, EL2, IMP #define FEAT_AA64EL3 ID_AA64PFR0_EL1, EL3, IMP +#define FEAT_SEL2 ID_AA64PFR0_EL1, SEL2, IMP #define FEAT_AIE ID_AA64MMFR3_EL1, AIE, IMP #define FEAT_S2POE ID_AA64MMFR3_EL1, S2POE, IMP #define FEAT_S1POE ID_AA64MMFR3_EL1, S1POE, IMP @@ -182,7 +187,6 @@ struct reg_feat_map_desc { #define FEAT_RME ID_AA64PFR0_EL1, RME, IMP #define FEAT_MPAM ID_AA64PFR0_EL1, MPAM, 1 #define FEAT_S2FWB ID_AA64MMFR2_EL1, FWB, IMP -#define FEAT_TME ID_AA64ISAR0_EL1, TME, IMP #define FEAT_TWED ID_AA64MMFR1_EL1, TWED, IMP #define FEAT_E2H0 ID_AA64MMFR4_EL1, E2H0, IMP #define FEAT_SRMASK ID_AA64MMFR4_EL1, SRMASK, IMP @@ -201,6 +205,8 @@ struct reg_feat_map_desc { #define FEAT_ASID2 ID_AA64MMFR4_EL1, ASID2, IMP #define FEAT_MEC ID_AA64MMFR3_EL1, MEC, IMP #define FEAT_HAFT ID_AA64MMFR1_EL1, HAFDBS, HAFT +#define FEAT_HDBSS ID_AA64MMFR1_EL1, HAFDBS, HDBSS +#define FEAT_HPDS2 ID_AA64MMFR1_EL1, HPDS, HPDS2 #define FEAT_BTI ID_AA64PFR1_EL1, BT, IMP #define FEAT_ExS ID_AA64MMFR0_EL1, EXS, IMP #define FEAT_IESB ID_AA64MMFR2_EL1, IESB, IMP @@ -218,6 +224,7 @@ struct reg_feat_map_desc { #define FEAT_FGT2 ID_AA64MMFR0_EL1, FGT, FGT2 #define FEAT_MTPMU ID_AA64DFR0_EL1, MTPMU, IMP #define FEAT_HCX ID_AA64MMFR1_EL1, HCX, IMP +#define FEAT_S2PIE ID_AA64MMFR3_EL1, S2PIE, IMP static bool not_feat_aa64el3(struct kvm *kvm) { @@ -305,21 +312,6 @@ static bool feat_trbe_mpam(struct kvm *kvm) (read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_MPAM)); } -static bool feat_asid2_e2h1(struct kvm *kvm) -{ - return kvm_has_feat(kvm, FEAT_ASID2) && !kvm_has_feat(kvm, FEAT_E2H0); -} - -static bool feat_d128_e2h1(struct kvm *kvm) -{ - return kvm_has_feat(kvm, FEAT_D128) && !kvm_has_feat(kvm, FEAT_E2H0); -} - -static bool feat_mec_e2h1(struct kvm *kvm) -{ - return kvm_has_feat(kvm, FEAT_MEC) && !kvm_has_feat(kvm, FEAT_E2H0); -} - static bool feat_ebep_pmuv3_ss(struct kvm *kvm) { return kvm_has_feat(kvm, FEAT_EBEP) || kvm_has_feat(kvm, FEAT_PMUv3_SS); @@ -361,29 +353,26 @@ static bool feat_pmuv3p9(struct kvm *kvm) return check_pmu_revision(kvm, V3P9); } -static bool compute_hcr_rw(struct kvm *kvm, u64 *bits) -{ - /* This is purely academic: AArch32 and NV are mutually exclusive */ - if (bits) { - if (kvm_has_feat(kvm, FEAT_AA32EL1)) - *bits &= ~HCR_EL2_RW; - else - *bits |= HCR_EL2_RW; - } +#define has_feat_s2tgran(k, s) \ + ((kvm_has_feat_enum(kvm, ID_AA64MMFR0_EL1, TGRAN##s##_2, TGRAN##s) && \ + kvm_has_feat(kvm, ID_AA64MMFR0_EL1, TGRAN##s, IMP)) || \ + kvm_has_feat(kvm, ID_AA64MMFR0_EL1, TGRAN##s##_2, IMP)) - return true; +static bool feat_lpa2(struct kvm *kvm) +{ + return ((kvm_has_feat(kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT) || + !kvm_has_feat(kvm, ID_AA64MMFR0_EL1, TGRAN4, IMP)) && + (kvm_has_feat(kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT) || + !kvm_has_feat(kvm, ID_AA64MMFR0_EL1, TGRAN16, IMP)) && + (kvm_has_feat(kvm, ID_AA64MMFR0_EL1, TGRAN4_2, 52_BIT) || + !has_feat_s2tgran(kvm, 4)) && + (kvm_has_feat(kvm, ID_AA64MMFR0_EL1, TGRAN16_2, 52_BIT) || + !has_feat_s2tgran(kvm, 16))); } -static bool compute_hcr_e2h(struct kvm *kvm, u64 *bits) +static bool feat_vmid16(struct kvm *kvm) { - if (bits) { - if (kvm_has_feat(kvm, FEAT_E2H0)) - *bits &= ~HCR_EL2_E2H; - else - *bits |= HCR_EL2_E2H; - } - - return true; + return kvm_has_feat_enum(kvm, ID_AA64MMFR1_EL1, VMIDBits, 16); } static const struct reg_bits_to_feat_map hfgrtr_feat_map[] = { @@ -939,7 +928,7 @@ static const DECLARE_FEAT_MAP(hcrx_desc, __HCRX_EL2, static const struct reg_bits_to_feat_map hcr_feat_map[] = { NEEDS_FEAT(HCR_EL2_TID0, FEAT_AA32EL0), - NEEDS_FEAT_FIXED(HCR_EL2_RW, compute_hcr_rw), + NEEDS_FEAT_FLAG(HCR_EL2_RW, AS_RES1, FEAT_AA32EL1), NEEDS_FEAT(HCR_EL2_HCD, not_feat_aa64el3), NEEDS_FEAT(HCR_EL2_AMO | HCR_EL2_BSU | @@ -949,7 +938,6 @@ static const struct reg_bits_to_feat_map hcr_feat_map[] = { HCR_EL2_FMO | HCR_EL2_ID | HCR_EL2_IMO | - HCR_EL2_MIOCNCE | HCR_EL2_PTW | HCR_EL2_SWIO | HCR_EL2_TACR | @@ -1001,11 +989,12 @@ static const struct reg_bits_to_feat_map hcr_feat_map[] = { NEEDS_FEAT(HCR_EL2_FIEN, feat_rasv1p1), NEEDS_FEAT(HCR_EL2_GPF, FEAT_RME), NEEDS_FEAT(HCR_EL2_FWB, FEAT_S2FWB), - NEEDS_FEAT(HCR_EL2_TME, FEAT_TME), NEEDS_FEAT(HCR_EL2_TWEDEL | HCR_EL2_TWEDEn, FEAT_TWED), - NEEDS_FEAT_FIXED(HCR_EL2_E2H, compute_hcr_e2h), + NEEDS_FEAT_FLAG(HCR_EL2_E2H, RES1_WHEN_E2H1 | FORCE_RESx), + FORCE_RES0(HCR_EL2_RES0), + FORCE_RES1(HCR_EL2_RES1), }; static const DECLARE_FEAT_MAP(hcr_desc, HCR_EL2, @@ -1026,21 +1015,23 @@ static const struct reg_bits_to_feat_map sctlr2_feat_map[] = { SCTLR2_EL1_CPTM | SCTLR2_EL1_CPTM0, FEAT_CPA2), + FORCE_RES0(SCTLR2_EL1_RES0), + FORCE_RES1(SCTLR2_EL1_RES1), }; static const DECLARE_FEAT_MAP(sctlr2_desc, SCTLR2_EL1, sctlr2_feat_map, FEAT_SCTLR2); static const struct reg_bits_to_feat_map tcr2_el2_feat_map[] = { - NEEDS_FEAT(TCR2_EL2_FNG1 | - TCR2_EL2_FNG0 | - TCR2_EL2_A2, - feat_asid2_e2h1), - NEEDS_FEAT(TCR2_EL2_DisCH1 | - TCR2_EL2_DisCH0 | - TCR2_EL2_D128, - feat_d128_e2h1), - NEEDS_FEAT(TCR2_EL2_AMEC1, feat_mec_e2h1), + NEEDS_FEAT_FLAG(TCR2_EL2_FNG1 | + TCR2_EL2_FNG0 | + TCR2_EL2_A2, + REQUIRES_E2H1, FEAT_ASID2), + NEEDS_FEAT_FLAG(TCR2_EL2_DisCH1 | + TCR2_EL2_DisCH0 | + TCR2_EL2_D128, + REQUIRES_E2H1, FEAT_D128), + NEEDS_FEAT_FLAG(TCR2_EL2_AMEC1, REQUIRES_E2H1, FEAT_MEC), NEEDS_FEAT(TCR2_EL2_AMEC0, FEAT_MEC), NEEDS_FEAT(TCR2_EL2_HAFT, FEAT_HAFT), NEEDS_FEAT(TCR2_EL2_PTTWI | @@ -1051,33 +1042,36 @@ static const struct reg_bits_to_feat_map tcr2_el2_feat_map[] = { TCR2_EL2_E0POE, FEAT_S1POE), NEEDS_FEAT(TCR2_EL2_PIE, FEAT_S1PIE), + FORCE_RES0(TCR2_EL2_RES0), + FORCE_RES1(TCR2_EL2_RES1), }; static const DECLARE_FEAT_MAP(tcr2_el2_desc, TCR2_EL2, tcr2_el2_feat_map, FEAT_TCR2); static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = { - NEEDS_FEAT(SCTLR_EL1_CP15BEN | - SCTLR_EL1_ITD | - SCTLR_EL1_SED, - FEAT_AA32EL0), + NEEDS_FEAT(SCTLR_EL1_CP15BEN, FEAT_AA32EL0), + NEEDS_FEAT_FLAG(SCTLR_EL1_ITD | + SCTLR_EL1_SED, + AS_RES1, FEAT_AA32EL0), NEEDS_FEAT(SCTLR_EL1_BT0 | SCTLR_EL1_BT1, FEAT_BTI), NEEDS_FEAT(SCTLR_EL1_CMOW, FEAT_CMOW), - NEEDS_FEAT(SCTLR_EL1_TSCXT, feat_csv2_2_csv2_1p2), - NEEDS_FEAT(SCTLR_EL1_EIS | - SCTLR_EL1_EOS, - FEAT_ExS), + NEEDS_FEAT_FLAG(SCTLR_EL1_TSCXT, + AS_RES1, feat_csv2_2_csv2_1p2), + NEEDS_FEAT_FLAG(SCTLR_EL1_EIS | + SCTLR_EL1_EOS, + AS_RES1, FEAT_ExS), NEEDS_FEAT(SCTLR_EL1_EnFPM, FEAT_FPMR), NEEDS_FEAT(SCTLR_EL1_IESB, FEAT_IESB), NEEDS_FEAT(SCTLR_EL1_EnALS, FEAT_LS64), NEEDS_FEAT(SCTLR_EL1_EnAS0, FEAT_LS64_ACCDATA), NEEDS_FEAT(SCTLR_EL1_EnASR, FEAT_LS64_V), NEEDS_FEAT(SCTLR_EL1_nAA, FEAT_LSE2), - NEEDS_FEAT(SCTLR_EL1_LSMAOE | - SCTLR_EL1_nTLSMD, - FEAT_LSMAOC), + NEEDS_FEAT_FLAG(SCTLR_EL1_LSMAOE | + SCTLR_EL1_nTLSMD, + AS_RES1, FEAT_LSMAOC), NEEDS_FEAT(SCTLR_EL1_EE, FEAT_MixedEnd), NEEDS_FEAT(SCTLR_EL1_E0E, feat_mixedendel0), NEEDS_FEAT(SCTLR_EL1_MSCEn, FEAT_MOPS), @@ -1093,7 +1087,8 @@ static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = { NEEDS_FEAT(SCTLR_EL1_NMI | SCTLR_EL1_SPINTMASK, FEAT_NMI), - NEEDS_FEAT(SCTLR_EL1_SPAN, FEAT_PAN), + NEEDS_FEAT_FLAG(SCTLR_EL1_SPAN, + AS_RES1, FEAT_PAN), NEEDS_FEAT(SCTLR_EL1_EPAN, FEAT_PAN3), NEEDS_FEAT(SCTLR_EL1_EnDA | SCTLR_EL1_EnDB | @@ -1104,17 +1099,10 @@ static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = { NEEDS_FEAT(SCTLR_EL1_EnRCTX, FEAT_SPECRES), NEEDS_FEAT(SCTLR_EL1_DSSBS, FEAT_SSBS), NEEDS_FEAT(SCTLR_EL1_TIDCP, FEAT_TIDCP1), - NEEDS_FEAT(SCTLR_EL1_TME0 | - SCTLR_EL1_TME | - SCTLR_EL1_TMT0 | - SCTLR_EL1_TMT, - FEAT_TME), NEEDS_FEAT(SCTLR_EL1_TWEDEL | SCTLR_EL1_TWEDEn, FEAT_TWED), NEEDS_FEAT(SCTLR_EL1_UCI | - SCTLR_EL1_EE | - SCTLR_EL1_E0E | SCTLR_EL1_WXN | SCTLR_EL1_nTWE | SCTLR_EL1_nTWI | @@ -1128,11 +1116,91 @@ static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = { SCTLR_EL1_A | SCTLR_EL1_M, FEAT_AA64EL1), + FORCE_RES0(SCTLR_EL1_RES0), + FORCE_RES1(SCTLR_EL1_RES1), }; static const DECLARE_FEAT_MAP(sctlr_el1_desc, SCTLR_EL1, sctlr_el1_feat_map, FEAT_AA64EL1); +static const struct reg_bits_to_feat_map sctlr_el2_feat_map[] = { + NEEDS_FEAT_FLAG(SCTLR_EL2_CP15BEN, + RES1_WHEN_E2H0 | REQUIRES_E2H1, + FEAT_AA32EL0), + NEEDS_FEAT_FLAG(SCTLR_EL2_ITD | + SCTLR_EL2_SED, + RES1_WHEN_E2H1 | REQUIRES_E2H1, + FEAT_AA32EL0), + NEEDS_FEAT_FLAG(SCTLR_EL2_BT0, REQUIRES_E2H1, FEAT_BTI), + NEEDS_FEAT(SCTLR_EL2_BT, FEAT_BTI), + NEEDS_FEAT_FLAG(SCTLR_EL2_CMOW, REQUIRES_E2H1, FEAT_CMOW), + NEEDS_FEAT_FLAG(SCTLR_EL2_TSCXT, + RES1_WHEN_E2H1 | REQUIRES_E2H1, + feat_csv2_2_csv2_1p2), + NEEDS_FEAT_FLAG(SCTLR_EL2_EIS | + SCTLR_EL2_EOS, + AS_RES1, FEAT_ExS), + NEEDS_FEAT(SCTLR_EL2_EnFPM, FEAT_FPMR), + NEEDS_FEAT(SCTLR_EL2_IESB, FEAT_IESB), + NEEDS_FEAT_FLAG(SCTLR_EL2_EnALS, REQUIRES_E2H1, FEAT_LS64), + NEEDS_FEAT_FLAG(SCTLR_EL2_EnAS0, REQUIRES_E2H1, FEAT_LS64_ACCDATA), + NEEDS_FEAT_FLAG(SCTLR_EL2_EnASR, REQUIRES_E2H1, FEAT_LS64_V), + NEEDS_FEAT(SCTLR_EL2_nAA, FEAT_LSE2), + NEEDS_FEAT_FLAG(SCTLR_EL2_LSMAOE | + SCTLR_EL2_nTLSMD, + AS_RES1 | REQUIRES_E2H1, FEAT_LSMAOC), + NEEDS_FEAT(SCTLR_EL2_EE, FEAT_MixedEnd), + NEEDS_FEAT_FLAG(SCTLR_EL2_E0E, REQUIRES_E2H1, feat_mixedendel0), + NEEDS_FEAT_FLAG(SCTLR_EL2_MSCEn, REQUIRES_E2H1, FEAT_MOPS), + NEEDS_FEAT_FLAG(SCTLR_EL2_ATA0 | + SCTLR_EL2_TCF0, + REQUIRES_E2H1, FEAT_MTE2), + NEEDS_FEAT(SCTLR_EL2_ATA | + SCTLR_EL2_TCF, + FEAT_MTE2), + NEEDS_FEAT(SCTLR_EL2_ITFSB, feat_mte_async), + NEEDS_FEAT_FLAG(SCTLR_EL2_TCSO0, REQUIRES_E2H1, FEAT_MTE_STORE_ONLY), + NEEDS_FEAT(SCTLR_EL2_TCSO, + FEAT_MTE_STORE_ONLY), + NEEDS_FEAT(SCTLR_EL2_NMI | + SCTLR_EL2_SPINTMASK, + FEAT_NMI), + NEEDS_FEAT_FLAG(SCTLR_EL2_SPAN, AS_RES1 | REQUIRES_E2H1, FEAT_PAN), + NEEDS_FEAT_FLAG(SCTLR_EL2_EPAN, REQUIRES_E2H1, FEAT_PAN3), + NEEDS_FEAT(SCTLR_EL2_EnDA | + SCTLR_EL2_EnDB | + SCTLR_EL2_EnIA | + SCTLR_EL2_EnIB, + feat_pauth), + NEEDS_FEAT_FLAG(SCTLR_EL2_EnTP2, REQUIRES_E2H1, FEAT_SME), + NEEDS_FEAT(SCTLR_EL2_EnRCTX, FEAT_SPECRES), + NEEDS_FEAT(SCTLR_EL2_DSSBS, FEAT_SSBS), + NEEDS_FEAT_FLAG(SCTLR_EL2_TIDCP, REQUIRES_E2H1, FEAT_TIDCP1), + NEEDS_FEAT_FLAG(SCTLR_EL2_TWEDEL | + SCTLR_EL2_TWEDEn, + REQUIRES_E2H1, FEAT_TWED), + NEEDS_FEAT_FLAG(SCTLR_EL2_nTWE | + SCTLR_EL2_nTWI, + AS_RES1 | REQUIRES_E2H1, FEAT_AA64EL2), + NEEDS_FEAT_FLAG(SCTLR_EL2_UCI | + SCTLR_EL2_UCT | + SCTLR_EL2_DZE | + SCTLR_EL2_SA0, + REQUIRES_E2H1, FEAT_AA64EL2), + NEEDS_FEAT(SCTLR_EL2_WXN | + SCTLR_EL2_I | + SCTLR_EL2_SA | + SCTLR_EL2_C | + SCTLR_EL2_A | + SCTLR_EL2_M, + FEAT_AA64EL2), + FORCE_RES0(SCTLR_EL2_RES0), + FORCE_RES1(SCTLR_EL2_RES1), +}; + +static const DECLARE_FEAT_MAP(sctlr_el2_desc, SCTLR_EL2, + sctlr_el2_feat_map, FEAT_AA64EL2); + static const struct reg_bits_to_feat_map mdcr_el2_feat_map[] = { NEEDS_FEAT(MDCR_EL2_EBWE, FEAT_Debugv8p9), NEEDS_FEAT(MDCR_EL2_TDOSA, FEAT_DoubleLock), @@ -1162,27 +1230,75 @@ static const struct reg_bits_to_feat_map mdcr_el2_feat_map[] = { MDCR_EL2_TDE | MDCR_EL2_TDRA, FEAT_AA64EL1), + FORCE_RES0(MDCR_EL2_RES0), + FORCE_RES1(MDCR_EL2_RES1), }; static const DECLARE_FEAT_MAP(mdcr_el2_desc, MDCR_EL2, mdcr_el2_feat_map, FEAT_AA64EL2); +static const struct reg_bits_to_feat_map vtcr_el2_feat_map[] = { + NEEDS_FEAT(VTCR_EL2_HDBSS, FEAT_HDBSS), + NEEDS_FEAT(VTCR_EL2_HAFT, FEAT_HAFT), + NEEDS_FEAT(VTCR_EL2_TL0 | + VTCR_EL2_TL1 | + VTCR_EL2_AssuredOnly | + VTCR_EL2_GCSH, + FEAT_THE), + NEEDS_FEAT(VTCR_EL2_D128, FEAT_D128), + NEEDS_FEAT(VTCR_EL2_S2POE, FEAT_S2POE), + NEEDS_FEAT(VTCR_EL2_S2PIE, FEAT_S2PIE), + NEEDS_FEAT(VTCR_EL2_SL2 | + VTCR_EL2_DS, + feat_lpa2), + NEEDS_FEAT(VTCR_EL2_NSA | + VTCR_EL2_NSW, + FEAT_SEL2), + NEEDS_FEAT(VTCR_EL2_HWU62 | + VTCR_EL2_HWU61 | + VTCR_EL2_HWU60 | + VTCR_EL2_HWU59, + FEAT_HPDS2), + NEEDS_FEAT(VTCR_EL2_HD, ID_AA64MMFR1_EL1, HAFDBS, DBM), + NEEDS_FEAT(VTCR_EL2_HA, ID_AA64MMFR1_EL1, HAFDBS, AF), + NEEDS_FEAT(VTCR_EL2_VS, feat_vmid16), + NEEDS_FEAT(VTCR_EL2_PS | + VTCR_EL2_TG0 | + VTCR_EL2_SH0 | + VTCR_EL2_ORGN0 | + VTCR_EL2_IRGN0 | + VTCR_EL2_SL0 | + VTCR_EL2_T0SZ, + FEAT_AA64EL1), + FORCE_RES0(VTCR_EL2_RES0), + FORCE_RES1(VTCR_EL2_RES1), +}; + +static const DECLARE_FEAT_MAP(vtcr_el2_desc, VTCR_EL2, + vtcr_el2_feat_map, FEAT_AA64EL2); + static void __init check_feat_map(const struct reg_bits_to_feat_map *map, - int map_size, u64 res0, const char *str) + int map_size, u64 resx, const char *str) { u64 mask = 0; + /* + * Don't account for FORCE_RESx that are architectural, and + * therefore part of the resx parameter. Other FORCE_RESx bits + * are implementation choices, and therefore accounted for. + */ for (int i = 0; i < map_size; i++) - mask |= map[i].bits; + if (!((map[i].flags & FORCE_RESx) && (map[i].bits & resx))) + mask |= map[i].bits; - if (mask != ~res0) + if (mask != ~resx) kvm_err("Undefined %s behaviour, bits %016llx\n", - str, mask ^ ~res0); + str, mask ^ ~resx); } static u64 reg_feat_map_bits(const struct reg_bits_to_feat_map *map) { - return map->flags & RES0_POINTER ? ~(*map->res0p) : map->bits; + return map->flags & MASKS_POINTER ? (map->masks->mask | map->masks->nmask) : map->bits; } static void __init check_reg_desc(const struct reg_feat_map_desc *r) @@ -1209,7 +1325,9 @@ void __init check_feature_map(void) check_reg_desc(&sctlr2_desc); check_reg_desc(&tcr2_el2_desc); check_reg_desc(&sctlr_el1_desc); + check_reg_desc(&sctlr_el2_desc); check_reg_desc(&mdcr_el2_desc); + check_reg_desc(&vtcr_el2_desc); } static bool idreg_feat_match(struct kvm *kvm, const struct reg_bits_to_feat_map *map) @@ -1226,14 +1344,14 @@ static bool idreg_feat_match(struct kvm *kvm, const struct reg_bits_to_feat_map } } -static u64 __compute_fixed_bits(struct kvm *kvm, - const struct reg_bits_to_feat_map *map, - int map_size, - u64 *fixed_bits, - unsigned long require, - unsigned long exclude) +static struct resx compute_resx_bits(struct kvm *kvm, + const struct reg_bits_to_feat_map *map, + int map_size, + unsigned long require, + unsigned long exclude) { - u64 val = 0; + bool e2h0 = kvm_has_feat(kvm, FEAT_E2H0); + struct resx resx = {}; for (int i = 0; i < map_size; i++) { bool match; @@ -1244,60 +1362,72 @@ static u64 __compute_fixed_bits(struct kvm *kvm, if (map[i].flags & exclude) continue; - if (map[i].flags & CALL_FUNC) - match = (map[i].flags & FIXED_VALUE) ? - map[i].fval(kvm, fixed_bits) : - map[i].match(kvm); + if (map[i].flags & FORCE_RESx) + match = false; + else if (map[i].flags & CALL_FUNC) + match = map[i].match(kvm); else match = idreg_feat_match(kvm, &map[i]); - if (!match || (map[i].flags & FIXED_VALUE)) - val |= reg_feat_map_bits(&map[i]); - } + if (map[i].flags & REQUIRES_E2H1) + match &= !e2h0; - return val; -} + if (!match) { + u64 bits = reg_feat_map_bits(&map[i]); -static u64 compute_res0_bits(struct kvm *kvm, - const struct reg_bits_to_feat_map *map, - int map_size, - unsigned long require, - unsigned long exclude) -{ - return __compute_fixed_bits(kvm, map, map_size, NULL, - require, exclude | FIXED_VALUE); -} + if ((map[i].flags & AS_RES1) || + (e2h0 && (map[i].flags & RES1_WHEN_E2H0)) || + (!e2h0 && (map[i].flags & RES1_WHEN_E2H1))) + resx.res1 |= bits; + else + resx.res0 |= bits; + } + } -static u64 compute_reg_res0_bits(struct kvm *kvm, - const struct reg_feat_map_desc *r, - unsigned long require, unsigned long exclude) + return resx; +} +static struct resx compute_reg_resx_bits(struct kvm *kvm, + const struct reg_feat_map_desc *r, + unsigned long require, + unsigned long exclude) { - u64 res0; + struct resx resx; - res0 = compute_res0_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, + resx = compute_resx_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, require, exclude); + if (r->feat_map.flags & MASKS_POINTER) { + resx.res0 |= r->feat_map.masks->res0; + resx.res1 |= r->feat_map.masks->res1; + } + /* - * If computing FGUs, don't take RES0 or register existence - * into account -- we're not computing bits for the register - * itself. + * If the register itself was not valid, all the non-RESx bits are + * now considered RES0 (this matches the behaviour of registers such + * as SCTLR2 and TCR2). Weed out any potential (though unlikely) + * overlap with RES1 bits coming from the previous computation. */ - if (!(exclude & NEVER_FGU)) { - res0 |= compute_res0_bits(kvm, &r->feat_map, 1, require, exclude); - res0 |= ~reg_feat_map_bits(&r->feat_map); - } + resx.res0 |= compute_resx_bits(kvm, &r->feat_map, 1, require, exclude).res0; + resx.res1 &= ~resx.res0; - return res0; + return resx; } -static u64 compute_reg_fixed_bits(struct kvm *kvm, - const struct reg_feat_map_desc *r, - u64 *fixed_bits, unsigned long require, - unsigned long exclude) +static u64 compute_fgu_bits(struct kvm *kvm, const struct reg_feat_map_desc *r) { - return __compute_fixed_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, - fixed_bits, require | FIXED_VALUE, exclude); + struct resx resx; + + /* + * If computing FGUs, we collect the unsupported feature bits as + * RESx bits, but don't take the actual RESx bits or register + * existence into account -- we're not computing bits for the + * register itself. + */ + resx = compute_resx_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, + 0, NEVER_FGU); + + return resx.res0 | resx.res1; } void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt) @@ -1306,40 +1436,29 @@ void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt) switch (fgt) { case HFGRTR_GROUP: - val |= compute_reg_res0_bits(kvm, &hfgrtr_desc, - 0, NEVER_FGU); - val |= compute_reg_res0_bits(kvm, &hfgwtr_desc, - 0, NEVER_FGU); + val |= compute_fgu_bits(kvm, &hfgrtr_desc); + val |= compute_fgu_bits(kvm, &hfgwtr_desc); break; case HFGITR_GROUP: - val |= compute_reg_res0_bits(kvm, &hfgitr_desc, - 0, NEVER_FGU); + val |= compute_fgu_bits(kvm, &hfgitr_desc); break; case HDFGRTR_GROUP: - val |= compute_reg_res0_bits(kvm, &hdfgrtr_desc, - 0, NEVER_FGU); - val |= compute_reg_res0_bits(kvm, &hdfgwtr_desc, - 0, NEVER_FGU); + val |= compute_fgu_bits(kvm, &hdfgrtr_desc); + val |= compute_fgu_bits(kvm, &hdfgwtr_desc); break; case HAFGRTR_GROUP: - val |= compute_reg_res0_bits(kvm, &hafgrtr_desc, - 0, NEVER_FGU); + val |= compute_fgu_bits(kvm, &hafgrtr_desc); break; case HFGRTR2_GROUP: - val |= compute_reg_res0_bits(kvm, &hfgrtr2_desc, - 0, NEVER_FGU); - val |= compute_reg_res0_bits(kvm, &hfgwtr2_desc, - 0, NEVER_FGU); + val |= compute_fgu_bits(kvm, &hfgrtr2_desc); + val |= compute_fgu_bits(kvm, &hfgwtr2_desc); break; case HFGITR2_GROUP: - val |= compute_reg_res0_bits(kvm, &hfgitr2_desc, - 0, NEVER_FGU); + val |= compute_fgu_bits(kvm, &hfgitr2_desc); break; case HDFGRTR2_GROUP: - val |= compute_reg_res0_bits(kvm, &hdfgrtr2_desc, - 0, NEVER_FGU); - val |= compute_reg_res0_bits(kvm, &hdfgwtr2_desc, - 0, NEVER_FGU); + val |= compute_fgu_bits(kvm, &hdfgrtr2_desc); + val |= compute_fgu_bits(kvm, &hdfgwtr2_desc); break; default: BUG(); @@ -1348,87 +1467,77 @@ void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt) kvm->arch.fgu[fgt] = val; } -void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *res1) +struct resx get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg) { - u64 fixed = 0, mask; + struct resx resx; switch (reg) { case HFGRTR_EL2: - *res0 = compute_reg_res0_bits(kvm, &hfgrtr_desc, 0, 0); - *res1 = HFGRTR_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hfgrtr_desc, 0, 0); break; case HFGWTR_EL2: - *res0 = compute_reg_res0_bits(kvm, &hfgwtr_desc, 0, 0); - *res1 = HFGWTR_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hfgwtr_desc, 0, 0); break; case HFGITR_EL2: - *res0 = compute_reg_res0_bits(kvm, &hfgitr_desc, 0, 0); - *res1 = HFGITR_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hfgitr_desc, 0, 0); break; case HDFGRTR_EL2: - *res0 = compute_reg_res0_bits(kvm, &hdfgrtr_desc, 0, 0); - *res1 = HDFGRTR_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hdfgrtr_desc, 0, 0); break; case HDFGWTR_EL2: - *res0 = compute_reg_res0_bits(kvm, &hdfgwtr_desc, 0, 0); - *res1 = HDFGWTR_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hdfgwtr_desc, 0, 0); break; case HAFGRTR_EL2: - *res0 = compute_reg_res0_bits(kvm, &hafgrtr_desc, 0, 0); - *res1 = HAFGRTR_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hafgrtr_desc, 0, 0); break; case HFGRTR2_EL2: - *res0 = compute_reg_res0_bits(kvm, &hfgrtr2_desc, 0, 0); - *res1 = HFGRTR2_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hfgrtr2_desc, 0, 0); break; case HFGWTR2_EL2: - *res0 = compute_reg_res0_bits(kvm, &hfgwtr2_desc, 0, 0); - *res1 = HFGWTR2_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hfgwtr2_desc, 0, 0); break; case HFGITR2_EL2: - *res0 = compute_reg_res0_bits(kvm, &hfgitr2_desc, 0, 0); - *res1 = HFGITR2_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hfgitr2_desc, 0, 0); break; case HDFGRTR2_EL2: - *res0 = compute_reg_res0_bits(kvm, &hdfgrtr2_desc, 0, 0); - *res1 = HDFGRTR2_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hdfgrtr2_desc, 0, 0); break; case HDFGWTR2_EL2: - *res0 = compute_reg_res0_bits(kvm, &hdfgwtr2_desc, 0, 0); - *res1 = HDFGWTR2_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hdfgwtr2_desc, 0, 0); break; case HCRX_EL2: - *res0 = compute_reg_res0_bits(kvm, &hcrx_desc, 0, 0); - *res1 = __HCRX_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hcrx_desc, 0, 0); + resx.res1 |= __HCRX_EL2_RES1; break; case HCR_EL2: - mask = compute_reg_fixed_bits(kvm, &hcr_desc, &fixed, 0, 0); - *res0 = compute_reg_res0_bits(kvm, &hcr_desc, 0, 0); - *res0 |= (mask & ~fixed); - *res1 = HCR_EL2_RES1 | (mask & fixed); + resx = compute_reg_resx_bits(kvm, &hcr_desc, 0, 0); break; case SCTLR2_EL1: case SCTLR2_EL2: - *res0 = compute_reg_res0_bits(kvm, &sctlr2_desc, 0, 0); - *res1 = SCTLR2_EL1_RES1; + resx = compute_reg_resx_bits(kvm, &sctlr2_desc, 0, 0); break; case TCR2_EL2: - *res0 = compute_reg_res0_bits(kvm, &tcr2_el2_desc, 0, 0); - *res1 = TCR2_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &tcr2_el2_desc, 0, 0); break; case SCTLR_EL1: - *res0 = compute_reg_res0_bits(kvm, &sctlr_el1_desc, 0, 0); - *res1 = SCTLR_EL1_RES1; + resx = compute_reg_resx_bits(kvm, &sctlr_el1_desc, 0, 0); + break; + case SCTLR_EL2: + resx = compute_reg_resx_bits(kvm, &sctlr_el2_desc, 0, 0); break; case MDCR_EL2: - *res0 = compute_reg_res0_bits(kvm, &mdcr_el2_desc, 0, 0); - *res1 = MDCR_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &mdcr_el2_desc, 0, 0); + break; + case VTCR_EL2: + resx = compute_reg_resx_bits(kvm, &vtcr_el2_desc, 0, 0); break; default: WARN_ON_ONCE(1); - *res0 = *res1 = 0; + resx = (typeof(resx)){}; break; } + + return resx; } static __always_inline struct fgt_masks *__fgt_reg_to_masks(enum vcpu_sysreg reg) diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 834f13fb1fb7..22d497554c94 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -70,6 +70,7 @@ enum cgt_group_id { CGT_HCR_ENSCXT, CGT_HCR_TTLBIS, CGT_HCR_TTLBOS, + CGT_HCR_TID5, CGT_MDCR_TPMCR, CGT_MDCR_TPM, @@ -308,6 +309,12 @@ static const struct trap_bits coarse_trap_bits[] = { .mask = HCR_TTLBOS, .behaviour = BEHAVE_FORWARD_RW, }, + [CGT_HCR_TID5] = { + .index = HCR_EL2, + .value = HCR_TID5, + .mask = HCR_TID5, + .behaviour = BEHAVE_FORWARD_RW, + }, [CGT_MDCR_TPMCR] = { .index = MDCR_EL2, .value = MDCR_EL2_TPMCR, @@ -665,6 +672,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = { SR_TRAP(SYS_CCSIDR2_EL1, CGT_HCR_TID2_TID4), SR_TRAP(SYS_CLIDR_EL1, CGT_HCR_TID2_TID4), SR_TRAP(SYS_CSSELR_EL1, CGT_HCR_TID2_TID4), + SR_TRAP(SYS_GMID_EL1, CGT_HCR_TID5), SR_RANGE_TRAP(SYS_ID_PFR0_EL1, sys_reg(3, 0, 0, 7, 7), CGT_HCR_TID3), SR_TRAP(SYS_ICC_SGI0R_EL1, CGT_HCR_IMO_FMO_ICH_HCR_TC), @@ -1166,6 +1174,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = { SR_TRAP(SYS_DBGWCRn_EL1(12), CGT_MDCR_TDE_TDA), SR_TRAP(SYS_DBGWCRn_EL1(13), CGT_MDCR_TDE_TDA), SR_TRAP(SYS_DBGWCRn_EL1(14), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWCRn_EL1(15), CGT_MDCR_TDE_TDA), SR_TRAP(SYS_DBGCLAIMSET_EL1, CGT_MDCR_TDE_TDA), SR_TRAP(SYS_DBGCLAIMCLR_EL1, CGT_MDCR_TDE_TDA), SR_TRAP(SYS_DBGAUTHSTATUS_EL1, CGT_MDCR_TDE_TDA), @@ -2105,23 +2114,24 @@ static u32 encoding_next(u32 encoding) } #define FGT_MASKS(__n, __m) \ - struct fgt_masks __n = { .str = #__m, .res0 = __m, } - -FGT_MASKS(hfgrtr_masks, HFGRTR_EL2_RES0); -FGT_MASKS(hfgwtr_masks, HFGWTR_EL2_RES0); -FGT_MASKS(hfgitr_masks, HFGITR_EL2_RES0); -FGT_MASKS(hdfgrtr_masks, HDFGRTR_EL2_RES0); -FGT_MASKS(hdfgwtr_masks, HDFGWTR_EL2_RES0); -FGT_MASKS(hafgrtr_masks, HAFGRTR_EL2_RES0); -FGT_MASKS(hfgrtr2_masks, HFGRTR2_EL2_RES0); -FGT_MASKS(hfgwtr2_masks, HFGWTR2_EL2_RES0); -FGT_MASKS(hfgitr2_masks, HFGITR2_EL2_RES0); -FGT_MASKS(hdfgrtr2_masks, HDFGRTR2_EL2_RES0); -FGT_MASKS(hdfgwtr2_masks, HDFGWTR2_EL2_RES0); + struct fgt_masks __n = { .str = #__m, .res0 = __m ## _RES0, .res1 = __m ## _RES1 } + +FGT_MASKS(hfgrtr_masks, HFGRTR_EL2); +FGT_MASKS(hfgwtr_masks, HFGWTR_EL2); +FGT_MASKS(hfgitr_masks, HFGITR_EL2); +FGT_MASKS(hdfgrtr_masks, HDFGRTR_EL2); +FGT_MASKS(hdfgwtr_masks, HDFGWTR_EL2); +FGT_MASKS(hafgrtr_masks, HAFGRTR_EL2); +FGT_MASKS(hfgrtr2_masks, HFGRTR2_EL2); +FGT_MASKS(hfgwtr2_masks, HFGWTR2_EL2); +FGT_MASKS(hfgitr2_masks, HFGITR2_EL2); +FGT_MASKS(hdfgrtr2_masks, HDFGRTR2_EL2); +FGT_MASKS(hdfgwtr2_masks, HDFGWTR2_EL2); static __init bool aggregate_fgt(union trap_config tc) { struct fgt_masks *rmasks, *wmasks; + u64 rresx, wresx; switch (tc.fgt) { case HFGRTR_GROUP: @@ -2154,24 +2164,27 @@ static __init bool aggregate_fgt(union trap_config tc) break; } + rresx = rmasks->res0 | rmasks->res1; + if (wmasks) + wresx = wmasks->res0 | wmasks->res1; + /* * A bit can be reserved in either the R or W register, but * not both. */ - if ((BIT(tc.bit) & rmasks->res0) && - (!wmasks || (BIT(tc.bit) & wmasks->res0))) + if ((BIT(tc.bit) & rresx) && (!wmasks || (BIT(tc.bit) & wresx))) return false; if (tc.pol) - rmasks->mask |= BIT(tc.bit) & ~rmasks->res0; + rmasks->mask |= BIT(tc.bit) & ~rresx; else - rmasks->nmask |= BIT(tc.bit) & ~rmasks->res0; + rmasks->nmask |= BIT(tc.bit) & ~rresx; if (wmasks) { if (tc.pol) - wmasks->mask |= BIT(tc.bit) & ~wmasks->res0; + wmasks->mask |= BIT(tc.bit) & ~wresx; else - wmasks->nmask |= BIT(tc.bit) & ~wmasks->res0; + wmasks->nmask |= BIT(tc.bit) & ~wresx; } return true; @@ -2180,7 +2193,6 @@ static __init bool aggregate_fgt(union trap_config tc) static __init int check_fgt_masks(struct fgt_masks *masks) { unsigned long duplicate = masks->mask & masks->nmask; - u64 res0 = masks->res0; int ret = 0; if (duplicate) { @@ -2194,10 +2206,14 @@ static __init int check_fgt_masks(struct fgt_masks *masks) ret = -EINVAL; } - masks->res0 = ~(masks->mask | masks->nmask); - if (masks->res0 != res0) - kvm_info("Implicit %s = %016llx, expecting %016llx\n", - masks->str, masks->res0, res0); + if ((masks->res0 | masks->res1 | masks->mask | masks->nmask) != GENMASK(63, 0) || + (masks->res0 & masks->res1) || (masks->res0 & masks->mask) || + (masks->res0 & masks->nmask) || (masks->res1 & masks->mask) || + (masks->res1 & masks->nmask) || (masks->mask & masks->nmask)) { + kvm_info("Inconsistent masks for %s (%016llx, %016llx, %016llx, %016llx)\n", + masks->str, masks->res0, masks->res1, masks->mask, masks->nmask); + masks->res0 = ~(masks->res1 | masks->mask | masks->nmask); + } return ret; } @@ -2269,9 +2285,6 @@ int __init populate_nv_trap_config(void) kvm_info("nv: %ld coarse grained trap handlers\n", ARRAY_SIZE(encoding_to_cgt)); - if (!cpus_have_final_cap(ARM64_HAS_FGT)) - goto check_mcb; - for (int i = 0; i < ARRAY_SIZE(encoding_to_fgt); i++) { const struct encoding_to_trap_config *fgt = &encoding_to_fgt[i]; union trap_config tc; @@ -2291,6 +2304,15 @@ int __init populate_nv_trap_config(void) } tc.val |= fgt->tc.val; + + if (!aggregate_fgt(tc)) { + ret = -EINVAL; + print_nv_trap_error(fgt, "FGT bit is reserved", ret); + } + + if (!cpus_have_final_cap(ARM64_HAS_FGT)) + continue; + prev = xa_store(&sr_forward_xa, enc, xa_mk_value(tc.val), GFP_KERNEL); @@ -2298,11 +2320,6 @@ int __init populate_nv_trap_config(void) ret = xa_err(prev); print_nv_trap_error(fgt, "Failed FGT insertion", ret); } - - if (!aggregate_fgt(tc)) { - ret = -EINVAL; - print_nv_trap_error(fgt, "FGT bit is reserved", ret); - } } } @@ -2318,7 +2335,6 @@ int __init populate_nv_trap_config(void) kvm_info("nv: %ld fine grained trap handlers\n", ARRAY_SIZE(encoding_to_fgt)); -check_mcb: for (int id = __MULTIPLE_CONTROL_BITS__; id < __COMPLEX_CONDITIONS__; id++) { const enum cgt_group_id *cgids; @@ -2420,15 +2436,7 @@ static enum trap_behaviour compute_trap_behaviour(struct kvm_vcpu *vcpu, static u64 kvm_get_sysreg_res0(struct kvm *kvm, enum vcpu_sysreg sr) { - struct kvm_sysreg_masks *masks; - - /* Only handle the VNCR-backed regs for now */ - if (sr < __VNCR_START__) - return 0; - - masks = kvm->arch.sysreg_masks; - - return masks->mask[sr - __VNCR_START__].res0; + return kvm_get_sysreg_resx(kvm, sr).res0; } static bool check_fgt_bit(struct kvm_vcpu *vcpu, enum vcpu_sysreg sr, @@ -2581,6 +2589,19 @@ local: params = esr_sys64_to_params(esr); /* + * This implements the pseudocode UnimplementedIDRegister() + * helper for the purpose of dealing with FEAT_IDST. + */ + if (in_feat_id_space(¶ms)) { + if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, IDS, IMP)) + kvm_inject_sync(vcpu, kvm_vcpu_get_esr(vcpu)); + else + kvm_inject_undefined(vcpu); + + return true; + } + + /* * Check for the IMPDEF range, as per DDI0487 J.a, * D18.3.2 Reserved encodings for IMPLEMENTATION * DEFINED registers. diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index d1ccddf9e87d..11a10d8f5beb 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -126,9 +126,7 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) add x1, x1, #VCPU_CONTEXT - alternative_cb ARM64_ALWAYS_SYSTEM, kvm_pan_patch_el2_entry - nop - alternative_cb_end + ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN) // Store the guest regs x2 and x3 stp x2, x3, [x1, #CPU_XREG_OFFSET(2)] diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index afecbdd3c1e9..2597e8bda867 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -59,10 +59,8 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to * it will cause an exception. */ - if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { + if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) write_sysreg(1 << 30, fpexc32_el2); - isb(); - } } static inline void __activate_cptr_traps_nvhe(struct kvm_vcpu *vcpu) @@ -495,7 +493,7 @@ static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu) /* * When the guest owns the FP regs, we know that guest+hyp traps for * any FPSIMD/SVE/SME features exposed to the guest have been disabled - * by either fpsimd_lazy_switch_to_guest() or kvm_hyp_handle_fpsimd() + * by either __activate_cptr_traps() or kvm_hyp_handle_fpsimd() * prior to __guest_entry(). As __guest_entry() guarantees a context * synchronization event, we don't need an ISB here to avoid taking * traps for anything that was exposed to the guest. diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index f731cc4c3f28..94161ea1cd60 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -792,7 +792,7 @@ static void do_ffa_version(struct arm_smccc_1_2_regs *res, .a0 = FFA_VERSION, .a1 = ffa_req_version, }, res); - if (res->a0 == FFA_RET_NOT_SUPPORTED) + if ((s32)res->a0 == FFA_RET_NOT_SUPPORTED) goto unlock; hyp_ffa_version = ffa_req_version; @@ -943,7 +943,7 @@ int hyp_ffa_init(void *pages) .a0 = FFA_VERSION, .a1 = FFA_VERSION_1_2, }, &res); - if (res.a0 == FFA_RET_NOT_SUPPORTED) + if ((s32)res.a0 == FFA_RET_NOT_SUPPORTED) return 0; /* diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index aada42522e7b..0d42eedc7167 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -260,11 +260,6 @@ reset: msr sctlr_el2, x5 isb -alternative_if ARM64_KVM_PROTECTED_MODE - mov_q x5, HCR_HOST_NVHE_FLAGS - msr_hcr_el2 x5 -alternative_else_nop_endif - /* Install stub vectors */ adr_l x5, __hyp_stub_vectors msr vbar_el2, x5 diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 8ffbbce5e2ed..e7790097db93 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -690,6 +690,69 @@ static void handle_host_smc(struct kvm_cpu_context *host_ctxt) kvm_skip_host_instr(); } +/* + * Inject an Undefined Instruction exception into the host. + * + * This is open-coded to allow control over PSTATE construction without + * complicating the generic exception entry helpers. + */ +static void inject_undef64(void) +{ + u64 spsr_mask, vbar, sctlr, old_spsr, new_spsr, esr, offset; + + spsr_mask = PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT | PSR_DIT_BIT | PSR_PAN_BIT; + + vbar = read_sysreg_el1(SYS_VBAR); + sctlr = read_sysreg_el1(SYS_SCTLR); + old_spsr = read_sysreg_el2(SYS_SPSR); + + new_spsr = old_spsr & spsr_mask; + new_spsr |= PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT; + new_spsr |= PSR_MODE_EL1h; + + if (!(sctlr & SCTLR_EL1_SPAN)) + new_spsr |= PSR_PAN_BIT; + + if (sctlr & SCTLR_ELx_DSSBS) + new_spsr |= PSR_SSBS_BIT; + + if (system_supports_mte()) + new_spsr |= PSR_TCO_BIT; + + esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT) | ESR_ELx_IL; + offset = CURRENT_EL_SP_ELx_VECTOR + except_type_sync; + + write_sysreg_el1(esr, SYS_ESR); + write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR); + write_sysreg_el1(old_spsr, SYS_SPSR); + write_sysreg_el2(vbar + offset, SYS_ELR); + write_sysreg_el2(new_spsr, SYS_SPSR); +} + +static bool handle_host_mte(u64 esr) +{ + switch (esr_sys64_to_sysreg(esr)) { + case SYS_RGSR_EL1: + case SYS_GCR_EL1: + case SYS_TFSR_EL1: + case SYS_TFSRE0_EL1: + /* If we're here for any reason other than MTE, it's a bug. */ + if (read_sysreg(HCR_EL2) & HCR_ATA) + return false; + break; + case SYS_GMID_EL1: + /* If we're here for any reason other than MTE, it's a bug. */ + if (!(read_sysreg(HCR_EL2) & HCR_TID5)) + return false; + break; + default: + return false; + } + + inject_undef64(); + return true; +} + void handle_trap(struct kvm_cpu_context *host_ctxt) { u64 esr = read_sysreg_el2(SYS_ESR); @@ -705,6 +768,10 @@ void handle_trap(struct kvm_cpu_context *host_ctxt) case ESR_ELx_EC_DABT_LOW: handle_host_mem_abort(host_ctxt); break; + case ESR_ELx_EC_SYS64: + if (handle_host_mte(esr)) + break; + fallthrough; default: BUG(); } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 49db32f3ddf7..38f66a56a766 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -19,7 +19,7 @@ #include <nvhe/mem_protect.h> #include <nvhe/mm.h> -#define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP) +#define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_AS_S1 | KVM_PGTABLE_S2_IDMAP) struct host_mmu host_mmu; @@ -324,6 +324,8 @@ int __pkvm_prot_finalize(void) params->vttbr = kvm_get_vttbr(mmu); params->vtcr = mmu->vtcr; params->hcr_el2 |= HCR_VM; + if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) + params->hcr_el2 |= HCR_FWB; /* * The CMO below not only cleans the updated params to the diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 12b2acfbcfd1..8e29d7734a15 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -82,7 +82,7 @@ static void pvm_init_traps_hcr(struct kvm_vcpu *vcpu) if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP)) val &= ~(HCR_AMVOFFEN); - if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, MTE, IMP)) { + if (!kvm_has_mte(kvm)) { val |= HCR_TID5; val &= ~(HCR_DCT | HCR_ATA); } @@ -117,8 +117,8 @@ static void pvm_init_traps_mdcr(struct kvm_vcpu *vcpu) if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP)) val |= MDCR_EL2_TTRF; - if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, ExtTrcBuff, IMP)) - val |= MDCR_EL2_E2TB_MASK; + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, IMP)) + val &= ~MDCR_EL2_E2TB_MASK; /* Trap Debug Communications Channel registers */ if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, FGT, IMP)) @@ -339,9 +339,6 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc /* Preserve the vgic model so that GICv3 emulation works */ hyp_vm->kvm.arch.vgic.vgic_model = host_kvm->arch.vgic.vgic_model; - if (test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &host_kvm->arch.flags)) - set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags); - /* No restrictions for non-protected VMs. */ if (!kvm_vm_is_protected(kvm)) { hyp_vm->kvm.arch.flags = host_arch_flags; @@ -356,20 +353,23 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc return; } + if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_MTE)) + kvm->arch.flags |= host_arch_flags & BIT(KVM_ARCH_FLAG_MTE_ENABLED); + bitmap_zero(allowed_features, KVM_VCPU_MAX_FEATURES); set_bit(KVM_ARM_VCPU_PSCI_0_2, allowed_features); - if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PMU_V3)) + if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_PMU_V3)) set_bit(KVM_ARM_VCPU_PMU_V3, allowed_features); - if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_ADDRESS)) + if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_PTRAUTH_ADDRESS)) set_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, allowed_features); - if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_GENERIC)) + if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_PTRAUTH_GENERIC)) set_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, allowed_features); - if (kvm_pvm_ext_allowed(KVM_CAP_ARM_SVE)) { + if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_SVE)) { set_bit(KVM_ARM_VCPU_SVE, allowed_features); kvm->arch.flags |= host_arch_flags & BIT(KVM_ARCH_FLAG_GUEST_HAS_SVE); } diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 3108b5185c20..06d28621722e 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -134,7 +134,7 @@ static const struct pvm_ftr_bits pvmid_aa64mmfr2[] = { MAX_FEAT(ID_AA64MMFR2_EL1, UAO, IMP), MAX_FEAT(ID_AA64MMFR2_EL1, IESB, IMP), MAX_FEAT(ID_AA64MMFR2_EL1, AT, IMP), - MAX_FEAT_ENUM(ID_AA64MMFR2_EL1, IDS, 0x18), + MAX_FEAT(ID_AA64MMFR2_EL1, IDS, IMP), MAX_FEAT(ID_AA64MMFR2_EL1, TTL, IMP), MAX_FEAT(ID_AA64MMFR2_EL1, BBM, 2), MAX_FEAT(ID_AA64MMFR2_EL1, E0PD, IMP), @@ -243,16 +243,15 @@ static u64 pvm_calc_id_reg(const struct kvm_vcpu *vcpu, u32 id) } } -/* - * Inject an unknown/undefined exception to an AArch64 guest while most of its - * sysregs are live. - */ -static void inject_undef64(struct kvm_vcpu *vcpu) +static void inject_sync64(struct kvm_vcpu *vcpu, u64 esr) { - u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); - *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR); + + /* + * Make sure we have the latest update to VBAR_EL1, as pKVM + * handles traps very early, before sysregs are resync'ed + */ __vcpu_assign_sys_reg(vcpu, VBAR_EL1, read_sysreg_el1(SYS_VBAR)); kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); @@ -265,6 +264,15 @@ static void inject_undef64(struct kvm_vcpu *vcpu) write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR); } +/* + * Inject an unknown/undefined exception to an AArch64 guest while most of its + * sysregs are live. + */ +static void inject_undef64(struct kvm_vcpu *vcpu) +{ + inject_sync64(vcpu, (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT)); +} + static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r) { @@ -339,6 +347,18 @@ static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu, return true; } +static bool pvm_idst_access(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, IDS, IMP)) + inject_sync64(vcpu, kvm_vcpu_get_esr(vcpu)); + else + inject_undef64(vcpu); + + return false; +} + /* Mark the specified system register as an AArch32 feature id register. */ #define AARCH32(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch32 } @@ -469,6 +489,9 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { HOST_HANDLED(SYS_CCSIDR_EL1), HOST_HANDLED(SYS_CLIDR_EL1), + { SYS_DESC(SYS_CCSIDR2_EL1), .access = pvm_idst_access }, + { SYS_DESC(SYS_GMID_EL1), .access = pvm_idst_access }, + { SYS_DESC(SYS_SMIDR_EL1), .access = pvm_idst_access }, HOST_HANDLED(SYS_AIDR_EL1), HOST_HANDLED(SYS_CSSELR_EL1), HOST_HANDLED(SYS_CTR_EL0), diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 9abc0a6cf448..0e4ddd28ef5d 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -342,6 +342,9 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) if (!(prot & KVM_PGTABLE_PROT_R)) return -EINVAL; + if (!cpus_have_final_cap(ARM64_KVM_HVHE)) + prot &= ~KVM_PGTABLE_PROT_UX; + if (prot & KVM_PGTABLE_PROT_X) { if (prot & KVM_PGTABLE_PROT_W) return -EINVAL; @@ -351,8 +354,16 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) if (system_supports_bti_kernel()) attr |= KVM_PTE_LEAF_ATTR_HI_S1_GP; + } + + if (cpus_have_final_cap(ARM64_KVM_HVHE)) { + if (!(prot & KVM_PGTABLE_PROT_PX)) + attr |= KVM_PTE_LEAF_ATTR_HI_S1_PXN; + if (!(prot & KVM_PGTABLE_PROT_UX)) + attr |= KVM_PTE_LEAF_ATTR_HI_S1_UXN; } else { - attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN; + if (!(prot & KVM_PGTABLE_PROT_PX)) + attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN; } attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap); @@ -373,8 +384,15 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) if (!kvm_pte_valid(pte)) return prot; - if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN)) - prot |= KVM_PGTABLE_PROT_X; + if (cpus_have_final_cap(ARM64_KVM_HVHE)) { + if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_PXN)) + prot |= KVM_PGTABLE_PROT_PX; + if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_UXN)) + prot |= KVM_PGTABLE_PROT_UX; + } else { + if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN)) + prot |= KVM_PGTABLE_PROT_PX; + } ap = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_AP, pte); if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RO) @@ -583,8 +601,8 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) u64 vtcr = VTCR_EL2_FLAGS; s8 lvls; - vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT; - vtcr |= VTCR_EL2_T0SZ(phys_shift); + vtcr |= FIELD_PREP(VTCR_EL2_PS, kvm_get_parange(mmfr0)); + vtcr |= FIELD_PREP(VTCR_EL2_T0SZ, (UL(64) - phys_shift)); /* * Use a minimum 2 level page table to prevent splitting * host PMD huge pages at stage2. @@ -624,21 +642,11 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) vtcr |= VTCR_EL2_DS; /* Set the vmid bits */ - vtcr |= (get_vmid_bits(mmfr1) == 16) ? - VTCR_EL2_VS_16BIT : - VTCR_EL2_VS_8BIT; + vtcr |= (get_vmid_bits(mmfr1) == 16) ? VTCR_EL2_VS : 0; return vtcr; } -static bool stage2_has_fwb(struct kvm_pgtable *pgt) -{ - if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) - return false; - - return !(pgt->flags & KVM_PGTABLE_S2_NOFWB); -} - void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, size_t size) { @@ -659,7 +667,17 @@ void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, } } -#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt)) +#define KVM_S2_MEMATTR(pgt, attr) \ + ({ \ + kvm_pte_t __attr; \ + \ + if ((pgt)->flags & KVM_PGTABLE_S2_AS_S1) \ + __attr = PAGE_S2_MEMATTR(AS_S1); \ + else \ + __attr = PAGE_S2_MEMATTR(attr); \ + \ + __attr; \ + }) static int stage2_set_xn_attr(enum kvm_pgtable_prot prot, kvm_pte_t *attr) { @@ -868,7 +886,7 @@ static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt) * system supporting FWB as the optimization is entirely * pointless when the unmap walker needs to perform CMOs. */ - return system_supports_tlb_range() && stage2_has_fwb(pgt); + return system_supports_tlb_range() && cpus_have_final_cap(ARM64_HAS_STAGE2_FWB); } static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx, @@ -1148,7 +1166,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, if (mm_ops->page_count(childp) != 1) return 0; } else if (stage2_pte_cacheable(pgt, ctx->old)) { - need_flush = !stage2_has_fwb(pgt); + need_flush = !cpus_have_final_cap(ARM64_HAS_STAGE2_FWB); } /* @@ -1379,7 +1397,7 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) .arg = pgt, }; - if (stage2_has_fwb(pgt)) + if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) return 0; return kvm_pgtable_walk(pgt, addr, size, &walker); diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index 5fd99763b54d..f0605836821e 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -44,7 +44,7 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) /* Build the full address */ fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); - fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + fault_ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu)); /* If not for GICV, move on */ if (fault_ipa < vgic->vgic_cpu_base || diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 0b670a033fd8..c4d2f1feea8b 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -569,11 +569,11 @@ static int __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, u32 vmcr, continue; /* Group-0 interrupt, but Group-0 disabled? */ - if (!(val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG0_MASK)) + if (!(val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_EL2_VENG0_MASK)) continue; /* Group-1 interrupt, but Group-1 disabled? */ - if ((val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG1_MASK)) + if ((val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_EL2_VENG1_MASK)) continue; /* Not the highest priority? */ @@ -646,19 +646,19 @@ static int __vgic_v3_get_highest_active_priority(void) static unsigned int __vgic_v3_get_bpr0(u32 vmcr) { - return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; + return FIELD_GET(ICH_VMCR_EL2_VBPR0, vmcr); } static unsigned int __vgic_v3_get_bpr1(u32 vmcr) { unsigned int bpr; - if (vmcr & ICH_VMCR_CBPR_MASK) { + if (vmcr & ICH_VMCR_EL2_VCBPR_MASK) { bpr = __vgic_v3_get_bpr0(vmcr); if (bpr < 7) bpr++; } else { - bpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; + bpr = FIELD_GET(ICH_VMCR_EL2_VBPR1, vmcr); } return bpr; @@ -758,7 +758,7 @@ static void __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) if (grp != !!(lr_val & ICH_LR_GROUP)) goto spurious; - pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; + pmr = FIELD_GET(ICH_VMCR_EL2_VPMR, vmcr); lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; if (pmr <= lr_prio) goto spurious; @@ -806,7 +806,7 @@ static int ___vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) int lr; /* EOImode == 0, nothing to be done here */ - if (!(vmcr & ICH_VMCR_EOIM_MASK)) + if (!(vmcr & ICH_VMCR_EL2_VEOIM_MASK)) return 1; /* No deactivate to be performed on an LPI */ @@ -849,7 +849,7 @@ static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) } /* EOImode == 1 and not an LPI, nothing to be done here */ - if ((vmcr & ICH_VMCR_EOIM_MASK) && !(vid >= VGIC_MIN_LPI)) + if ((vmcr & ICH_VMCR_EL2_VEOIM_MASK) && !(vid >= VGIC_MIN_LPI)) return; lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; @@ -865,22 +865,19 @@ static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) static void __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { - vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG0_MASK)); + vcpu_set_reg(vcpu, rt, FIELD_GET(ICH_VMCR_EL2_VENG0, vmcr)); } static void __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { - vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK)); + vcpu_set_reg(vcpu, rt, FIELD_GET(ICH_VMCR_EL2_VENG1, vmcr)); } static void __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); - if (val & 1) - vmcr |= ICH_VMCR_ENG0_MASK; - else - vmcr &= ~ICH_VMCR_ENG0_MASK; + FIELD_MODIFY(ICH_VMCR_EL2_VENG0, &vmcr, val & 1); __vgic_v3_write_vmcr(vmcr); } @@ -889,10 +886,7 @@ static void __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); - if (val & 1) - vmcr |= ICH_VMCR_ENG1_MASK; - else - vmcr &= ~ICH_VMCR_ENG1_MASK; + FIELD_MODIFY(ICH_VMCR_EL2_VENG1, &vmcr, val & 1); __vgic_v3_write_vmcr(vmcr); } @@ -916,10 +910,7 @@ static void __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) if (val < bpr_min) val = bpr_min; - val <<= ICH_VMCR_BPR0_SHIFT; - val &= ICH_VMCR_BPR0_MASK; - vmcr &= ~ICH_VMCR_BPR0_MASK; - vmcr |= val; + FIELD_MODIFY(ICH_VMCR_EL2_VBPR0, &vmcr, val); __vgic_v3_write_vmcr(vmcr); } @@ -929,17 +920,14 @@ static void __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) u64 val = vcpu_get_reg(vcpu, rt); u8 bpr_min = __vgic_v3_bpr_min(); - if (vmcr & ICH_VMCR_CBPR_MASK) + if (FIELD_GET(ICH_VMCR_EL2_VCBPR, val)) return; /* Enforce BPR limiting */ if (val < bpr_min) val = bpr_min; - val <<= ICH_VMCR_BPR1_SHIFT; - val &= ICH_VMCR_BPR1_MASK; - vmcr &= ~ICH_VMCR_BPR1_MASK; - vmcr |= val; + FIELD_MODIFY(ICH_VMCR_EL2_VBPR1, &vmcr, val); __vgic_v3_write_vmcr(vmcr); } @@ -1029,19 +1017,14 @@ spurious: static void __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { - vmcr &= ICH_VMCR_PMR_MASK; - vmcr >>= ICH_VMCR_PMR_SHIFT; - vcpu_set_reg(vcpu, rt, vmcr); + vcpu_set_reg(vcpu, rt, FIELD_GET(ICH_VMCR_EL2_VPMR, vmcr)); } static void __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 val = vcpu_get_reg(vcpu, rt); - val <<= ICH_VMCR_PMR_SHIFT; - val &= ICH_VMCR_PMR_MASK; - vmcr &= ~ICH_VMCR_PMR_MASK; - vmcr |= val; + FIELD_MODIFY(ICH_VMCR_EL2_VPMR, &vmcr, val); write_gicreg(vmcr, ICH_VMCR_EL2); } @@ -1064,9 +1047,11 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) /* A3V */ val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT; /* EOImode */ - val |= ((vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT) << ICC_CTLR_EL1_EOImode_SHIFT; + val |= FIELD_PREP(ICC_CTLR_EL1_EOImode_MASK, + FIELD_GET(ICH_VMCR_EL2_VEOIM, vmcr)); /* CBPR */ - val |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; + val |= FIELD_PREP(ICC_CTLR_EL1_CBPR_MASK, + FIELD_GET(ICH_VMCR_EL2_VCBPR, vmcr)); vcpu_set_reg(vcpu, rt, val); } @@ -1075,15 +1060,11 @@ static void __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 val = vcpu_get_reg(vcpu, rt); - if (val & ICC_CTLR_EL1_CBPR_MASK) - vmcr |= ICH_VMCR_CBPR_MASK; - else - vmcr &= ~ICH_VMCR_CBPR_MASK; + FIELD_MODIFY(ICH_VMCR_EL2_VCBPR, &vmcr, + FIELD_GET(ICC_CTLR_EL1_CBPR_MASK, val)); - if (val & ICC_CTLR_EL1_EOImode_MASK) - vmcr |= ICH_VMCR_EOIM_MASK; - else - vmcr &= ~ICH_VMCR_EOIM_MASK; + FIELD_MODIFY(ICH_VMCR_EL2_VEOIM, &vmcr, + FIELD_GET(ICC_CTLR_EL1_EOImode_MASK, val)); write_gicreg(vmcr, ICH_VMCR_EL2); } diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index f28c6cf4fe1b..b254d442e54e 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -205,7 +205,7 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu) /* * When running a normal EL1 guest, we only load a new vcpu - * after a context switch, which imvolves a DSB, so all + * after a context switch, which involves a DSB, so all * speculative EL1&0 walks will have already completed. * If running NV, the vcpu may transition between vEL1 and * vEL2 without a context switch, so make sure we complete diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index dfcd66c65517..89982bd3345f 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -162,12 +162,16 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); } +void kvm_inject_sync(struct kvm_vcpu *vcpu, u64 esr) +{ + pend_sync_exception(vcpu); + vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); +} + static void inject_undef64(struct kvm_vcpu *vcpu) { u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); - pend_sync_exception(vcpu); - /* * Build an unknown exception, depending on the instruction * set. @@ -175,7 +179,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) if (kvm_vcpu_trap_il_is32bit(vcpu)) esr |= ESR_ELx_IL; - vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); + kvm_inject_sync(vcpu, esr); } #define DFSR_FSC_EXTABT_LPAE 0x10 @@ -253,12 +257,46 @@ int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr) return 1; } +static int kvm_inject_nested_excl_atomic(struct kvm_vcpu *vcpu, u64 addr) +{ + u64 esr = FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_DABT_LOW) | + FIELD_PREP(ESR_ELx_FSC, ESR_ELx_FSC_EXCL_ATOMIC) | + ESR_ELx_IL; + + vcpu_write_sys_reg(vcpu, addr, FAR_EL2); + return kvm_inject_nested_sync(vcpu, esr); +} + +/** + * kvm_inject_dabt_excl_atomic - inject a data abort for unsupported exclusive + * or atomic access + * @vcpu: The VCPU to receive the data abort + * @addr: The address to report in the DFAR + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +int kvm_inject_dabt_excl_atomic(struct kvm_vcpu *vcpu, u64 addr) +{ + u64 esr; + + if (is_nested_ctxt(vcpu) && (vcpu_read_sys_reg(vcpu, HCR_EL2) & HCR_VM)) + return kvm_inject_nested_excl_atomic(vcpu, addr); + + __kvm_inject_sea(vcpu, false, addr); + esr = vcpu_read_sys_reg(vcpu, exception_esr_elx(vcpu)); + esr &= ~ESR_ELx_FSC; + esr |= ESR_ELx_FSC_EXCL_ATOMIC; + vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); + return 1; +} + void kvm_inject_size_fault(struct kvm_vcpu *vcpu) { unsigned long addr, esr; addr = kvm_vcpu_get_fault_ipa(vcpu); - addr |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + addr |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu)); __kvm_inject_sea(vcpu, kvm_vcpu_trap_is_iabt(vcpu), addr); diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 54f9358c9e0e..e2285ed8c91d 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -159,6 +159,9 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) bool is_write; int len; u8 data_buf[8]; + u64 esr; + + esr = kvm_vcpu_get_esr(vcpu); /* * No valid syndrome? Ask userspace for help if it has @@ -168,7 +171,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) * though, so directly deliver an exception to the guest. */ if (!kvm_vcpu_dabt_isvalid(vcpu)) { - trace_kvm_mmio_nisv(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), + trace_kvm_mmio_nisv(*vcpu_pc(vcpu), esr, kvm_vcpu_get_hfar(vcpu), fault_ipa); if (vcpu_is_protected(vcpu)) @@ -186,6 +189,28 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) } /* + * When (DFSC == 0b00xxxx || DFSC == 0b10101x) && DFSC != 0b0000xx + * ESR_EL2[12:11] describe the Load/Store Type. This allows us to + * punt the LD64B/ST64B/ST64BV/ST64BV0 instructions to userspace, + * which will have to provide a full emulation of these 4 + * instructions. No, we don't expect this do be fast. + * + * We rely on traps being set if the corresponding features are not + * enabled, so if we get here, userspace has promised us to handle + * it already. + */ + switch (kvm_vcpu_trap_get_fault(vcpu)) { + case 0b000100 ... 0b001111: + case 0b101010 ... 0b101011: + if (FIELD_GET(GENMASK(12, 11), esr)) { + run->exit_reason = KVM_EXIT_ARM_LDST64B; + run->arm_nisv.esr_iss = esr & ~(u64)ESR_ELx_FSC; + run->arm_nisv.fault_ipa = fault_ipa; + return 0; + } + } + + /* * Prepare MMIO operation. First decode the syndrome data we get * from the CPU. Then try if some in-kernel emulation feels * responsible, otherwise let user space do its magic. diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 2caa97f87890..8c5d259810b2 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1843,6 +1843,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return ret; } + /* + * Guest performs atomic/exclusive operations on memory with unsupported + * attributes (e.g. ld64b/st64b on normal memory when no FEAT_LS64WB) + * and trigger the exception here. Since the memslot is valid, inject + * the fault back to the guest. + */ + if (esr_fsc_is_excl_atomic_fault(kvm_vcpu_get_esr(vcpu))) { + kvm_inject_dabt_excl_atomic(vcpu, kvm_vcpu_get_hfar(vcpu)); + return 1; + } + if (nested) adjust_nested_fault_perms(nested, &prot, &writable); @@ -2068,7 +2079,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) /* Falls between the IPA range and the PARange? */ if (fault_ipa >= BIT_ULL(VTCR_EL2_IPA(vcpu->arch.hw_mmu->vtcr))) { - fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + fault_ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu)); return kvm_inject_sea(vcpu, is_iabt, fault_ipa); } @@ -2080,7 +2091,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) /* Check the stage-2 fault is trans. fault or write fault */ if (!esr_fsc_is_translation_fault(esr) && !esr_fsc_is_permission_fault(esr) && - !esr_fsc_is_access_flag_fault(esr)) { + !esr_fsc_is_access_flag_fault(esr) && + !esr_fsc_is_excl_atomic_fault(esr)) { kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", kvm_vcpu_trap_get_class(vcpu), (unsigned long)kvm_vcpu_trap_get_fault(vcpu), @@ -2173,7 +2185,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) * faulting VA. This is always 12 bits, irrespective * of the page size. */ - ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu)); ret = io_mem_abort(vcpu, ipa); goto out_unlock; } @@ -2282,11 +2294,9 @@ static struct kvm_pgtable_mm_ops kvm_hyp_mm_ops = { .virt_to_phys = kvm_host_pa, }; -int __init kvm_mmu_init(u32 *hyp_va_bits) +int __init kvm_mmu_init(u32 hyp_va_bits) { int err; - u32 idmap_bits; - u32 kernel_bits; hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start); hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE); @@ -2300,25 +2310,7 @@ int __init kvm_mmu_init(u32 *hyp_va_bits) */ BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); - /* - * The ID map is always configured for 48 bits of translation, which - * may be fewer than the number of VA bits used by the regular kernel - * stage 1, when VA_BITS=52. - * - * At EL2, there is only one TTBR register, and we can't switch between - * translation tables *and* update TCR_EL2.T0SZ at the same time. Bottom - * line: we need to use the extended range with *both* our translation - * tables. - * - * So use the maximum of the idmap VA bits and the regular kernel stage - * 1 VA bits to assure that the hypervisor can both ID map its code page - * and map any kernel memory. - */ - idmap_bits = IDMAP_VA_BITS; - kernel_bits = vabits_actual; - *hyp_va_bits = max(idmap_bits, kernel_bits); - - kvm_debug("Using %u-bit virtual addresses at EL2\n", *hyp_va_bits); + kvm_debug("Using %u-bit virtual addresses at EL2\n", hyp_va_bits); kvm_debug("IDMAP page: %lx\n", hyp_idmap_start); kvm_debug("HYP VA range: %lx:%lx\n", kern_hyp_va(PAGE_OFFSET), @@ -2343,7 +2335,7 @@ int __init kvm_mmu_init(u32 *hyp_va_bits) goto out; } - err = kvm_pgtable_hyp_init(hyp_pgtable, *hyp_va_bits, &kvm_hyp_mm_ops); + err = kvm_pgtable_hyp_init(hyp_pgtable, hyp_va_bits, &kvm_hyp_mm_ops); if (err) goto out_free_pgtable; @@ -2352,7 +2344,7 @@ int __init kvm_mmu_init(u32 *hyp_va_bits) goto out_destroy_pgtable; io_map_base = hyp_idmap_start; - __hyp_va_bits = *hyp_va_bits; + __hyp_va_bits = hyp_va_bits; return 0; out_destroy_pgtable: diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index cdeeb8f09e72..eeea5e692370 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -377,7 +377,7 @@ static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi) { wi->t0sz = vtcr & TCR_EL2_T0SZ_MASK; - switch (vtcr & VTCR_EL2_TG0_MASK) { + switch (FIELD_GET(VTCR_EL2_TG0_MASK, vtcr)) { case VTCR_EL2_TG0_4K: wi->pgshift = 12; break; case VTCR_EL2_TG0_16K: @@ -513,7 +513,7 @@ static u8 get_guest_mapping_ttl(struct kvm_s2_mmu *mmu, u64 addr) lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(mmu)->mmu_lock); - switch (vtcr & VTCR_EL2_TG0_MASK) { + switch (FIELD_GET(VTCR_EL2_TG0_MASK, vtcr)) { case VTCR_EL2_TG0_4K: ttl = (TLBI_TTL_TG_4K << 2); break; @@ -530,7 +530,7 @@ static u8 get_guest_mapping_ttl(struct kvm_s2_mmu *mmu, u64 addr) again: /* Iteratively compute the block sizes for a particular granule size */ - switch (vtcr & VTCR_EL2_TG0_MASK) { + switch (FIELD_GET(VTCR_EL2_TG0_MASK, vtcr)) { case VTCR_EL2_TG0_4K: if (sz < SZ_4K) sz = SZ_4K; else if (sz < SZ_2M) sz = SZ_2M; @@ -593,7 +593,7 @@ unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val) if (!max_size) { /* Compute the maximum extent of the invalidation */ - switch (mmu->tlb_vtcr & VTCR_EL2_TG0_MASK) { + switch (FIELD_GET(VTCR_EL2_TG0_MASK, mmu->tlb_vtcr)) { case VTCR_EL2_TG0_4K: max_size = SZ_1G; break; @@ -1101,6 +1101,9 @@ void kvm_nested_s2_wp(struct kvm *kvm) lockdep_assert_held_write(&kvm->mmu_lock); + if (!kvm->arch.nested_mmus_size) + return; + for (i = 0; i < kvm->arch.nested_mmus_size; i++) { struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i]; @@ -1117,6 +1120,9 @@ void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block) lockdep_assert_held_write(&kvm->mmu_lock); + if (!kvm->arch.nested_mmus_size) + return; + for (i = 0; i < kvm->arch.nested_mmus_size; i++) { struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i]; @@ -1133,6 +1139,9 @@ void kvm_nested_s2_flush(struct kvm *kvm) lockdep_assert_held_write(&kvm->mmu_lock); + if (!kvm->arch.nested_mmus_size) + return; + for (i = 0; i < kvm->arch.nested_mmus_size; i++) { struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i]; @@ -1145,6 +1154,9 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) { int i; + if (!kvm->arch.nested_mmus_size) + return; + for (i = 0; i < kvm->arch.nested_mmus_size; i++) { struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i]; @@ -1505,11 +1517,6 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) u64 orig_val = val; switch (reg) { - case SYS_ID_AA64ISAR0_EL1: - /* Support everything but TME */ - val &= ~ID_AA64ISAR0_EL1_TME; - break; - case SYS_ID_AA64ISAR1_EL1: /* Support everything but LS64 and Spec Invalidation */ val &= ~(ID_AA64ISAR1_EL1_LS64 | @@ -1669,36 +1676,28 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *vcpu, enum vcpu_sysreg sr, u64 v) { - struct kvm_sysreg_masks *masks; + struct resx resx; - masks = vcpu->kvm->arch.sysreg_masks; - - if (masks) { - sr -= __SANITISED_REG_START__; - - v &= ~masks->mask[sr].res0; - v |= masks->mask[sr].res1; - } + resx = kvm_get_sysreg_resx(vcpu->kvm, sr); + v &= ~resx.res0; + v |= resx.res1; return v; } -static __always_inline void set_sysreg_masks(struct kvm *kvm, int sr, u64 res0, u64 res1) +static __always_inline void set_sysreg_masks(struct kvm *kvm, int sr, struct resx resx) { - int i = sr - __SANITISED_REG_START__; - BUILD_BUG_ON(!__builtin_constant_p(sr)); BUILD_BUG_ON(sr < __SANITISED_REG_START__); BUILD_BUG_ON(sr >= NR_SYS_REGS); - kvm->arch.sysreg_masks->mask[i].res0 = res0; - kvm->arch.sysreg_masks->mask[i].res1 = res1; + kvm_set_sysreg_resx(kvm, sr, resx); } int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; - u64 res0, res1; + struct resx resx; lockdep_assert_held(&kvm->arch.config_lock); @@ -1711,111 +1710,116 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu) return -ENOMEM; /* VTTBR_EL2 */ - res0 = res1 = 0; + resx = (typeof(resx)){}; if (!kvm_has_feat_enum(kvm, ID_AA64MMFR1_EL1, VMIDBits, 16)) - res0 |= GENMASK(63, 56); + resx.res0 |= GENMASK(63, 56); if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, CnP, IMP)) - res0 |= VTTBR_CNP_BIT; - set_sysreg_masks(kvm, VTTBR_EL2, res0, res1); + resx.res0 |= VTTBR_CNP_BIT; + set_sysreg_masks(kvm, VTTBR_EL2, resx); /* VTCR_EL2 */ - res0 = GENMASK(63, 32) | GENMASK(30, 20); - res1 = BIT(31); - set_sysreg_masks(kvm, VTCR_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, VTCR_EL2); + set_sysreg_masks(kvm, VTCR_EL2, resx); /* VMPIDR_EL2 */ - res0 = GENMASK(63, 40) | GENMASK(30, 24); - res1 = BIT(31); - set_sysreg_masks(kvm, VMPIDR_EL2, res0, res1); + resx.res0 = GENMASK(63, 40) | GENMASK(30, 24); + resx.res1 = BIT(31); + set_sysreg_masks(kvm, VMPIDR_EL2, resx); /* HCR_EL2 */ - get_reg_fixed_bits(kvm, HCR_EL2, &res0, &res1); - set_sysreg_masks(kvm, HCR_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, HCR_EL2); + set_sysreg_masks(kvm, HCR_EL2, resx); /* HCRX_EL2 */ - get_reg_fixed_bits(kvm, HCRX_EL2, &res0, &res1); - set_sysreg_masks(kvm, HCRX_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, HCRX_EL2); + set_sysreg_masks(kvm, HCRX_EL2, resx); /* HFG[RW]TR_EL2 */ - get_reg_fixed_bits(kvm, HFGRTR_EL2, &res0, &res1); - set_sysreg_masks(kvm, HFGRTR_EL2, res0, res1); - get_reg_fixed_bits(kvm, HFGWTR_EL2, &res0, &res1); - set_sysreg_masks(kvm, HFGWTR_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, HFGRTR_EL2); + set_sysreg_masks(kvm, HFGRTR_EL2, resx); + resx = get_reg_fixed_bits(kvm, HFGWTR_EL2); + set_sysreg_masks(kvm, HFGWTR_EL2, resx); /* HDFG[RW]TR_EL2 */ - get_reg_fixed_bits(kvm, HDFGRTR_EL2, &res0, &res1); - set_sysreg_masks(kvm, HDFGRTR_EL2, res0, res1); - get_reg_fixed_bits(kvm, HDFGWTR_EL2, &res0, &res1); - set_sysreg_masks(kvm, HDFGWTR_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, HDFGRTR_EL2); + set_sysreg_masks(kvm, HDFGRTR_EL2, resx); + resx = get_reg_fixed_bits(kvm, HDFGWTR_EL2); + set_sysreg_masks(kvm, HDFGWTR_EL2, resx); /* HFGITR_EL2 */ - get_reg_fixed_bits(kvm, HFGITR_EL2, &res0, &res1); - set_sysreg_masks(kvm, HFGITR_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, HFGITR_EL2); + set_sysreg_masks(kvm, HFGITR_EL2, resx); /* HAFGRTR_EL2 - not a lot to see here */ - get_reg_fixed_bits(kvm, HAFGRTR_EL2, &res0, &res1); - set_sysreg_masks(kvm, HAFGRTR_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, HAFGRTR_EL2); + set_sysreg_masks(kvm, HAFGRTR_EL2, resx); /* HFG[RW]TR2_EL2 */ - get_reg_fixed_bits(kvm, HFGRTR2_EL2, &res0, &res1); - set_sysreg_masks(kvm, HFGRTR2_EL2, res0, res1); - get_reg_fixed_bits(kvm, HFGWTR2_EL2, &res0, &res1); - set_sysreg_masks(kvm, HFGWTR2_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, HFGRTR2_EL2); + set_sysreg_masks(kvm, HFGRTR2_EL2, resx); + resx = get_reg_fixed_bits(kvm, HFGWTR2_EL2); + set_sysreg_masks(kvm, HFGWTR2_EL2, resx); /* HDFG[RW]TR2_EL2 */ - get_reg_fixed_bits(kvm, HDFGRTR2_EL2, &res0, &res1); - set_sysreg_masks(kvm, HDFGRTR2_EL2, res0, res1); - get_reg_fixed_bits(kvm, HDFGWTR2_EL2, &res0, &res1); - set_sysreg_masks(kvm, HDFGWTR2_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, HDFGRTR2_EL2); + set_sysreg_masks(kvm, HDFGRTR2_EL2, resx); + resx = get_reg_fixed_bits(kvm, HDFGWTR2_EL2); + set_sysreg_masks(kvm, HDFGWTR2_EL2, resx); /* HFGITR2_EL2 */ - get_reg_fixed_bits(kvm, HFGITR2_EL2, &res0, &res1); - set_sysreg_masks(kvm, HFGITR2_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, HFGITR2_EL2); + set_sysreg_masks(kvm, HFGITR2_EL2, resx); /* TCR2_EL2 */ - get_reg_fixed_bits(kvm, TCR2_EL2, &res0, &res1); - set_sysreg_masks(kvm, TCR2_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, TCR2_EL2); + set_sysreg_masks(kvm, TCR2_EL2, resx); /* SCTLR_EL1 */ - get_reg_fixed_bits(kvm, SCTLR_EL1, &res0, &res1); - set_sysreg_masks(kvm, SCTLR_EL1, res0, res1); + resx = get_reg_fixed_bits(kvm, SCTLR_EL1); + set_sysreg_masks(kvm, SCTLR_EL1, resx); + + /* SCTLR_EL2 */ + resx = get_reg_fixed_bits(kvm, SCTLR_EL2); + set_sysreg_masks(kvm, SCTLR_EL2, resx); /* SCTLR2_ELx */ - get_reg_fixed_bits(kvm, SCTLR2_EL1, &res0, &res1); - set_sysreg_masks(kvm, SCTLR2_EL1, res0, res1); - get_reg_fixed_bits(kvm, SCTLR2_EL2, &res0, &res1); - set_sysreg_masks(kvm, SCTLR2_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, SCTLR2_EL1); + set_sysreg_masks(kvm, SCTLR2_EL1, resx); + resx = get_reg_fixed_bits(kvm, SCTLR2_EL2); + set_sysreg_masks(kvm, SCTLR2_EL2, resx); /* MDCR_EL2 */ - get_reg_fixed_bits(kvm, MDCR_EL2, &res0, &res1); - set_sysreg_masks(kvm, MDCR_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, MDCR_EL2); + set_sysreg_masks(kvm, MDCR_EL2, resx); /* CNTHCTL_EL2 */ - res0 = GENMASK(63, 20); - res1 = 0; + resx.res0 = GENMASK(63, 20); + resx.res1 = 0; if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RME, IMP)) - res0 |= CNTHCTL_CNTPMASK | CNTHCTL_CNTVMASK; + resx.res0 |= CNTHCTL_CNTPMASK | CNTHCTL_CNTVMASK; if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, ECV, CNTPOFF)) { - res0 |= CNTHCTL_ECV; + resx.res0 |= CNTHCTL_ECV; if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, ECV, IMP)) - res0 |= (CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT | - CNTHCTL_EL1NVPCT | CNTHCTL_EL1NVVCT); + resx.res0 |= (CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT | + CNTHCTL_EL1NVPCT | CNTHCTL_EL1NVVCT); } if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, VH, IMP)) - res0 |= GENMASK(11, 8); - set_sysreg_masks(kvm, CNTHCTL_EL2, res0, res1); + resx.res0 |= GENMASK(11, 8); + set_sysreg_masks(kvm, CNTHCTL_EL2, resx); /* ICH_HCR_EL2 */ - res0 = ICH_HCR_EL2_RES0; - res1 = ICH_HCR_EL2_RES1; + resx.res0 = ICH_HCR_EL2_RES0; + resx.res1 = ICH_HCR_EL2_RES1; if (!(kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_EL2_TDS)) - res0 |= ICH_HCR_EL2_TDIR; + resx.res0 |= ICH_HCR_EL2_TDIR; /* No GICv4 is presented to the guest */ - res0 |= ICH_HCR_EL2_DVIM | ICH_HCR_EL2_vSGIEOICount; - set_sysreg_masks(kvm, ICH_HCR_EL2, res0, res1); + resx.res0 |= ICH_HCR_EL2_DVIM | ICH_HCR_EL2_vSGIEOICount; + set_sysreg_masks(kvm, ICH_HCR_EL2, resx); /* VNCR_EL2 */ - set_sysreg_masks(kvm, VNCR_EL2, VNCR_EL2_RES0, VNCR_EL2_RES1); + resx.res0 = VNCR_EL2_RES0; + resx.res1 = VNCR_EL2_RES1; + set_sysreg_masks(kvm, VNCR_EL2, resx); out: for (enum vcpu_sysreg sr = __SANITISED_REG_START__; sr < NR_SYS_REGS; sr++) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 88a57ca36d96..a7cd0badc20c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3414,8 +3414,6 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr }, { SYS_DESC(SYS_CLIDR_EL1), access_clidr, reset_clidr, CLIDR_EL1, .set_user = set_clidr, .val = ~CLIDR_EL1_RES0 }, - { SYS_DESC(SYS_CCSIDR2_EL1), undef_access }, - { SYS_DESC(SYS_SMIDR_EL1), undef_access }, IMPLEMENTATION_ID(AIDR_EL1, GENMASK_ULL(63, 0)), { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, ID_FILTERED(CTR_EL0, ctr_el0, @@ -4995,7 +4993,7 @@ static bool emulate_sys_reg(struct kvm_vcpu *vcpu, return false; } -static const struct sys_reg_desc *idregs_debug_find(struct kvm *kvm, u8 pos) +static const struct sys_reg_desc *idregs_debug_find(struct kvm *kvm, loff_t pos) { unsigned long i, idreg_idx = 0; @@ -5005,10 +5003,8 @@ static const struct sys_reg_desc *idregs_debug_find(struct kvm *kvm, u8 pos) if (!is_vm_ftr_id_reg(reg_to_encoding(r))) continue; - if (idreg_idx == pos) + if (idreg_idx++ == pos) return r; - - idreg_idx++; } return NULL; @@ -5017,23 +5013,11 @@ static const struct sys_reg_desc *idregs_debug_find(struct kvm *kvm, u8 pos) static void *idregs_debug_start(struct seq_file *s, loff_t *pos) { struct kvm *kvm = s->private; - u8 *iter; - - mutex_lock(&kvm->arch.config_lock); - iter = &kvm->arch.idreg_debugfs_iter; - if (test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags) && - *iter == (u8)~0) { - *iter = *pos; - if (!idregs_debug_find(kvm, *iter)) - iter = NULL; - } else { - iter = ERR_PTR(-EBUSY); - } - - mutex_unlock(&kvm->arch.config_lock); + if (!test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags)) + return NULL; - return iter; + return (void *)idregs_debug_find(kvm, *pos); } static void *idregs_debug_next(struct seq_file *s, void *v, loff_t *pos) @@ -5042,37 +5026,19 @@ static void *idregs_debug_next(struct seq_file *s, void *v, loff_t *pos) (*pos)++; - if (idregs_debug_find(kvm, kvm->arch.idreg_debugfs_iter + 1)) { - kvm->arch.idreg_debugfs_iter++; - - return &kvm->arch.idreg_debugfs_iter; - } - - return NULL; + return (void *)idregs_debug_find(kvm, *pos); } static void idregs_debug_stop(struct seq_file *s, void *v) { - struct kvm *kvm = s->private; - - if (IS_ERR(v)) - return; - - mutex_lock(&kvm->arch.config_lock); - - kvm->arch.idreg_debugfs_iter = ~0; - - mutex_unlock(&kvm->arch.config_lock); } static int idregs_debug_show(struct seq_file *s, void *v) { - const struct sys_reg_desc *desc; + const struct sys_reg_desc *desc = v; struct kvm *kvm = s->private; - desc = idregs_debug_find(kvm, kvm->arch.idreg_debugfs_iter); - - if (!desc->name) + if (!desc) return 0; seq_printf(s, "%20s:\t%016llx\n", @@ -5090,12 +5056,78 @@ static const struct seq_operations idregs_debug_sops = { DEFINE_SEQ_ATTRIBUTE(idregs_debug); -void kvm_sys_regs_create_debugfs(struct kvm *kvm) +static const struct sys_reg_desc *sr_resx_find(struct kvm *kvm, loff_t pos) +{ + unsigned long i, sr_idx = 0; + + for (i = 0; i < ARRAY_SIZE(sys_reg_descs); i++) { + const struct sys_reg_desc *r = &sys_reg_descs[i]; + + if (r->reg < __SANITISED_REG_START__) + continue; + + if (sr_idx++ == pos) + return r; + } + + return NULL; +} + +static void *sr_resx_start(struct seq_file *s, loff_t *pos) +{ + struct kvm *kvm = s->private; + + if (!kvm->arch.sysreg_masks) + return NULL; + + return (void *)sr_resx_find(kvm, *pos); +} + +static void *sr_resx_next(struct seq_file *s, void *v, loff_t *pos) { - kvm->arch.idreg_debugfs_iter = ~0; + struct kvm *kvm = s->private; + + (*pos)++; + + return (void *)sr_resx_find(kvm, *pos); +} + +static void sr_resx_stop(struct seq_file *s, void *v) +{ +} +static int sr_resx_show(struct seq_file *s, void *v) +{ + const struct sys_reg_desc *desc = v; + struct kvm *kvm = s->private; + struct resx resx; + + if (!desc) + return 0; + + resx = kvm_get_sysreg_resx(kvm, desc->reg); + + seq_printf(s, "%20s:\tRES0:%016llx\tRES1:%016llx\n", + desc->name, resx.res0, resx.res1); + + return 0; +} + +static const struct seq_operations sr_resx_sops = { + .start = sr_resx_start, + .next = sr_resx_next, + .stop = sr_resx_stop, + .show = sr_resx_show, +}; + +DEFINE_SEQ_ATTRIBUTE(sr_resx); + +void kvm_sys_regs_create_debugfs(struct kvm *kvm) +{ debugfs_create_file("idregs", 0444, kvm->debugfs_dentry, kvm, &idregs_debug_fops); + debugfs_create_file("resx", 0444, kvm->debugfs_dentry, kvm, + &sr_resx_fops); } static void reset_vm_ftr_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *reg) @@ -5581,6 +5613,8 @@ static void vcpu_set_hcr(struct kvm_vcpu *vcpu) if (kvm_has_mte(vcpu->kvm)) vcpu->arch.hcr_el2 |= HCR_ATA; + else + vcpu->arch.hcr_el2 |= HCR_TID5; /* * In the absence of FGT, we cannot independently trap TLBI diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index b3f904472fac..2a983664220c 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -49,6 +49,16 @@ struct sys_reg_params { .Op2 = ((esr) >> 17) & 0x7, \ .is_write = !((esr) & 1) }) +/* + * The Feature ID space is defined as the System register space in AArch64 + * with op0==3, op1=={0, 1, 3}, CRn==0, CRm=={0-7}, op2=={0-7}. + */ +static inline bool in_feat_id_space(struct sys_reg_params *p) +{ + return (p->Op0 == 3 && !(p->Op1 & 0b100) && p->Op1 != 2 && + p->CRn == 0 && !(p->CRm & 0b1000)); +} + struct sys_reg_desc { /* Sysreg string for debug */ const char *name; diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index bf888d150dc7..2346f9435a71 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -47,8 +47,30 @@ static void init_hyp_physvirt_offset(void) } /* + * Calculate the actual VA size used by the hypervisor + */ +__init u32 kvm_hyp_va_bits(void) +{ + /* + * The ID map is always configured for 48 bits of translation, which may + * be different from the number of VA bits used by the regular kernel + * stage 1. + * + * At EL2, there is only one TTBR register, and we can't switch between + * translation tables *and* update TCR_EL2.T0SZ at the same time. Bottom + * line: we need to use the extended range with *both* our translation + * tables. + * + * So use the maximum of the idmap VA bits and the regular kernel stage + * 1 VA bits as the hypervisor VA size to assure that the hypervisor can + * both ID map its code page and map any kernel memory. + */ + return max(IDMAP_VA_BITS, vabits_actual); +} + +/* * We want to generate a hyp VA with the following format (with V == - * vabits_actual): + * hypervisor VA bits): * * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0 * --------------------------------------------------------- @@ -61,10 +83,11 @@ __init void kvm_compute_layout(void) { phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start); u64 hyp_va_msb; + u32 hyp_va_bits = kvm_hyp_va_bits(); /* Where is my RAM region? */ - hyp_va_msb = idmap_addr & BIT(vabits_actual - 1); - hyp_va_msb ^= BIT(vabits_actual - 1); + hyp_va_msb = idmap_addr & BIT(hyp_va_bits - 1); + hyp_va_msb ^= BIT(hyp_va_bits - 1); tag_lsb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^ (u64)(high_memory - 1)); @@ -72,9 +95,9 @@ __init void kvm_compute_layout(void) va_mask = GENMASK_ULL(tag_lsb - 1, 0); tag_val = hyp_va_msb; - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && tag_lsb != (vabits_actual - 1)) { + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && tag_lsb != (hyp_va_bits - 1)) { /* We have some free bits to insert a random tag. */ - tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); + tag_val |= get_random_long() & GENMASK_ULL(hyp_va_bits - 2, tag_lsb); } tag_val >>= tag_lsb; @@ -296,31 +319,3 @@ void kvm_compute_final_ctr_el0(struct alt_instr *alt, generate_mov_q(read_sanitised_ftr_reg(SYS_CTR_EL0), origptr, updptr, nr_inst); } - -void kvm_pan_patch_el2_entry(struct alt_instr *alt, - __le32 *origptr, __le32 *updptr, int nr_inst) -{ - /* - * If we're running at EL1 without hVHE, then SCTLR_EL2.SPAN means - * nothing to us (it is RES1), and we don't need to set PSTATE.PAN - * to anything useful. - */ - if (!is_kernel_in_hyp_mode() && !cpus_have_cap(ARM64_KVM_HVHE)) - return; - - /* - * Leap of faith: at this point, we must be running VHE one way or - * another, and FEAT_PAN is required to be implemented. If KVM - * explodes at runtime because your system does not abide by this - * requirement, call your favourite HW vendor, they have screwed up. - * - * We don't expect hVHE to access any userspace mapping, so always - * set PSTATE.PAN on enty. Same thing if we have PAN enabled on an - * EL2 kernel. Only force it to 0 if we have not configured PAN in - * the kernel (and you know this is really silly). - */ - if (cpus_have_cap(ARM64_KVM_HVHE) || IS_ENABLED(CONFIG_ARM64_PAN)) - *updptr = cpu_to_le32(ENCODE_PSTATE(1, PAN)); - else - *updptr = cpu_to_le32(ENCODE_PSTATE(0, PAN)); -} diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index bb92853d1fd3..2c6776a1779b 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -25,11 +25,9 @@ struct vgic_state_iter { int nr_cpus; int nr_spis; - int nr_lpis; int dist_id; int vcpu_id; unsigned long intid; - int lpi_idx; }; static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter) @@ -45,13 +43,15 @@ static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter) * Let the xarray drive the iterator after the last SPI, as the iterator * has exhausted the sequentially-allocated INTID space. */ - if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1) && - iter->nr_lpis) { - if (iter->lpi_idx < iter->nr_lpis) - xa_find_after(&dist->lpi_xa, &iter->intid, - VGIC_LPI_MAX_INTID, - LPI_XA_MARK_DEBUG_ITER); - iter->lpi_idx++; + if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1)) { + if (iter->intid == VGIC_LPI_MAX_INTID + 1) + return; + + rcu_read_lock(); + if (!xa_find_after(&dist->lpi_xa, &iter->intid, + VGIC_LPI_MAX_INTID, XA_PRESENT)) + iter->intid = VGIC_LPI_MAX_INTID + 1; + rcu_read_unlock(); return; } @@ -61,44 +61,21 @@ static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter) iter->intid = 0; } -static int iter_mark_lpis(struct kvm *kvm) +static int vgic_count_lpis(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - unsigned long intid, flags; struct vgic_irq *irq; + unsigned long intid; int nr_lpis = 0; - xa_lock_irqsave(&dist->lpi_xa, flags); - - xa_for_each(&dist->lpi_xa, intid, irq) { - if (!vgic_try_get_irq_ref(irq)) - continue; - - __xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); + rcu_read_lock(); + xa_for_each(&dist->lpi_xa, intid, irq) nr_lpis++; - } - - xa_unlock_irqrestore(&dist->lpi_xa, flags); + rcu_read_unlock(); return nr_lpis; } -static void iter_unmark_lpis(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - unsigned long intid, flags; - struct vgic_irq *irq; - - xa_for_each_marked(&dist->lpi_xa, intid, irq, LPI_XA_MARK_DEBUG_ITER) { - xa_lock_irqsave(&dist->lpi_xa, flags); - __xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); - xa_unlock_irqrestore(&dist->lpi_xa, flags); - - /* vgic_put_irq() expects to be called outside of the xa_lock */ - vgic_put_irq(kvm, irq); - } -} - static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter, loff_t pos) { @@ -108,8 +85,6 @@ static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter, iter->nr_cpus = nr_cpus; iter->nr_spis = kvm->arch.vgic.nr_spis; - if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) - iter->nr_lpis = iter_mark_lpis(kvm); /* Fast forward to the right position if needed */ while (pos--) @@ -121,7 +96,7 @@ static bool end_of_vgic(struct vgic_state_iter *iter) return iter->dist_id > 0 && iter->vcpu_id == iter->nr_cpus && iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS) && - (!iter->nr_lpis || iter->lpi_idx > iter->nr_lpis); + iter->intid > VGIC_LPI_MAX_INTID; } static void *vgic_debug_start(struct seq_file *s, loff_t *pos) @@ -129,72 +104,56 @@ static void *vgic_debug_start(struct seq_file *s, loff_t *pos) struct kvm *kvm = s->private; struct vgic_state_iter *iter; - mutex_lock(&kvm->arch.config_lock); - iter = kvm->arch.vgic.iter; - if (iter) { - iter = ERR_PTR(-EBUSY); - goto out; - } - iter = kmalloc(sizeof(*iter), GFP_KERNEL); - if (!iter) { - iter = ERR_PTR(-ENOMEM); - goto out; - } + if (!iter) + return ERR_PTR(-ENOMEM); iter_init(kvm, iter, *pos); - kvm->arch.vgic.iter = iter; - if (end_of_vgic(iter)) + if (end_of_vgic(iter)) { + kfree(iter); iter = NULL; -out: - mutex_unlock(&kvm->arch.config_lock); + } + return iter; } static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos) { struct kvm *kvm = s->private; - struct vgic_state_iter *iter = kvm->arch.vgic.iter; + struct vgic_state_iter *iter = v; ++*pos; iter_next(kvm, iter); - if (end_of_vgic(iter)) + if (end_of_vgic(iter)) { + kfree(iter); iter = NULL; + } return iter; } static void vgic_debug_stop(struct seq_file *s, void *v) { - struct kvm *kvm = s->private; - struct vgic_state_iter *iter; + struct vgic_state_iter *iter = v; - /* - * If the seq file wasn't properly opened, there's nothing to clearn - * up. - */ - if (IS_ERR(v)) + if (IS_ERR_OR_NULL(v)) return; - mutex_lock(&kvm->arch.config_lock); - iter = kvm->arch.vgic.iter; - iter_unmark_lpis(kvm); kfree(iter); - kvm->arch.vgic.iter = NULL; - mutex_unlock(&kvm->arch.config_lock); } static void print_dist_state(struct seq_file *s, struct vgic_dist *dist, struct vgic_state_iter *iter) { bool v3 = dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3; + struct kvm *kvm = s->private; seq_printf(s, "Distributor\n"); seq_printf(s, "===========\n"); seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2"); seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis); if (v3) - seq_printf(s, "nr_lpis:\t%d\n", iter->nr_lpis); + seq_printf(s, "nr_lpis:\t%d\n", vgic_count_lpis(kvm)); seq_printf(s, "enabled:\t%d\n", dist->enabled); seq_printf(s, "\n"); @@ -291,16 +250,13 @@ static int vgic_debug_show(struct seq_file *s, void *v) if (iter->vcpu_id < iter->nr_cpus) vcpu = kvm_get_vcpu(kvm, iter->vcpu_id); - /* - * Expect this to succeed, as iter_mark_lpis() takes a reference on - * every LPI to be visited. - */ if (iter->intid < VGIC_NR_PRIVATE_IRQS) irq = vgic_get_vcpu_irq(vcpu, iter->intid); else irq = vgic_get_irq(kvm, iter->intid); - if (WARN_ON_ONCE(!irq)) - return -EINVAL; + + if (!irq) + return 0; raw_spin_lock_irqsave(&irq->irq_lock, flags); print_irq_state(s, irq, vcpu); diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index dc9f9db31026..86c149537493 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -140,6 +140,10 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) goto out_unlock; } + kvm->arch.vgic.in_kernel = true; + kvm->arch.vgic.vgic_model = type; + kvm->arch.vgic.implementation_rev = KVM_VGIC_IMP_REV_LATEST; + kvm_for_each_vcpu(i, vcpu, kvm) { ret = vgic_allocate_private_irqs_locked(vcpu, type); if (ret) @@ -156,10 +160,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) goto out_unlock; } - kvm->arch.vgic.in_kernel = true; - kvm->arch.vgic.vgic_model = type; - kvm->arch.vgic.implementation_rev = KVM_VGIC_IMP_REV_LATEST; - kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c index 61b44f3f2bf1..5c69fa615823 100644 --- a/arch/arm64/kvm/vgic/vgic-v3-nested.c +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c @@ -57,7 +57,7 @@ static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx) * as the L1 guest is in charge of provisioning the interrupts via its own * view of the ICH_LR*_EL2 registers, which conveniently live in the VNCR * page. This means that the flow described above does work (there is no - * state to rebuild in the L0 hypervisor), and that most things happed on L2 + * state to rebuild in the L0 hypervisor), and that most things happen on L2 * load/put: * * - on L2 load: move the in-memory L1 vGIC configuration into a shadow, @@ -202,16 +202,16 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu) if ((hcr & ICH_HCR_EL2_NPIE) && !mi_state.pend) reg |= ICH_MISR_EL2_NP; - if ((hcr & ICH_HCR_EL2_VGrp0EIE) && (vmcr & ICH_VMCR_ENG0_MASK)) + if ((hcr & ICH_HCR_EL2_VGrp0EIE) && (vmcr & ICH_VMCR_EL2_VENG0_MASK)) reg |= ICH_MISR_EL2_VGrp0E; - if ((hcr & ICH_HCR_EL2_VGrp0DIE) && !(vmcr & ICH_VMCR_ENG0_MASK)) + if ((hcr & ICH_HCR_EL2_VGrp0DIE) && !(vmcr & ICH_VMCR_EL2_VENG0_MASK)) reg |= ICH_MISR_EL2_VGrp0D; - if ((hcr & ICH_HCR_EL2_VGrp1EIE) && (vmcr & ICH_VMCR_ENG1_MASK)) + if ((hcr & ICH_HCR_EL2_VGrp1EIE) && (vmcr & ICH_VMCR_EL2_VENG1_MASK)) reg |= ICH_MISR_EL2_VGrp1E; - if ((hcr & ICH_HCR_EL2_VGrp1DIE) && !(vmcr & ICH_VMCR_ENG1_MASK)) + if ((hcr & ICH_HCR_EL2_VGrp1DIE) && !(vmcr & ICH_VMCR_EL2_VENG1_MASK)) reg |= ICH_MISR_EL2_VGrp1D; return reg; diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 1d6dd1b545bd..386ddf69a9c5 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -41,9 +41,9 @@ void vgic_v3_configure_hcr(struct kvm_vcpu *vcpu, if (!als->nr_sgi) cpuif->vgic_hcr |= ICH_HCR_EL2_vSGIEOICount; - cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_ENG0_MASK) ? + cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_EL2_VENG0_MASK) ? ICH_HCR_EL2_VGrp0DIE : ICH_HCR_EL2_VGrp0EIE; - cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_ENG1_MASK) ? + cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_EL2_VENG1_MASK) ? ICH_HCR_EL2_VGrp1DIE : ICH_HCR_EL2_VGrp1EIE; /* @@ -215,7 +215,7 @@ void vgic_v3_deactivate(struct kvm_vcpu *vcpu, u64 val) * We only deal with DIR when EOIMode==1, and only for SGI, * PPI or SPI. */ - if (!(cpuif->vgic_vmcr & ICH_VMCR_EOIM_MASK) || + if (!(cpuif->vgic_vmcr & ICH_VMCR_EL2_VEOIM_MASK) || val >= vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS) return; @@ -408,25 +408,23 @@ void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) u32 vmcr; if (model == KVM_DEV_TYPE_ARM_VGIC_V2) { - vmcr = (vmcrp->ackctl << ICH_VMCR_ACK_CTL_SHIFT) & - ICH_VMCR_ACK_CTL_MASK; - vmcr |= (vmcrp->fiqen << ICH_VMCR_FIQ_EN_SHIFT) & - ICH_VMCR_FIQ_EN_MASK; + vmcr = FIELD_PREP(ICH_VMCR_EL2_VAckCtl, vmcrp->ackctl); + vmcr |= FIELD_PREP(ICH_VMCR_EL2_VFIQEn, vmcrp->fiqen); } else { /* * When emulating GICv3 on GICv3 with SRE=1 on the * VFIQEn bit is RES1 and the VAckCtl bit is RES0. */ - vmcr = ICH_VMCR_FIQ_EN_MASK; + vmcr = ICH_VMCR_EL2_VFIQEn_MASK; } - vmcr |= (vmcrp->cbpr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK; - vmcr |= (vmcrp->eoim << ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK; - vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; - vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; - vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; - vmcr |= (vmcrp->grpen0 << ICH_VMCR_ENG0_SHIFT) & ICH_VMCR_ENG0_MASK; - vmcr |= (vmcrp->grpen1 << ICH_VMCR_ENG1_SHIFT) & ICH_VMCR_ENG1_MASK; + vmcr |= FIELD_PREP(ICH_VMCR_EL2_VCBPR, vmcrp->cbpr); + vmcr |= FIELD_PREP(ICH_VMCR_EL2_VEOIM, vmcrp->eoim); + vmcr |= FIELD_PREP(ICH_VMCR_EL2_VBPR1, vmcrp->abpr); + vmcr |= FIELD_PREP(ICH_VMCR_EL2_VBPR0, vmcrp->bpr); + vmcr |= FIELD_PREP(ICH_VMCR_EL2_VPMR, vmcrp->pmr); + vmcr |= FIELD_PREP(ICH_VMCR_EL2_VENG0, vmcrp->grpen0); + vmcr |= FIELD_PREP(ICH_VMCR_EL2_VENG1, vmcrp->grpen1); cpu_if->vgic_vmcr = vmcr; } @@ -440,10 +438,8 @@ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) vmcr = cpu_if->vgic_vmcr; if (model == KVM_DEV_TYPE_ARM_VGIC_V2) { - vmcrp->ackctl = (vmcr & ICH_VMCR_ACK_CTL_MASK) >> - ICH_VMCR_ACK_CTL_SHIFT; - vmcrp->fiqen = (vmcr & ICH_VMCR_FIQ_EN_MASK) >> - ICH_VMCR_FIQ_EN_SHIFT; + vmcrp->ackctl = FIELD_GET(ICH_VMCR_EL2_VAckCtl, vmcr); + vmcrp->fiqen = FIELD_GET(ICH_VMCR_EL2_VFIQEn, vmcr); } else { /* * When emulating GICv3 on GICv3 with SRE=1 on the @@ -453,13 +449,13 @@ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) vmcrp->ackctl = 0; } - vmcrp->cbpr = (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; - vmcrp->eoim = (vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT; - vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; - vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; - vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; - vmcrp->grpen0 = (vmcr & ICH_VMCR_ENG0_MASK) >> ICH_VMCR_ENG0_SHIFT; - vmcrp->grpen1 = (vmcr & ICH_VMCR_ENG1_MASK) >> ICH_VMCR_ENG1_SHIFT; + vmcrp->cbpr = FIELD_GET(ICH_VMCR_EL2_VCBPR, vmcr); + vmcrp->eoim = FIELD_GET(ICH_VMCR_EL2_VEOIM, vmcr); + vmcrp->abpr = FIELD_GET(ICH_VMCR_EL2_VBPR1, vmcr); + vmcrp->bpr = FIELD_GET(ICH_VMCR_EL2_VBPR0, vmcr); + vmcrp->pmr = FIELD_GET(ICH_VMCR_EL2_VPMR, vmcr); + vmcrp->grpen0 = FIELD_GET(ICH_VMCR_EL2_VENG0, vmcr); + vmcrp->grpen1 = FIELD_GET(ICH_VMCR_EL2_VENG1, vmcr); } #define INITIAL_PENDBASER_VALUE \ @@ -880,6 +876,20 @@ void noinstr kvm_compute_ich_hcr_trap_bits(struct alt_instr *alt, *updptr = cpu_to_le32(insn); } +void vgic_v3_enable_cpuif_traps(void) +{ + u64 traps = vgic_ich_hcr_trap_bits(); + + if (traps) { + kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n", + (traps & ICH_HCR_EL2_TALL0) ? "G0" : "", + (traps & ICH_HCR_EL2_TALL1) ? "G1" : "", + (traps & ICH_HCR_EL2_TC) ? "C" : "", + (traps & ICH_HCR_EL2_TDIR) ? "D" : ""); + static_branch_enable(&vgic_v3_cpuif_trap); + } +} + /** * vgic_v3_probe - probe for a VGICv3 compatible interrupt controller * @info: pointer to the GIC description @@ -891,7 +901,6 @@ int vgic_v3_probe(const struct gic_kvm_info *info) { u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config); bool has_v2; - u64 traps; int ret; has_v2 = ich_vtr_el2 >> 63; @@ -955,15 +964,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info) kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_EL2_SEIS; } - traps = vgic_ich_hcr_trap_bits(); - if (traps) { - kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n", - (traps & ICH_HCR_EL2_TALL0) ? "G0" : "", - (traps & ICH_HCR_EL2_TALL1) ? "G1" : "", - (traps & ICH_HCR_EL2_TC) ? "C" : "", - (traps & ICH_HCR_EL2_TDIR) ? "D" : ""); - static_branch_enable(&vgic_v3_cpuif_trap); - } + vgic_v3_enable_cpuif_traps(); kvm_vgic_global_state.vctrl_base = NULL; kvm_vgic_global_state.type = VGIC_V3; diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c index 2d3811f4e117..331651087e2c 100644 --- a/arch/arm64/kvm/vgic/vgic-v5.c +++ b/arch/arm64/kvm/vgic/vgic-v5.c @@ -48,5 +48,7 @@ int vgic_v5_probe(const struct gic_kvm_info *info) static_branch_enable(&kvm_vgic_global_state.gicv3_cpuif); kvm_info("GCIE legacy system register CPU interface\n"); + vgic_v3_enable_cpuif_traps(); + return 0; } diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 5f0fc96b4dc2..c9b3bb07e483 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -324,6 +324,7 @@ void vgic_v3_configure_hcr(struct kvm_vcpu *vcpu, struct ap_list_summary *als); void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_v3_reset(struct kvm_vcpu *vcpu); +void vgic_v3_enable_cpuif_traps(void); int vgic_v3_probe(const struct gic_kvm_info *info); int vgic_v3_map_resources(struct kvm *kvm); int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq); diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index 4e298baddc2e..cc5b40917d0d 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -611,7 +611,6 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, state); } -#ifdef CONFIG_ARM64_LSE_ATOMICS static u32 aarch64_insn_encode_ldst_order(enum aarch64_insn_mem_order_type type, u32 insn) { @@ -755,7 +754,6 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn, value); } -#endif u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, enum aarch64_insn_register src, diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index b6eb7a465ad2..5ce82edc508e 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -776,7 +776,6 @@ static int emit_atomic_ld_st(const struct bpf_insn *insn, struct jit_ctx *ctx) return 0; } -#ifdef CONFIG_ARM64_LSE_ATOMICS static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) { const u8 code = insn->code; @@ -843,12 +842,6 @@ static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) return 0; } -#else -static inline int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) -{ - return -EINVAL; -} -#endif static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) { diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 0fac75f01534..50a0c695f340 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -46,6 +46,8 @@ HAS_HCX HAS_LDAPR HAS_LPA2 HAS_LSE_ATOMICS +HAS_LS64 +HAS_LS64_V HAS_MOPS HAS_NESTED_VIRT HAS_BBML2_NOABORT diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 8921b51866d6..9d1c21108057 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1856,10 +1856,7 @@ UnsignedEnum 31:28 RDM 0b0000 NI 0b0001 IMP EndEnum -UnsignedEnum 27:24 TME - 0b0000 NI - 0b0001 IMP -EndEnum +Res0 27:24 UnsignedEnum 23:20 ATOMIC 0b0000 NI 0b0010 IMP @@ -2098,18 +2095,18 @@ UnsignedEnum 47:44 EXS 0b0000 NI 0b0001 IMP EndEnum -Enum 43:40 TGRAN4_2 +UnsignedEnum 43:40 TGRAN4_2 0b0000 TGRAN4 0b0001 NI 0b0010 IMP 0b0011 52_BIT EndEnum -Enum 39:36 TGRAN64_2 +UnsignedEnum 39:36 TGRAN64_2 0b0000 TGRAN64 0b0001 NI 0b0010 IMP EndEnum -Enum 35:32 TGRAN16_2 +UnsignedEnum 35:32 TGRAN16_2 0b0000 TGRAN16 0b0001 NI 0b0010 IMP @@ -2256,9 +2253,10 @@ UnsignedEnum 43:40 FWB 0b0000 NI 0b0001 IMP EndEnum -Enum 39:36 IDS - 0b0000 0x0 - 0b0001 0x18 +UnsignedEnum 39:36 IDS + 0b0000 NI + 0b0001 IMP + 0b0010 EL3 EndEnum UnsignedEnum 35:32 AT 0b0000 NI @@ -2432,10 +2430,7 @@ Field 57 EPAN Field 56 EnALS Field 55 EnAS0 Field 54 EnASR -Field 53 TME -Field 52 TME0 -Field 51 TMT -Field 50 TMT0 +Res0 53:50 Field 49:46 TWEDEL Field 45 TWEDEn Field 44 DSSBS @@ -3749,6 +3744,75 @@ UnsignedEnum 2:0 F8S1 EndEnum EndSysreg +Sysreg SCTLR_EL2 3 4 1 0 0 +Field 63 TIDCP +Field 62 SPINTMASK +Field 61 NMI +Field 60 EnTP2 +Field 59 TCSO +Field 58 TCSO0 +Field 57 EPAN +Field 56 EnALS +Field 55 EnAS0 +Field 54 EnASR +Res0 53:50 +Field 49:46 TWEDEL +Field 45 TWEDEn +Field 44 DSSBS +Field 43 ATA +Field 42 ATA0 +Enum 41:40 TCF + 0b00 NONE + 0b01 SYNC + 0b10 ASYNC + 0b11 ASYMM +EndEnum +Enum 39:38 TCF0 + 0b00 NONE + 0b01 SYNC + 0b10 ASYNC + 0b11 ASYMM +EndEnum +Field 37 ITFSB +Field 36 BT +Field 35 BT0 +Field 34 EnFPM +Field 33 MSCEn +Field 32 CMOW +Field 31 EnIA +Field 30 EnIB +Field 29 LSMAOE +Field 28 nTLSMD +Field 27 EnDA +Field 26 UCI +Field 25 EE +Field 24 E0E +Field 23 SPAN +Field 22 EIS +Field 21 IESB +Field 20 TSCXT +Field 19 WXN +Field 18 nTWE +Res0 17 +Field 16 nTWI +Field 15 UCT +Field 14 DZE +Field 13 EnDB +Field 12 I +Field 11 EOS +Field 10 EnRCTX +Res0 9 +Field 8 SED +Field 7 ITD +Field 6 nAA +Field 5 CP15BEN +Field 4 SA0 +Field 3 SA +Field 2 C +Field 1 A +Field 0 M +EndSysreg + Sysreg HCR_EL2 3 4 1 1 0 Field 63:60 TWEDEL Field 59 TWEDEn @@ -3771,8 +3835,7 @@ Field 43 NV1 Field 42 NV Field 41 API Field 40 APK -Field 39 TME -Field 38 MIOCNCE +Res0 39:38 Field 37 TEA Field 36 TERR Field 35 TLOR @@ -4400,6 +4463,63 @@ Field 56:12 BADDR Res0 11:0 EndSysreg +Sysreg VTCR_EL2 3 4 2 1 2 +Res0 63:46 +Field 45 HDBSS +Field 44 HAFT +Res0 43:42 +Field 41 TL0 +Field 40 GCSH +Res0 39 +Field 38 D128 +Field 37 S2POE +Field 36 S2PIE +Field 35 TL1 +Field 34 AssuredOnly +Field 33 SL2 +Field 32 DS +Res1 31 +Field 30 NSA +Field 29 NSW +Field 28 HWU62 +Field 27 HWU61 +Field 26 HWU60 +Field 25 HWU59 +Res0 24:23 +Field 22 HD +Field 21 HA +Res0 20 +Enum 19 VS + 0b0 8BIT + 0b1 16BIT +EndEnum +Field 18:16 PS +Enum 15:14 TG0 + 0b00 4K + 0b01 64K + 0b10 16K +EndEnum +Enum 13:12 SH0 + 0b00 NONE + 0b01 OUTER + 0b11 INNER +EndEnum +Enum 11:10 ORGN0 + 0b00 NC + 0b01 WBWA + 0b10 WT + 0b11 WBnWA +EndEnum +Enum 9:8 IRGN0 + 0b00 NC + 0b01 WBWA + 0b10 WT + 0b11 WBnWA +EndEnum +Field 7:6 SL0 +Field 5:0 T0SZ +EndSysreg + Sysreg GCSCR_EL2 3 4 2 5 0 Fields GCSCR_ELx EndSysreg @@ -4579,7 +4699,7 @@ Field 7 ICC_IAFFIDR_EL1 Field 6 ICC_ICSR_EL1 Field 5 ICC_PCR_EL1 Field 4 ICC_HPPIR_EL1 -Field 3 ICC_HAPR_EL1 +Res1 3 Field 2 ICC_CR0_EL1 Field 1 ICC_IDRn_EL1 Field 0 ICC_APR_EL1 |
