diff options
32 files changed, 1596 insertions, 544 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a7926a90156f..35affb5d9456 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2671,7 +2671,7 @@ handled. 4.87 KVM_SET_GUEST_DEBUG Capability: KVM_CAP_SET_GUEST_DEBUG -Architectures: x86, s390, ppc +Architectures: x86, s390, ppc, arm64 Type: vcpu ioctl Parameters: struct kvm_guest_debug (in) Returns: 0 on success; -1 on error @@ -2693,8 +2693,8 @@ when running. Common control bits are: The top 16 bits of the control field are architecture specific control flags which can include the following: - - KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86] - - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390] + - KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86, arm64] + - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390, arm64] - KVM_GUESTDBG_INJECT_DB: inject DB type exception [x86] - KVM_GUESTDBG_INJECT_BP: inject BP type exception [x86] - KVM_GUESTDBG_EXIT_PENDING: trigger an immediate guest exit [s390] @@ -2709,6 +2709,11 @@ updated to the correct (supplied) values. The second part of the structure is architecture specific and typically contains a set of debug registers. +For arm64 the number of debug registers is implementation defined and +can be determined by querying the KVM_CAP_GUEST_DEBUG_HW_BPS and +KVM_CAP_GUEST_DEBUG_HW_WPS capabilities which return a positive number +indicating the number of supported registers. + When debug events exit the main run loop with the reason KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run structure containing architecture specific debug information. @@ -3111,11 +3116,13 @@ data_offset describes where the data is located (KVM_EXIT_IO_OUT) or where kvm expects application code to place the data for the next KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a packed array. + /* KVM_EXIT_DEBUG */ struct { struct kvm_debug_exit_arch arch; } debug; -Unused. +If the exit_reason is KVM_EXIT_DEBUG, then a vcpu is processing a debug event +for which architecture specific information is returned. /* KVM_EXIT_MMIO */ struct { diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index e896d2c196e6..dcba0fa5176e 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -231,4 +231,9 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arm_init_debug(void) {} +static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} +static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} +static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index bc738d2b8392..ce404a5c3062 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -125,6 +125,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out_free_stage2_pgd; + kvm_vgic_early_init(kvm); kvm_timer_init(kvm); /* Mark the initial VMID generation invalid */ @@ -249,6 +250,7 @@ out: void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) { + kvm_vgic_vcpu_early_init(vcpu); } void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) @@ -278,6 +280,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) /* Set up the timer */ kvm_timer_vcpu_init(vcpu); + kvm_arm_reset_debug_ptr(vcpu); + return 0; } @@ -301,13 +305,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_arm_set_running_vcpu(NULL); } -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *dbg) -{ - return -EINVAL; -} - - int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { @@ -528,10 +525,20 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (vcpu->arch.pause) vcpu_pause(vcpu); - kvm_vgic_flush_hwstate(vcpu); + /* + * Disarming the background timer must be done in a + * preemptible context, as this call may sleep. + */ kvm_timer_flush_hwstate(vcpu); + /* + * Preparing the interrupts to be injected also + * involves poking the GIC, which must be done in a + * non-preemptible context. + */ preempt_disable(); + kvm_vgic_flush_hwstate(vcpu); + local_irq_disable(); /* @@ -544,12 +551,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { local_irq_enable(); + kvm_vgic_sync_hwstate(vcpu); preempt_enable(); kvm_timer_sync_hwstate(vcpu); - kvm_vgic_sync_hwstate(vcpu); continue; } + kvm_arm_setup_debug(vcpu); + /************************************************************** * Enter the guest */ @@ -564,6 +573,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) * Back from guest *************************************************************/ + kvm_arm_clear_debug(vcpu); + /* * We may have taken a host interrupt in HYP mode (ie * while executing the guest). This interrupt is still @@ -586,11 +597,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) */ kvm_guest_exit(); trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); - preempt_enable(); + kvm_vgic_sync_hwstate(vcpu); + + preempt_enable(); kvm_timer_sync_hwstate(vcpu); - kvm_vgic_sync_hwstate(vcpu); ret = handle_exit(vcpu, run, ret); } @@ -921,6 +933,8 @@ static void cpu_init_hyp_mode(void *dummy) vector_ptr = (unsigned long)__kvm_hyp_vector; __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); + + kvm_arm_init_debug(); } static int hyp_init_cpu_notify(struct notifier_block *self, diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index d503fbb787d3..96e935bbc38c 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -290,3 +290,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, { return -EINVAL; } + +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + return -EINVAL; +} diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 568494dbbbb5..900ef6dd8f72 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -361,10 +361,6 @@ hyp_hvc: @ Check syndrome register mrc p15, 4, r1, c5, c2, 0 @ HSR lsr r0, r1, #HSR_EC_SHIFT -#ifdef CONFIG_VFPv3 - cmp r0, #HSR_EC_CP_0_13 - beq switch_to_guest_vfp -#endif cmp r0, #HSR_EC_HVC bne guest_trap @ Not HVC instr. @@ -378,7 +374,10 @@ hyp_hvc: cmp r2, #0 bne guest_trap @ Guest called HVC -host_switch_to_hyp: + /* + * Getting here means host called HVC, we shift parameters and branch + * to Hyp function. + */ pop {r0, r1, r2} /* Check for __hyp_get_vectors */ @@ -409,6 +408,10 @@ guest_trap: @ Check if we need the fault information lsr r1, r1, #HSR_EC_SHIFT +#ifdef CONFIG_VFPv3 + cmp r1, #HSR_EC_CP_0_13 + beq switch_to_guest_vfp +#endif cmp r1, #HSR_EC_IABT mrceq p15, 4, r2, c6, c0, 2 @ HIFAR beq 2f @@ -477,7 +480,6 @@ guest_trap: */ #ifdef CONFIG_VFPv3 switch_to_guest_vfp: - load_vcpu @ Load VCPU pointer to r0 push {r3-r7} @ NEON/VFP used. Turn on VFP access. diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index f558c073c023..eeb85858d6bb 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -77,7 +77,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) kvm_reset_coprocs(vcpu); /* Reset arch_timer context */ - kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); - - return 0; + return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); } diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index 52b484b6aa1a..4c47cb2fbb52 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -16,6 +16,8 @@ #ifndef __ASM_HW_BREAKPOINT_H #define __ASM_HW_BREAKPOINT_H +#include <asm/cputype.h> + #ifdef __KERNEL__ struct arch_hw_breakpoint_ctrl { @@ -132,5 +134,17 @@ static inline void ptrace_hw_copy_thread(struct task_struct *task) extern struct pmu perf_ops_bp; +/* Determine number of BRP registers available. */ +static inline int get_num_brps(void) +{ + return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1; +} + +/* Determine number of WRP registers available. */ +static inline int get_num_wrps(void) +{ + return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1; +} + #endif /* __KERNEL__ */ #endif /* __ASM_BREAKPOINT_H */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index ac6fafb95fe7..7605e095217f 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -171,10 +171,13 @@ #define HSTR_EL2_TTEE (1 << 16) #define HSTR_EL2_T(x) (1 << x) +/* Hyp Coproccessor Trap Register Shifts */ +#define CPTR_EL2_TFP_SHIFT 10 + /* Hyp Coprocessor Trap Register */ #define CPTR_EL2_TCPAC (1 << 31) #define CPTR_EL2_TTA (1 << 20) -#define CPTR_EL2_TFP (1 << 10) +#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT) /* Hyp Debug Configuration Register bits */ #define MDCR_EL2_TDRA (1 << 11) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 3c5fe685a2d6..67fa0de3d483 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -46,24 +46,16 @@ #define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ #define PAR_EL1 21 /* Physical Address Register */ #define MDSCR_EL1 22 /* Monitor Debug System Control Register */ -#define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */ -#define DBGBCR15_EL1 38 -#define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */ -#define DBGBVR15_EL1 54 -#define DBGWCR0_EL1 55 /* Debug Watchpoint Control Registers (0-15) */ -#define DBGWCR15_EL1 70 -#define DBGWVR0_EL1 71 /* Debug Watchpoint Value Registers (0-15) */ -#define DBGWVR15_EL1 86 -#define MDCCINT_EL1 87 /* Monitor Debug Comms Channel Interrupt Enable Reg */ +#define MDCCINT_EL1 23 /* Monitor Debug Comms Channel Interrupt Enable Reg */ /* 32bit specific registers. Keep them at the end of the range */ -#define DACR32_EL2 88 /* Domain Access Control Register */ -#define IFSR32_EL2 89 /* Instruction Fault Status Register */ -#define FPEXC32_EL2 90 /* Floating-Point Exception Control Register */ -#define DBGVCR32_EL2 91 /* Debug Vector Catch Register */ -#define TEECR32_EL1 92 /* ThumbEE Configuration Register */ -#define TEEHBR32_EL1 93 /* ThumbEE Handler Base Register */ -#define NR_SYS_REGS 94 +#define DACR32_EL2 24 /* Domain Access Control Register */ +#define IFSR32_EL2 25 /* Instruction Fault Status Register */ +#define FPEXC32_EL2 26 /* Floating-Point Exception Control Register */ +#define DBGVCR32_EL2 27 /* Debug Vector Catch Register */ +#define TEECR32_EL1 28 /* ThumbEE Configuration Register */ +#define TEEHBR32_EL1 29 /* ThumbEE Handler Base Register */ +#define NR_SYS_REGS 30 /* 32bit mapping */ #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ @@ -132,6 +124,8 @@ extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern u64 __vgic_v3_get_ich_vtr_el2(void); +extern u32 __kvm_get_mdcr_el2(void); + #endif #endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2709db2a7eac..415938dc45cf 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -103,15 +103,34 @@ struct kvm_vcpu_arch { /* HYP configuration */ u64 hcr_el2; + u32 mdcr_el2; /* Exception Information */ struct kvm_vcpu_fault_info fault; - /* Debug state */ + /* Guest debug state */ u64 debug_flags; + /* + * We maintain more than a single set of debug registers to support + * debugging the guest from the host and to maintain separate host and + * guest state during world switches. vcpu_debug_state are the debug + * registers of the vcpu as the guest sees them. host_debug_state are + * the host registers which are saved and restored during + * world switches. external_debug_state contains the debug + * values we want to debug the guest. This is set via the + * KVM_SET_GUEST_DEBUG ioctl. + * + * debug_ptr points to the set of debug registers that should be loaded + * onto the hardware when running the guest. + */ + struct kvm_guest_debug_arch *debug_ptr; + struct kvm_guest_debug_arch vcpu_debug_state; + struct kvm_guest_debug_arch external_debug_state; + /* Pointer to host CPU context */ kvm_cpu_context_t *host_cpu_context; + struct kvm_guest_debug_arch host_debug_state; /* VGIC state */ struct vgic_cpu vgic_cpu; @@ -122,6 +141,17 @@ struct kvm_vcpu_arch { * here. */ + /* + * Guest registers we preserve during guest debugging. + * + * These shadow registers are updated by the kvm_handle_sys_reg + * trap handler if the guest accesses or updates them while we + * are using guest debug. + */ + struct { + u32 mdscr_el1; + } guest_debug_preserved; + /* Don't run the guest */ bool pause; @@ -216,15 +246,15 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, hyp_stack_ptr, vector_ptr); } -struct vgic_sr_vectors { - void *save_vgic; - void *restore_vgic; -}; - static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +void kvm_arm_init_debug(void); +void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); +void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); +void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index d26832022127..0cd7b5947dfc 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -53,14 +53,20 @@ struct kvm_regs { struct user_fpsimd_state fp_regs; }; -/* Supported Processor Types */ +/* + * Supported CPU Targets - Adding a new target type is not recommended, + * unless there are some special registers not supported by the + * genericv8 syreg table. + */ #define KVM_ARM_TARGET_AEM_V8 0 #define KVM_ARM_TARGET_FOUNDATION_V8 1 #define KVM_ARM_TARGET_CORTEX_A57 2 #define KVM_ARM_TARGET_XGENE_POTENZA 3 #define KVM_ARM_TARGET_CORTEX_A53 4 +/* Generic ARM v8 target */ +#define KVM_ARM_TARGET_GENERIC_V8 5 -#define KVM_ARM_NUM_TARGETS 5 +#define KVM_ARM_NUM_TARGETS 6 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 @@ -100,12 +106,39 @@ struct kvm_sregs { struct kvm_fpu { }; +/* + * See v8 ARM ARM D7.3: Debug Registers + * + * The architectural limit is 16 debug registers of each type although + * in practice there are usually less (see ID_AA64DFR0_EL1). + * + * Although the control registers are architecturally defined as 32 + * bits wide we use a 64 bit structure here to keep parity with + * KVM_GET/SET_ONE_REG behaviour which treats all system registers as + * 64 bit values. It also allows for the possibility of the + * architecture expanding the control registers without having to + * change the userspace ABI. + */ +#define KVM_ARM_MAX_DBG_REGS 16 struct kvm_guest_debug_arch { + __u64 dbg_bcr[KVM_ARM_MAX_DBG_REGS]; + __u64 dbg_bvr[KVM_ARM_MAX_DBG_REGS]; + __u64 dbg_wcr[KVM_ARM_MAX_DBG_REGS]; + __u64 dbg_wvr[KVM_ARM_MAX_DBG_REGS]; }; struct kvm_debug_exit_arch { + __u32 hsr; + __u64 far; /* used for watchpoints */ }; +/* + * Architecture specific defines for kvm_guest_debug->control + */ + +#define KVM_GUESTDBG_USE_SW_BP (1 << 16) +#define KVM_GUESTDBG_USE_HW (1 << 17) + struct kvm_sync_regs { }; diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index c99701a34d7b..8d89cf8dae55 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -116,17 +116,22 @@ int main(void) DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags)); + DEFINE(VCPU_DEBUG_PTR, offsetof(struct kvm_vcpu, arch.debug_ptr)); + DEFINE(DEBUG_BCR, offsetof(struct kvm_guest_debug_arch, dbg_bcr)); + DEFINE(DEBUG_BVR, offsetof(struct kvm_guest_debug_arch, dbg_bvr)); + DEFINE(DEBUG_WCR, offsetof(struct kvm_guest_debug_arch, dbg_wcr)); + DEFINE(DEBUG_WVR, offsetof(struct kvm_guest_debug_arch, dbg_wvr)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); + DEFINE(VCPU_MDCR_EL2, offsetof(struct kvm_vcpu, arch.mdcr_el2)); DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); + DEFINE(VCPU_HOST_DEBUG_STATE, offsetof(struct kvm_vcpu, arch.host_debug_state)); DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); - DEFINE(VGIC_SAVE_FN, offsetof(struct vgic_sr_vectors, save_vgic)); - DEFINE(VGIC_RESTORE_FN, offsetof(struct vgic_sr_vectors, restore_vgic)); DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 7a1a5da6c8c1..77bee00bd7ea 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -48,18 +48,6 @@ static DEFINE_PER_CPU(int, stepping_kernel_bp); static int core_num_brps; static int core_num_wrps; -/* Determine number of BRP registers available. */ -static int get_num_brps(void) -{ - return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1; -} - -/* Determine number of WRP registers available. */ -static int get_num_wrps(void) -{ - return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1; -} - int hw_breakpoint_slots(int type) { /* diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index f90f4aa7f88d..1949fe5f5424 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -17,7 +17,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o -kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o +kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c new file mode 100644 index 000000000000..47e5f0feaee8 --- /dev/null +++ b/arch/arm64/kvm/debug.c @@ -0,0 +1,217 @@ +/* + * Debug and Guest Debug support + * + * Copyright (C) 2015 - Linaro Ltd + * Author: Alex Bennée <alex.bennee@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kvm_host.h> +#include <linux/hw_breakpoint.h> + +#include <asm/debug-monitors.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_emulate.h> + +#include "trace.h" + +/* These are the bits of MDSCR_EL1 we may manipulate */ +#define MDSCR_EL1_DEBUG_MASK (DBG_MDSCR_SS | \ + DBG_MDSCR_KDE | \ + DBG_MDSCR_MDE) + +static DEFINE_PER_CPU(u32, mdcr_el2); + +/** + * save/restore_guest_debug_regs + * + * For some debug operations we need to tweak some guest registers. As + * a result we need to save the state of those registers before we + * make those modifications. + * + * Guest access to MDSCR_EL1 is trapped by the hypervisor and handled + * after we have restored the preserved value to the main context. + */ +static void save_guest_debug_regs(struct kvm_vcpu *vcpu) +{ + vcpu->arch.guest_debug_preserved.mdscr_el1 = vcpu_sys_reg(vcpu, MDSCR_EL1); + + trace_kvm_arm_set_dreg32("Saved MDSCR_EL1", + vcpu->arch.guest_debug_preserved.mdscr_el1); +} + +static void restore_guest_debug_regs(struct kvm_vcpu *vcpu) +{ + vcpu_sys_reg(vcpu, MDSCR_EL1) = vcpu->arch.guest_debug_preserved.mdscr_el1; + + trace_kvm_arm_set_dreg32("Restored MDSCR_EL1", + vcpu_sys_reg(vcpu, MDSCR_EL1)); +} + +/** + * kvm_arm_init_debug - grab what we need for debug + * + * Currently the sole task of this function is to retrieve the initial + * value of mdcr_el2 so we can preserve MDCR_EL2.HPMN which has + * presumably been set-up by some knowledgeable bootcode. + * + * It is called once per-cpu during CPU hyp initialisation. + */ + +void kvm_arm_init_debug(void) +{ + __this_cpu_write(mdcr_el2, kvm_call_hyp(__kvm_get_mdcr_el2)); +} + +/** + * kvm_arm_reset_debug_ptr - reset the debug ptr to point to the vcpu state + */ + +void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) +{ + vcpu->arch.debug_ptr = &vcpu->arch.vcpu_debug_state; +} + +/** + * kvm_arm_setup_debug - set up debug related stuff + * + * @vcpu: the vcpu pointer + * + * This is called before each entry into the hypervisor to setup any + * debug related registers. Currently this just ensures we will trap + * access to: + * - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR) + * - Debug ROM Address (MDCR_EL2_TDRA) + * - OS related registers (MDCR_EL2_TDOSA) + * + * Additionally, KVM only traps guest accesses to the debug registers if + * the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY + * flag on vcpu->arch.debug_flags). Since the guest must not interfere + * with the hardware state when debugging the guest, we must ensure that + * trapping is enabled whenever we are debugging the guest using the + * debug registers. + */ + +void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) +{ + bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY); + + trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug); + + vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK; + vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM | + MDCR_EL2_TPMCR | + MDCR_EL2_TDRA | + MDCR_EL2_TDOSA); + + /* Is Guest debugging in effect? */ + if (vcpu->guest_debug) { + /* Route all software debug exceptions to EL2 */ + vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE; + + /* Save guest debug state */ + save_guest_debug_regs(vcpu); + + /* + * Single Step (ARM ARM D2.12.3 The software step state + * machine) + * + * If we are doing Single Step we need to manipulate + * the guest's MDSCR_EL1.SS and PSTATE.SS. Once the + * step has occurred the hypervisor will trap the + * debug exception and we return to userspace. + * + * If the guest attempts to single step its userspace + * we would have to deal with a trapped exception + * while in the guest kernel. Because this would be + * hard to unwind we suppress the guest's ability to + * do so by masking MDSCR_EL.SS. + * + * This confuses guest debuggers which use + * single-step behind the scenes but everything + * returns to normal once the host is no longer + * debugging the system. + */ + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { + *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; + vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_SS; + } else { + vcpu_sys_reg(vcpu, MDSCR_EL1) &= ~DBG_MDSCR_SS; + } + + trace_kvm_arm_set_dreg32("SPSR_EL2", *vcpu_cpsr(vcpu)); + + /* + * HW Breakpoints and watchpoints + * + * We simply switch the debug_ptr to point to our new + * external_debug_state which has been populated by the + * debug ioctl. The existing KVM_ARM64_DEBUG_DIRTY + * mechanism ensures the registers are updated on the + * world switch. + */ + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) { + /* Enable breakpoints/watchpoints */ + vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_MDE; + + vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state; + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + trap_debug = true; + + trace_kvm_arm_set_regset("BKPTS", get_num_brps(), + &vcpu->arch.debug_ptr->dbg_bcr[0], + &vcpu->arch.debug_ptr->dbg_bvr[0]); + + trace_kvm_arm_set_regset("WAPTS", get_num_wrps(), + &vcpu->arch.debug_ptr->dbg_wcr[0], + &vcpu->arch.debug_ptr->dbg_wvr[0]); + } + } + + BUG_ON(!vcpu->guest_debug && + vcpu->arch.debug_ptr != &vcpu->arch.vcpu_debug_state); + + /* Trap debug register access */ + if (trap_debug) + vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; + + trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2); + trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_sys_reg(vcpu, MDSCR_EL1)); +} + +void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) +{ + trace_kvm_arm_clear_debug(vcpu->guest_debug); + + if (vcpu->guest_debug) { + restore_guest_debug_regs(vcpu); + + /* + * If we were using HW debug we need to restore the + * debug_ptr to the guest debug state. + */ + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) { + kvm_arm_reset_debug_ptr(vcpu); + + trace_kvm_arm_set_regset("BKPTS", get_num_brps(), + &vcpu->arch.debug_ptr->dbg_bcr[0], + &vcpu->arch.debug_ptr->dbg_bvr[0]); + + trace_kvm_arm_set_regset("WAPTS", get_num_wrps(), + &vcpu->arch.debug_ptr->dbg_wcr[0], + &vcpu->arch.debug_ptr->dbg_wvr[0]); + } + } +} diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 9535bd555d1d..d250160d32bc 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -32,6 +32,8 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> +#include "trace.h" + struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; @@ -293,7 +295,8 @@ int __attribute_const__ kvm_target_cpu(void) break; }; - return -EINVAL; + /* Return a default generic target */ + return KVM_ARM_TARGET_GENERIC_V8; } int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) @@ -331,3 +334,41 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, { return -EINVAL; } + +#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ + KVM_GUESTDBG_USE_SW_BP | \ + KVM_GUESTDBG_USE_HW | \ + KVM_GUESTDBG_SINGLESTEP) + +/** + * kvm_arch_vcpu_ioctl_set_guest_debug - set up guest debugging + * @kvm: pointer to the KVM struct + * @kvm_guest_debug: the ioctl data buffer + * + * This sets up and enables the VM for guest debugging. Userspace + * passes in a control flag to enable different debug types and + * potentially other architecture specific information in the rest of + * the structure. + */ +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + trace_kvm_set_guest_debug(vcpu, dbg->control); + + if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) + return -EINVAL; + + if (dbg->control & KVM_GUESTDBG_ENABLE) { + vcpu->guest_debug = dbg->control; + + /* Hardware assisted Break and Watch points */ + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) { + vcpu->arch.external_debug_state = dbg->arch; + } + + } else { + /* If not enabled clear all flags */ + vcpu->guest_debug = 0; + } + return 0; +} diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 524fa25671fc..68a0759b1375 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -82,6 +82,45 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } +/** + * kvm_handle_guest_debug - handle a debug exception instruction + * + * @vcpu: the vcpu pointer + * @run: access to the kvm_run structure for results + * + * We route all debug exceptions through the same handler. If both the + * guest and host are using the same debug facilities it will be up to + * userspace to re-inject the correct exception for guest delivery. + * + * @return: 0 (while setting run->exit_reason), -1 for error + */ +static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + u32 hsr = kvm_vcpu_get_hsr(vcpu); + int ret = 0; + + run->exit_reason = KVM_EXIT_DEBUG; + run->debug.arch.hsr = hsr; + + switch (hsr >> ESR_ELx_EC_SHIFT) { + case ESR_ELx_EC_WATCHPT_LOW: + run->debug.arch.far = vcpu->arch.fault.far_el2; + /* fall through */ + case ESR_ELx_EC_SOFTSTP_LOW: + case ESR_ELx_EC_BREAKPT_LOW: + case ESR_ELx_EC_BKPT32: + case ESR_ELx_EC_BRK64: + break; + default: + kvm_err("%s: un-handled case hsr: %#08x\n", + __func__, (unsigned int) hsr); + ret = -1; + break; + } + + return ret; +} + static exit_handle_fn arm_exit_handlers[] = { [ESR_ELx_EC_WFx] = kvm_handle_wfx, [ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32, @@ -96,6 +135,11 @@ static exit_handle_fn arm_exit_handlers[] = { [ESR_ELx_EC_SYS64] = kvm_handle_sys_reg, [ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort, [ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort, + [ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug, + [ESR_ELx_EC_WATCHPT_LOW]= kvm_handle_guest_debug, + [ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug, + [ESR_ELx_EC_BKPT32] = kvm_handle_guest_debug, + [ESR_ELx_EC_BRK64] = kvm_handle_guest_debug, }; static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 17a8fb14f428..8188f6ae8fc4 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -230,199 +230,52 @@ stp x24, x25, [x3, #160] .endm -.macro save_debug - // x2: base address for cpu context - // x3: tmp register - - mrs x26, id_aa64dfr0_el1 - ubfx x24, x26, #12, #4 // Extract BRPs - ubfx x25, x26, #20, #4 // Extract WRPs - mov w26, #15 - sub w24, w26, w24 // How many BPs to skip - sub w25, w26, w25 // How many WPs to skip - - add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 -1: - mrs x20, dbgbcr15_el1 - mrs x19, dbgbcr14_el1 - mrs x18, dbgbcr13_el1 - mrs x17, dbgbcr12_el1 - mrs x16, dbgbcr11_el1 - mrs x15, dbgbcr10_el1 - mrs x14, dbgbcr9_el1 - mrs x13, dbgbcr8_el1 - mrs x12, dbgbcr7_el1 - mrs x11, dbgbcr6_el1 - mrs x10, dbgbcr5_el1 - mrs x9, dbgbcr4_el1 - mrs x8, dbgbcr3_el1 - mrs x7, dbgbcr2_el1 - mrs x6, dbgbcr1_el1 - mrs x5, dbgbcr0_el1 - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 - -1: - str x20, [x3, #(15 * 8)] - str x19, [x3, #(14 * 8)] - str x18, [x3, #(13 * 8)] - str x17, [x3, #(12 * 8)] - str x16, [x3, #(11 * 8)] - str x15, [x3, #(10 * 8)] - str x14, [x3, #(9 * 8)] - str x13, [x3, #(8 * 8)] - str x12, [x3, #(7 * 8)] - str x11, [x3, #(6 * 8)] - str x10, [x3, #(5 * 8)] - str x9, [x3, #(4 * 8)] - str x8, [x3, #(3 * 8)] - str x7, [x3, #(2 * 8)] - str x6, [x3, #(1 * 8)] - str x5, [x3, #(0 * 8)] - - add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1) - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 +.macro save_debug type + // x4: pointer to register set + // x5: number of registers to skip + // x6..x22 trashed + + adr x22, 1f + add x22, x22, x5, lsl #2 + br x22 1: - mrs x20, dbgbvr15_el1 - mrs x19, dbgbvr14_el1 - mrs x18, dbgbvr13_el1 - mrs x17, dbgbvr12_el1 - mrs x16, dbgbvr11_el1 - mrs x15, dbgbvr10_el1 - mrs x14, dbgbvr9_el1 - mrs x13, dbgbvr8_el1 - mrs x12, dbgbvr7_el1 - mrs x11, dbgbvr6_el1 - mrs x10, dbgbvr5_el1 - mrs x9, dbgbvr4_el1 - mrs x8, dbgbvr3_el1 - mrs x7, dbgbvr2_el1 - mrs x6, dbgbvr1_el1 - mrs x5, dbgbvr0_el1 - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 - -1: - str x20, [x3, #(15 * 8)] - str x19, [x3, #(14 * 8)] - str x18, [x3, #(13 * 8)] - str x17, [x3, #(12 * 8)] - str x16, [x3, #(11 * 8)] - str x15, [x3, #(10 * 8)] - str x14, [x3, #(9 * 8)] - str x13, [x3, #(8 * 8)] - str x12, [x3, #(7 * 8)] - str x11, [x3, #(6 * 8)] - str x10, [x3, #(5 * 8)] - str x9, [x3, #(4 * 8)] - str x8, [x3, #(3 * 8)] - str x7, [x3, #(2 * 8)] - str x6, [x3, #(1 * 8)] - str x5, [x3, #(0 * 8)] - - add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1) - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 -1: - mrs x20, dbgwcr15_el1 - mrs x19, dbgwcr14_el1 - mrs x18, dbgwcr13_el1 - mrs x17, dbgwcr12_el1 - mrs x16, dbgwcr11_el1 - mrs x15, dbgwcr10_el1 - mrs x14, dbgwcr9_el1 - mrs x13, dbgwcr8_el1 - mrs x12, dbgwcr7_el1 - mrs x11, dbgwcr6_el1 - mrs x10, dbgwcr5_el1 - mrs x9, dbgwcr4_el1 - mrs x8, dbgwcr3_el1 - mrs x7, dbgwcr2_el1 - mrs x6, dbgwcr1_el1 - mrs x5, dbgwcr0_el1 - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 - -1: - str x20, [x3, #(15 * 8)] - str x19, [x3, #(14 * 8)] - str x18, [x3, #(13 * 8)] - str x17, [x3, #(12 * 8)] - str x16, [x3, #(11 * 8)] - str x15, [x3, #(10 * 8)] - str x14, [x3, #(9 * 8)] - str x13, [x3, #(8 * 8)] - str x12, [x3, #(7 * 8)] - str x11, [x3, #(6 * 8)] - str x10, [x3, #(5 * 8)] - str x9, [x3, #(4 * 8)] - str x8, [x3, #(3 * 8)] - str x7, [x3, #(2 * 8)] - str x6, [x3, #(1 * 8)] - str x5, [x3, #(0 * 8)] - - add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1) - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 -1: - mrs x20, dbgwvr15_el1 - mrs x19, dbgwvr14_el1 - mrs x18, dbgwvr13_el1 - mrs x17, dbgwvr12_el1 - mrs x16, dbgwvr11_el1 - mrs x15, dbgwvr10_el1 - mrs x14, dbgwvr9_el1 - mrs x13, dbgwvr8_el1 - mrs x12, dbgwvr7_el1 - mrs x11, dbgwvr6_el1 - mrs x10, dbgwvr5_el1 - mrs x9, dbgwvr4_el1 - mrs x8, dbgwvr3_el1 - mrs x7, dbgwvr2_el1 - mrs x6, dbgwvr1_el1 - mrs x5, dbgwvr0_el1 - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 - + mrs x21, \type\()15_el1 + mrs x20, \type\()14_el1 + mrs x19, \type\()13_el1 + mrs x18, \type\()12_el1 + mrs x17, \type\()11_el1 + mrs x16, \type\()10_el1 + mrs x15, \type\()9_el1 + mrs x14, \type\()8_el1 + mrs x13, \type\()7_el1 + mrs x12, \type\()6_el1 + mrs x11, \type\()5_el1 + mrs x10, \type\()4_el1 + mrs x9, \type\()3_el1 + mrs x8, \type\()2_el1 + mrs x7, \type\()1_el1 + mrs x6, \type\()0_el1 + + adr x22, 1f + add x22, x22, x5, lsl #2 + br x22 1: - str x20, [x3, #(15 * 8)] - str x19, [x3, #(14 * 8)] - str x18, [x3, #(13 * 8)] - str x17, [x3, #(12 * 8)] - str x16, [x3, #(11 * 8)] - str x15, [x3, #(10 * 8)] - str x14, [x3, #(9 * 8)] - str x13, [x3, #(8 * 8)] - str x12, [x3, #(7 * 8)] - str x11, [x3, #(6 * 8)] - str x10, [x3, #(5 * 8)] - str x9, [x3, #(4 * 8)] - str x8, [x3, #(3 * 8)] - str x7, [x3, #(2 * 8)] - str x6, [x3, #(1 * 8)] - str x5, [x3, #(0 * 8)] - - mrs x21, mdccint_el1 - str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] + str x21, [x4, #(15 * 8)] + str x20, [x4, #(14 * 8)] + str x19, [x4, #(13 * 8)] + str x18, [x4, #(12 * 8)] + str x17, [x4, #(11 * 8)] + str x16, [x4, #(10 * 8)] + str x15, [x4, #(9 * 8)] + str x14, [x4, #(8 * 8)] + str x13, [x4, #(7 * 8)] + str x12, [x4, #(6 * 8)] + str x11, [x4, #(5 * 8)] + str x10, [x4, #(4 * 8)] + str x9, [x4, #(3 * 8)] + str x8, [x4, #(2 * 8)] + str x7, [x4, #(1 * 8)] + str x6, [x4, #(0 * 8)] .endm .macro restore_sysregs @@ -467,195 +320,52 @@ msr mdscr_el1, x25 .endm -.macro restore_debug - // x2: base address for cpu context - // x3: tmp register - - mrs x26, id_aa64dfr0_el1 - ubfx x24, x26, #12, #4 // Extract BRPs - ubfx x25, x26, #20, #4 // Extract WRPs - mov w26, #15 - sub w24, w26, w24 // How many BPs to skip - sub w25, w26, w25 // How many WPs to skip - - add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) +.macro restore_debug type + // x4: pointer to register set + // x5: number of registers to skip + // x6..x22 trashed - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 -1: - ldr x20, [x3, #(15 * 8)] - ldr x19, [x3, #(14 * 8)] - ldr x18, [x3, #(13 * 8)] - ldr x17, [x3, #(12 * 8)] - ldr x16, [x3, #(11 * 8)] - ldr x15, [x3, #(10 * 8)] - ldr x14, [x3, #(9 * 8)] - ldr x13, [x3, #(8 * 8)] - ldr x12, [x3, #(7 * 8)] - ldr x11, [x3, #(6 * 8)] - ldr x10, [x3, #(5 * 8)] - ldr x9, [x3, #(4 * 8)] - ldr x8, [x3, #(3 * 8)] - ldr x7, [x3, #(2 * 8)] - ldr x6, [x3, #(1 * 8)] - ldr x5, [x3, #(0 * 8)] - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 + adr x22, 1f + add x22, x22, x5, lsl #2 + br x22 1: - msr dbgbcr15_el1, x20 - msr dbgbcr14_el1, x19 - msr dbgbcr13_el1, x18 - msr dbgbcr12_el1, x17 - msr dbgbcr11_el1, x16 - msr dbgbcr10_el1, x15 - msr dbgbcr9_el1, x14 - msr dbgbcr8_el1, x13 - msr dbgbcr7_el1, x12 - msr dbgbcr6_el1, x11 - msr dbgbcr5_el1, x10 - msr dbgbcr4_el1, x9 - msr dbgbcr3_el1, x8 - msr dbgbcr2_el1, x7 - msr dbgbcr1_el1, x6 - msr dbgbcr0_el1, x5 - - add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1) - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 + ldr x21, [x4, #(15 * 8)] + ldr x20, [x4, #(14 * 8)] + ldr x19, [x4, #(13 * 8)] + ldr x18, [x4, #(12 * 8)] + ldr x17, [x4, #(11 * 8)] + ldr x16, [x4, #(10 * 8)] + ldr x15, [x4, #(9 * 8)] + ldr x14, [x4, #(8 * 8)] + ldr x13, [x4, #(7 * 8)] + ldr x12, [x4, #(6 * 8)] + ldr x11, [x4, #(5 * 8)] + ldr x10, [x4, #(4 * 8)] + ldr x9, [x4, #(3 * 8)] + ldr x8, [x4, #(2 * 8)] + ldr x7, [x4, #(1 * 8)] + ldr x6, [x4, #(0 * 8)] + + adr x22, 1f + add x22, x22, x5, lsl #2 + br x22 1: - ldr x20, [x3, #(15 * 8)] - ldr x19, [x3, #(14 * 8)] - ldr x18, [x3, #(13 * 8)] - ldr x17, [x3, #(12 * 8)] - ldr x16, [x3, #(11 * 8)] - ldr x15, [x3, #(10 * 8)] - ldr x14, [x3, #(9 * 8)] - ldr x13, [x3, #(8 * 8)] - ldr x12, [x3, #(7 * 8)] - ldr x11, [x3, #(6 * 8)] - ldr x10, [x3, #(5 * 8)] - ldr x9, [x3, #(4 * 8)] - ldr x8, [x3, #(3 * 8)] - ldr x7, [x3, #(2 * 8)] - ldr x6, [x3, #(1 * 8)] - ldr x5, [x3, #(0 * 8)] - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 -1: - msr dbgbvr15_el1, x20 - msr dbgbvr14_el1, x19 - msr dbgbvr13_el1, x18 - msr dbgbvr12_el1, x17 - msr dbgbvr11_el1, x16 - msr dbgbvr10_el1, x15 - msr dbgbvr9_el1, x14 - msr dbgbvr8_el1, x13 - msr dbgbvr7_el1, x12 - msr dbgbvr6_el1, x11 - msr dbgbvr5_el1, x10 - msr dbgbvr4_el1, x9 - msr dbgbvr3_el1, x8 - msr dbgbvr2_el1, x7 - msr dbgbvr1_el1, x6 - msr dbgbvr0_el1, x5 - - add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1) - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 -1: - ldr x20, [x3, #(15 * 8)] - ldr x19, [x3, #(14 * 8)] - ldr x18, [x3, #(13 * 8)] - ldr x17, [x3, #(12 * 8)] - ldr x16, [x3, #(11 * 8)] - ldr x15, [x3, #(10 * 8)] - ldr x14, [x3, #(9 * 8)] - ldr x13, [x3, #(8 * 8)] - ldr x12, [x3, #(7 * 8)] - ldr x11, [x3, #(6 * 8)] - ldr x10, [x3, #(5 * 8)] - ldr x9, [x3, #(4 * 8)] - ldr x8, [x3, #(3 * 8)] - ldr x7, [x3, #(2 * 8)] - ldr x6, [x3, #(1 * 8)] - ldr x5, [x3, #(0 * 8)] - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 -1: - msr dbgwcr15_el1, x20 - msr dbgwcr14_el1, x19 - msr dbgwcr13_el1, x18 - msr dbgwcr12_el1, x17 - msr dbgwcr11_el1, x16 - msr dbgwcr10_el1, x15 - msr dbgwcr9_el1, x14 - msr dbgwcr8_el1, x13 - msr dbgwcr7_el1, x12 - msr dbgwcr6_el1, x11 - msr dbgwcr5_el1, x10 - msr dbgwcr4_el1, x9 - msr dbgwcr3_el1, x8 - msr dbgwcr2_el1, x7 - msr dbgwcr1_el1, x6 - msr dbgwcr0_el1, x5 - - add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1) - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 -1: - ldr x20, [x3, #(15 * 8)] - ldr x19, [x3, #(14 * 8)] - ldr x18, [x3, #(13 * 8)] - ldr x17, [x3, #(12 * 8)] - ldr x16, [x3, #(11 * 8)] - ldr x15, [x3, #(10 * 8)] - ldr x14, [x3, #(9 * 8)] - ldr x13, [x3, #(8 * 8)] - ldr x12, [x3, #(7 * 8)] - ldr x11, [x3, #(6 * 8)] - ldr x10, [x3, #(5 * 8)] - ldr x9, [x3, #(4 * 8)] - ldr x8, [x3, #(3 * 8)] - ldr x7, [x3, #(2 * 8)] - ldr x6, [x3, #(1 * 8)] - ldr x5, [x3, #(0 * 8)] - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 -1: - msr dbgwvr15_el1, x20 - msr dbgwvr14_el1, x19 - msr dbgwvr13_el1, x18 - msr dbgwvr12_el1, x17 - msr dbgwvr11_el1, x16 - msr dbgwvr10_el1, x15 - msr dbgwvr9_el1, x14 - msr dbgwvr8_el1, x13 - msr dbgwvr7_el1, x12 - msr dbgwvr6_el1, x11 - msr dbgwvr5_el1, x10 - msr dbgwvr4_el1, x9 - msr dbgwvr3_el1, x8 - msr dbgwvr2_el1, x7 - msr dbgwvr1_el1, x6 - msr dbgwvr0_el1, x5 - - ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] - msr mdccint_el1, x21 + msr \type\()15_el1, x21 + msr \type\()14_el1, x20 + msr \type\()13_el1, x19 + msr \type\()12_el1, x18 + msr \type\()11_el1, x17 + msr \type\()10_el1, x16 + msr \type\()9_el1, x15 + msr \type\()8_el1, x14 + msr \type\()7_el1, x13 + msr \type\()6_el1, x12 + msr \type\()5_el1, x11 + msr \type\()4_el1, x10 + msr \type\()3_el1, x9 + msr \type\()2_el1, x8 + msr \type\()1_el1, x7 + msr \type\()0_el1, x6 .endm .macro skip_32bit_state tmp, target @@ -675,6 +385,14 @@ tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target .endm +/* + * Branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping enabled) + */ +.macro skip_fpsimd_state tmp, target + mrs \tmp, cptr_el2 + tbnz \tmp, #CPTR_EL2_TFP_SHIFT, \target +.endm + .macro compute_debug_state target // Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY // is set, we do a full save/restore cycle and disable trapping. @@ -713,10 +431,12 @@ add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) mrs x4, dacr32_el2 mrs x5, ifsr32_el2 - mrs x6, fpexc32_el2 stp x4, x5, [x3] - str x6, [x3, #16] + skip_fpsimd_state x8, 3f + mrs x6, fpexc32_el2 + str x6, [x3, #16] +3: skip_debug_state x8, 2f mrs x7, dbgvcr32_el2 str x7, [x3, #24] @@ -743,10 +463,8 @@ add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) ldp x4, x5, [x3] - ldr x6, [x3, #16] msr dacr32_el2, x4 msr ifsr32_el2, x5 - msr fpexc32_el2, x6 skip_debug_state x8, 2f ldr x7, [x3, #24] @@ -763,31 +481,35 @@ .macro activate_traps ldr x2, [x0, #VCPU_HCR_EL2] + + /* + * We are about to set CPTR_EL2.TFP to trap all floating point + * register accesses to EL2, however, the ARM ARM clearly states that + * traps are only taken to EL2 if the operation would not otherwise + * trap to EL1. Therefore, always make sure that for 32-bit guests, + * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. + */ + tbnz x2, #HCR_RW_SHIFT, 99f // open code skip_32bit_state + mov x3, #(1 << 30) + msr fpexc32_el2, x3 + isb +99: msr hcr_el2, x2 mov x2, #CPTR_EL2_TTA + orr x2, x2, #CPTR_EL2_TFP msr cptr_el2, x2 mov x2, #(1 << 15) // Trap CP15 Cr=15 msr hstr_el2, x2 - mrs x2, mdcr_el2 - and x2, x2, #MDCR_EL2_HPMN_MASK - orr x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR) - orr x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA) - - // Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap - // if not dirty. - ldr x3, [x0, #VCPU_DEBUG_FLAGS] - tbnz x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f - orr x2, x2, #MDCR_EL2_TDA -1: + // Monitor Debug Config - see kvm_arm_setup_debug() + ldr x2, [x0, #VCPU_MDCR_EL2] msr mdcr_el2, x2 .endm .macro deactivate_traps mov x2, #HCR_RW msr hcr_el2, x2 - msr cptr_el2, xzr msr hstr_el2, xzr mrs x2, mdcr_el2 @@ -892,21 +614,101 @@ __restore_sysregs: restore_sysregs ret +/* Save debug state */ __save_debug: - save_debug + // x2: ptr to CPU context + // x3: ptr to debug reg struct + // x4/x5/x6-22/x24-26: trashed + + mrs x26, id_aa64dfr0_el1 + ubfx x24, x26, #12, #4 // Extract BRPs + ubfx x25, x26, #20, #4 // Extract WRPs + mov w26, #15 + sub w24, w26, w24 // How many BPs to skip + sub w25, w26, w25 // How many WPs to skip + + mov x5, x24 + add x4, x3, #DEBUG_BCR + save_debug dbgbcr + add x4, x3, #DEBUG_BVR + save_debug dbgbvr + + mov x5, x25 + add x4, x3, #DEBUG_WCR + save_debug dbgwcr + add x4, x3, #DEBUG_WVR + save_debug dbgwvr + + mrs x21, mdccint_el1 + str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] ret +/* Restore debug state */ __restore_debug: - restore_debug + // x2: ptr to CPU context + // x3: ptr to debug reg struct + // x4/x5/x6-22/x24-26: trashed + + mrs x26, id_aa64dfr0_el1 + ubfx x24, x26, #12, #4 // Extract BRPs + ubfx x25, x26, #20, #4 // Extract WRPs + mov w26, #15 + sub w24, w26, w24 // How many BPs to skip + sub w25, w26, w25 // How many WPs to skip + + mov x5, x24 + add x4, x3, #DEBUG_BCR + restore_debug dbgbcr + add x4, x3, #DEBUG_BVR + restore_debug dbgbvr + + mov x5, x25 + add x4, x3, #DEBUG_WCR + restore_debug dbgwcr + add x4, x3, #DEBUG_WVR + restore_debug dbgwvr + + ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] + msr mdccint_el1, x21 + ret __save_fpsimd: + skip_fpsimd_state x3, 1f save_fpsimd - ret +1: ret __restore_fpsimd: + skip_fpsimd_state x3, 1f restore_fpsimd - ret +1: ret + +switch_to_guest_fpsimd: + push x4, lr + + mrs x2, cptr_el2 + bic x2, x2, #CPTR_EL2_TFP + msr cptr_el2, x2 + isb + + mrs x0, tpidr_el2 + + ldr x2, [x0, #VCPU_HOST_CONTEXT] + kern_hyp_va x2 + bl __save_fpsimd + + add x2, x0, #VCPU_CONTEXT + bl __restore_fpsimd + + skip_32bit_state x3, 1f + ldr x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)] + msr fpexc32_el2, x4 +1: + pop x4, lr + pop x2, x3 + pop x0, x1 + + eret /* * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu); @@ -928,10 +730,10 @@ ENTRY(__kvm_vcpu_run) kern_hyp_va x2 save_host_regs - bl __save_fpsimd bl __save_sysregs compute_debug_state 1f + add x3, x0, #VCPU_HOST_DEBUG_STATE bl __save_debug 1: activate_traps @@ -944,9 +746,10 @@ ENTRY(__kvm_vcpu_run) add x2, x0, #VCPU_CONTEXT bl __restore_sysregs - bl __restore_fpsimd skip_debug_state x3, 1f + ldr x3, [x0, #VCPU_DEBUG_PTR] + kern_hyp_va x3 bl __restore_debug 1: restore_guest_32bit_state @@ -967,6 +770,8 @@ __kvm_vcpu_return: bl __save_sysregs skip_debug_state x3, 1f + ldr x3, [x0, #VCPU_DEBUG_PTR] + kern_hyp_va x3 bl __save_debug 1: save_guest_32bit_state @@ -983,12 +788,15 @@ __kvm_vcpu_return: bl __restore_sysregs bl __restore_fpsimd + /* Clear FPSIMD and Trace trapping */ + msr cptr_el2, xzr skip_debug_state x3, 1f // Clear the dirty flag for the next run, as all the state has // already been saved. Note that we nuke the whole 64bit word. // If we ever add more flags, we'll have to be more careful... str xzr, [x0, #VCPU_DEBUG_FLAGS] + add x3, x0, #VCPU_HOST_DEBUG_STATE bl __restore_debug 1: restore_host_regs @@ -1191,6 +999,11 @@ el1_trap: * x1: ESR * x2: ESR_EC */ + + /* Guest accessed VFP/SIMD registers, save host, restore Guest */ + cmp x2, #ESR_ELx_EC_FP_ASIMD + b.eq switch_to_guest_fpsimd + cmp x2, #ESR_ELx_EC_DABT_LOW mov x0, #ESR_ELx_EC_IABT_LOW ccmp x2, x0, #4, ne @@ -1285,4 +1098,10 @@ ENTRY(__kvm_hyp_vector) ventry el1_error_invalid // Error 32-bit EL1 ENDPROC(__kvm_hyp_vector) + +ENTRY(__kvm_get_mdcr_el2) + mrs x0, mdcr_el2 + ret +ENDPROC(__kvm_get_mdcr_el2) + .popsection diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 0b4326578985..91cf5350b328 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -22,6 +22,7 @@ #include <linux/errno.h> #include <linux/kvm_host.h> #include <linux/kvm.h> +#include <linux/hw_breakpoint.h> #include <kvm/arm_arch_timer.h> @@ -56,6 +57,12 @@ static bool cpu_has_32bit_el1(void) return !!(pfr0 & 0x20); } +/** + * kvm_arch_dev_ioctl_check_extension + * + * We currently assume that the number of HW registers is uniform + * across all CPUs (see cpuinfo_sanity_check). + */ int kvm_arch_dev_ioctl_check_extension(long ext) { int r; @@ -64,6 +71,15 @@ int kvm_arch_dev_ioctl_check_extension(long ext) case KVM_CAP_ARM_EL1_32BIT: r = cpu_has_32bit_el1(); break; + case KVM_CAP_GUEST_DEBUG_HW_BPS: + r = get_num_brps(); + break; + case KVM_CAP_GUEST_DEBUG_HW_WPS: + r = get_num_wrps(); + break; + case KVM_CAP_SET_GUEST_DEBUG: + r = 1; + break; default: r = 0; } @@ -105,7 +121,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) kvm_reset_sys_regs(vcpu); /* Reset timer */ - kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); - - return 0; + return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c370b4014799..b41607d270ac 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -38,6 +38,8 @@ #include "sys_regs.h" +#include "trace.h" + /* * All of this file is extremly similar to the ARM coproc.c, but the * types are different. My gut feeling is that it should be pretty @@ -208,9 +210,217 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg); } + trace_trap_reg(__func__, r->reg, p->is_write, *vcpu_reg(vcpu, p->Rt)); + + return true; +} + +/* + * reg_to_dbg/dbg_to_reg + * + * A 32 bit write to a debug register leave top bits alone + * A 32 bit read from a debug register only returns the bottom bits + * + * All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the + * hyp.S code switches between host and guest values in future. + */ +static inline void reg_to_dbg(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + u64 *dbg_reg) +{ + u64 val = *vcpu_reg(vcpu, p->Rt); + + if (p->is_32bit) { + val &= 0xffffffffUL; + val |= ((*dbg_reg >> 32) << 32); + } + + *dbg_reg = val; + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; +} + +static inline void dbg_to_reg(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + u64 *dbg_reg) +{ + u64 val = *dbg_reg; + + if (p->is_32bit) + val &= 0xffffffffUL; + + *vcpu_reg(vcpu, p->Rt) = val; +} + +static inline bool trap_bvr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *rd) +{ + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + + if (p->is_write) + reg_to_dbg(vcpu, p, dbg_reg); + else + dbg_to_reg(vcpu, p, dbg_reg); + + trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); + + return true; +} + +static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + + if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static inline void reset_bvr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val; +} + +static inline bool trap_bcr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *rd) +{ + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + + if (p->is_write) + reg_to_dbg(vcpu, p, dbg_reg); + else + dbg_to_reg(vcpu, p, dbg_reg); + + trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); + + return true; +} + +static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + + if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + + return 0; +} + +static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static inline void reset_bcr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val; +} + +static inline bool trap_wvr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *rd) +{ + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + + if (p->is_write) + reg_to_dbg(vcpu, p, dbg_reg); + else + dbg_to_reg(vcpu, p, dbg_reg); + + trace_trap_reg(__func__, rd->reg, p->is_write, + vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]); + return true; } +static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + + if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static inline void reset_wvr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val; +} + +static inline bool trap_wcr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *rd) +{ + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + + if (p->is_write) + reg_to_dbg(vcpu, p, dbg_reg); + else + dbg_to_reg(vcpu, p, dbg_reg); + + trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); + + return true; +} + +static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + + if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static inline void reset_wcr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val; +} + static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 amair; @@ -240,16 +450,16 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ /* DBGBVRn_EL1 */ \ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \ - trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 }, \ + trap_bvr, reset_bvr, n, 0, get_bvr, set_bvr }, \ /* DBGBCRn_EL1 */ \ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \ - trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 }, \ + trap_bcr, reset_bcr, n, 0, get_bcr, set_bcr }, \ /* DBGWVRn_EL1 */ \ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \ - trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 }, \ + trap_wvr, reset_wvr, n, 0, get_wvr, set_wvr }, \ /* DBGWCRn_EL1 */ \ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \ - trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 } + trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr } /* * Architected system registers. @@ -516,28 +726,57 @@ static bool trap_debug32(struct kvm_vcpu *vcpu, return true; } -#define DBG_BCR_BVR_WCR_WVR(n) \ - /* DBGBVRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32, \ - NULL, (cp14_DBGBVR0 + (n) * 2) }, \ - /* DBGBCRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32, \ - NULL, (cp14_DBGBCR0 + (n) * 2) }, \ - /* DBGWVRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32, \ - NULL, (cp14_DBGWVR0 + (n) * 2) }, \ - /* DBGWCRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32, \ - NULL, (cp14_DBGWCR0 + (n) * 2) } - -#define DBGBXVR(n) \ - { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32, \ - NULL, cp14_DBGBXVR0 + n * 2 } +/* AArch32 debug register mappings + * + * AArch32 DBGBVRn is mapped to DBGBVRn_EL1[31:0] + * AArch32 DBGBXVRn is mapped to DBGBVRn_EL1[63:32] + * + * All control registers and watchpoint value registers are mapped to + * the lower 32 bits of their AArch64 equivalents. We share the trap + * handlers with the above AArch64 code which checks what mode the + * system is in. + */ + +static inline bool trap_xvr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *rd) +{ + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + + if (p->is_write) { + u64 val = *dbg_reg; + + val &= 0xffffffffUL; + val |= *vcpu_reg(vcpu, p->Rt) << 32; + *dbg_reg = val; + + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + } else { + *vcpu_reg(vcpu, p->Rt) = *dbg_reg >> 32; + } + + trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); + + return true; +} + +#define DBG_BCR_BVR_WCR_WVR(n) \ + /* DBGBVRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \ + /* DBGBCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \ + /* DBGWVRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \ + /* DBGWCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_wcr, NULL, n } + +#define DBGBXVR(n) \ + { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_xvr, NULL, n } /* * Trapped cp14 registers. We generally ignore most of the external * debug, on the principle that they don't really make sense to a - * guest. Revisit this one day, whould this principle change. + * guest. Revisit this one day, would this principle change. */ static const struct sys_reg_desc cp14_regs[] = { /* DBGIDR */ @@ -999,6 +1238,8 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run) struct sys_reg_params params; unsigned long esr = kvm_vcpu_get_hsr(vcpu); + trace_kvm_handle_sys_reg(esr); + params.is_aarch32 = false; params.is_32bit = false; params.Op0 = (esr >> 20) & 3; @@ -1303,6 +1544,9 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg if (!r) return get_invariant_sys_reg(reg->id, uaddr); + if (r->get_user) + return (r->get_user)(vcpu, r, reg, uaddr); + return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id); } @@ -1321,6 +1565,9 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg if (!r) return set_invariant_sys_reg(reg->id, uaddr); + if (r->set_user) + return (r->set_user)(vcpu, r, reg, uaddr); + return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id); } diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index d411e251412c..eaa324e4db4d 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -55,6 +55,12 @@ struct sys_reg_desc { /* Value (usually reset value) */ u64 val; + + /* Custom get/set_user functions, fallback to generic if NULL */ + int (*get_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr); + int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr); }; static inline void print_sys_reg_instr(const struct sys_reg_params *p) diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index 475fd2929310..1e4576824165 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -94,6 +94,8 @@ static int __init sys_reg_genericv8_init(void) &genericv8_target_table); kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA, &genericv8_target_table); + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_GENERIC_V8, + &genericv8_target_table); return 0; } diff --git a/arch/arm64/kvm/trace.h b/arch/arm64/kvm/trace.h index 157416e963f2..7fb0008c4fa3 100644 --- a/arch/arm64/kvm/trace.h +++ b/arch/arm64/kvm/trace.h @@ -44,6 +44,129 @@ TRACE_EVENT(kvm_hvc_arm64, __entry->vcpu_pc, __entry->r0, __entry->imm) ); +TRACE_EVENT(kvm_arm_setup_debug, + TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug), + TP_ARGS(vcpu, guest_debug), + + TP_STRUCT__entry( + __field(struct kvm_vcpu *, vcpu) + __field(__u32, guest_debug) + ), + + TP_fast_assign( + __entry->vcpu = vcpu; + __entry->guest_debug = guest_debug; + ), + + TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug) +); + +TRACE_EVENT(kvm_arm_clear_debug, + TP_PROTO(__u32 guest_debug), + TP_ARGS(guest_debug), + + TP_STRUCT__entry( + __field(__u32, guest_debug) + ), + + TP_fast_assign( + __entry->guest_debug = guest_debug; + ), + + TP_printk("flags: 0x%08x", __entry->guest_debug) +); + +TRACE_EVENT(kvm_arm_set_dreg32, + TP_PROTO(const char *name, __u32 value), + TP_ARGS(name, value), + + TP_STRUCT__entry( + __field(const char *, name) + __field(__u32, value) + ), + + TP_fast_assign( + __entry->name = name; + __entry->value = value; + ), + + TP_printk("%s: 0x%08x", __entry->name, __entry->value) +); + +TRACE_EVENT(kvm_arm_set_regset, + TP_PROTO(const char *type, int len, __u64 *control, __u64 *value), + TP_ARGS(type, len, control, value), + TP_STRUCT__entry( + __field(const char *, name) + __field(int, len) + __array(u64, ctrls, 16) + __array(u64, values, 16) + ), + TP_fast_assign( + __entry->name = type; + __entry->len = len; + memcpy(__entry->ctrls, control, len << 3); + memcpy(__entry->values, value, len << 3); + ), + TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name, + __print_array(__entry->ctrls, __entry->len, sizeof(__u64)), + __print_array(__entry->values, __entry->len, sizeof(__u64))) +); + +TRACE_EVENT(trap_reg, + TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value), + TP_ARGS(fn, reg, is_write, write_value), + + TP_STRUCT__entry( + __field(const char *, fn) + __field(int, reg) + __field(bool, is_write) + __field(u64, write_value) + ), + + TP_fast_assign( + __entry->fn = fn; + __entry->reg = reg; + __entry->is_write = is_write; + __entry->write_value = write_value; + ), + + TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value) +); + +TRACE_EVENT(kvm_handle_sys_reg, + TP_PROTO(unsigned long hsr), + TP_ARGS(hsr), + + TP_STRUCT__entry( + __field(unsigned long, hsr) + ), + + TP_fast_assign( + __entry->hsr = hsr; + ), + + TP_printk("HSR 0x%08lx", __entry->hsr) +); + +TRACE_EVENT(kvm_set_guest_debug, + TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug), + TP_ARGS(vcpu, guest_debug), + + TP_STRUCT__entry( + __field(struct kvm_vcpu *, vcpu) + __field(__u32, guest_debug) + ), + + TP_fast_assign( + __entry->vcpu = vcpu; + __entry->guest_debug = guest_debug; + ), + + TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug) +); + + #endif /* _TRACE_ARM64_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index e5966758c093..e1e4d7c38dda 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -52,13 +52,16 @@ struct arch_timer_cpu { /* Timer IRQ */ const struct kvm_irq_level *irq; + + /* VGIC mapping */ + struct irq_phys_map *map; }; int kvm_timer_hyp_init(void); void kvm_timer_enable(struct kvm *kvm); void kvm_timer_init(struct kvm *kvm); -void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, - const struct kvm_irq_level *irq); +int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, + const struct kvm_irq_level *irq); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 133ea00aa83b..d901f1a47be6 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -95,11 +95,15 @@ enum vgic_type { #define LR_STATE_ACTIVE (1 << 1) #define LR_STATE_MASK (3 << 0) #define LR_EOI_INT (1 << 2) +#define LR_HW (1 << 3) struct vgic_lr { - u16 irq; - u8 source; - u8 state; + unsigned irq:10; + union { + unsigned hwirq:10; + unsigned source:3; + }; + unsigned state:4; }; struct vgic_vmcr { @@ -155,6 +159,19 @@ struct vgic_io_device { struct kvm_io_device dev; }; +struct irq_phys_map { + u32 virt_irq; + u32 phys_irq; + u32 irq; + bool active; +}; + +struct irq_phys_map_entry { + struct list_head entry; + struct rcu_head rcu; + struct irq_phys_map map; +}; + struct vgic_dist { spinlock_t lock; bool in_kernel; @@ -252,6 +269,10 @@ struct vgic_dist { struct vgic_vm_ops vm_ops; struct vgic_io_device dist_iodev; struct vgic_io_device *redist_iodevs; + + /* Virtual irq to hwirq mapping */ + spinlock_t irq_phys_map_lock; + struct list_head irq_phys_map_list; }; struct vgic_v2_cpu_if { @@ -303,6 +324,9 @@ struct vgic_cpu { struct vgic_v2_cpu_if vgic_v2; struct vgic_v3_cpu_if vgic_v3; }; + + /* Protected by the distributor's irq_phys_map_lock */ + struct list_head irq_phys_map_list; }; #define LR_EMPTY 0xff @@ -317,16 +341,25 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); int kvm_vgic_hyp_init(void); int kvm_vgic_map_resources(struct kvm *kvm); int kvm_vgic_get_max_vcpus(void); +void kvm_vgic_early_init(struct kvm *kvm); int kvm_vgic_create(struct kvm *kvm, u32 type); void kvm_vgic_destroy(struct kvm *kvm); +void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu); void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level); +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, + struct irq_phys_map *map, bool level); void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); +struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, + int virt_irq, int irq); +int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map); +bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map); +void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index ffbc034c8810..cf637d65b589 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -268,9 +268,12 @@ #define ICH_LR_EOI (1UL << 41) #define ICH_LR_GROUP (1UL << 60) +#define ICH_LR_HW (1UL << 61) #define ICH_LR_STATE (3UL << 62) #define ICH_LR_PENDING_BIT (1UL << 62) #define ICH_LR_ACTIVE_BIT (1UL << 63) +#define ICH_LR_PHYS_ID_SHIFT 32 +#define ICH_LR_PHYS_ID_MASK (0x3ffUL << ICH_LR_PHYS_ID_SHIFT) #define ICH_MISR_EOI (1 << 0) #define ICH_MISR_U (1 << 1) diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 9de976b4f9a7..ca88dad65260 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -71,11 +71,12 @@ #define GICH_LR_VIRTUALID (0x3ff << 0) #define GICH_LR_PHYSID_CPUID_SHIFT (10) -#define GICH_LR_PHYSID_CPUID (7 << GICH_LR_PHYSID_CPUID_SHIFT) +#define GICH_LR_PHYSID_CPUID (0x3ff << GICH_LR_PHYSID_CPUID_SHIFT) #define GICH_LR_STATE (3 << 28) #define GICH_LR_PENDING_BIT (1 << 28) #define GICH_LR_ACTIVE_BIT (1 << 29) #define GICH_LR_EOI (1 << 19) +#define GICH_LR_HW (1 << 31) #define GICH_VMCR_CTRL_SHIFT 0 #define GICH_VMCR_CTRL_MASK (0x21f << GICH_VMCR_CTRL_SHIFT) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 716ad4ae4d4b..a1e08e7bbf20 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -237,6 +237,7 @@ struct kvm_run { __u32 count; __u64 data_offset; /* relative to kvm_run start */ } io; + /* KVM_EXIT_DEBUG */ struct { struct kvm_debug_exit_arch arch; } debug; @@ -285,6 +286,7 @@ struct kvm_run { __u32 data; __u8 is_write; } dcr; + /* KVM_EXIT_INTERNAL_ERROR */ struct { __u32 suberror; /* Available with KVM_CAP_INTERNAL_ERROR_DATA: */ @@ -295,6 +297,7 @@ struct kvm_run { struct { __u64 gprs[32]; } osi; + /* KVM_EXIT_PAPR_HCALL */ struct { __u64 nr; __u64 ret; @@ -817,6 +820,8 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_DISABLE_QUIRKS 116 #define KVM_CAP_X86_SMM 117 #define KVM_CAP_MULTI_ADDRESS_SPACE 118 +#define KVM_CAP_GUEST_DEBUG_HW_BPS 119 +#define KVM_CAP_GUEST_DEBUG_HW_WPS 120 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 98c95f2fcba4..76e38d231e99 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -64,10 +64,10 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) int ret; struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; - ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - timer->irq->irq, - timer->irq->level); + kvm_vgic_set_phys_irq_active(timer->map, true); + ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, + timer->map, + timer->irq->level); WARN_ON(ret); } @@ -117,7 +117,8 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) cycle_t cval, now; if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || - !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) + !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) || + kvm_vgic_get_phys_irq_active(timer->map)) return false; cval = timer->cntv_cval; @@ -184,10 +185,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) timer_arm(timer, ns); } -void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, - const struct kvm_irq_level *irq) +int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, + const struct kvm_irq_level *irq) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct irq_phys_map *map; /* * The vcpu timer irq number cannot be determined in @@ -196,6 +198,17 @@ void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, * vcpu timer irq number when the vcpu is reset. */ timer->irq = irq; + + /* + * Tell the VGIC that the virtual interrupt is tied to a + * physical interrupt. We do that once per VCPU. + */ + map = kvm_vgic_map_phys_irq(vcpu, irq->irq, host_vtimer_irq); + if (WARN_ON(IS_ERR(map))) + return PTR_ERR(map); + + timer->map = map; + return 0; } void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) @@ -335,6 +348,8 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; timer_disarm(timer); + if (timer->map) + kvm_vgic_unmap_phys_irq(vcpu, timer->map); } void kvm_timer_enable(struct kvm *kvm) diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index f9b9c7c51372..8d7b04db8471 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -48,6 +48,10 @@ static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr) lr_desc.state |= LR_STATE_ACTIVE; if (val & GICH_LR_EOI) lr_desc.state |= LR_EOI_INT; + if (val & GICH_LR_HW) { + lr_desc.state |= LR_HW; + lr_desc.hwirq = (val & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT; + } return lr_desc; } @@ -55,7 +59,9 @@ static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr) static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc) { - u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq; + u32 lr_val; + + lr_val = lr_desc.irq; if (lr_desc.state & LR_STATE_PENDING) lr_val |= GICH_LR_PENDING_BIT; @@ -64,6 +70,14 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, if (lr_desc.state & LR_EOI_INT) lr_val |= GICH_LR_EOI; + if (lr_desc.state & LR_HW) { + lr_val |= GICH_LR_HW; + lr_val |= (u32)lr_desc.hwirq << GICH_LR_PHYSID_CPUID_SHIFT; + } + + if (lr_desc.irq < VGIC_NR_SGIS) + lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT); + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; } diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index dff06021e748..afbf925b00f4 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -67,6 +67,10 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) lr_desc.state |= LR_STATE_ACTIVE; if (val & ICH_LR_EOI) lr_desc.state |= LR_EOI_INT; + if (val & ICH_LR_HW) { + lr_desc.state |= LR_HW; + lr_desc.hwirq = (val >> ICH_LR_PHYS_ID_SHIFT) & GENMASK(9, 0); + } return lr_desc; } @@ -84,10 +88,17 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, * Eventually we want to make this configurable, so we may revisit * this in the future. */ - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) + switch (vcpu->kvm->arch.vgic.vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V3: lr_val |= ICH_LR_GROUP; - else - lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT; + break; + case KVM_DEV_TYPE_ARM_VGIC_V2: + if (lr_desc.irq < VGIC_NR_SGIS) + lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT; + break; + default: + BUG(); + } if (lr_desc.state & LR_STATE_PENDING) lr_val |= ICH_LR_PENDING_BIT; @@ -95,6 +106,10 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, lr_val |= ICH_LR_ACTIVE_BIT; if (lr_desc.state & LR_EOI_INT) lr_val |= ICH_LR_EOI; + if (lr_desc.state & LR_HW) { + lr_val |= ICH_LR_HW; + lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT; + } vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; } diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index bc40137a022d..9eb489a2c94c 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -24,6 +24,7 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> +#include <linux/rculist.h> #include <linux/uaccess.h> #include <asm/kvm_emulate.h> @@ -74,6 +75,28 @@ * cause the interrupt to become inactive in such a situation. * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become * inactive as long as the external input line is held high. + * + * + * Initialization rules: there are multiple stages to the vgic + * initialization, both for the distributor and the CPU interfaces. + * + * Distributor: + * + * - kvm_vgic_early_init(): initialization of static data that doesn't + * depend on any sizing information or emulation type. No allocation + * is allowed there. + * + * - vgic_init(): allocation and initialization of the generic data + * structures that depend on sizing information (number of CPUs, + * number of interrupts). Also initializes the vcpu specific data + * structures. Can be executed lazily for GICv2. + * [to be renamed to kvm_vgic_init??] + * + * CPU Interface: + * + * - kvm_vgic_cpu_early_init(): initialization of static data that + * doesn't depend on any sizing information or emulation type. No + * allocation is allowed there. */ #include "vgic.h" @@ -82,6 +105,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); +static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, + int virt_irq); static const struct vgic_ops *vgic_ops; static const struct vgic_params *vgic; @@ -375,7 +400,7 @@ void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq) { - return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq); + return !vgic_irq_is_queued(vcpu, irq); } /** @@ -1115,6 +1140,39 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, if (!vgic_irq_is_edge(vcpu, irq)) vlr.state |= LR_EOI_INT; + if (vlr.irq >= VGIC_NR_SGIS) { + struct irq_phys_map *map; + map = vgic_irq_map_search(vcpu, irq); + + /* + * If we have a mapping, and the virtual interrupt is + * being injected, then we must set the state to + * active in the physical world. Otherwise the + * physical interrupt will fire and the guest will + * exit before processing the virtual interrupt. + */ + if (map) { + int ret; + + BUG_ON(!map->active); + vlr.hwirq = map->phys_irq; + vlr.state |= LR_HW; + vlr.state &= ~LR_EOI_INT; + + ret = irq_set_irqchip_state(map->irq, + IRQCHIP_STATE_ACTIVE, + true); + WARN_ON(ret); + + /* + * Make sure we're not going to sample this + * again, as a HW-backed interrupt cannot be + * in the PENDING_ACTIVE stage. + */ + vgic_irq_set_queued(vcpu, irq); + } + } + vgic_set_lr(vcpu, lr_nr, vlr); vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); } @@ -1339,6 +1397,39 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) return level_pending; } +/* + * Save the physical active state, and reset it to inactive. + * + * Return 1 if HW interrupt went from active to inactive, and 0 otherwise. + */ +static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr) +{ + struct irq_phys_map *map; + int ret; + + if (!(vlr.state & LR_HW)) + return 0; + + map = vgic_irq_map_search(vcpu, vlr.irq); + BUG_ON(!map || !map->active); + + ret = irq_get_irqchip_state(map->irq, + IRQCHIP_STATE_ACTIVE, + &map->active); + + WARN_ON(ret); + + if (map->active) { + ret = irq_set_irqchip_state(map->irq, + IRQCHIP_STATE_ACTIVE, + false); + WARN_ON(ret); + return 0; + } + + return 1; +} + /* Sync back the VGIC state after a guest run */ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { @@ -1353,14 +1444,31 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) elrsr = vgic_get_elrsr(vcpu); elrsr_ptr = u64_to_bitmask(&elrsr); - /* Clear mappings for empty LRs */ - for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) { + /* Deal with HW interrupts, and clear mappings for empty LRs */ + for (lr = 0; lr < vgic->nr_lr; lr++) { struct vgic_lr vlr; - if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) + if (!test_bit(lr, vgic_cpu->lr_used)) continue; vlr = vgic_get_lr(vcpu, lr); + if (vgic_sync_hwirq(vcpu, vlr)) { + /* + * So this is a HW interrupt that the guest + * EOI-ed. Clean the LR state and allow the + * interrupt to be sampled again. + */ + vlr.state = 0; + vlr.hwirq = 0; + vgic_set_lr(vcpu, lr, vlr); + vgic_irq_clear_queued(vcpu, vlr.irq); + set_bit(lr, elrsr_ptr); + } + + if (!test_bit(lr, elrsr_ptr)) + continue; + + clear_bit(lr, vgic_cpu->lr_used); BUG_ON(vlr.irq >= dist->nr_irqs); vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; @@ -1447,7 +1555,8 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) } static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, - unsigned int irq_num, bool level) + struct irq_phys_map *map, + unsigned int irq_num, bool level) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; @@ -1455,6 +1564,9 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, int enabled; bool ret = true, can_inject = true; + if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) + return -EINVAL; + spin_lock(&dist->lock); vcpu = kvm_get_vcpu(kvm, cpuid); @@ -1517,18 +1629,46 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, out: spin_unlock(&dist->lock); - return ret ? cpuid : -EINVAL; + if (ret) { + /* kick the specified vcpu */ + kvm_vcpu_kick(kvm_get_vcpu(kvm, cpuid)); + } + + return 0; +} + +static int vgic_lazy_init(struct kvm *kvm) +{ + int ret = 0; + + if (unlikely(!vgic_initialized(kvm))) { + /* + * We only provide the automatic initialization of the VGIC + * for the legacy case of a GICv2. Any other type must + * be explicitly initialized once setup with the respective + * KVM device call. + */ + if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) + return -EBUSY; + + mutex_lock(&kvm->lock); + ret = vgic_init(kvm); + mutex_unlock(&kvm->lock); + } + + return ret; } /** * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic * @kvm: The VM structure pointer * @cpuid: The CPU for PPIs - * @irq_num: The IRQ number that is assigned to the device + * @irq_num: The IRQ number that is assigned to the device. This IRQ + * must not be mapped to a HW interrupt. * @level: Edge-triggered: true: to trigger the interrupt * false: to ignore the call - * Level-sensitive true: activates an interrupt - * false: deactivates an interrupt + * Level-sensitive true: raise the input signal + * false: lower the input signal * * The GIC is not concerned with devices being active-LOW or active-HIGH for * level-sensitive interrupts. You can think of the level parameter as 1 @@ -1537,39 +1677,44 @@ out: int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level) { - int ret = 0; - int vcpu_id; - - if (unlikely(!vgic_initialized(kvm))) { - /* - * We only provide the automatic initialization of the VGIC - * for the legacy case of a GICv2. Any other type must - * be explicitly initialized once setup with the respective - * KVM device call. - */ - if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) { - ret = -EBUSY; - goto out; - } - mutex_lock(&kvm->lock); - ret = vgic_init(kvm); - mutex_unlock(&kvm->lock); + struct irq_phys_map *map; + int ret; - if (ret) - goto out; - } + ret = vgic_lazy_init(kvm); + if (ret) + return ret; - if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) + map = vgic_irq_map_search(kvm_get_vcpu(kvm, cpuid), irq_num); + if (map) return -EINVAL; - vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level); - if (vcpu_id >= 0) { - /* kick the specified vcpu */ - kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id)); - } + return vgic_update_irq_pending(kvm, cpuid, NULL, irq_num, level); +} -out: - return ret; +/** + * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic + * @kvm: The VM structure pointer + * @cpuid: The CPU for PPIs + * @map: Pointer to a irq_phys_map structure describing the mapping + * @level: Edge-triggered: true: to trigger the interrupt + * false: to ignore the call + * Level-sensitive true: raise the input signal + * false: lower the input signal + * + * The GIC is not concerned with devices being active-LOW or active-HIGH for + * level-sensitive interrupts. You can think of the level parameter as 1 + * being HIGH and 0 being LOW and all devices being active-HIGH. + */ +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, + struct irq_phys_map *map, bool level) +{ + int ret; + + ret = vgic_lazy_init(kvm); + if (ret) + return ret; + + return vgic_update_irq_pending(kvm, cpuid, map, map->virt_irq, level); } static irqreturn_t vgic_maintenance_handler(int irq, void *data) @@ -1583,6 +1728,188 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) return IRQ_HANDLED; } +static struct list_head *vgic_get_irq_phys_map_list(struct kvm_vcpu *vcpu, + int virt_irq) +{ + if (virt_irq < VGIC_NR_PRIVATE_IRQS) + return &vcpu->arch.vgic_cpu.irq_phys_map_list; + else + return &vcpu->kvm->arch.vgic.irq_phys_map_list; +} + +/** + * kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ + * @vcpu: The VCPU pointer + * @virt_irq: The virtual irq number + * @irq: The Linux IRQ number + * + * Establish a mapping between a guest visible irq (@virt_irq) and a + * Linux irq (@irq). On injection, @virt_irq will be associated with + * the physical interrupt represented by @irq. This mapping can be + * established multiple times as long as the parameters are the same. + * + * Returns a valid pointer on success, and an error pointer otherwise + */ +struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, + int virt_irq, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq); + struct irq_phys_map *map; + struct irq_phys_map_entry *entry; + struct irq_desc *desc; + struct irq_data *data; + int phys_irq; + + desc = irq_to_desc(irq); + if (!desc) { + kvm_err("%s: no interrupt descriptor\n", __func__); + return ERR_PTR(-EINVAL); + } + + data = irq_desc_get_irq_data(desc); + while (data->parent_data) + data = data->parent_data; + + phys_irq = data->hwirq; + + /* Create a new mapping */ + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + + spin_lock(&dist->irq_phys_map_lock); + + /* Try to match an existing mapping */ + map = vgic_irq_map_search(vcpu, virt_irq); + if (map) { + /* Make sure this mapping matches */ + if (map->phys_irq != phys_irq || + map->irq != irq) + map = ERR_PTR(-EINVAL); + + /* Found an existing, valid mapping */ + goto out; + } + + map = &entry->map; + map->virt_irq = virt_irq; + map->phys_irq = phys_irq; + map->irq = irq; + + list_add_tail_rcu(&entry->entry, root); + +out: + spin_unlock(&dist->irq_phys_map_lock); + /* If we've found a hit in the existing list, free the useless + * entry */ + if (IS_ERR(map) || map != &entry->map) + kfree(entry); + return map; +} + +static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, + int virt_irq) +{ + struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq); + struct irq_phys_map_entry *entry; + struct irq_phys_map *map; + + rcu_read_lock(); + + list_for_each_entry_rcu(entry, root, entry) { + map = &entry->map; + if (map->virt_irq == virt_irq) { + rcu_read_unlock(); + return map; + } + } + + rcu_read_unlock(); + + return NULL; +} + +static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu) +{ + struct irq_phys_map_entry *entry; + + entry = container_of(rcu, struct irq_phys_map_entry, rcu); + kfree(entry); +} + +/** + * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ + * + * Return the logical active state of a mapped interrupt. This doesn't + * necessarily reflects the current HW state. + */ +bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map) +{ + BUG_ON(!map); + return map->active; +} + +/** + * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ + * + * Set the logical active state of a mapped interrupt. This doesn't + * immediately affects the HW state. + */ +void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active) +{ + BUG_ON(!map); + map->active = active; +} + +/** + * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping + * @vcpu: The VCPU pointer + * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq + * + * Remove an existing mapping between virtual and physical interrupts. + */ +int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + struct irq_phys_map_entry *entry; + struct list_head *root; + + if (!map) + return -EINVAL; + + root = vgic_get_irq_phys_map_list(vcpu, map->virt_irq); + + spin_lock(&dist->irq_phys_map_lock); + + list_for_each_entry(entry, root, entry) { + if (&entry->map == map) { + list_del_rcu(&entry->entry); + call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu); + break; + } + } + + spin_unlock(&dist->irq_phys_map_lock); + + return 0; +} + +static void vgic_destroy_irq_phys_map(struct kvm *kvm, struct list_head *root) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct irq_phys_map_entry *entry; + + spin_lock(&dist->irq_phys_map_lock); + + list_for_each_entry(entry, root, entry) { + list_del_rcu(&entry->entry); + call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu); + } + + spin_unlock(&dist->irq_phys_map_lock); +} + void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1591,6 +1918,7 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) kfree(vgic_cpu->active_shared); kfree(vgic_cpu->pend_act_shared); kfree(vgic_cpu->vgic_irq_lr_map); + vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list); vgic_cpu->pending_shared = NULL; vgic_cpu->active_shared = NULL; vgic_cpu->pend_act_shared = NULL; @@ -1628,6 +1956,17 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) } /** + * kvm_vgic_vcpu_early_init - Earliest possible per-vcpu vgic init stage + * + * No memory allocation should be performed here, only static init. + */ +void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + INIT_LIST_HEAD(&vgic_cpu->irq_phys_map_list); +} + +/** * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW * * The host's GIC naturally limits the maximum amount of VCPUs a guest @@ -1664,6 +2003,7 @@ void kvm_vgic_destroy(struct kvm *kvm) kfree(dist->irq_spi_target); kfree(dist->irq_pending_on_cpu); kfree(dist->irq_active_on_cpu); + vgic_destroy_irq_phys_map(kvm, &dist->irq_phys_map_list); dist->irq_sgi_sources = NULL; dist->irq_spi_cpu = NULL; dist->irq_spi_target = NULL; @@ -1787,6 +2127,18 @@ static int init_vgic_model(struct kvm *kvm, int type) return 0; } +/** + * kvm_vgic_early_init - Earliest possible vgic initialization stage + * + * No memory allocation should be performed here, only static init. + */ +void kvm_vgic_early_init(struct kvm *kvm) +{ + spin_lock_init(&kvm->arch.vgic.lock); + spin_lock_init(&kvm->arch.vgic.irq_phys_map_lock); + INIT_LIST_HEAD(&kvm->arch.vgic.irq_phys_map_list); +} + int kvm_vgic_create(struct kvm *kvm, u32 type) { int i, vcpu_lock_idx = -1, ret; @@ -1832,7 +2184,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) if (ret) goto out_unlock; - spin_lock_init(&kvm->arch.vgic.lock); kvm->arch.vgic.in_kernel = true; kvm->arch.vgic.vgic_model = type; kvm->arch.vgic.vctrl_base = vgic->vctrl_base; |