diff options
47 files changed, 1012 insertions, 703 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 1490eb0ef798..57d25fdd3d7e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -997,7 +997,7 @@ for vm-wide capabilities. 4.38 KVM_GET_MP_STATE Capability: KVM_CAP_MP_STATE -Architectures: x86, s390 +Architectures: x86, s390, arm, arm64 Type: vcpu ioctl Parameters: struct kvm_mp_state (out) Returns: 0 on success; -1 on error @@ -1011,7 +1011,7 @@ uniprocessor guests). Possible values are: - - KVM_MP_STATE_RUNNABLE: the vcpu is currently running [x86] + - KVM_MP_STATE_RUNNABLE: the vcpu is currently running [x86,arm/arm64] - KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP) which has not yet received an INIT signal [x86] - KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is @@ -1020,7 +1020,7 @@ Possible values are: is waiting for an interrupt [x86] - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector accessible via KVM_GET_VCPU_EVENTS) [x86] - - KVM_MP_STATE_STOPPED: the vcpu is stopped [s390] + - KVM_MP_STATE_STOPPED: the vcpu is stopped [s390,arm/arm64] - KVM_MP_STATE_CHECK_STOP: the vcpu is in a special error state [s390] - KVM_MP_STATE_OPERATING: the vcpu is operating (running or halted) [s390] @@ -1031,11 +1031,15 @@ On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel irqchip, the multiprocessing state must be maintained by userspace on these architectures. +For arm/arm64: + +The only states that are valid are KVM_MP_STATE_STOPPED and +KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not. 4.39 KVM_SET_MP_STATE Capability: KVM_CAP_MP_STATE -Architectures: x86, s390 +Architectures: x86, s390, arm, arm64 Type: vcpu ioctl Parameters: struct kvm_mp_state (in) Returns: 0 on success; -1 on error @@ -1047,6 +1051,10 @@ On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel irqchip, the multiprocessing state must be maintained by userspace on these architectures. +For arm/arm64: + +The only states that are valid are KVM_MP_STATE_STOPPED and +KVM_MP_STATE_RUNNABLE which reflect if the vcpu should be paused or not. 4.40 KVM_SET_IDENTITY_MAP_ADDR @@ -2263,7 +2271,7 @@ into the hash PTE second double word). 4.75 KVM_IRQFD Capability: KVM_CAP_IRQFD -Architectures: x86 s390 +Architectures: x86 s390 arm arm64 Type: vm ioctl Parameters: struct kvm_irqfd (in) Returns: 0 on success, -1 on error @@ -2289,6 +2297,10 @@ Note that closing the resamplefd is not sufficient to disable the irqfd. The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment and need not be specified with KVM_IRQFD_FLAG_DEASSIGN. +On ARM/ARM64, the gsi field in the kvm_irqfd struct specifies the Shared +Peripheral Interrupt (SPI) index, such that the GIC interrupt ID is +given by gsi + 32. + 4.76 KVM_PPC_ALLOCATE_HTAB Capability: KVM_CAP_PPC_ALLOC_HTAB diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 816db0bf2dd8..d995821f1698 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -185,6 +185,7 @@ #define HSR_COND (0xfU << HSR_COND_SHIFT) #define FSC_FAULT (0x04) +#define FSC_ACCESS (0x08) #define FSC_PERM (0x0c) /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 41008cd7c53f..d71607c16601 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -27,6 +27,8 @@ #include <asm/fpstate.h> #include <kvm/arm_arch_timer.h> +#define __KVM_HAVE_ARCH_INTC_INITIALIZED + #if defined(CONFIG_KVM_ARM_MAX_VCPUS) #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS #else @@ -165,19 +167,10 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); /* We do not have shadow page tables, hence the empty hooks */ -static inline int kvm_age_hva(struct kvm *kvm, unsigned long start, - unsigned long end) -{ - return 0; -} - -static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) -{ - return 0; -} - static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, unsigned long address) { diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h index 3f83db2f6cf0..d8e90c8cb5fa 100644 --- a/arch/arm/include/asm/kvm_mmio.h +++ b/arch/arm/include/asm/kvm_mmio.h @@ -28,28 +28,6 @@ struct kvm_decode { bool sign_extend; }; -/* - * The in-kernel MMIO emulation code wants to use a copy of run->mmio, - * which is an anonymous type. Use our own type instead. - */ -struct kvm_exit_mmio { - phys_addr_t phys_addr; - u8 data[8]; - u32 len; - bool is_write; - void *private; -}; - -static inline void kvm_prepare_mmio(struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - run->mmio.phys_addr = mmio->phys_addr; - run->mmio.len = mmio->len; - run->mmio.is_write = mmio->is_write; - memcpy(run->mmio.data, mmio->data, mmio->len); - run->exit_reason = KVM_EXIT_MMIO; -} - int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa); diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 0db25bc32864..2499867dd0d8 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -198,6 +198,9 @@ struct kvm_arch_memory_slot { /* Highest supported SPI, from VGIC_NR_IRQS */ #define KVM_ARM_IRQ_GIC_MAX 127 +/* One single KVM irqchip, ie. the VGIC */ +#define KVM_NR_IRQCHIPS 1 + /* PSCI interface */ #define KVM_PSCI_FN_BASE 0x95c1ba5e #define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n)) diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 2d2d6087b9b1..488eaac56028 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -190,7 +190,6 @@ int main(void) DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.fault.hxfar)); DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.fault.hpfar)); DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.fault.hyp_pc)); -#ifdef CONFIG_KVM_ARM_VGIC DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); @@ -200,14 +199,11 @@ int main(void) DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); -#ifdef CONFIG_KVM_ARM_TIMER DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); -#endif DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); -#endif DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); #endif return 0; diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 338ace78ed18..f1f79d104309 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -18,6 +18,7 @@ if VIRTUALIZATION config KVM bool "Kernel-based Virtual Machine (KVM) support" + depends on MMU && OF select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT @@ -26,10 +27,12 @@ config KVM select KVM_ARM_HOST select KVM_GENERIC_DIRTYLOG_READ_PROTECT select SRCU - depends on ARM_VIRT_EXT && ARM_LPAE + select MMU_NOTIFIER + select HAVE_KVM_EVENTFD + select HAVE_KVM_IRQFD + depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER ---help--- - Support hosting virtualized guest machines. You will also - need to select one or more of the processor modules below. + Support hosting virtualized guest machines. This module provides access to the hardware capabilities through a character device node named /dev/kvm. @@ -37,10 +40,7 @@ config KVM If unsure, say N. config KVM_ARM_HOST - bool "KVM host support for ARM cpus." - depends on KVM - depends on MMU - select MMU_NOTIFIER + bool ---help--- Provides host support for ARM processors. @@ -55,20 +55,4 @@ config KVM_ARM_MAX_VCPUS large, so only choose a reasonable number that you expect to actually use. -config KVM_ARM_VGIC - bool "KVM support for Virtual GIC" - depends on KVM_ARM_HOST && OF - select HAVE_KVM_IRQCHIP - default y - ---help--- - Adds support for a hardware assisted, in-kernel GIC emulation. - -config KVM_ARM_TIMER - bool "KVM support for Architected Timers" - depends on KVM_ARM_VGIC && ARM_ARCH_TIMER - select HAVE_KVM_IRQCHIP - default y - ---help--- - Adds support for the Architected Timers in virtual machines - endif # VIRTUALIZATION diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 443b8bea43e9..139e46c08b6e 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -7,7 +7,7 @@ ifeq ($(plus_virt),+virt) plus_virt_def := -DREQUIRES_VIRT=1 endif -ccflags-y += -Ivirt/kvm -Iarch/arm/kvm +ccflags-y += -Iarch/arm/kvm CFLAGS_arm.o := -I. $(plus_virt_def) CFLAGS_mmu.o := -I. @@ -15,12 +15,12 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) KVM := ../../../virt/kvm -kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o +kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o -obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o -obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o -obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o -obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o +obj-y += $(KVM)/arm/vgic.o +obj-y += $(KVM)/arm/vgic-v2.o +obj-y += $(KVM)/arm/vgic-v2-emul.o +obj-y += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 5560f74f9eee..6f536451ab78 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -61,8 +61,6 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u8 kvm_next_vmid; static DEFINE_SPINLOCK(kvm_vmid_lock); -static bool vgic_present; - static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) { BUG_ON(preemptible()); @@ -173,8 +171,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int r; switch (ext) { case KVM_CAP_IRQCHIP: - r = vgic_present; - break; + case KVM_CAP_IRQFD: + case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: case KVM_CAP_SYNC_MMU: @@ -183,6 +181,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_PSCI: case KVM_CAP_ARM_PSCI_0_2: case KVM_CAP_READONLY_MEM: + case KVM_CAP_MP_STATE: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -268,7 +267,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - return 0; + return kvm_timer_should_fire(vcpu); } int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) @@ -313,13 +312,29 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - return -EINVAL; + if (vcpu->arch.pause) + mp_state->mp_state = KVM_MP_STATE_STOPPED; + else + mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + + return 0; } int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - return -EINVAL; + switch (mp_state->mp_state) { + case KVM_MP_STATE_RUNNABLE: + vcpu->arch.pause = false; + break; + case KVM_MP_STATE_STOPPED: + vcpu->arch.pause = true; + break; + default: + return -EINVAL; + } + + return 0; } /** @@ -452,6 +467,11 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) return 0; } +bool kvm_arch_intc_initialized(struct kvm *kvm) +{ + return vgic_initialized(kvm); +} + static void vcpu_pause(struct kvm_vcpu *vcpu) { wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); @@ -831,8 +851,6 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, switch (dev_id) { case KVM_ARM_DEVICE_VGIC_V2: - if (!vgic_present) - return -ENXIO; return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); default: return -ENODEV; @@ -847,10 +865,7 @@ long kvm_arch_vm_ioctl(struct file *filp, switch (ioctl) { case KVM_CREATE_IRQCHIP: { - if (vgic_present) - return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); - else - return -ENXIO; + return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); } case KVM_ARM_SET_DEVICE_ADDR: { struct kvm_arm_device_addr dev_addr; @@ -1035,10 +1050,6 @@ static int init_hyp_mode(void) if (err) goto out_free_context; -#ifdef CONFIG_KVM_ARM_VGIC - vgic_present = true; -#endif - /* * Init HYP architected timer support */ diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 384bab67c462..d503fbb787d3 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -109,22 +109,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return -EINVAL; } -#ifndef CONFIG_KVM_ARM_TIMER - -#define NUM_TIMER_REGS 0 - -static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) -{ - return 0; -} - -static bool is_timer_reg(u64 index) -{ - return false; -} - -#else - #define NUM_TIMER_REGS 3 static bool is_timer_reg(u64 index) @@ -152,8 +136,6 @@ static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return 0; } -#endif - static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { void __user *uaddr = (void __user *)(long)reg->addr; diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 14d488388480..35e4a3a0c476 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -402,7 +402,6 @@ vcpu .req r0 @ vcpu pointer always in r0 * Assumes vcpu pointer in vcpu reg */ .macro save_vgic_state -#ifdef CONFIG_KVM_ARM_VGIC /* Get VGIC VCTRL base into r2 */ ldr r2, [vcpu, #VCPU_KVM] ldr r2, [r2, #KVM_VGIC_VCTRL] @@ -460,7 +459,6 @@ ARM_BE8(rev r6, r6 ) subs r4, r4, #1 bne 1b 2: -#endif .endm /* @@ -469,7 +467,6 @@ ARM_BE8(rev r6, r6 ) * Assumes vcpu pointer in vcpu reg */ .macro restore_vgic_state -#ifdef CONFIG_KVM_ARM_VGIC /* Get VGIC VCTRL base into r2 */ ldr r2, [vcpu, #VCPU_KVM] ldr r2, [r2, #KVM_VGIC_VCTRL] @@ -501,7 +498,6 @@ ARM_BE8(rev r6, r6 ) subs r4, r4, #1 bne 1b 2: -#endif .endm #define CNTHCTL_PL1PCTEN (1 << 0) @@ -515,7 +511,6 @@ ARM_BE8(rev r6, r6 ) * Clobbers r2-r5 */ .macro save_timer_state -#ifdef CONFIG_KVM_ARM_TIMER ldr r4, [vcpu, #VCPU_KVM] ldr r2, [r4, #KVM_TIMER_ENABLED] cmp r2, #0 @@ -537,7 +532,6 @@ ARM_BE8(rev r6, r6 ) mcrr p15, 4, r2, r2, c14 @ CNTVOFF 1: -#endif @ Allow physical timer/counter access for the host mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN) @@ -559,7 +553,6 @@ ARM_BE8(rev r6, r6 ) bic r2, r2, #CNTHCTL_PL1PCEN mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL -#ifdef CONFIG_KVM_ARM_TIMER ldr r4, [vcpu, #VCPU_KVM] ldr r2, [r4, #KVM_TIMER_ENABLED] cmp r2, #0 @@ -579,7 +572,6 @@ ARM_BE8(rev r6, r6 ) and r2, r2, #3 mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL 1: -#endif .endm .equ vmentry, 0 diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 5d3bfc0eb3f0..974b1c606d04 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -121,12 +121,11 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) return 0; } -static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - struct kvm_exit_mmio *mmio) +static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) { unsigned long rt; - int len; - bool is_write, sign_extend; + int access_size; + bool sign_extend; if (kvm_vcpu_dabt_isextabt(vcpu)) { /* cache operation on I/O addr, tell guest unsupported */ @@ -140,17 +139,15 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return 1; } - len = kvm_vcpu_dabt_get_as(vcpu); - if (unlikely(len < 0)) - return len; + access_size = kvm_vcpu_dabt_get_as(vcpu); + if (unlikely(access_size < 0)) + return access_size; - is_write = kvm_vcpu_dabt_iswrite(vcpu); + *is_write = kvm_vcpu_dabt_iswrite(vcpu); sign_extend = kvm_vcpu_dabt_issext(vcpu); rt = kvm_vcpu_dabt_get_rd(vcpu); - mmio->is_write = is_write; - mmio->phys_addr = fault_ipa; - mmio->len = len; + *len = access_size; vcpu->arch.mmio_decode.sign_extend = sign_extend; vcpu->arch.mmio_decode.rt = rt; @@ -165,20 +162,20 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa) { - struct kvm_exit_mmio mmio; unsigned long data; unsigned long rt; int ret; + bool is_write; + int len; + u8 data_buf[8]; /* - * Prepare MMIO operation. First stash it in a private - * structure that we can use for in-kernel emulation. If the - * kernel can't handle it, copy it into run->mmio and let user - * space do its magic. + * Prepare MMIO operation. First decode the syndrome data we get + * from the CPU. Then try if some in-kernel emulation feels + * responsible, otherwise let user space do its magic. */ - if (kvm_vcpu_dabt_isvalid(vcpu)) { - ret = decode_hsr(vcpu, fault_ipa, &mmio); + ret = decode_hsr(vcpu, &is_write, &len); if (ret) return ret; } else { @@ -188,21 +185,34 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, rt = vcpu->arch.mmio_decode.rt; - if (mmio.is_write) { - data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), - mmio.len); + if (is_write) { + data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), len); + + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); + mmio_write_buf(data_buf, len, data); - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len, - fault_ipa, data); - mmio_write_buf(mmio.data, mmio.len, data); + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, + data_buf); } else { - trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len, + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, fault_ipa, 0); + + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len, + data_buf); } - if (vgic_handle_mmio(vcpu, run, &mmio)) + /* Now prepare kvm_run for the potential return to userland. */ + run->mmio.is_write = is_write; + run->mmio.phys_addr = fault_ipa; + run->mmio.len = len; + memcpy(run->mmio.data, data_buf, len); + + if (!ret) { + /* We handled the access successfully in the kernel. */ + kvm_handle_mmio_return(vcpu, run); return 1; + } - kvm_prepare_mmio(run, &mmio); + run->exit_reason = KVM_EXIT_MMIO; return 0; } diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 5656d79c5a44..15b050d46fc9 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1330,10 +1330,51 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, out_unlock: spin_unlock(&kvm->mmu_lock); + kvm_set_pfn_accessed(pfn); kvm_release_pfn_clean(pfn); return ret; } +/* + * Resolve the access fault by making the page young again. + * Note that because the faulting entry is guaranteed not to be + * cached in the TLB, we don't need to invalidate anything. + */ +static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) +{ + pmd_t *pmd; + pte_t *pte; + pfn_t pfn; + bool pfn_valid = false; + + trace_kvm_access_fault(fault_ipa); + + spin_lock(&vcpu->kvm->mmu_lock); + + pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa); + if (!pmd || pmd_none(*pmd)) /* Nothing there */ + goto out; + + if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ + *pmd = pmd_mkyoung(*pmd); + pfn = pmd_pfn(*pmd); + pfn_valid = true; + goto out; + } + + pte = pte_offset_kernel(pmd, fault_ipa); + if (pte_none(*pte)) /* Nothing there either */ + goto out; + + *pte = pte_mkyoung(*pte); /* Just a page... */ + pfn = pte_pfn(*pte); + pfn_valid = true; +out: + spin_unlock(&vcpu->kvm->mmu_lock); + if (pfn_valid) + kvm_set_pfn_accessed(pfn); +} + /** * kvm_handle_guest_abort - handles all 2nd stage aborts * @vcpu: the VCPU pointer @@ -1364,7 +1405,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) /* Check the stage-2 fault is trans. fault or write fault */ fault_status = kvm_vcpu_trap_get_fault_type(vcpu); - if (fault_status != FSC_FAULT && fault_status != FSC_PERM) { + if (fault_status != FSC_FAULT && fault_status != FSC_PERM && + fault_status != FSC_ACCESS) { kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", kvm_vcpu_trap_get_class(vcpu), (unsigned long)kvm_vcpu_trap_get_fault(vcpu), @@ -1400,6 +1442,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) /* Userspace should not be able to register out-of-bounds IPAs */ VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE); + if (fault_status == FSC_ACCESS) { + handle_access_fault(vcpu, fault_ipa); + ret = 1; + goto out_unlock; + } + ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); if (ret == 0) ret = 1; @@ -1408,15 +1456,16 @@ out_unlock: return ret; } -static void handle_hva_to_gpa(struct kvm *kvm, - unsigned long start, - unsigned long end, - void (*handler)(struct kvm *kvm, - gpa_t gpa, void *data), - void *data) +static int handle_hva_to_gpa(struct kvm *kvm, + unsigned long start, + unsigned long end, + int (*handler)(struct kvm *kvm, + gpa_t gpa, void *data), + void *data) { struct kvm_memslots *slots; struct kvm_memory_slot *memslot; + int ret = 0; slots = kvm_memslots(kvm); @@ -1440,14 +1489,17 @@ static void handle_hva_to_gpa(struct kvm *kvm, for (; gfn < gfn_end; ++gfn) { gpa_t gpa = gfn << PAGE_SHIFT; - handler(kvm, gpa, data); + ret |= handler(kvm, gpa, data); } } + + return ret; } -static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) +static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) { unmap_stage2_range(kvm, gpa, PAGE_SIZE); + return 0; } int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) @@ -1473,7 +1525,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, return 0; } -static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data) +static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data) { pte_t *pte = (pte_t *)data; @@ -1485,6 +1537,7 @@ static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data) * through this calling path. */ stage2_set_pte(kvm, NULL, gpa, pte, 0); + return 0; } @@ -1501,6 +1554,67 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte); } +static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) +{ + pmd_t *pmd; + pte_t *pte; + + pmd = stage2_get_pmd(kvm, NULL, gpa); + if (!pmd || pmd_none(*pmd)) /* Nothing there */ + return 0; + + if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ + if (pmd_young(*pmd)) { + *pmd = pmd_mkold(*pmd); + return 1; + } + + return 0; + } + + pte = pte_offset_kernel(pmd, gpa); + if (pte_none(*pte)) + return 0; + + if (pte_young(*pte)) { + *pte = pte_mkold(*pte); /* Just a page... */ + return 1; + } + + return 0; +} + +static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) +{ + pmd_t *pmd; + pte_t *pte; + + pmd = stage2_get_pmd(kvm, NULL, gpa); + if (!pmd || pmd_none(*pmd)) /* Nothing there */ + return 0; + + if (kvm_pmd_huge(*pmd)) /* THP, HugeTLB */ + return pmd_young(*pmd); + + pte = pte_offset_kernel(pmd, gpa); + if (!pte_none(*pte)) /* Just a page... */ + return pte_young(*pte); + + return 0; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) +{ + trace_kvm_age_hva(start, end); + return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + trace_kvm_test_age_hva(hva); + return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); +} + void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) { mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index 6817664b46b8..0ec35392d208 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -68,6 +68,21 @@ TRACE_EVENT(kvm_guest_fault, __entry->hxfar, __entry->vcpu_pc) ); +TRACE_EVENT(kvm_access_fault, + TP_PROTO(unsigned long ipa), + TP_ARGS(ipa), + + TP_STRUCT__entry( + __field( unsigned long, ipa ) + ), + + TP_fast_assign( + __entry->ipa = ipa; + ), + + TP_printk("IPA: %lx", __entry->ipa) +); + TRACE_EVENT(kvm_irq_line, TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level), TP_ARGS(type, vcpu_idx, irq_num, level), @@ -210,6 +225,39 @@ TRACE_EVENT(kvm_set_spte_hva, TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva) ); +TRACE_EVENT(kvm_age_hva, + TP_PROTO(unsigned long start, unsigned long end), + TP_ARGS(start, end), + + TP_STRUCT__entry( + __field( unsigned long, start ) + __field( unsigned long, end ) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + ), + + TP_printk("mmu notifier age hva: %#08lx -- %#08lx", + __entry->start, __entry->end) +); + +TRACE_EVENT(kvm_test_age_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("mmu notifier test age hva: %#08lx", __entry->hva) +); + TRACE_EVENT(kvm_hvc, TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm), TP_ARGS(vcpu_pc, r0, imm), diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 92bbae381598..70522450ca23 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -90,6 +90,7 @@ #define ESR_ELx_FSC (0x3F) #define ESR_ELx_FSC_TYPE (0x3C) #define ESR_ELx_FSC_EXTABT (0x10) +#define ESR_ELx_FSC_ACCESS (0x08) #define ESR_ELx_FSC_FAULT (0x04) #define ESR_ELx_FSC_PERM (0x0C) #define ESR_ELx_CV (UL(1) << 24) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 54bb4ba97441..ac6fafb95fe7 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -188,6 +188,7 @@ /* For compatibility with fault code shared with 32-bit */ #define FSC_FAULT ESR_ELx_FSC_FAULT +#define FSC_ACCESS ESR_ELx_FSC_ACCESS #define FSC_PERM ESR_ELx_FSC_PERM /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8ac3c70fe3c6..f0f58c9beec0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -28,6 +28,8 @@ #include <asm/kvm_asm.h> #include <asm/kvm_mmio.h> +#define __KVM_HAVE_ARCH_INTC_INITIALIZED + #if defined(CONFIG_KVM_ARM_MAX_VCPUS) #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS #else @@ -177,19 +179,10 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); /* We do not have shadow page tables, hence the empty hooks */ -static inline int kvm_age_hva(struct kvm *kvm, unsigned long start, - unsigned long end) -{ - return 0; -} - -static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) -{ - return 0; -} - static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, unsigned long address) { diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h index 9f52beb7cb13..889c908ee631 100644 --- a/arch/arm64/include/asm/kvm_mmio.h +++ b/arch/arm64/include/asm/kvm_mmio.h @@ -31,28 +31,6 @@ struct kvm_decode { bool sign_extend; }; -/* - * The in-kernel MMIO emulation code wants to use a copy of run->mmio, - * which is an anonymous type. Use our own type instead. - */ -struct kvm_exit_mmio { - phys_addr_t phys_addr; - u8 data[8]; - u32 len; - bool is_write; - void *private; -}; - -static inline void kvm_prepare_mmio(struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - run->mmio.phys_addr = mmio->phys_addr; - run->mmio.len = mmio->len; - run->mmio.is_write = mmio->is_write; - memcpy(run->mmio.data, mmio->data, mmio->len); - run->exit_reason = KVM_EXIT_MMIO; -} - int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa); diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 3ef77a466018..c154c0b7eb60 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -191,6 +191,9 @@ struct kvm_arch_memory_slot { /* Highest supported SPI, from VGIC_NR_IRQS */ #define KVM_ARM_IRQ_GIC_MAX 127 +/* One single KVM irqchip, ie. the VGIC */ +#define KVM_NR_IRQCHIPS 1 + /* PSCI interface */ #define KVM_PSCI_FN_BASE 0x95c1ba5e #define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n)) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index f5590c81d95f..5105e297ed5f 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -18,6 +18,7 @@ if VIRTUALIZATION config KVM bool "Kernel-based Virtual Machine (KVM) support" + depends on OF select MMU_NOTIFIER select PREEMPT_NOTIFIERS select ANON_INODES @@ -25,10 +26,10 @@ config KVM select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO select KVM_ARM_HOST - select KVM_ARM_VGIC - select KVM_ARM_TIMER select KVM_GENERIC_DIRTYLOG_READ_PROTECT select SRCU + select HAVE_KVM_EVENTFD + select HAVE_KVM_IRQFD ---help--- Support hosting virtualized guest machines. @@ -50,17 +51,4 @@ config KVM_ARM_MAX_VCPUS large, so only choose a reasonable number that you expect to actually use. -config KVM_ARM_VGIC - bool - depends on KVM_ARM_HOST && OF - select HAVE_KVM_IRQCHIP - ---help--- - Adds support for a hardware assisted, in-kernel GIC emulation. - -config KVM_ARM_TIMER - bool - depends on KVM_ARM_VGIC - ---help--- - Adds support for the Architected Timers in virtual machines. - endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 4e6e09ee4033..d5904f876cdb 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -2,7 +2,7 @@ # Makefile for Kernel-based Virtual Machine module # -ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm +ccflags-y += -Iarch/arm64/kvm CFLAGS_arm.o := -I. CFLAGS_mmu.o := -I. @@ -11,7 +11,7 @@ ARM=../../../arch/arm/kvm obj-$(CONFIG_KVM_ARM_HOST) += kvm.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o @@ -19,11 +19,11 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o -kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3-emul.o -kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o -kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v2-switch.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 39b3a8f816f2..4703fadd2737 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -34,7 +34,7 @@ #include <asm/kvm_para.h> #include <asm/kvm_host.h> #include <asm/kvm_ppc.h> -#include "iodev.h" +#include <kvm/iodev.h> #define MAX_CPU 32 #define MAX_SRC 256 @@ -1374,8 +1374,9 @@ static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val) return -ENXIO; } -static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr, - int len, void *ptr) +static int kvm_mpic_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, void *ptr) { struct openpic *opp = container_of(this, struct openpic, mmio); int ret; @@ -1415,8 +1416,9 @@ static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr, return ret; } -static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr, - int len, const void *ptr) +static int kvm_mpic_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, const void *ptr) { struct openpic *opp = container_of(this, struct openpic, mmio); int ret; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 27c0face86f4..24bfe401373e 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -807,7 +807,7 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, idx = srcu_read_lock(&vcpu->kvm->srcu); - ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr, bytes, &run->mmio.data); srcu_read_unlock(&vcpu->kvm->srcu, idx); @@ -880,7 +880,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, idx = srcu_read_lock(&vcpu->kvm->srcu); - ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr, bytes, &run->mmio.data); srcu_read_unlock(&vcpu->kvm->srcu, idx); diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 89140ddb998c..fc7ec95848c3 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -213,7 +213,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) * - gpr 3 contains the virtqueue index (passed as datamatch) * - gpr 4 contains the index on the bus (optionally) */ - ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, + ret = kvm_io_bus_write_cookie(vcpu, KVM_VIRTIO_CCW_NOTIFY_BUS, vcpu->run->s.regs.gprs[2] & 0xffffffff, 8, &vcpu->run->s.regs.gprs[3], vcpu->run->s.regs.gprs[4]); diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 08f790dfadc9..16e8f962eaad 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -1,5 +1,5 @@ -ccflags-y += -Ivirt/kvm -Iarch/x86/kvm +ccflags-y += -Iarch/x86/kvm CFLAGS_x86.o := -I. CFLAGS_svm.o := -I. diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 298781d4cfb4..4dce6f8b6129 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -443,7 +443,8 @@ static inline int pit_in_range(gpa_t addr) (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH)); } -static int pit_ioport_write(struct kvm_io_device *this, +static int pit_ioport_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, int len, const void *data) { struct kvm_pit *pit = dev_to_pit(this); @@ -519,7 +520,8 @@ static int pit_ioport_write(struct kvm_io_device *this, return 0; } -static int pit_ioport_read(struct kvm_io_device *this, +static int pit_ioport_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, int len, void *data) { struct kvm_pit *pit = dev_to_pit(this); @@ -589,7 +591,8 @@ static int pit_ioport_read(struct kvm_io_device *this, return 0; } -static int speaker_ioport_write(struct kvm_io_device *this, +static int speaker_ioport_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, int len, const void *data) { struct kvm_pit *pit = speaker_to_pit(this); @@ -606,8 +609,9 @@ static int speaker_ioport_write(struct kvm_io_device *this, return 0; } -static int speaker_ioport_read(struct kvm_io_device *this, - gpa_t addr, int len, void *data) +static int speaker_ioport_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, void *data) { struct kvm_pit *pit = speaker_to_pit(this); struct kvm_kpit_state *pit_state = &pit->pit_state; diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index dd1b16b611b0..c84990b42b5b 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -3,7 +3,7 @@ #include <linux/kthread.h> -#include "iodev.h" +#include <kvm/iodev.h> struct kvm_kpit_channel_state { u32 count; /* can be 65536 */ diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 9541ba34126b..fef922ff2635 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -529,42 +529,42 @@ static int picdev_read(struct kvm_pic *s, return 0; } -static int picdev_master_write(struct kvm_io_device *dev, +static int picdev_master_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { return picdev_write(container_of(dev, struct kvm_pic, dev_master), addr, len, val); } -static int picdev_master_read(struct kvm_io_device *dev, +static int picdev_master_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { return picdev_read(container_of(dev, struct kvm_pic, dev_master), addr, len, val); } -static int picdev_slave_write(struct kvm_io_device *dev, +static int picdev_slave_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { return picdev_write(container_of(dev, struct kvm_pic, dev_slave), addr, len, val); } -static int picdev_slave_read(struct kvm_io_device *dev, +static int picdev_slave_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { return picdev_read(container_of(dev, struct kvm_pic, dev_slave), addr, len, val); } -static int picdev_eclr_write(struct kvm_io_device *dev, +static int picdev_eclr_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { return picdev_write(container_of(dev, struct kvm_pic, dev_eclr), addr, len, val); } -static int picdev_eclr_read(struct kvm_io_device *dev, +static int picdev_eclr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { return picdev_read(container_of(dev, struct kvm_pic, dev_eclr), diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 24f0f17639d6..51889ec847b0 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -493,8 +493,8 @@ static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr) (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); } -static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, - void *val) +static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, + gpa_t addr, int len, void *val) { struct kvm_ioapic *ioapic = to_ioapic(this); u32 result; @@ -536,8 +536,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, return 0; } -static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, - const void *val) +static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, + gpa_t addr, int len, const void *val) { struct kvm_ioapic *ioapic = to_ioapic(this); u32 data; diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 6e265cfcd86a..ca0b0b4e6256 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -3,7 +3,7 @@ #include <linux/kvm_host.h> -#include "iodev.h" +#include <kvm/iodev.h> struct kvm; struct kvm_vcpu; diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 2d03568e9498..ad68c73008c5 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -27,7 +27,7 @@ #include <linux/kvm_host.h> #include <linux/spinlock.h> -#include "iodev.h" +#include <kvm/iodev.h> #include "ioapic.h" #include "lapic.h" diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index bd4e34de24c7..44f7b9afbedb 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1038,7 +1038,7 @@ static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) addr < apic->base_address + LAPIC_MMIO_LENGTH; } -static int apic_mmio_read(struct kvm_io_device *this, +static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t address, int len, void *data) { struct kvm_lapic *apic = to_lapic(this); @@ -1358,7 +1358,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) return ret; } -static int apic_mmio_write(struct kvm_io_device *this, +static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t address, int len, const void *data) { struct kvm_lapic *apic = to_lapic(this); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 0bc6c656625b..e284c2880c56 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -1,7 +1,7 @@ #ifndef __KVM_X86_LAPIC_H #define __KVM_X86_LAPIC_H -#include "iodev.h" +#include <kvm/iodev.h> #include <linux/kvm_host.h> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0caaf56eb459..b5a6425d8d97 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5824,7 +5824,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) gpa_t gpa; gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); - if (!kvm_io_bus_write(vcpu->kvm, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { + if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { skip_emulated_instruction(vcpu); return 1; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cc2c759f69a3..a284c927551e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4115,8 +4115,8 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, do { n = min(len, 8); if (!(vcpu->arch.apic && - !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v)) - && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) + !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v)) + && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v)) break; handled += n; addr += n; @@ -4135,8 +4135,9 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) do { n = min(len, 8); if (!(vcpu->arch.apic && - !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v)) - && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) + !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev, + addr, n, v)) + && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) break; trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); handled += n; @@ -4631,10 +4632,10 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) int r; if (vcpu->arch.pio.in) - r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, + r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port, vcpu->arch.pio.size, pd); else - r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, + r = kvm_io_bus_write(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port, vcpu->arch.pio.size, pd); return r; diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index b3f45a578344..e5966758c093 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -24,17 +24,14 @@ #include <linux/workqueue.h> struct arch_timer_kvm { -#ifdef CONFIG_KVM_ARM_TIMER /* Is the timer enabled */ bool enabled; /* Virtual offset */ cycle_t cntvoff; -#endif }; struct arch_timer_cpu { -#ifdef CONFIG_KVM_ARM_TIMER /* Registers: control register, timer value */ u32 cntv_ctl; /* Saved/restored */ cycle_t cntv_cval; /* Saved/restored */ @@ -55,10 +52,8 @@ struct arch_timer_cpu { /* Timer IRQ */ const struct kvm_irq_level *irq; -#endif }; -#ifdef CONFIG_KVM_ARM_TIMER int kvm_timer_hyp_init(void); void kvm_timer_enable(struct kvm *kvm); void kvm_timer_init(struct kvm *kvm); @@ -72,30 +67,6 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); -#else -static inline int kvm_timer_hyp_init(void) -{ - return 0; -}; - -static inline void kvm_timer_enable(struct kvm *kvm) {} -static inline void kvm_timer_init(struct kvm *kvm) {} -static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, - const struct kvm_irq_level *irq) {} -static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} -static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {} -static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {} -static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {} - -static inline int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) -{ - return 0; -} - -static inline u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) -{ - return 0; -} -#endif +bool kvm_timer_should_fire(struct kvm_vcpu *vcpu); #endif diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 66203b268984..133ea00aa83b 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -24,6 +24,7 @@ #include <linux/irqreturn.h> #include <linux/spinlock.h> #include <linux/types.h> +#include <kvm/iodev.h> #define VGIC_NR_IRQS_LEGACY 256 #define VGIC_NR_SGIS 16 @@ -140,16 +141,21 @@ struct vgic_params { }; struct vgic_vm_ops { - bool (*handle_mmio)(struct kvm_vcpu *, struct kvm_run *, - struct kvm_exit_mmio *); bool (*queue_sgi)(struct kvm_vcpu *, int irq); void (*add_sgi_source)(struct kvm_vcpu *, int irq, int source); int (*init_model)(struct kvm *); int (*map_resources)(struct kvm *, const struct vgic_params *); }; +struct vgic_io_device { + gpa_t addr; + int len; + const struct vgic_io_range *reg_ranges; + struct kvm_vcpu *redist_vcpu; + struct kvm_io_device dev; +}; + struct vgic_dist { -#ifdef CONFIG_KVM_ARM_VGIC spinlock_t lock; bool in_kernel; bool ready; @@ -197,6 +203,9 @@ struct vgic_dist { /* Level-triggered interrupt queued on VCPU interface */ struct vgic_bitmap irq_queued; + /* Interrupt was active when unqueue from VCPU interface */ + struct vgic_bitmap irq_active; + /* Interrupt priority. Not used yet. */ struct vgic_bytemap irq_priority; @@ -237,8 +246,12 @@ struct vgic_dist { /* Bitmap indicating which CPU has something pending */ unsigned long *irq_pending_on_cpu; + /* Bitmap indicating which CPU has active IRQs */ + unsigned long *irq_active_on_cpu; + struct vgic_vm_ops vm_ops; -#endif + struct vgic_io_device dist_iodev; + struct vgic_io_device *redist_iodevs; }; struct vgic_v2_cpu_if { @@ -266,13 +279,18 @@ struct vgic_v3_cpu_if { }; struct vgic_cpu { -#ifdef CONFIG_KVM_ARM_VGIC /* per IRQ to LR mapping */ u8 *vgic_irq_lr_map; - /* Pending interrupts on this VCPU */ + /* Pending/active/both interrupts on this VCPU */ DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); + DECLARE_BITMAP( active_percpu, VGIC_NR_PRIVATE_IRQS); + DECLARE_BITMAP( pend_act_percpu, VGIC_NR_PRIVATE_IRQS); + + /* Pending/active/both shared interrupts, dynamically sized */ unsigned long *pending_shared; + unsigned long *active_shared; + unsigned long *pend_act_shared; /* Bitmap of used/free list registers */ DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS); @@ -285,7 +303,6 @@ struct vgic_cpu { struct vgic_v2_cpu_if vgic_v2; struct vgic_v3_cpu_if vgic_v3; }; -#endif }; #define LR_EMPTY 0xff @@ -295,10 +312,7 @@ struct vgic_cpu { struct kvm; struct kvm_vcpu; -struct kvm_run; -struct kvm_exit_mmio; -#ifdef CONFIG_KVM_ARM_VGIC int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); int kvm_vgic_hyp_init(void); int kvm_vgic_map_resources(struct kvm *kvm); @@ -312,8 +326,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level); void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); -bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio); +int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) @@ -335,84 +348,4 @@ static inline int vgic_v3_probe(struct device_node *vgic_node, } #endif -#else -static inline int kvm_vgic_hyp_init(void) -{ - return 0; -} - -static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) -{ - return 0; -} - -static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) -{ - return -ENXIO; -} - -static inline int kvm_vgic_map_resources(struct kvm *kvm) -{ - return 0; -} - -static inline int kvm_vgic_create(struct kvm *kvm, u32 type) -{ - return 0; -} - -static inline void kvm_vgic_destroy(struct kvm *kvm) -{ -} - -static inline void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) -{ -} - -static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) -{ - return 0; -} - -static inline void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) {} -static inline void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) {} - -static inline int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, - unsigned int irq_num, bool level) -{ - return 0; -} - -static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) -{ - return 0; -} - -static inline bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - return false; -} - -static inline int irqchip_in_kernel(struct kvm *kvm) -{ - return 0; -} - -static inline bool vgic_initialized(struct kvm *kvm) -{ - return true; -} - -static inline bool vgic_ready(struct kvm *kvm) -{ - return true; -} - -static inline int kvm_vgic_get_max_vcpus(void) -{ - return KVM_MAX_VCPUS; -} -#endif - #endif diff --git a/virt/kvm/iodev.h b/include/kvm/iodev.h index 12fd3caffd2b..a6d208b916f5 100644 --- a/virt/kvm/iodev.h +++ b/include/kvm/iodev.h @@ -9,17 +9,17 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __KVM_IODEV_H__ #define __KVM_IODEV_H__ #include <linux/kvm_types.h> -#include <asm/errno.h> +#include <linux/errno.h> struct kvm_io_device; +struct kvm_vcpu; /** * kvm_io_device_ops are called under kvm slots_lock. @@ -27,11 +27,13 @@ struct kvm_io_device; * or non-zero to have it passed to the next device. **/ struct kvm_io_device_ops { - int (*read)(struct kvm_io_device *this, + int (*read)(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, int len, void *val); - int (*write)(struct kvm_io_device *this, + int (*write)(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, int len, const void *val); @@ -49,16 +51,20 @@ static inline void kvm_iodevice_init(struct kvm_io_device *dev, dev->ops = ops; } -static inline int kvm_iodevice_read(struct kvm_io_device *dev, - gpa_t addr, int l, void *v) +static inline int kvm_iodevice_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, gpa_t addr, + int l, void *v) { - return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP; + return dev->ops->read ? dev->ops->read(vcpu, dev, addr, l, v) + : -EOPNOTSUPP; } -static inline int kvm_iodevice_write(struct kvm_io_device *dev, - gpa_t addr, int l, const void *v) +static inline int kvm_iodevice_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, gpa_t addr, + int l, const void *v) { - return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP; + return dev->ops->write ? dev->ops->write(vcpu, dev, addr, l, v) + : -EOPNOTSUPP; } static inline void kvm_iodevice_destructor(struct kvm_io_device *dev) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0f574ebc82f4..27bd53b69080 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -165,12 +165,12 @@ enum kvm_bus { KVM_NR_BUSES }; -int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, +int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val); -int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, - int len, const void *val, long cookie); -int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, - void *val); +int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, + gpa_t addr, int len, const void *val, long cookie); +int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, + int len, void *val); int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev); int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, @@ -699,6 +699,20 @@ static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) #endif } +#ifdef __KVM_HAVE_ARCH_INTC_INITIALIZED +/* + * returns true if the virtual interrupt controller is initialized and + * ready to accept virtual IRQ. On some architectures the virtual interrupt + * controller is dynamically instantiated and this is not always true. + */ +bool kvm_arch_intc_initialized(struct kvm *kvm); +#else +static inline bool kvm_arch_intc_initialized(struct kvm *kvm) +{ + return true; +} +#endif + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); void kvm_arch_destroy_vm(struct kvm *kvm); void kvm_arch_sync_events(struct kvm *kvm); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 6e54f3542126..98c95f2fcba4 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -85,13 +85,22 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) return IRQ_HANDLED; } +/* + * Work function for handling the backup timer that we schedule when a vcpu is + * no longer running, but had a timer programmed to fire in the future. + */ static void kvm_timer_inject_irq_work(struct work_struct *work) { struct kvm_vcpu *vcpu; vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); vcpu->arch.timer_cpu.armed = false; - kvm_timer_inject_irq(vcpu); + + /* + * If the vcpu is blocked we want to wake it up so that it will see + * the timer has expired when entering the guest. + */ + kvm_vcpu_kick(vcpu); } static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) @@ -102,6 +111,21 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) return HRTIMER_NORESTART; } +bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + cycle_t cval, now; + + if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || + !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) + return false; + + cval = timer->cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + + return cval <= now; +} + /** * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu * @vcpu: The vcpu pointer @@ -119,6 +143,13 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) * populate the CPU timer again. */ timer_disarm(timer); + + /* + * If the timer expired while we were not scheduled, now is the time + * to inject it. + */ + if (kvm_timer_should_fire(vcpu)) + kvm_timer_inject_irq(vcpu); } /** @@ -134,16 +165,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) cycle_t cval, now; u64 ns; - if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || - !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) - return; - - cval = timer->cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - BUG_ON(timer_is_armed(timer)); - if (cval <= now) { + if (kvm_timer_should_fire(vcpu)) { /* * Timer has already expired while we were not * looking. Inject the interrupt and carry on. @@ -152,6 +176,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) return; } + cval = timer->cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask, &timecounter->frac); timer_arm(timer, ns); diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c index 19c6210f02cf..13907970d11c 100644 --- a/virt/kvm/arm/vgic-v2-emul.c +++ b/virt/kvm/arm/vgic-v2-emul.c @@ -107,6 +107,22 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, vcpu->vcpu_id); } +static bool handle_mmio_set_active_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id); +} + +static bool handle_mmio_clear_active_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id); +} + static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) @@ -303,7 +319,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false); } -static const struct kvm_mmio_range vgic_dist_ranges[] = { +static const struct vgic_io_range vgic_dist_ranges[] = { { .base = GIC_DIST_CTRL, .len = 12, @@ -344,13 +360,13 @@ static const struct kvm_mmio_range vgic_dist_ranges[] = { .base = GIC_DIST_ACTIVE_SET, .len = VGIC_MAX_IRQS / 8, .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, + .handle_mmio = handle_mmio_set_active_reg, }, { .base = GIC_DIST_ACTIVE_CLEAR, .len = VGIC_MAX_IRQS / 8, .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, + .handle_mmio = handle_mmio_clear_active_reg, }, { .base = GIC_DIST_PRI, @@ -388,24 +404,6 @@ static const struct kvm_mmio_range vgic_dist_ranges[] = { {} }; -static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - unsigned long base = vcpu->kvm->arch.vgic.vgic_dist_base; - - if (!is_in_range(mmio->phys_addr, mmio->len, base, - KVM_VGIC_V2_DIST_SIZE)) - return false; - - /* GICv2 does not support accesses wider than 32 bits */ - if (mmio->len > 4) { - kvm_inject_dabt(vcpu, mmio->phys_addr); - return true; - } - - return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base); -} - static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) { struct kvm *kvm = vcpu->kvm; @@ -490,6 +488,7 @@ static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq) static int vgic_v2_map_resources(struct kvm *kvm, const struct vgic_params *params) { + struct vgic_dist *dist = &kvm->arch.vgic; int ret = 0; if (!irqchip_in_kernel(kvm)) @@ -500,13 +499,17 @@ static int vgic_v2_map_resources(struct kvm *kvm, if (vgic_ready(kvm)) goto out; - if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) { + if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || + IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { kvm_err("Need to set vgic cpu and dist addresses first\n"); ret = -ENXIO; goto out; } + vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base, + KVM_VGIC_V2_DIST_SIZE, + vgic_dist_ranges, -1, &dist->dist_iodev); + /* * Initialize the vgic if this hasn't already been done on demand by * accessing the vgic state from userspace. @@ -514,18 +517,23 @@ static int vgic_v2_map_resources(struct kvm *kvm, ret = vgic_init(kvm); if (ret) { kvm_err("Unable to allocate maps\n"); - goto out; + goto out_unregister; } - ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, + ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, params->vcpu_base, KVM_VGIC_V2_CPU_SIZE, true); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); - goto out; + goto out_unregister; } - kvm->arch.vgic.ready = true; + dist->ready = true; + goto out; + +out_unregister: + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev); + out: if (ret) kvm_vgic_destroy(kvm); @@ -554,7 +562,6 @@ void vgic_v2_init_emulation(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - dist->vm_ops.handle_mmio = vgic_v2_handle_mmio; dist->vm_ops.queue_sgi = vgic_v2_queue_sgi; dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source; dist->vm_ops.init_model = vgic_v2_init_model; @@ -631,7 +638,7 @@ static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu, * CPU Interface Register accesses - these are not accessed by the VM, but by * user space for saving and restoring VGIC state. */ -static const struct kvm_mmio_range vgic_cpu_ranges[] = { +static const struct vgic_io_range vgic_cpu_ranges[] = { { .base = GIC_CPU_CTRL, .len = 12, @@ -658,12 +665,13 @@ static int vgic_attr_regs_access(struct kvm_device *dev, struct kvm_device_attr *attr, u32 *reg, bool is_write) { - const struct kvm_mmio_range *r = NULL, *ranges; + const struct vgic_io_range *r = NULL, *ranges; phys_addr_t offset; int ret, cpuid, c; struct kvm_vcpu *vcpu, *tmp_vcpu; struct vgic_dist *vgic; struct kvm_exit_mmio mmio; + u32 data; offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> @@ -685,6 +693,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, mmio.len = 4; mmio.is_write = is_write; + mmio.data = &data; if (is_write) mmio_data_write(&mmio, ~0, *reg); switch (attr->group) { @@ -699,7 +708,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, default: BUG(); } - r = vgic_find_range(ranges, &mmio, offset); + r = vgic_find_range(ranges, 4, offset); if (unlikely(!r || !r->handle_mmio)) { ret = -ENXIO; diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c index b3f154631515..e9c3a7a83833 100644 --- a/virt/kvm/arm/vgic-v3-emul.c +++ b/virt/kvm/arm/vgic-v3-emul.c @@ -340,7 +340,7 @@ static bool handle_mmio_idregs(struct kvm_vcpu *vcpu, return false; } -static const struct kvm_mmio_range vgic_v3_dist_ranges[] = { +static const struct vgic_io_range vgic_v3_dist_ranges[] = { { .base = GICD_CTLR, .len = 0x04, @@ -502,6 +502,43 @@ static const struct kvm_mmio_range vgic_v3_dist_ranges[] = { {}, }; +static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + /* since we don't support LPIs, this register is zero for now */ + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + return false; +} + +static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 reg; + u64 mpidr; + struct kvm_vcpu *redist_vcpu = mmio->private; + int target_vcpu_id = redist_vcpu->vcpu_id; + + /* the upper 32 bits contain the affinity value */ + if ((offset & ~3) == 4) { + mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu); + reg = compress_mpidr(mpidr); + + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + return false; + } + + reg = redist_vcpu->vcpu_id << 8; + if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) + reg |= GICR_TYPER_LAST; + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + return false; +} + static bool handle_mmio_set_enable_reg_redist(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) @@ -570,186 +607,107 @@ static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu *vcpu, return vgic_handle_cfg_reg(reg, mmio, offset); } -static const struct kvm_mmio_range vgic_redist_sgi_ranges[] = { +#define SGI_base(x) ((x) + SZ_64K) + +static const struct vgic_io_range vgic_redist_ranges[] = { + { + .base = GICR_CTLR, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_ctlr_redist, + }, + { + .base = GICR_TYPER, + .len = 0x08, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_typer_redist, + }, + { + .base = GICR_IIDR, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_iidr, + }, + { + .base = GICR_WAKER, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_raz_wi, + }, { - .base = GICR_IGROUPR0, + .base = GICR_IDREGS, + .len = 0x30, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_idregs, + }, + { + .base = SGI_base(GICR_IGROUPR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_rao_wi, }, { - .base = GICR_ISENABLER0, + .base = SGI_base(GICR_ISENABLER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_set_enable_reg_redist, }, { - .base = GICR_ICENABLER0, + .base = SGI_base(GICR_ICENABLER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_clear_enable_reg_redist, }, { - .base = GICR_ISPENDR0, + .base = SGI_base(GICR_ISPENDR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_set_pending_reg_redist, }, { - .base = GICR_ICPENDR0, + .base = SGI_base(GICR_ICPENDR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_clear_pending_reg_redist, }, { - .base = GICR_ISACTIVER0, + .base = SGI_base(GICR_ISACTIVER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { - .base = GICR_ICACTIVER0, + .base = SGI_base(GICR_ICACTIVER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { - .base = GICR_IPRIORITYR0, + .base = SGI_base(GICR_IPRIORITYR0), .len = 0x20, .bits_per_irq = 8, .handle_mmio = handle_mmio_priority_reg_redist, }, { - .base = GICR_ICFGR0, + .base = SGI_base(GICR_ICFGR0), .len = 0x08, .bits_per_irq = 2, .handle_mmio = handle_mmio_cfg_reg_redist, }, { - .base = GICR_IGRPMODR0, + .base = SGI_base(GICR_IGRPMODR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { - .base = GICR_NSACR, + .base = SGI_base(GICR_NSACR), .len = 0x04, .handle_mmio = handle_mmio_raz_wi, }, {}, }; -static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - /* since we don't support LPIs, this register is zero for now */ - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 reg; - u64 mpidr; - struct kvm_vcpu *redist_vcpu = mmio->private; - int target_vcpu_id = redist_vcpu->vcpu_id; - - /* the upper 32 bits contain the affinity value */ - if ((offset & ~3) == 4) { - mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu); - reg = compress_mpidr(mpidr); - - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; - } - - reg = redist_vcpu->vcpu_id << 8; - if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) - reg |= GICR_TYPER_LAST; - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; -} - -static const struct kvm_mmio_range vgic_redist_ranges[] = { - { - .base = GICR_CTLR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_ctlr_redist, - }, - { - .base = GICR_TYPER, - .len = 0x08, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_typer_redist, - }, - { - .base = GICR_IIDR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_iidr, - }, - { - .base = GICR_WAKER, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GICR_IDREGS, - .len = 0x30, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_idregs, - }, - {}, -}; - -/* - * This function splits accesses between the distributor and the two - * redistributor parts (private/SPI). As each redistributor is accessible - * from any CPU, we have to determine the affected VCPU by taking the faulting - * address into account. We then pass this VCPU to the handler function via - * the private parameter. - */ -#define SGI_BASE_OFFSET SZ_64K -static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long dbase = dist->vgic_dist_base; - unsigned long rdbase = dist->vgic_redist_base; - int nrcpus = atomic_read(&vcpu->kvm->online_vcpus); - int vcpu_id; - const struct kvm_mmio_range *mmio_range; - - if (is_in_range(mmio->phys_addr, mmio->len, dbase, GIC_V3_DIST_SIZE)) { - return vgic_handle_mmio_range(vcpu, run, mmio, - vgic_v3_dist_ranges, dbase); - } - - if (!is_in_range(mmio->phys_addr, mmio->len, rdbase, - GIC_V3_REDIST_SIZE * nrcpus)) - return false; - - vcpu_id = (mmio->phys_addr - rdbase) / GIC_V3_REDIST_SIZE; - rdbase += (vcpu_id * GIC_V3_REDIST_SIZE); - mmio->private = kvm_get_vcpu(vcpu->kvm, vcpu_id); - - if (mmio->phys_addr >= rdbase + SGI_BASE_OFFSET) { - rdbase += SGI_BASE_OFFSET; - mmio_range = vgic_redist_sgi_ranges; - } else { - mmio_range = vgic_redist_ranges; - } - return vgic_handle_mmio_range(vcpu, run, mmio, mmio_range, rdbase); -} - static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq) { if (vgic_queue_irq(vcpu, 0, irq)) { @@ -766,6 +724,9 @@ static int vgic_v3_map_resources(struct kvm *kvm, { int ret = 0; struct vgic_dist *dist = &kvm->arch.vgic; + gpa_t rdbase = dist->vgic_redist_base; + struct vgic_io_device *iodevs = NULL; + int i; if (!irqchip_in_kernel(kvm)) return 0; @@ -791,7 +752,41 @@ static int vgic_v3_map_resources(struct kvm *kvm, goto out; } - kvm->arch.vgic.ready = true; + ret = vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base, + GIC_V3_DIST_SIZE, vgic_v3_dist_ranges, + -1, &dist->dist_iodev); + if (ret) + goto out; + + iodevs = kcalloc(dist->nr_cpus, sizeof(iodevs[0]), GFP_KERNEL); + if (!iodevs) { + ret = -ENOMEM; + goto out_unregister; + } + + for (i = 0; i < dist->nr_cpus; i++) { + ret = vgic_register_kvm_io_dev(kvm, rdbase, + SZ_128K, vgic_redist_ranges, + i, &iodevs[i]); + if (ret) + goto out_unregister; + rdbase += GIC_V3_REDIST_SIZE; + } + + dist->redist_iodevs = iodevs; + dist->ready = true; + goto out; + +out_unregister: + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev); + if (iodevs) { + for (i = 0; i < dist->nr_cpus; i++) { + if (iodevs[i].dev.ops) + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, + &iodevs[i].dev); + } + } + out: if (ret) kvm_vgic_destroy(kvm); @@ -832,7 +827,6 @@ void vgic_v3_init_emulation(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - dist->vm_ops.handle_mmio = vgic_v3_handle_mmio; dist->vm_ops.queue_sgi = vgic_v3_queue_sgi; dist->vm_ops.add_sgi_source = vgic_v3_add_sgi_source; dist->vm_ops.init_model = vgic_v3_init_model; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index c9f60f524588..8d550ff14700 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -31,6 +31,9 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_arm.h> #include <asm/kvm_mmu.h> +#include <trace/events/kvm.h> +#include <asm/kvm.h> +#include <kvm/iodev.h> /* * How the whole thing works (courtesy of Christoffer Dall): @@ -263,6 +266,13 @@ static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq) return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq); } +static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); +} + static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -277,6 +287,20 @@ static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq) vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0); } +static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); +} + +static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); +} + static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -520,6 +544,44 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm, return false; } +bool vgic_handle_set_active_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id) +{ + u32 *reg; + struct vgic_dist *dist = &kvm->arch.vgic; + + reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); + + if (mmio->is_write) { + vgic_update_state(kvm); + return true; + } + + return false; +} + +bool vgic_handle_clear_active_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id) +{ + u32 *reg; + struct vgic_dist *dist = &kvm->arch.vgic; + + reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); + + if (mmio->is_write) { + vgic_update_state(kvm); + return true; + } + + return false; +} + static u32 vgic_cfg_expand(u16 val) { u32 res = 0; @@ -588,16 +650,12 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, } /** - * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor + * vgic_unqueue_irqs - move pending/active IRQs from LRs to the distributor * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs * - * Move any pending IRQs that have already been assigned to LRs back to the + * Move any IRQs that have already been assigned to LRs back to the * emulated distributor state so that the complete emulated state can be read * from the main emulation structures without investigating the LRs. - * - * Note that IRQs in the active state in the LRs get their pending state moved - * to the distributor but the active state stays in the LRs, because we don't - * track the active state on the distributor side. */ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) { @@ -613,12 +671,22 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * 01: pending * 10: active * 11: pending and active - * - * If the LR holds only an active interrupt (not pending) then - * just leave it alone. */ - if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE) - continue; + BUG_ON(!(lr.state & LR_STATE_MASK)); + + /* Reestablish SGI source for pending and active IRQs */ + if (lr.irq < VGIC_NR_SGIS) + add_sgi_source(vcpu, lr.irq, lr.source); + + /* + * If the LR holds an active (10) or a pending and active (11) + * interrupt then move the active state to the + * distributor tracking bit. + */ + if (lr.state & LR_STATE_ACTIVE) { + vgic_irq_set_active(vcpu, lr.irq); + lr.state &= ~LR_STATE_ACTIVE; + } /* * Reestablish the pending state on the distributor and the @@ -626,21 +694,19 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * is fine, then we are only setting a few bits that were * already set. */ - vgic_dist_irq_set_pending(vcpu, lr.irq); - if (lr.irq < VGIC_NR_SGIS) - add_sgi_source(vcpu, lr.irq, lr.source); - lr.state &= ~LR_STATE_PENDING; + if (lr.state & LR_STATE_PENDING) { + vgic_dist_irq_set_pending(vcpu, lr.irq); + lr.state &= ~LR_STATE_PENDING; + } + vgic_set_lr(vcpu, i, lr); /* - * If there's no state left on the LR (it could still be - * active), then the LR does not hold any useful info and can - * be marked as free for other use. + * Mark the LR as free for other use. */ - if (!(lr.state & LR_STATE_MASK)) { - vgic_retire_lr(i, lr.irq, vcpu); - vgic_irq_clear_queued(vcpu, lr.irq); - } + BUG_ON(lr.state & LR_STATE_MASK); + vgic_retire_lr(i, lr.irq, vcpu); + vgic_irq_clear_queued(vcpu, lr.irq); /* Finally update the VGIC state. */ vgic_update_state(vcpu->kvm); @@ -648,24 +714,21 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) } const -struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - const struct kvm_mmio_range *r = ranges; - - while (r->len) { - if (offset >= r->base && - (offset + mmio->len) <= (r->base + r->len)) - return r; - r++; +struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges, + int len, gpa_t offset) +{ + while (ranges->len) { + if (offset >= ranges->base && + (offset + len) <= (ranges->base + ranges->len)) + return ranges; + ranges++; } return NULL; } static bool vgic_validate_access(const struct vgic_dist *dist, - const struct kvm_mmio_range *range, + const struct vgic_io_range *range, unsigned long offset) { int irq; @@ -693,9 +756,8 @@ static bool vgic_validate_access(const struct vgic_dist *dist, static bool call_range_handler(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, unsigned long offset, - const struct kvm_mmio_range *range) + const struct vgic_io_range *range) { - u32 *data32 = (void *)mmio->data; struct kvm_exit_mmio mmio32; bool ret; @@ -712,91 +774,142 @@ static bool call_range_handler(struct kvm_vcpu *vcpu, mmio32.private = mmio->private; mmio32.phys_addr = mmio->phys_addr + 4; - if (mmio->is_write) - *(u32 *)mmio32.data = data32[1]; + mmio32.data = &((u32 *)mmio->data)[1]; ret = range->handle_mmio(vcpu, &mmio32, offset + 4); - if (!mmio->is_write) - data32[1] = *(u32 *)mmio32.data; mmio32.phys_addr = mmio->phys_addr; - if (mmio->is_write) - *(u32 *)mmio32.data = data32[0]; + mmio32.data = &((u32 *)mmio->data)[0]; ret |= range->handle_mmio(vcpu, &mmio32, offset); - if (!mmio->is_write) - data32[0] = *(u32 *)mmio32.data; return ret; } /** - * vgic_handle_mmio_range - handle an in-kernel MMIO access + * vgic_handle_mmio_access - handle an in-kernel MMIO access + * This is called by the read/write KVM IO device wrappers below. * @vcpu: pointer to the vcpu performing the access - * @run: pointer to the kvm_run structure - * @mmio: pointer to the data describing the access - * @ranges: array of MMIO ranges in a given region - * @mmio_base: base address of that region + * @this: pointer to the KVM IO device in charge + * @addr: guest physical address of the access + * @len: size of the access + * @val: pointer to the data region + * @is_write: read or write access * * returns true if the MMIO access could be performed */ -bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio, - const struct kvm_mmio_range *ranges, - unsigned long mmio_base) +static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, + int len, void *val, bool is_write) { - const struct kvm_mmio_range *range; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + struct vgic_io_device *iodev = container_of(this, + struct vgic_io_device, dev); + struct kvm_run *run = vcpu->run; + const struct vgic_io_range *range; + struct kvm_exit_mmio mmio; bool updated_state; - unsigned long offset; + gpa_t offset; - offset = mmio->phys_addr - mmio_base; - range = vgic_find_range(ranges, mmio, offset); + offset = addr - iodev->addr; + range = vgic_find_range(iodev->reg_ranges, len, offset); if (unlikely(!range || !range->handle_mmio)) { - pr_warn("Unhandled access %d %08llx %d\n", - mmio->is_write, mmio->phys_addr, mmio->len); - return false; + pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len); + return -ENXIO; } - spin_lock(&vcpu->kvm->arch.vgic.lock); + mmio.phys_addr = addr; + mmio.len = len; + mmio.is_write = is_write; + mmio.data = val; + mmio.private = iodev->redist_vcpu; + + spin_lock(&dist->lock); offset -= range->base; if (vgic_validate_access(dist, range, offset)) { - updated_state = call_range_handler(vcpu, mmio, offset, range); + updated_state = call_range_handler(vcpu, &mmio, offset, range); } else { - if (!mmio->is_write) - memset(mmio->data, 0, mmio->len); + if (!is_write) + memset(val, 0, len); updated_state = false; } - spin_unlock(&vcpu->kvm->arch.vgic.lock); - kvm_prepare_mmio(run, mmio); + spin_unlock(&dist->lock); + run->mmio.is_write = is_write; + run->mmio.len = len; + run->mmio.phys_addr = addr; + memcpy(run->mmio.data, val, len); + kvm_handle_mmio_return(vcpu, run); if (updated_state) vgic_kick_vcpus(vcpu->kvm); - return true; + return 0; +} + +static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, void *val) +{ + return vgic_handle_mmio_access(vcpu, this, addr, len, val, false); } +static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, const void *val) +{ + return vgic_handle_mmio_access(vcpu, this, addr, len, (void *)val, + true); +} + +struct kvm_io_device_ops vgic_io_ops = { + .read = vgic_handle_mmio_read, + .write = vgic_handle_mmio_write, +}; + /** - * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation - * @vcpu: pointer to the vcpu performing the access - * @run: pointer to the kvm_run structure - * @mmio: pointer to the data describing the access + * vgic_register_kvm_io_dev - register VGIC register frame on the KVM I/O bus + * @kvm: The VM structure pointer + * @base: The (guest) base address for the register frame + * @len: Length of the register frame window + * @ranges: Describing the handler functions for each register + * @redist_vcpu_id: The VCPU ID to pass on to the handlers on call + * @iodev: Points to memory to be passed on to the handler * - * returns true if the MMIO access has been performed in kernel space, - * and false if it needs to be emulated in user space. - * Calls the actual handling routine for the selected VGIC model. + * @iodev stores the parameters of this function to be usable by the handler + * respectively the dispatcher function (since the KVM I/O bus framework lacks + * an opaque parameter). Initialization is done in this function, but the + * reference should be valid and unique for the whole VGIC lifetime. + * If the register frame is not mapped for a specific VCPU, pass -1 to + * @redist_vcpu_id. */ -bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) +int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len, + const struct vgic_io_range *ranges, + int redist_vcpu_id, + struct vgic_io_device *iodev) { - if (!irqchip_in_kernel(vcpu->kvm)) - return false; + struct kvm_vcpu *vcpu = NULL; + int ret; - /* - * This will currently call either vgic_v2_handle_mmio() or - * vgic_v3_handle_mmio(), which in turn will call - * vgic_handle_mmio_range() defined above. - */ - return vcpu->kvm->arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio); + if (redist_vcpu_id >= 0) + vcpu = kvm_get_vcpu(kvm, redist_vcpu_id); + + iodev->addr = base; + iodev->len = len; + iodev->reg_ranges = ranges; + iodev->redist_vcpu = vcpu; + + kvm_iodevice_init(&iodev->dev, &vgic_io_ops); + + mutex_lock(&kvm->slots_lock); + + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, base, len, + &iodev->dev); + mutex_unlock(&kvm->slots_lock); + + /* Mark the iodev as invalid if registration fails. */ + if (ret) + iodev->dev.ops = NULL; + + return ret; } static int vgic_nr_shared_irqs(struct vgic_dist *dist) @@ -804,6 +917,36 @@ static int vgic_nr_shared_irqs(struct vgic_dist *dist) return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS; } +static int compute_active_for_cpu(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + unsigned long *active, *enabled, *act_percpu, *act_shared; + unsigned long active_private, active_shared; + int nr_shared = vgic_nr_shared_irqs(dist); + int vcpu_id; + + vcpu_id = vcpu->vcpu_id; + act_percpu = vcpu->arch.vgic_cpu.active_percpu; + act_shared = vcpu->arch.vgic_cpu.active_shared; + + active = vgic_bitmap_get_cpu_map(&dist->irq_active, vcpu_id); + enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); + bitmap_and(act_percpu, active, enabled, VGIC_NR_PRIVATE_IRQS); + + active = vgic_bitmap_get_shared_map(&dist->irq_active); + enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); + bitmap_and(act_shared, active, enabled, nr_shared); + bitmap_and(act_shared, act_shared, + vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), + nr_shared); + + active_private = find_first_bit(act_percpu, VGIC_NR_PRIVATE_IRQS); + active_shared = find_first_bit(act_shared, nr_shared); + + return (active_private < VGIC_NR_PRIVATE_IRQS || + active_shared < nr_shared); +} + static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -835,7 +978,7 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) /* * Update the interrupt state and determine which CPUs have pending - * interrupts. Must be called with distributor lock held. + * or active interrupts. Must be called with distributor lock held. */ void vgic_update_state(struct kvm *kvm) { @@ -849,10 +992,13 @@ void vgic_update_state(struct kvm *kvm) } kvm_for_each_vcpu(c, vcpu, kvm) { - if (compute_pending_for_cpu(vcpu)) { - pr_debug("CPU%d has pending interrupts\n", c); + if (compute_pending_for_cpu(vcpu)) set_bit(c, dist->irq_pending_on_cpu); - } + + if (compute_active_for_cpu(vcpu)) + set_bit(c, dist->irq_active_on_cpu); + else + clear_bit(c, dist->irq_active_on_cpu); } } @@ -955,6 +1101,26 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) } } +static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, + int lr_nr, struct vgic_lr vlr) +{ + if (vgic_irq_is_active(vcpu, irq)) { + vlr.state |= LR_STATE_ACTIVE; + kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state); + vgic_irq_clear_active(vcpu, irq); + vgic_update_state(vcpu->kvm); + } else if (vgic_dist_irq_is_pending(vcpu, irq)) { + vlr.state |= LR_STATE_PENDING; + kvm_debug("Set pending: 0x%x\n", vlr.state); + } + + if (!vgic_irq_is_edge(vcpu, irq)) + vlr.state |= LR_EOI_INT; + + vgic_set_lr(vcpu, lr_nr, vlr); + vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); +} + /* * Queue an interrupt to a CPU virtual interface. Return true on success, * or false if it wasn't possible to queue it. @@ -982,9 +1148,7 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) if (vlr.source == sgi_source_id) { kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); - vlr.state |= LR_STATE_PENDING; - vgic_set_lr(vcpu, lr, vlr); - vgic_sync_lr_elrsr(vcpu, lr, vlr); + vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); return true; } } @@ -1001,12 +1165,8 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) vlr.irq = irq; vlr.source = sgi_source_id; - vlr.state = LR_STATE_PENDING; - if (!vgic_irq_is_edge(vcpu, irq)) - vlr.state |= LR_EOI_INT; - - vgic_set_lr(vcpu, lr, vlr); - vgic_sync_lr_elrsr(vcpu, lr, vlr); + vlr.state = 0; + vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); return true; } @@ -1038,39 +1198,49 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + unsigned long *pa_percpu, *pa_shared; int i, vcpu_id; int overflow = 0; + int nr_shared = vgic_nr_shared_irqs(dist); vcpu_id = vcpu->vcpu_id; + pa_percpu = vcpu->arch.vgic_cpu.pend_act_percpu; + pa_shared = vcpu->arch.vgic_cpu.pend_act_shared; + + bitmap_or(pa_percpu, vgic_cpu->pending_percpu, vgic_cpu->active_percpu, + VGIC_NR_PRIVATE_IRQS); + bitmap_or(pa_shared, vgic_cpu->pending_shared, vgic_cpu->active_shared, + nr_shared); /* * We may not have any pending interrupt, or the interrupts * may have been serviced from another vcpu. In all cases, * move along. */ - if (!kvm_vgic_vcpu_pending_irq(vcpu)) { - pr_debug("CPU%d has no pending interrupt\n", vcpu_id); + if (!kvm_vgic_vcpu_pending_irq(vcpu) && !kvm_vgic_vcpu_active_irq(vcpu)) goto epilog; - } /* SGIs */ - for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) { + for_each_set_bit(i, pa_percpu, VGIC_NR_SGIS) { if (!queue_sgi(vcpu, i)) overflow = 1; } /* PPIs */ - for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) { + for_each_set_bit_from(i, pa_percpu, VGIC_NR_PRIVATE_IRQS) { if (!vgic_queue_hwirq(vcpu, i)) overflow = 1; } /* SPIs */ - for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) { + for_each_set_bit(i, pa_shared, nr_shared) { if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) overflow = 1; } + + + epilog: if (overflow) { vgic_enable_underflow(vcpu); @@ -1089,7 +1259,9 @@ epilog: static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) { u32 status = vgic_get_interrupt_status(vcpu); + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; bool level_pending = false; + struct kvm *kvm = vcpu->kvm; kvm_debug("STATUS = %08x\n", status); @@ -1106,6 +1278,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) struct vgic_lr vlr = vgic_get_lr(vcpu, lr); WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); + spin_lock(&dist->lock); vgic_irq_clear_queued(vcpu, vlr.irq); WARN_ON(vlr.state & LR_STATE_MASK); vlr.state = 0; @@ -1124,6 +1297,17 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) */ vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); + /* + * kvm_notify_acked_irq calls kvm_set_irq() + * to reset the IRQ level. Need to release the + * lock for kvm_set_irq to grab it. + */ + spin_unlock(&dist->lock); + + kvm_notify_acked_irq(kvm, 0, + vlr.irq - VGIC_NR_PRIVATE_IRQS); + spin_lock(&dist->lock); + /* Any additional pending interrupt? */ if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { vgic_cpu_irq_set(vcpu, vlr.irq); @@ -1133,6 +1317,8 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) vgic_cpu_irq_clear(vcpu, vlr.irq); } + spin_unlock(&dist->lock); + /* * Despite being EOIed, the LR may not have * been marked as empty. @@ -1155,10 +1341,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) return level_pending; } -/* - * Sync back the VGIC state after a guest run. The distributor lock is - * needed so we don't get preempted in the middle of the state processing. - */ +/* Sync back the VGIC state after a guest run */ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1205,14 +1388,10 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - if (!irqchip_in_kernel(vcpu->kvm)) return; - spin_lock(&dist->lock); __kvm_vgic_sync_hwstate(vcpu); - spin_unlock(&dist->lock); } int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) @@ -1225,6 +1404,17 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); } +int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + if (!irqchip_in_kernel(vcpu->kvm)) + return 0; + + return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu); +} + + void vgic_kick_vcpus(struct kvm *kvm) { struct kvm_vcpu *vcpu; @@ -1397,8 +1587,12 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; kfree(vgic_cpu->pending_shared); + kfree(vgic_cpu->active_shared); + kfree(vgic_cpu->pend_act_shared); kfree(vgic_cpu->vgic_irq_lr_map); vgic_cpu->pending_shared = NULL; + vgic_cpu->active_shared = NULL; + vgic_cpu->pend_act_shared = NULL; vgic_cpu->vgic_irq_lr_map = NULL; } @@ -1408,9 +1602,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); + vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); + vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL); - if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) { + if (!vgic_cpu->pending_shared + || !vgic_cpu->active_shared + || !vgic_cpu->pend_act_shared + || !vgic_cpu->vgic_irq_lr_map) { kvm_vgic_vcpu_destroy(vcpu); return -ENOMEM; } @@ -1463,10 +1662,12 @@ void kvm_vgic_destroy(struct kvm *kvm) kfree(dist->irq_spi_mpidr); kfree(dist->irq_spi_target); kfree(dist->irq_pending_on_cpu); + kfree(dist->irq_active_on_cpu); dist->irq_sgi_sources = NULL; dist->irq_spi_cpu = NULL; dist->irq_spi_target = NULL; dist->irq_pending_on_cpu = NULL; + dist->irq_active_on_cpu = NULL; dist->nr_cpus = 0; } @@ -1502,6 +1703,7 @@ int vgic_init(struct kvm *kvm) ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs); ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs); ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs); + ret |= vgic_init_bitmap(&dist->irq_active, nr_cpus, nr_irqs); ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs); ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs); @@ -1514,10 +1716,13 @@ int vgic_init(struct kvm *kvm) GFP_KERNEL); dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long), GFP_KERNEL); + dist->irq_active_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long), + GFP_KERNEL); if (!dist->irq_sgi_sources || !dist->irq_spi_cpu || !dist->irq_spi_target || - !dist->irq_pending_on_cpu) { + !dist->irq_pending_on_cpu || + !dist->irq_active_on_cpu) { ret = -ENOMEM; goto out; } @@ -1845,12 +2050,9 @@ int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return r; } -int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset) +int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset) { - struct kvm_exit_mmio dev_attr_mmio; - - dev_attr_mmio.len = 4; - if (vgic_find_range(ranges, &dev_attr_mmio, offset)) + if (vgic_find_range(ranges, 4, offset)) return 0; else return -ENXIO; @@ -1883,8 +2085,10 @@ static struct notifier_block vgic_cpu_nb = { }; static const struct of_device_id vgic_ids[] = { - { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, - { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, + { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, + { .compatible = "arm,cortex-a7-gic", .data = vgic_v2_probe, }, + { .compatible = "arm,gic-400", .data = vgic_v2_probe, }, + { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, {}, }; @@ -1932,3 +2136,38 @@ out_free_irq: free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); return ret; } + +int kvm_irq_map_gsi(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *entries, + int gsi) +{ + return gsi; +} + +int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + return pin; +} + +int kvm_set_irq(struct kvm *kvm, int irq_source_id, + u32 irq, int level, bool line_status) +{ + unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS; + + trace_kvm_set_irq(irq, level, irq_source_id); + + BUG_ON(!vgic_initialized(kvm)); + + if (spi > kvm->arch.vgic.nr_irqs) + return -EINVAL; + return kvm_vgic_inject_irq(kvm, 0, spi, level); + +} + +/* MSI not implemented yet */ +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) +{ + return 0; +} diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h index 1e83bdf5f499..0df74cbb6200 100644 --- a/virt/kvm/arm/vgic.h +++ b/virt/kvm/arm/vgic.h @@ -20,6 +20,8 @@ #ifndef __KVM_VGIC_H__ #define __KVM_VGIC_H__ +#include <kvm/iodev.h> + #define VGIC_ADDR_UNDEF (-1) #define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) @@ -57,6 +59,14 @@ void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq); void vgic_unqueue_irqs(struct kvm_vcpu *vcpu); +struct kvm_exit_mmio { + phys_addr_t phys_addr; + void *data; + u32 len; + bool is_write; + void *private; +}; + void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, phys_addr_t offset, int mode); bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, @@ -74,7 +84,7 @@ void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) *((u32 *)mmio->data) = cpu_to_le32(value) & mask; } -struct kvm_mmio_range { +struct vgic_io_range { phys_addr_t base; unsigned long len; int bits_per_irq; @@ -82,6 +92,11 @@ struct kvm_mmio_range { phys_addr_t offset); }; +int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len, + const struct vgic_io_range *ranges, + int redist_id, + struct vgic_io_device *iodev); + static inline bool is_in_range(phys_addr_t addr, unsigned long len, phys_addr_t baseaddr, unsigned long size) { @@ -89,14 +104,8 @@ static inline bool is_in_range(phys_addr_t addr, unsigned long len, } const -struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges, - struct kvm_exit_mmio *mmio, - phys_addr_t offset); - -bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio, - const struct kvm_mmio_range *ranges, - unsigned long mmio_base); +struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges, + int len, gpa_t offset); bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, phys_addr_t offset, int vcpu_id, int access); @@ -107,12 +116,20 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, phys_addr_t offset, int vcpu_id); +bool vgic_handle_set_active_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id); + +bool vgic_handle_clear_active_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id); + bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, phys_addr_t offset); void vgic_kick_vcpus(struct kvm *kvm); -int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset); +int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset); int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr); int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr); diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 00d86427af0f..571c1ce37d15 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -8,7 +8,7 @@ * */ -#include "iodev.h" +#include <kvm/iodev.h> #include <linux/kvm_host.h> #include <linux/slab.h> @@ -60,8 +60,9 @@ static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev) return 1; } -static int coalesced_mmio_write(struct kvm_io_device *this, - gpa_t addr, int len, const void *val) +static int coalesced_mmio_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, + int len, const void *val) { struct kvm_coalesced_mmio_dev *dev = to_mmio(this); struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 148b2392c762..9ff4193dfa49 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -36,7 +36,7 @@ #include <linux/seqlock.h> #include <trace/events/kvm.h> -#include "iodev.h" +#include <kvm/iodev.h> #ifdef CONFIG_HAVE_KVM_IRQFD /* @@ -311,6 +311,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) unsigned int events; int idx; + if (!kvm_arch_intc_initialized(kvm)) + return -EAGAIN; + irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); if (!irqfd) return -ENOMEM; @@ -712,8 +715,8 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) /* MMIO/PIO writes trigger an event if the addr/val match */ static int -ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len, - const void *val) +ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, + int len, const void *val) { struct _ioeventfd *p = to_ioeventfd(this); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ce7888a15128..efe59ae64dc3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -16,7 +16,7 @@ * */ -#include "iodev.h" +#include <kvm/iodev.h> #include <linux/kvm_host.h> #include <linux/kvm.h> @@ -2994,7 +2994,7 @@ static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, return off; } -static int __kvm_io_bus_write(struct kvm_io_bus *bus, +static int __kvm_io_bus_write(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus, struct kvm_io_range *range, const void *val) { int idx; @@ -3005,7 +3005,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus, while (idx < bus->dev_count && kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { - if (!kvm_iodevice_write(bus->range[idx].dev, range->addr, + if (!kvm_iodevice_write(vcpu, bus->range[idx].dev, range->addr, range->len, val)) return idx; idx++; @@ -3015,7 +3015,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus, } /* kvm_io_bus_write - called under kvm->slots_lock */ -int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, +int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val) { struct kvm_io_bus *bus; @@ -3027,14 +3027,14 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, .len = len, }; - bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); - r = __kvm_io_bus_write(bus, &range, val); + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + r = __kvm_io_bus_write(vcpu, bus, &range, val); return r < 0 ? r : 0; } /* kvm_io_bus_write_cookie - called under kvm->slots_lock */ -int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, - int len, const void *val, long cookie) +int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, + gpa_t addr, int len, const void *val, long cookie) { struct kvm_io_bus *bus; struct kvm_io_range range; @@ -3044,12 +3044,12 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, .len = len, }; - bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); /* First try the device referenced by cookie. */ if ((cookie >= 0) && (cookie < bus->dev_count) && (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0)) - if (!kvm_iodevice_write(bus->range[cookie].dev, addr, len, + if (!kvm_iodevice_write(vcpu, bus->range[cookie].dev, addr, len, val)) return cookie; @@ -3057,11 +3057,11 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, * cookie contained garbage; fall back to search and return the * correct cookie value. */ - return __kvm_io_bus_write(bus, &range, val); + return __kvm_io_bus_write(vcpu, bus, &range, val); } -static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range, - void *val) +static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus, + struct kvm_io_range *range, void *val) { int idx; @@ -3071,7 +3071,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range, while (idx < bus->dev_count && kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { - if (!kvm_iodevice_read(bus->range[idx].dev, range->addr, + if (!kvm_iodevice_read(vcpu, bus->range[idx].dev, range->addr, range->len, val)) return idx; idx++; @@ -3082,7 +3082,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range, EXPORT_SYMBOL_GPL(kvm_io_bus_write); /* kvm_io_bus_read - called under kvm->slots_lock */ -int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, +int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, void *val) { struct kvm_io_bus *bus; @@ -3094,8 +3094,8 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, .len = len, }; - bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); - r = __kvm_io_bus_read(bus, &range, val); + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + r = __kvm_io_bus_read(vcpu, bus, &range, val); return r < 0 ? r : 0; } |