diff options
| -rw-r--r-- | Documentation/admin-guide/kernel-parameters.txt | 4 | ||||
| -rw-r--r-- | Documentation/virtual/kvm/devices/arm-vgic-its.txt | 2 | ||||
| -rw-r--r-- | arch/arm/kvm/Kconfig | 5 | ||||
| -rw-r--r-- | arch/arm/kvm/Makefile | 1 | ||||
| -rw-r--r-- | arch/arm64/kvm/Kconfig | 3 | ||||
| -rw-r--r-- | arch/arm64/kvm/Makefile | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/svm.c | 7 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx.c | 161 | ||||
| -rw-r--r-- | include/kvm/arm_vgic.h | 41 | ||||
| -rw-r--r-- | virt/kvm/arm/arch_timer.c | 24 | ||||
| -rw-r--r-- | virt/kvm/arm/arm.c | 48 | ||||
| -rw-r--r-- | virt/kvm/arm/hyp/vgic-v3-sr.c | 9 | ||||
| -rw-r--r-- | virt/kvm/arm/vgic/vgic-init.c | 7 | ||||
| -rw-r--r-- | virt/kvm/arm/vgic/vgic-its.c | 204 | ||||
| -rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio-v3.c | 5 | ||||
| -rw-r--r-- | virt/kvm/arm/vgic/vgic-v3.c | 14 | ||||
| -rw-r--r-- | virt/kvm/arm/vgic/vgic-v4.c | 364 | ||||
| -rw-r--r-- | virt/kvm/arm/vgic/vgic.c | 67 | ||||
| -rw-r--r-- | virt/kvm/arm/vgic/vgic.h | 10 | 
19 files changed, 819 insertions, 158 deletions
| diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b44217290e57..6571fbfdb2a1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1890,6 +1890,10 @@  			[KVM,ARM] Trap guest accesses to GICv3 common  			system registers +	kvm-arm.vgic_v4_enable= +			[KVM,ARM] Allow use of GICv4 for direct injection of +			LPIs. +  	kvm-intel.ept=	[KVM,Intel] Disable extended page tables  			(virtualized MMU) support on capable Intel chips.  			Default is 1 (enabled) diff --git a/Documentation/virtual/kvm/devices/arm-vgic-its.txt b/Documentation/virtual/kvm/devices/arm-vgic-its.txt index 8d5830eab26a..4f0c9fc40365 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic-its.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic-its.txt @@ -64,6 +64,8 @@ Groups:      -EINVAL: Inconsistent restored data      -EFAULT: Invalid guest ram access      -EBUSY:  One or more VCPUS are running +    -EACCES: The virtual ITS is backed by a physical GICv4 ITS, and the +	     state is not available    KVM_DEV_ARM_VGIC_GRP_ITS_REGS    Attributes: diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index f24628db5409..e2bd35b6780c 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -4,6 +4,7 @@  #  source "virt/kvm/Kconfig" +source "virt/lib/Kconfig"  menuconfig VIRTUALIZATION  	bool "Virtualization" @@ -23,6 +24,8 @@ config KVM  	select PREEMPT_NOTIFIERS  	select ANON_INODES  	select ARM_GIC +	select ARM_GIC_V3 +	select ARM_GIC_V3_ITS  	select HAVE_KVM_CPU_RELAX_INTERCEPT  	select HAVE_KVM_ARCH_TLB_FLUSH_ALL  	select KVM_MMIO @@ -36,6 +39,8 @@ config KVM  	select HAVE_KVM_IRQCHIP  	select HAVE_KVM_IRQ_ROUTING  	select HAVE_KVM_MSI +	select IRQ_BYPASS_MANAGER +	select HAVE_KVM_IRQ_BYPASS  	depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER  	---help---  	  Support hosting virtualized guest machines. diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index f550abd64a25..48de846f2246 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -32,6 +32,7 @@ obj-y += $(KVM)/arm/vgic/vgic-init.o  obj-y += $(KVM)/arm/vgic/vgic-irqfd.o  obj-y += $(KVM)/arm/vgic/vgic-v2.o  obj-y += $(KVM)/arm/vgic/vgic-v3.o +obj-y += $(KVM)/arm/vgic/vgic-v4.o  obj-y += $(KVM)/arm/vgic/vgic-mmio.o  obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o  obj-y += $(KVM)/arm/vgic/vgic-mmio-v3.o diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 13f81f971390..2257dfcc44cc 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -4,6 +4,7 @@  #  source "virt/kvm/Kconfig" +source "virt/lib/Kconfig"  menuconfig VIRTUALIZATION  	bool "Virtualization" @@ -36,6 +37,8 @@ config KVM  	select HAVE_KVM_MSI  	select HAVE_KVM_IRQCHIP  	select HAVE_KVM_IRQ_ROUTING +	select IRQ_BYPASS_MANAGER +	select HAVE_KVM_IRQ_BYPASS  	---help---  	  Support hosting virtualized guest machines.  	  We don't support KVM with 16K page tables yet, due to the multiple diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 861acbbac385..87c4f7ae24de 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -27,6 +27,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v4.o  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b71daed3cca2..59e13a79c2e3 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3671,6 +3671,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)  	u32 ecx = msr->index;  	u64 data = msr->data;  	switch (ecx) { +	case MSR_IA32_CR_PAT: +		if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) +			return 1; +		vcpu->arch.pat = data; +		svm->vmcb->save.g_pat = data; +		mark_dirty(svm->vmcb, VMCB_NPT); +		break;  	case MSR_IA32_TSC:  		kvm_write_tsc(vcpu, msr);  		break; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7c3522a989d0..714a0673ec3c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -70,6 +70,9 @@ MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);  static bool __read_mostly enable_vpid = 1;  module_param_named(vpid, enable_vpid, bool, 0444); +static bool __read_mostly enable_vnmi = 1; +module_param_named(vnmi, enable_vnmi, bool, S_IRUGO); +  static bool __read_mostly flexpriority_enabled = 1;  module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); @@ -202,6 +205,10 @@ struct loaded_vmcs {  	bool nmi_known_unmasked;  	unsigned long vmcs_host_cr3;	/* May not match real cr3 */  	unsigned long vmcs_host_cr4;	/* May not match real cr4 */ +	/* Support for vnmi-less CPUs */ +	int soft_vnmi_blocked; +	ktime_t entry_time; +	s64 vnmi_blocked_time;  	struct list_head loaded_vmcss_on_cpu_link;  }; @@ -1291,6 +1298,11 @@ static inline bool cpu_has_vmx_invpcid(void)  		SECONDARY_EXEC_ENABLE_INVPCID;  } +static inline bool cpu_has_virtual_nmis(void) +{ +	return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; +} +  static inline bool cpu_has_vmx_wbinvd_exit(void)  {  	return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -1348,11 +1360,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)  		(vmcs12->secondary_vm_exec_control & bit);  } -static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12) -{ -	return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; -} -  static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)  {  	return vmcs12->pin_based_vm_exec_control & @@ -3712,9 +3719,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)  				&_vmexit_control) < 0)  		return -EIO; -	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | -		PIN_BASED_VIRTUAL_NMIS; -	opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER; +	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; +	opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR | +		 PIN_BASED_VMX_PREEMPTION_TIMER;  	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,  				&_pin_based_exec_control) < 0)  		return -EIO; @@ -5232,6 +5239,10 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)  	if (!kvm_vcpu_apicv_active(&vmx->vcpu))  		pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; + +	if (!enable_vnmi) +		pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS; +  	/* Enable the preemption timer dynamically */  	pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;  	return pin_based_exec_ctrl; @@ -5666,7 +5677,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)  static void enable_nmi_window(struct kvm_vcpu *vcpu)  { -	if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { +	if (!enable_vnmi || +	    vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {  		enable_irq_window(vcpu);  		return;  	} @@ -5706,6 +5718,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)  {  	struct vcpu_vmx *vmx = to_vmx(vcpu); +	if (!enable_vnmi) { +		/* +		 * Tracking the NMI-blocked state in software is built upon +		 * finding the next open IRQ window. This, in turn, depends on +		 * well-behaving guests: They have to keep IRQs disabled at +		 * least as long as the NMI handler runs. Otherwise we may +		 * cause NMI nesting, maybe breaking the guest. But as this is +		 * highly unlikely, we can live with the residual risk. +		 */ +		vmx->loaded_vmcs->soft_vnmi_blocked = 1; +		vmx->loaded_vmcs->vnmi_blocked_time = 0; +	} +  	++vcpu->stat.nmi_injections;  	vmx->loaded_vmcs->nmi_known_unmasked = false; @@ -5724,6 +5749,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)  	struct vcpu_vmx *vmx = to_vmx(vcpu);  	bool masked; +	if (!enable_vnmi) +		return vmx->loaded_vmcs->soft_vnmi_blocked;  	if (vmx->loaded_vmcs->nmi_known_unmasked)  		return false;  	masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; @@ -5735,13 +5762,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)  {  	struct vcpu_vmx *vmx = to_vmx(vcpu); -	vmx->loaded_vmcs->nmi_known_unmasked = !masked; -	if (masked) -		vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, -			      GUEST_INTR_STATE_NMI); -	else -		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, -				GUEST_INTR_STATE_NMI); +	if (!enable_vnmi) { +		if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) { +			vmx->loaded_vmcs->soft_vnmi_blocked = masked; +			vmx->loaded_vmcs->vnmi_blocked_time = 0; +		} +	} else { +		vmx->loaded_vmcs->nmi_known_unmasked = !masked; +		if (masked) +			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, +				      GUEST_INTR_STATE_NMI); +		else +			vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, +					GUEST_INTR_STATE_NMI); +	}  }  static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) @@ -5749,6 +5783,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)  	if (to_vmx(vcpu)->nested.nested_run_pending)  		return 0; +	if (!enable_vnmi && +	    to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked) +		return 0; +  	return	!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &  		  (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI  		   | GUEST_INTR_STATE_NMI)); @@ -6476,6 +6514,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)  	 * AAK134, BY25.  	 */  	if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && +			enable_vnmi &&  			(exit_qualification & INTR_INFO_UNBLOCK_NMI))  		vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); @@ -6535,6 +6574,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)  static int handle_nmi_window(struct kvm_vcpu *vcpu)  { +	WARN_ON_ONCE(!enable_vnmi);  	vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,  			CPU_BASED_VIRTUAL_NMI_PENDING);  	++vcpu->stat.nmi_window_exits; @@ -6758,6 +6798,9 @@ static __init int hardware_setup(void)  	if (!cpu_has_vmx_flexpriority())  		flexpriority_enabled = 0; +	if (!cpu_has_virtual_nmis()) +		enable_vnmi = 0; +  	/*  	 * set_apic_access_page_addr() is used to reload apic access  	 * page upon invalidation.  No need to do anything if not @@ -6962,7 +7005,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)  	}  	/* Create a new VMCS */ -	item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); +	item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);  	if (!item)  		return NULL;  	item->vmcs02.vmcs = alloc_vmcs(); @@ -7979,6 +8022,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)  	 * "blocked by NMI" bit has to be set before next VM entry.  	 */  	if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && +			enable_vnmi &&  			(exit_qualification & INTR_INFO_UNBLOCK_NMI))  		vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,  				GUEST_INTR_STATE_NMI); @@ -8823,6 +8867,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)  		return 0;  	} +	if (unlikely(!enable_vnmi && +		     vmx->loaded_vmcs->soft_vnmi_blocked)) { +		if (vmx_interrupt_allowed(vcpu)) { +			vmx->loaded_vmcs->soft_vnmi_blocked = 0; +		} else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL && +			   vcpu->arch.nmi_pending) { +			/* +			 * This CPU don't support us in finding the end of an +			 * NMI-blocked window if the guest runs with IRQs +			 * disabled. So we pull the trigger after 1 s of +			 * futile waiting, but inform the user about this. +			 */ +			printk(KERN_WARNING "%s: Breaking out of NMI-blocked " +			       "state on VCPU %d after 1 s timeout\n", +			       __func__, vcpu->vcpu_id); +			vmx->loaded_vmcs->soft_vnmi_blocked = 0; +		} +	} +  	if (exit_reason < kvm_vmx_max_exit_handlers  	    && kvm_vmx_exit_handlers[exit_reason])  		return kvm_vmx_exit_handlers[exit_reason](vcpu); @@ -9105,33 +9168,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)  	idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; -	if (vmx->loaded_vmcs->nmi_known_unmasked) -		return; -	/* -	 * Can't use vmx->exit_intr_info since we're not sure what -	 * the exit reason is. -	 */ -	exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); -	unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; -	vector = exit_intr_info & INTR_INFO_VECTOR_MASK; -	/* -	 * SDM 3: 27.7.1.2 (September 2008) -	 * Re-set bit "block by NMI" before VM entry if vmexit caused by -	 * a guest IRET fault. -	 * SDM 3: 23.2.2 (September 2008) -	 * Bit 12 is undefined in any of the following cases: -	 *  If the VM exit sets the valid bit in the IDT-vectoring -	 *   information field. -	 *  If the VM exit is due to a double fault. -	 */ -	if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && -	    vector != DF_VECTOR && !idtv_info_valid) -		vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, -			      GUEST_INTR_STATE_NMI); -	else -		vmx->loaded_vmcs->nmi_known_unmasked = -			!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) -			  & GUEST_INTR_STATE_NMI); +	if (enable_vnmi) { +		if (vmx->loaded_vmcs->nmi_known_unmasked) +			return; +		/* +		 * Can't use vmx->exit_intr_info since we're not sure what +		 * the exit reason is. +		 */ +		exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); +		unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; +		vector = exit_intr_info & INTR_INFO_VECTOR_MASK; +		/* +		 * SDM 3: 27.7.1.2 (September 2008) +		 * Re-set bit "block by NMI" before VM entry if vmexit caused by +		 * a guest IRET fault. +		 * SDM 3: 23.2.2 (September 2008) +		 * Bit 12 is undefined in any of the following cases: +		 *  If the VM exit sets the valid bit in the IDT-vectoring +		 *   information field. +		 *  If the VM exit is due to a double fault. +		 */ +		if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && +		    vector != DF_VECTOR && !idtv_info_valid) +			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, +				      GUEST_INTR_STATE_NMI); +		else +			vmx->loaded_vmcs->nmi_known_unmasked = +				!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) +				  & GUEST_INTR_STATE_NMI); +	} else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked)) +		vmx->loaded_vmcs->vnmi_blocked_time += +			ktime_to_ns(ktime_sub(ktime_get(), +					      vmx->loaded_vmcs->entry_time));  }  static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, @@ -9248,6 +9316,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)  	struct vcpu_vmx *vmx = to_vmx(vcpu);  	unsigned long debugctlmsr, cr3, cr4; +	/* Record the guest's net vcpu time for enforced NMI injections. */ +	if (unlikely(!enable_vnmi && +		     vmx->loaded_vmcs->soft_vnmi_blocked)) +		vmx->loaded_vmcs->entry_time = ktime_get(); +  	/* Don't enter VMX if guest state is invalid, let the exit handler  	   start emulation until we arrive back to a valid state */  	if (vmx->emulation_required) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 34dba516ef24..8c896540a72c 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -26,6 +26,8 @@  #include <linux/list.h>  #include <linux/jump_label.h> +#include <linux/irqchip/arm-gic-v4.h> +  #define VGIC_V3_MAX_CPUS	255  #define VGIC_V2_MAX_CPUS	8  #define VGIC_NR_IRQS_LEGACY     256 @@ -73,6 +75,9 @@ struct vgic_global {  	/* Only needed for the legacy KVM_CREATE_IRQCHIP */  	bool			can_emulate_gicv2; +	/* Hardware has GICv4? */ +	bool			has_gicv4; +  	/* GIC system register CPU interface */  	struct static_key_false gicv3_cpuif; @@ -116,6 +121,7 @@ struct vgic_irq {  	bool hw;			/* Tied to HW IRQ */  	struct kref refcount;		/* Used for LPIs */  	u32 hwintid;			/* HW INTID number */ +	unsigned int host_irq;		/* linux irq corresponding to hwintid */  	union {  		u8 targets;			/* GICv2 target VCPUs mask */  		u32 mpidr;			/* GICv3 target VCPU */ @@ -232,6 +238,15 @@ struct vgic_dist {  	/* used by vgic-debug */  	struct vgic_state_iter *iter; + +	/* +	 * GICv4 ITS per-VM data, containing the IRQ domain, the VPE +	 * array, the property table pointer as well as allocation +	 * data. This essentially ties the Linux IRQ core and ITS +	 * together, and avoids leaking KVM's data structures anywhere +	 * else. +	 */ +	struct its_vm		its_vm;  };  struct vgic_v2_cpu_if { @@ -250,6 +265,14 @@ struct vgic_v3_cpu_if {  	u32		vgic_ap0r[4];  	u32		vgic_ap1r[4];  	u64		vgic_lr[VGIC_V3_MAX_LRS]; + +	/* +	 * GICv4 ITS per-VPE data, containing the doorbell IRQ, the +	 * pending table pointer, the its_vm pointer and a few other +	 * HW specific things. As for the its_vm structure, this is +	 * linking the Linux IRQ subsystem and the ITS together. +	 */ +	struct its_vpe	its_vpe;  };  struct vgic_cpu { @@ -307,9 +330,10 @@ void kvm_vgic_init_cpu_hardware(void);  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,  			bool level, void *owner); -int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq); -int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq); -bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq); +int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, +			  u32 vintid); +int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid); +bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid);  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); @@ -349,4 +373,15 @@ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm);  int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner); +struct kvm_kernel_irq_routing_entry; + +int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq, +			       struct kvm_kernel_irq_routing_entry *irq_entry); + +int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq, +				 struct kvm_kernel_irq_routing_entry *irq_entry); + +void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu); +void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu); +  #endif /* __KVM_ARM_VGIC_H */ diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 4db54ff08d9e..4151250ce8da 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -817,9 +817,6 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)  {  	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;  	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); -	struct irq_desc *desc; -	struct irq_data *data; -	int phys_irq;  	int ret;  	if (timer->enabled) @@ -837,26 +834,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)  		return -EINVAL;  	} -	/* -	 * Find the physical IRQ number corresponding to the host_vtimer_irq -	 */ -	desc = irq_to_desc(host_vtimer_irq); -	if (!desc) { -		kvm_err("%s: no interrupt descriptor\n", __func__); -		return -EINVAL; -	} - -	data = irq_desc_get_irq_data(desc); -	while (data->parent_data) -		data = data->parent_data; - -	phys_irq = data->hwirq; - -	/* -	 * Tell the VGIC that the virtual interrupt is tied to a -	 * physical interrupt. We do that once per VCPU. -	 */ -	ret = kvm_vgic_map_phys_irq(vcpu, vtimer->irq.irq, phys_irq); +	ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq);  	if (ret)  		return ret; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 772bf74ac2e9..a6524ff27de4 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -27,6 +27,8 @@  #include <linux/mman.h>  #include <linux/sched.h>  #include <linux/kvm.h> +#include <linux/kvm_irqfd.h> +#include <linux/irqbypass.h>  #include <trace/events/kvm.h>  #include <kvm/arm_pmu.h> @@ -175,6 +177,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)  {  	int i; +	kvm_vgic_destroy(kvm); +  	free_percpu(kvm->arch.last_vcpu_ran);  	kvm->arch.last_vcpu_ran = NULL; @@ -184,8 +188,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)  			kvm->vcpus[i] = NULL;  		}  	} - -	kvm_vgic_destroy(kvm);  }  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -313,11 +315,13 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)  void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)  {  	kvm_timer_schedule(vcpu); +	kvm_vgic_v4_enable_doorbell(vcpu);  }  void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)  {  	kvm_timer_unschedule(vcpu); +	kvm_vgic_v4_disable_doorbell(vcpu);  }  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) @@ -1450,6 +1454,46 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)  	return NULL;  } +bool kvm_arch_has_irq_bypass(void) +{ +	return true; +} + +int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, +				      struct irq_bypass_producer *prod) +{ +	struct kvm_kernel_irqfd *irqfd = +		container_of(cons, struct kvm_kernel_irqfd, consumer); + +	return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq, +					  &irqfd->irq_entry); +} +void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, +				      struct irq_bypass_producer *prod) +{ +	struct kvm_kernel_irqfd *irqfd = +		container_of(cons, struct kvm_kernel_irqfd, consumer); + +	kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq, +				     &irqfd->irq_entry); +} + +void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons) +{ +	struct kvm_kernel_irqfd *irqfd = +		container_of(cons, struct kvm_kernel_irqfd, consumer); + +	kvm_arm_halt_guest(irqfd->kvm); +} + +void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons) +{ +	struct kvm_kernel_irqfd *irqfd = +		container_of(cons, struct kvm_kernel_irqfd, consumer); + +	kvm_arm_resume_guest(irqfd->kvm); +} +  /**   * Initialize Hyp-mode and memory mappings on all CPUs.   */ diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 91728faa13fd..f5c3d6d7019e 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -258,7 +258,8 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)  			cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0);  		}  	} else { -		if (static_branch_unlikely(&vgic_v3_cpuif_trap)) +		if (static_branch_unlikely(&vgic_v3_cpuif_trap) || +		    cpu_if->its_vpe.its_vm)  			write_gicreg(0, ICH_HCR_EL2);  		cpu_if->vgic_elrsr = 0xffff; @@ -337,9 +338,11 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)  		/*  		 * If we need to trap system registers, we must write  		 * ICH_HCR_EL2 anyway, even if no interrupts are being -		 * injected, +		 * injected. Same thing if GICv4 is used, as VLPI +		 * delivery is gated by ICH_HCR_EL2.En.  		 */ -		if (static_branch_unlikely(&vgic_v3_cpuif_trap)) +		if (static_branch_unlikely(&vgic_v3_cpuif_trap) || +		    cpu_if->its_vpe.its_vm)  			write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);  	} diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 5801261f3add..62310122ee78 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -285,6 +285,10 @@ int vgic_init(struct kvm *kvm)  	if (ret)  		goto out; +	ret = vgic_v4_init(kvm); +	if (ret) +		goto out; +  	kvm_for_each_vcpu(i, vcpu, kvm)  		kvm_vgic_vcpu_enable(vcpu); @@ -320,6 +324,9 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)  	kfree(dist->spis);  	dist->nr_spis = 0; + +	if (vgic_supports_direct_msis(kvm)) +		vgic_v4_teardown(kvm);  }  void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index d2a99ab0ade7..1f761a9991e7 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -38,7 +38,7 @@ static int vgic_its_save_tables_v0(struct vgic_its *its);  static int vgic_its_restore_tables_v0(struct vgic_its *its);  static int vgic_its_commit_v0(struct vgic_its *its);  static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, -			     struct kvm_vcpu *filter_vcpu); +			     struct kvm_vcpu *filter_vcpu, bool needs_inv);  /*   * Creates a new (reference to a) struct vgic_irq for a given LPI. @@ -106,7 +106,7 @@ out_unlock:  	 * However we only have those structs for mapped IRQs, so we read in  	 * the respective config data from memory here upon mapping the LPI.  	 */ -	ret = update_lpi_config(kvm, irq, NULL); +	ret = update_lpi_config(kvm, irq, NULL, false);  	if (ret)  		return ERR_PTR(ret); @@ -273,7 +273,7 @@ static struct its_collection *find_collection(struct vgic_its *its, int coll_id)   * VCPU. Unconditionally applies if filter_vcpu is NULL.   */  static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, -			     struct kvm_vcpu *filter_vcpu) +			     struct kvm_vcpu *filter_vcpu, bool needs_inv)  {  	u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser);  	u8 prop; @@ -292,11 +292,17 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,  		irq->priority = LPI_PROP_PRIORITY(prop);  		irq->enabled = LPI_PROP_ENABLE_BIT(prop); -		vgic_queue_irq_unlock(kvm, irq, flags); -	} else { -		spin_unlock_irqrestore(&irq->irq_lock, flags); +		if (!irq->hw) { +			vgic_queue_irq_unlock(kvm, irq, flags); +			return 0; +		}  	} +	spin_unlock_irqrestore(&irq->irq_lock, flags); + +	if (irq->hw) +		return its_prop_update_vlpi(irq->host_irq, prop, needs_inv); +  	return 0;  } @@ -336,6 +342,29 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)  	return i;  } +static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu) +{ +	int ret = 0; + +	spin_lock(&irq->irq_lock); +	irq->target_vcpu = vcpu; +	spin_unlock(&irq->irq_lock); + +	if (irq->hw) { +		struct its_vlpi_map map; + +		ret = its_get_vlpi(irq->host_irq, &map); +		if (ret) +			return ret; + +		map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + +		ret = its_map_vlpi(irq->host_irq, &map); +	} + +	return ret; +} +  /*   * Promotes the ITS view of affinity of an ITTE (which redistributor this LPI   * is targeting) to the VGIC's view, which deals with target VCPUs. @@ -350,10 +379,7 @@ static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite)  		return;  	vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr); - -	spin_lock(&ite->irq->irq_lock); -	ite->irq->target_vcpu = vcpu; -	spin_unlock(&ite->irq->irq_lock); +	update_affinity(ite->irq, vcpu);  }  /* @@ -505,19 +531,11 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,  	return 0;  } -/* - * Find the target VCPU and the LPI number for a given devid/eventid pair - * and make this IRQ pending, possibly injecting it. - * Must be called with the its_lock mutex held. - * Returns 0 on success, a positive error value for any ITS mapping - * related errors and negative error values for generic errors. - */ -static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, -				u32 devid, u32 eventid) +int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, +			 u32 devid, u32 eventid, struct vgic_irq **irq)  {  	struct kvm_vcpu *vcpu;  	struct its_ite *ite; -	unsigned long flags;  	if (!its->enabled)  		return -EBUSY; @@ -533,26 +551,65 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,  	if (!vcpu->arch.vgic_cpu.lpis_enabled)  		return -EBUSY; -	spin_lock_irqsave(&ite->irq->irq_lock, flags); -	ite->irq->pending_latch = true; -	vgic_queue_irq_unlock(kvm, ite->irq, flags); - +	*irq = ite->irq;  	return 0;  } -static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev) +struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi)  { +	u64 address; +	struct kvm_io_device *kvm_io_dev;  	struct vgic_io_device *iodev; -	if (dev->ops != &kvm_io_gic_ops) -		return NULL; +	if (!vgic_has_its(kvm)) +		return ERR_PTR(-ENODEV); -	iodev = container_of(dev, struct vgic_io_device, dev); +	if (!(msi->flags & KVM_MSI_VALID_DEVID)) +		return ERR_PTR(-EINVAL); +	address = (u64)msi->address_hi << 32 | msi->address_lo; + +	kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address); +	if (!kvm_io_dev) +		return ERR_PTR(-EINVAL); + +	if (kvm_io_dev->ops != &kvm_io_gic_ops) +		return ERR_PTR(-EINVAL); + +	iodev = container_of(kvm_io_dev, struct vgic_io_device, dev);  	if (iodev->iodev_type != IODEV_ITS) -		return NULL; +		return ERR_PTR(-EINVAL); + +	return iodev->its; +} + +/* + * Find the target VCPU and the LPI number for a given devid/eventid pair + * and make this IRQ pending, possibly injecting it. + * Must be called with the its_lock mutex held. + * Returns 0 on success, a positive error value for any ITS mapping + * related errors and negative error values for generic errors. + */ +static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, +				u32 devid, u32 eventid) +{ +	struct vgic_irq *irq = NULL; +	unsigned long flags; +	int err; + +	err = vgic_its_resolve_lpi(kvm, its, devid, eventid, &irq); +	if (err) +		return err; + +	if (irq->hw) +		return irq_set_irqchip_state(irq->host_irq, +					     IRQCHIP_STATE_PENDING, true); + +	spin_lock_irqsave(&irq->irq_lock, flags); +	irq->pending_latch = true; +	vgic_queue_irq_unlock(kvm, irq, flags); -	return iodev; +	return 0;  }  /* @@ -563,30 +620,16 @@ static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev)   */  int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)  { -	u64 address; -	struct kvm_io_device *kvm_io_dev; -	struct vgic_io_device *iodev; +	struct vgic_its *its;  	int ret; -	if (!vgic_has_its(kvm)) -		return -ENODEV; - -	if (!(msi->flags & KVM_MSI_VALID_DEVID)) -		return -EINVAL; +	its = vgic_msi_to_its(kvm, msi); +	if (IS_ERR(its)) +		return PTR_ERR(its); -	address = (u64)msi->address_hi << 32 | msi->address_lo; - -	kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address); -	if (!kvm_io_dev) -		return -EINVAL; - -	iodev = vgic_get_its_iodev(kvm_io_dev); -	if (!iodev) -		return -EINVAL; - -	mutex_lock(&iodev->its->its_lock); -	ret = vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data); -	mutex_unlock(&iodev->its->its_lock); +	mutex_lock(&its->its_lock); +	ret = vgic_its_trigger_msi(kvm, its, msi->devid, msi->data); +	mutex_unlock(&its->its_lock);  	if (ret < 0)  		return ret; @@ -608,8 +651,12 @@ static void its_free_ite(struct kvm *kvm, struct its_ite *ite)  	list_del(&ite->ite_list);  	/* This put matches the get in vgic_add_lpi. */ -	if (ite->irq) +	if (ite->irq) { +		if (ite->irq->hw) +			WARN_ON(its_unmap_vlpi(ite->irq->host_irq)); +  		vgic_put_irq(kvm, ite->irq); +	}  	kfree(ite);  } @@ -683,11 +730,7 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,  	ite->collection = collection;  	vcpu = kvm_get_vcpu(kvm, collection->target_addr); -	spin_lock(&ite->irq->irq_lock); -	ite->irq->target_vcpu = vcpu; -	spin_unlock(&ite->irq->irq_lock); - -	return 0; +	return update_affinity(ite->irq, vcpu);  }  /* @@ -1054,6 +1097,10 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its,  	ite->irq->pending_latch = false; +	if (ite->irq->hw) +		return irq_set_irqchip_state(ite->irq->host_irq, +					     IRQCHIP_STATE_PENDING, false); +  	return 0;  } @@ -1073,7 +1120,7 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,  	if (!ite)  		return E_ITS_INV_UNMAPPED_INTERRUPT; -	return update_lpi_config(kvm, ite->irq, NULL); +	return update_lpi_config(kvm, ite->irq, NULL, true);  }  /* @@ -1108,12 +1155,15 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,  		irq = vgic_get_irq(kvm, NULL, intids[i]);  		if (!irq)  			continue; -		update_lpi_config(kvm, irq, vcpu); +		update_lpi_config(kvm, irq, vcpu, false);  		vgic_put_irq(kvm, irq);  	}  	kfree(intids); +	if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm) +		its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe); +  	return 0;  } @@ -1128,11 +1178,12 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,  static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,  				      u64 *its_cmd)  { -	struct vgic_dist *dist = &kvm->arch.vgic;  	u32 target1_addr = its_cmd_get_target_addr(its_cmd);  	u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32);  	struct kvm_vcpu *vcpu1, *vcpu2;  	struct vgic_irq *irq; +	u32 *intids; +	int irq_count, i;  	if (target1_addr >= atomic_read(&kvm->online_vcpus) ||  	    target2_addr >= atomic_read(&kvm->online_vcpus)) @@ -1144,19 +1195,19 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,  	vcpu1 = kvm_get_vcpu(kvm, target1_addr);  	vcpu2 = kvm_get_vcpu(kvm, target2_addr); -	spin_lock(&dist->lpi_list_lock); +	irq_count = vgic_copy_lpi_list(vcpu1, &intids); +	if (irq_count < 0) +		return irq_count; -	list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { -		spin_lock(&irq->irq_lock); +	for (i = 0; i < irq_count; i++) { +		irq = vgic_get_irq(kvm, NULL, intids[i]); -		if (irq->target_vcpu == vcpu1) -			irq->target_vcpu = vcpu2; +		update_affinity(irq, vcpu2); -		spin_unlock(&irq->irq_lock); +		vgic_put_irq(kvm, irq);  	} -	spin_unlock(&dist->lpi_list_lock); - +	kfree(intids);  	return 0;  } @@ -1634,6 +1685,14 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)  	if (!its)  		return -ENOMEM; +	if (vgic_initialized(dev->kvm)) { +		int ret = vgic_v4_init(dev->kvm); +		if (ret < 0) { +			kfree(its); +			return ret; +		} +	} +  	mutex_init(&its->its_lock);  	mutex_init(&its->cmd_lock); @@ -1946,6 +2005,15 @@ static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device)  	list_for_each_entry(ite, &device->itt_head, ite_list) {  		gpa_t gpa = base + ite->event_id * ite_esz; +		/* +		 * If an LPI carries the HW bit, this means that this +		 * interrupt is controlled by GICv4, and we do not +		 * have direct access to that state. Let's simply fail +		 * the save operation... +		 */ +		if (ite->irq->hw) +			return -EACCES; +  		ret = vgic_its_save_ite(its, device, ite, gpa, ite_esz);  		if (ret)  			return ret; diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 83786108829e..671fe81f8e1d 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -54,6 +54,11 @@ bool vgic_has_its(struct kvm *kvm)  	return dist->has_its;  } +bool vgic_supports_direct_msis(struct kvm *kvm) +{ +	return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm); +} +  static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,  					    gpa_t addr, unsigned int len)  { diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 863351c090d8..2f05f732d3fd 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -24,6 +24,7 @@  static bool group0_trap;  static bool group1_trap;  static bool common_trap; +static bool gicv4_enable;  void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)  { @@ -461,6 +462,12 @@ static int __init early_common_trap_cfg(char *buf)  }  early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg); +static int __init early_gicv4_enable(char *buf) +{ +	return strtobool(buf, &gicv4_enable); +} +early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); +  /**   * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT   * @node:	pointer to the DT node @@ -480,6 +487,13 @@ int vgic_v3_probe(const struct gic_kvm_info *info)  	kvm_vgic_global_state.can_emulate_gicv2 = false;  	kvm_vgic_global_state.ich_vtr_el2 = ich_vtr_el2; +	/* GICv4 support? */ +	if (info->has_v4) { +		kvm_vgic_global_state.has_gicv4 = gicv4_enable; +		kvm_info("GICv4 support %sabled\n", +			 gicv4_enable ? "en" : "dis"); +	} +  	if (!info->vcpu.start) {  		kvm_info("GICv3: no GICV resource entry\n");  		kvm_vgic_global_state.vcpu_base = 0; diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c new file mode 100644 index 000000000000..53c324aa44ef --- /dev/null +++ b/virt/kvm/arm/vgic/vgic-v4.c @@ -0,0 +1,364 @@ +/* + * Copyright (C) 2017 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/kvm_host.h> +#include <linux/irqchip/arm-gic-v3.h> + +#include "vgic.h" + +/* + * How KVM uses GICv4 (insert rude comments here): + * + * The vgic-v4 layer acts as a bridge between several entities: + * - The GICv4 ITS representation offered by the ITS driver + * - VFIO, which is in charge of the PCI endpoint + * - The virtual ITS, which is the only thing the guest sees + * + * The configuration of VLPIs is triggered by a callback from VFIO, + * instructing KVM that a PCI device has been configured to deliver + * MSIs to a vITS. + * + * kvm_vgic_v4_set_forwarding() is thus called with the routing entry, + * and this is used to find the corresponding vITS data structures + * (ITS instance, device, event and irq) using a process that is + * extremely similar to the injection of an MSI. + * + * At this stage, we can link the guest's view of an LPI (uniquely + * identified by the routing entry) and the host irq, using the GICv4 + * driver mapping operation. Should the mapping succeed, we've then + * successfully upgraded the guest's LPI to a VLPI. We can then start + * with updating GICv4's view of the property table and generating an + * INValidation in order to kickstart the delivery of this VLPI to the + * guest directly, without software intervention. Well, almost. + * + * When the PCI endpoint is deconfigured, this operation is reversed + * with VFIO calling kvm_vgic_v4_unset_forwarding(). + * + * Once the VLPI has been mapped, it needs to follow any change the + * guest performs on its LPI through the vITS. For that, a number of + * command handlers have hooks to communicate these changes to the HW: + * - Any invalidation triggers a call to its_prop_update_vlpi() + * - The INT command results in a irq_set_irqchip_state(), which + *   generates an INT on the corresponding VLPI. + * - The CLEAR command results in a irq_set_irqchip_state(), which + *   generates an CLEAR on the corresponding VLPI. + * - DISCARD translates into an unmap, similar to a call to + *   kvm_vgic_v4_unset_forwarding(). + * - MOVI is translated by an update of the existing mapping, changing + *   the target vcpu, resulting in a VMOVI being generated. + * - MOVALL is translated by a string of mapping updates (similar to + *   the handling of MOVI). MOVALL is horrible. + * + * Note that a DISCARD/MAPTI sequence emitted from the guest without + * reprogramming the PCI endpoint after MAPTI does not result in a + * VLPI being mapped, as there is no callback from VFIO (the guest + * will get the interrupt via the normal SW injection). Fixing this is + * not trivial, and requires some horrible messing with the VFIO + * internals. Not fun. Don't do that. + * + * Then there is the scheduling. Each time a vcpu is about to run on a + * physical CPU, KVM must tell the corresponding redistributor about + * it. And if we've migrated our vcpu from one CPU to another, we must + * tell the ITS (so that the messages reach the right redistributor). + * This is done in two steps: first issue a irq_set_affinity() on the + * irq corresponding to the vcpu, then call its_schedule_vpe(). You + * must be in a non-preemptible context. On exit, another call to + * its_schedule_vpe() tells the redistributor that we're done with the + * vcpu. + * + * Finally, the doorbell handling: Each vcpu is allocated an interrupt + * which will fire each time a VLPI is made pending whilst the vcpu is + * not running. Each time the vcpu gets blocked, the doorbell + * interrupt gets enabled. When the vcpu is unblocked (for whatever + * reason), the doorbell interrupt is disabled. + */ + +#define DB_IRQ_FLAGS	(IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY | IRQ_NO_BALANCING) + +static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info) +{ +	struct kvm_vcpu *vcpu = info; + +	vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true; +	kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); +	kvm_vcpu_kick(vcpu); + +	return IRQ_HANDLED; +} + +/** + * vgic_v4_init - Initialize the GICv4 data structures + * @kvm:	Pointer to the VM being initialized + * + * We may be called each time a vITS is created, or when the + * vgic is initialized. This relies on kvm->lock to be + * held. In both cases, the number of vcpus should now be + * fixed. + */ +int vgic_v4_init(struct kvm *kvm) +{ +	struct vgic_dist *dist = &kvm->arch.vgic; +	struct kvm_vcpu *vcpu; +	int i, nr_vcpus, ret; + +	if (!vgic_supports_direct_msis(kvm)) +		return 0; /* Nothing to see here... move along. */ + +	if (dist->its_vm.vpes) +		return 0; + +	nr_vcpus = atomic_read(&kvm->online_vcpus); + +	dist->its_vm.vpes = kzalloc(sizeof(*dist->its_vm.vpes) * nr_vcpus, +				    GFP_KERNEL); +	if (!dist->its_vm.vpes) +		return -ENOMEM; + +	dist->its_vm.nr_vpes = nr_vcpus; + +	kvm_for_each_vcpu(i, vcpu, kvm) +		dist->its_vm.vpes[i] = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + +	ret = its_alloc_vcpu_irqs(&dist->its_vm); +	if (ret < 0) { +		kvm_err("VPE IRQ allocation failure\n"); +		kfree(dist->its_vm.vpes); +		dist->its_vm.nr_vpes = 0; +		dist->its_vm.vpes = NULL; +		return ret; +	} + +	kvm_for_each_vcpu(i, vcpu, kvm) { +		int irq = dist->its_vm.vpes[i]->irq; + +		/* +		 * Don't automatically enable the doorbell, as we're +		 * flipping it back and forth when the vcpu gets +		 * blocked. Also disable the lazy disabling, as the +		 * doorbell could kick us out of the guest too +		 * early... +		 */ +		irq_set_status_flags(irq, DB_IRQ_FLAGS); +		ret = request_irq(irq, vgic_v4_doorbell_handler, +				  0, "vcpu", vcpu); +		if (ret) { +			kvm_err("failed to allocate vcpu IRQ%d\n", irq); +			/* +			 * Trick: adjust the number of vpes so we know +			 * how many to nuke on teardown... +			 */ +			dist->its_vm.nr_vpes = i; +			break; +		} +	} + +	if (ret) +		vgic_v4_teardown(kvm); + +	return ret; +} + +/** + * vgic_v4_teardown - Free the GICv4 data structures + * @kvm:	Pointer to the VM being destroyed + * + * Relies on kvm->lock to be held. + */ +void vgic_v4_teardown(struct kvm *kvm) +{ +	struct its_vm *its_vm = &kvm->arch.vgic.its_vm; +	int i; + +	if (!its_vm->vpes) +		return; + +	for (i = 0; i < its_vm->nr_vpes; i++) { +		struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, i); +		int irq = its_vm->vpes[i]->irq; + +		irq_clear_status_flags(irq, DB_IRQ_FLAGS); +		free_irq(irq, vcpu); +	} + +	its_free_vcpu_irqs(its_vm); +	kfree(its_vm->vpes); +	its_vm->nr_vpes = 0; +	its_vm->vpes = NULL; +} + +int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu) +{ +	if (!vgic_supports_direct_msis(vcpu->kvm)) +		return 0; + +	return its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, false); +} + +int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu) +{ +	int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq; +	int err; + +	if (!vgic_supports_direct_msis(vcpu->kvm)) +		return 0; + +	/* +	 * Before making the VPE resident, make sure the redistributor +	 * corresponding to our current CPU expects us here. See the +	 * doc in drivers/irqchip/irq-gic-v4.c to understand how this +	 * turns into a VMOVP command at the ITS level. +	 */ +	err = irq_set_affinity(irq, cpumask_of(smp_processor_id())); +	if (err) +		return err; + +	err = its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, true); +	if (err) +		return err; + +	/* +	 * Now that the VPE is resident, let's get rid of a potential +	 * doorbell interrupt that would still be pending. +	 */ +	err = irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, false); + +	return err; +} + +static struct vgic_its *vgic_get_its(struct kvm *kvm, +				     struct kvm_kernel_irq_routing_entry *irq_entry) +{ +	struct kvm_msi msi  = (struct kvm_msi) { +		.address_lo	= irq_entry->msi.address_lo, +		.address_hi	= irq_entry->msi.address_hi, +		.data		= irq_entry->msi.data, +		.flags		= irq_entry->msi.flags, +		.devid		= irq_entry->msi.devid, +	}; + +	return vgic_msi_to_its(kvm, &msi); +} + +int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, +			       struct kvm_kernel_irq_routing_entry *irq_entry) +{ +	struct vgic_its *its; +	struct vgic_irq *irq; +	struct its_vlpi_map map; +	int ret; + +	if (!vgic_supports_direct_msis(kvm)) +		return 0; + +	/* +	 * Get the ITS, and escape early on error (not a valid +	 * doorbell for any of our vITSs). +	 */ +	its = vgic_get_its(kvm, irq_entry); +	if (IS_ERR(its)) +		return 0; + +	mutex_lock(&its->its_lock); + +	/* Perform then actual DevID/EventID -> LPI translation. */ +	ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid, +				   irq_entry->msi.data, &irq); +	if (ret) +		goto out; + +	/* +	 * Emit the mapping request. If it fails, the ITS probably +	 * isn't v4 compatible, so let's silently bail out. Holding +	 * the ITS lock should ensure that nothing can modify the +	 * target vcpu. +	 */ +	map = (struct its_vlpi_map) { +		.vm		= &kvm->arch.vgic.its_vm, +		.vpe		= &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe, +		.vintid		= irq->intid, +		.properties	= ((irq->priority & 0xfc) | +				   (irq->enabled ? LPI_PROP_ENABLED : 0) | +				   LPI_PROP_GROUP1), +		.db_enabled	= true, +	}; + +	ret = its_map_vlpi(virq, &map); +	if (ret) +		goto out; + +	irq->hw		= true; +	irq->host_irq	= virq; + +out: +	mutex_unlock(&its->its_lock); +	return ret; +} + +int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq, +				 struct kvm_kernel_irq_routing_entry *irq_entry) +{ +	struct vgic_its *its; +	struct vgic_irq *irq; +	int ret; + +	if (!vgic_supports_direct_msis(kvm)) +		return 0; + +	/* +	 * Get the ITS, and escape early on error (not a valid +	 * doorbell for any of our vITSs). +	 */ +	its = vgic_get_its(kvm, irq_entry); +	if (IS_ERR(its)) +		return 0; + +	mutex_lock(&its->its_lock); + +	ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid, +				   irq_entry->msi.data, &irq); +	if (ret) +		goto out; + +	WARN_ON(!(irq->hw && irq->host_irq == virq)); +	irq->hw = false; +	ret = its_unmap_vlpi(virq); + +out: +	mutex_unlock(&its->its_lock); +	return ret; +} + +void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu) +{ +	if (vgic_supports_direct_msis(vcpu->kvm)) { +		int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq; +		if (irq) +			enable_irq(irq); +	} +} + +void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu) +{ +	if (vgic_supports_direct_msis(vcpu->kvm)) { +		int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq; +		if (irq) +			disable_irq(irq); +	} +} diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index e54ef2fdf73d..b168a328a9e0 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -17,6 +17,8 @@  #include <linux/kvm.h>  #include <linux/kvm_host.h>  #include <linux/list_sort.h> +#include <linux/interrupt.h> +#include <linux/irq.h>  #include "vgic.h" @@ -409,25 +411,56 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,  	return 0;  } -int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq) +/* @irq->irq_lock must be held */ +static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, +			    unsigned int host_irq)  { -	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); +	struct irq_desc *desc; +	struct irq_data *data; + +	/* +	 * Find the physical IRQ number corresponding to @host_irq +	 */ +	desc = irq_to_desc(host_irq); +	if (!desc) { +		kvm_err("%s: no interrupt descriptor\n", __func__); +		return -EINVAL; +	} +	data = irq_desc_get_irq_data(desc); +	while (data->parent_data) +		data = data->parent_data; + +	irq->hw = true; +	irq->host_irq = host_irq; +	irq->hwintid = data->hwirq; +	return 0; +} + +/* @irq->irq_lock must be held */ +static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) +{ +	irq->hw = false; +	irq->hwintid = 0; +} + +int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, +			  u32 vintid) +{ +	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);  	unsigned long flags; +	int ret;  	BUG_ON(!irq);  	spin_lock_irqsave(&irq->irq_lock, flags); - -	irq->hw = true; -	irq->hwintid = phys_irq; - +	ret = kvm_vgic_map_irq(vcpu, irq, host_irq);  	spin_unlock_irqrestore(&irq->irq_lock, flags);  	vgic_put_irq(vcpu->kvm, irq); -	return 0; +	return ret;  } -int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) +int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)  {  	struct vgic_irq *irq;  	unsigned long flags; @@ -435,14 +468,11 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)  	if (!vgic_initialized(vcpu->kvm))  		return -EAGAIN; -	irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); +	irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);  	BUG_ON(!irq);  	spin_lock_irqsave(&irq->irq_lock, flags); - -	irq->hw = false; -	irq->hwintid = 0; - +	kvm_vgic_unmap_irq(irq);  	spin_unlock_irqrestore(&irq->irq_lock, flags);  	vgic_put_irq(vcpu->kvm, irq); @@ -688,6 +718,8 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)  {  	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; +	WARN_ON(vgic_v4_sync_hwstate(vcpu)); +  	/* An empty ap_list_head implies used_lrs == 0 */  	if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))  		return; @@ -700,6 +732,8 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)  /* Flush our emulation state into the GIC hardware before entering the guest. */  void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)  { +	WARN_ON(vgic_v4_flush_hwstate(vcpu)); +  	/*  	 * If there are no virtual interrupts active or pending for this  	 * VCPU, then there is no work to do and we can bail out without @@ -751,6 +785,9 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)  	if (!vcpu->kvm->arch.vgic.enabled)  		return false; +	if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last) +		return true; +  	spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);  	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { @@ -784,9 +821,9 @@ void vgic_kick_vcpus(struct kvm *kvm)  	}  } -bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq) +bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)  { -	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); +	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);  	bool map_is_active;  	unsigned long flags; diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 4f8aecb07ae6..efbcf8f96f9c 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -237,4 +237,14 @@ static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)  	}  } +int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, +			 u32 devid, u32 eventid, struct vgic_irq **irq); +struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi); + +bool vgic_supports_direct_msis(struct kvm *kvm); +int vgic_v4_init(struct kvm *kvm); +void vgic_v4_teardown(struct kvm *kvm); +int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu); +int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu); +  #endif | 
