From 2b953ea348128ef4d70b0e21f01c1bdee48ee4dc Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 28 Sep 2018 14:39:18 +0100 Subject: KVM: Allow 2048-bit register access via ioctl interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Arm SVE architecture defines registers that are up to 2048 bits in size (with some possibility of further future expansion). In order to avoid the need for an excessively large number of ioctls when saving and restoring a vcpu's registers, this patch adds a #define to make support for individual 2048-bit registers through the KVM_{GET,SET}_ONE_REG ioctl interface official. This will allow each SVE register to be accessed in a single call. There are sufficient spare bits in the register id size field for this change, so there is no ABI impact, providing that KVM_GET_REG_LIST does not enumerate any 2048-bit register unless userspace explicitly opts in to the relevant architecture-specific features. Signed-off-by: Dave Martin Reviewed-by: Alex Bennée Tested-by: zhang.lei Signed-off-by: Marc Zyngier --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6d4ea4b6c922..dc77a5a3648d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1145,6 +1145,7 @@ struct kvm_dirty_tlb { #define KVM_REG_SIZE_U256 0x0050000000000000ULL #define KVM_REG_SIZE_U512 0x0060000000000000ULL #define KVM_REG_SIZE_U1024 0x0070000000000000ULL +#define KVM_REG_SIZE_U2048 0x0080000000000000ULL struct kvm_reg_list { __u64 n; /* number of regs */ -- cgit v1.2.3 From 7dd32a0d0103a5941efbb971f85a3e930cc5665e Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 19 Dec 2018 14:27:01 +0000 Subject: KVM: arm/arm64: Add KVM_ARM_VCPU_FINALIZE ioctl Some aspects of vcpu configuration may be too complex to be completed inside KVM_ARM_VCPU_INIT. Thus, there may be a requirement for userspace to do some additional configuration before various other ioctls will work in a consistent way. In particular this will be the case for SVE, where userspace will need to negotiate the set of vector lengths to be made available to the guest before the vcpu becomes fully usable. In order to provide an explicit way for userspace to confirm that it has finished setting up a particular vcpu feature, this patch adds a new ioctl KVM_ARM_VCPU_FINALIZE. When userspace has opted into a feature that requires finalization, typically by means of a feature flag passed to KVM_ARM_VCPU_INIT, a matching call to KVM_ARM_VCPU_FINALIZE is now required before KVM_RUN or KVM_GET_REG_LIST is allowed. Individual features may impose additional restrictions where appropriate. No existing vcpu features are affected by this, so current userspace implementations will continue to work exactly as before, with no need to issue KVM_ARM_VCPU_FINALIZE. As implemented in this patch, KVM_ARM_VCPU_FINALIZE is currently a placeholder: no finalizable features exist yet, so ioctl is not required and will always yield EINVAL. Subsequent patches will add the finalization logic to make use of this ioctl for SVE. No functional change for existing userspace. Signed-off-by: Dave Martin Reviewed-by: Julien Thierry Tested-by: zhang.lei Signed-off-by: Marc Zyngier --- include/uapi/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index dc77a5a3648d..c3b8e7a31315 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1441,6 +1441,9 @@ struct kvm_enc_region { /* Available with KVM_CAP_HYPERV_CPUID */ #define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2) +/* Available with KVM_CAP_ARM_SVE */ +#define KVM_ARM_VCPU_FINALIZE _IOW(KVMIO, 0xc2, int) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ -- cgit v1.2.3 From 555f3d03e7fb43a539050aee91337978da14a25e Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Tue, 15 Jan 2019 12:21:22 +0000 Subject: KVM: arm64: Add a capability to advertise SVE support To provide a uniform way to check for KVM SVE support amongst other features, this patch adds a suitable capability KVM_CAP_ARM_SVE, and reports it as present when SVE is available. Signed-off-by: Dave Martin Reviewed-by: Julien Thierry Tested-by: zhang.lei Signed-off-by: Marc Zyngier --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index c3b8e7a31315..1d564445b515 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -988,6 +988,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_VM_IPA_SIZE 165 #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 #define KVM_CAP_HYPERV_CPUID 167 +#define KVM_CAP_ARM_SVE 168 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From a243c16d18be130b17cf1064e9115de73bfdff5a Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Tue, 23 Apr 2019 10:12:37 +0530 Subject: KVM: arm64: Add capability to advertise ptrauth for guest This patch advertises the capability of two cpu feature called address pointer authentication and generic pointer authentication. These capabilities depend upon system support for pointer authentication and VHE mode. The current arm64 KVM partially implements pointer authentication and support of address/generic authentication are tied together. However, separate ABI requirements for both of them is added so that any future isolated implementation will not require any ABI changes. Signed-off-by: Amit Daniel Kachhap Cc: Mark Rutland Cc: Marc Zyngier Cc: Christoffer Dall Cc: kvmarm@lists.cs.columbia.edu Signed-off-by: Marc Zyngier --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 1d564445b515..4dc34f8e29f6 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -989,6 +989,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 #define KVM_CAP_HYPERV_CPUID 167 #define KVM_CAP_ARM_SVE 168 +#define KVM_CAP_ARM_PTRAUTH_ADDRESS 169 +#define KVM_CAP_ARM_PTRAUTH_GENERIC 170 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From cdd6ad3ac63d2fa320baefcf92a02a918375c30f Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 5 Mar 2019 05:30:01 -0500 Subject: KVM: polling: add architecture backend to disable polling There are cases where halt polling is unwanted. For example when running KVM on an over committed LPAR we rather want to give back the CPU to neighbour LPARs instead of polling. Let us provide a callback that allows architectures to disable polling. Signed-off-by: Christian Borntraeger Acked-by: Paolo Bonzini Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- include/linux/kvm_host.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9d55c63db09b..b3aff1a3f633 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1305,6 +1305,16 @@ static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_INVALID_WAKEUPS */ +#ifdef CONFIG_HAVE_KVM_NO_POLL +/* Callback that tells if we must not poll */ +bool kvm_arch_no_poll(struct kvm_vcpu *vcpu); +#else +static inline bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) +{ + return false; +} +#endif /* CONFIG_HAVE_KVM_NO_POLL */ + #ifdef CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL long kvm_arch_vcpu_async_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); -- cgit v1.2.3 From 90c73795afa24890bd2ae4f3b359de04b4147d37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 18 Apr 2019 12:39:27 +0200 Subject: KVM: PPC: Book3S HV: Add a new KVM device for the XIVE native exploitation mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the basic framework for the new KVM device supporting the XIVE native exploitation mode. The user interface exposes a new KVM device to be created by QEMU, only available when running on a L0 hypervisor. Support for nested guests is not available yet. The XIVE device reuses the device structure of the XICS-on-XIVE device as they have a lot in common. That could possibly change in the future if the need arise. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6d4ea4b6c922..e6368163d3a0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1211,6 +1211,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_ITS, #define KVM_DEV_TYPE_ARM_VGIC_ITS KVM_DEV_TYPE_ARM_VGIC_ITS + KVM_DEV_TYPE_XIVE, +#define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_MAX, }; -- cgit v1.2.3 From eacc56bb9de3e6830ddc169553772cd6de59ee4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 18 Apr 2019 12:39:28 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Introduce a new capability KVM_CAP_PPC_IRQ_XIVE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The user interface exposes a new capability KVM_CAP_PPC_IRQ_XIVE to let QEMU connect the vCPU presenters to the XIVE KVM device if required. The capability is not advertised for now as the full support for the XIVE native exploitation mode is not yet available. When this is case, the capability will be advertised on PowerNV Hypervisors only. Nested guests (pseries KVM Hypervisor) are not supported. Internally, the interface to the new KVM device is protected with a new interrupt mode: KVMPPC_IRQ_XIVE. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e6368163d3a0..52bf74a1616e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -988,6 +988,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_VM_IPA_SIZE 165 #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 #define KVM_CAP_HYPERV_CPUID 167 +#define KVM_CAP_PPC_IRQ_XIVE 168 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From a1cd3f0883f435e5f9ae6530d7e62b361c87a91a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 18 Apr 2019 12:39:36 +0200 Subject: KVM: Introduce a 'mmap' method for KVM devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some KVM devices will want to handle special mappings related to the underlying HW. For instance, the XIVE interrupt controller of the POWER9 processor has MMIO pages for thread interrupt management and for interrupt source control that need to be exposed to the guest when the OS has the required support. Cc: Paolo Bonzini Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9d55c63db09b..831d963451d8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1245,6 +1245,7 @@ struct kvm_device_ops { int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); long (*ioctl)(struct kvm_device *dev, unsigned int ioctl, unsigned long arg); + int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma); }; void kvm_device_get(struct kvm_device *dev); -- cgit v1.2.3 From 2bde9b3ec8bdf60788e9e2ce8c07a2f8d6003dbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 18 Apr 2019 12:39:41 +0200 Subject: KVM: Introduce a 'release' method for KVM devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a P9 sPAPR VM boots, the CAS negotiation process determines which interrupt mode to use (XICS legacy or XIVE native) and invokes a machine reset to activate the chosen mode. To be able to switch from one interrupt mode to another, we introduce the capability to release a KVM device without destroying the VM. The KVM device interface is extended with a new 'release' method which is called when the file descriptor of the device is closed. Once 'release' is called, the 'destroy' method will not be called anymore as the device is removed from the device list of the VM. Cc: Paolo Bonzini Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- include/linux/kvm_host.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 831d963451d8..722692e2f745 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1240,6 +1240,15 @@ struct kvm_device_ops { */ void (*destroy)(struct kvm_device *dev); + /* + * Release is an alternative method to free the device. It is + * called when the device file descriptor is closed. Once + * release is called, the destroy method will not be called + * anymore as the device is removed from the device list of + * the VM. kvm->lock is held. + */ + void (*release)(struct kvm_device *dev); + int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); -- cgit v1.2.3 From 8479e04e7d6b1974629a0f657afa8ec5f17d2e90 Mon Sep 17 00:00:00 2001 From: Luwei Kang Date: Mon, 18 Feb 2019 19:26:07 -0500 Subject: KVM: x86: Inject PMI for KVM guest Inject a PMI for KVM guest when Intel PT working in Host-Guest mode and Guest ToPA entry memory buffer was completely filled. Signed-off-by: Luwei Kang Signed-off-by: Paolo Bonzini --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e47ef764f613..820c4ff31bc5 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -30,6 +30,7 @@ struct perf_guest_info_callbacks { int (*is_in_guest)(void); int (*is_user_mode)(void); unsigned long (*get_guest_ip)(void); + void (*handle_intel_pt_intr)(void); }; #ifdef CONFIG_HAVE_HW_BREAKPOINT -- cgit v1.2.3 From e45adf665a53df0db37f784ed87c6b57ddd81885 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 31 Jan 2019 21:24:34 +0100 Subject: KVM: Introduce a new guest mapping API In KVM, specially for nested guests, there is a dominant pattern of: => map guest memory -> do_something -> unmap guest memory In addition to all this unnecessarily noise in the code due to boiler plate code, most of the time the mapping function does not properly handle memory that is not backed by "struct page". This new guest mapping API encapsulate most of this boiler plate code and also handles guest memory that is not backed by "struct page". The current implementation of this API is using memremap for memory that is not backed by a "struct page" which would lead to a huge slow-down if it was used for high-frequency mapping operations. The API does not have any effect on current setups where guest memory is backed by a "struct page". Further patches are going to also introduce a pfn-cache which would significantly improve the performance of the memremap case. Signed-off-by: KarimAllah Ahmed Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 43f87ad83ff4..6f665c16e31d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -227,6 +227,32 @@ enum { READING_SHADOW_PAGE_TABLES, }; +#define KVM_UNMAPPED_PAGE ((void *) 0x500 + POISON_POINTER_DELTA) + +struct kvm_host_map { + /* + * Only valid if the 'pfn' is managed by the host kernel (i.e. There is + * a 'struct page' for it. When using mem= kernel parameter some memory + * can be used as guest memory but they are not managed by host + * kernel). + * If 'pfn' is not managed by the host kernel, this field is + * initialized to KVM_UNMAPPED_PAGE. + */ + struct page *page; + void *hva; + kvm_pfn_t pfn; + kvm_pfn_t gfn; +}; + +/* + * Used to check if the mapping is valid or not. Never use 'kvm_host_map' + * directly to check for that. + */ +static inline bool kvm_vcpu_mapped(struct kvm_host_map *map) +{ + return !!map->hva; +} + /* * Sometimes a large or cross-page mmio needs to be broken up into separate * exits for userspace servicing. @@ -733,7 +759,9 @@ struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn); kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn); kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); +int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map); struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn); +void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty); unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, -- cgit v1.2.3 From d7547c55cbe7471255ca51f14bcd4699f5eaabe5 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 8 May 2019 17:15:47 +0800 Subject: KVM: Introduce KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 The previous KVM_CAP_MANUAL_DIRTY_LOG_PROTECT has some problem which blocks the correct usage from userspace. Obsolete the old one and introduce a new capability bit for it. Suggested-by: Paolo Bonzini Signed-off-by: Peter Xu Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6d4ea4b6c922..d673734c46cb 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -986,8 +986,9 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 #define KVM_CAP_EXCEPTION_PAYLOAD 164 #define KVM_CAP_ARM_VM_IPA_SIZE 165 -#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 +#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 /* Obsolete */ #define KVM_CAP_HYPERV_CPUID 167 +#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 168 #ifdef KVM_CAP_IRQ_ROUTING @@ -1434,7 +1435,7 @@ struct kvm_enc_region { #define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state) #define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state) -/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT */ +/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */ #define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log) /* Available with KVM_CAP_HYPERV_CPUID */ -- cgit v1.2.3