From 2c07ded06427dd3339278487a1413d5e478f05f9 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Mon, 4 Jan 2021 09:17:49 -0600 Subject: KVM/SVM: add support for SEV attestation command The SEV FW version >= 0.23 added a new command that can be used to query the attestation report containing the SHA-256 digest of the guest memory encrypted through the KVM_SEV_LAUNCH_UPDATE_{DATA, VMSA} commands and sign the report with the Platform Endorsement Key (PEK). See the SEV FW API spec section 6.8 for more details. Note there already exist a command (KVM_SEV_LAUNCH_MEASURE) that can be used to get the SHA-256 digest. The main difference between the KVM_SEV_LAUNCH_MEASURE and KVM_SEV_ATTESTATION_REPORT is that the latter can be called while the guest is running and the measurement value is signed with PEK. Cc: James Bottomley Cc: Tom Lendacky Cc: David Rientjes Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Borislav Petkov Cc: John Allen Cc: Herbert Xu Cc: linux-crypto@vger.kernel.org Reviewed-by: Tom Lendacky Acked-by: David Rientjes Tested-by: James Bottomley Signed-off-by: Brijesh Singh Message-Id: <20210104151749.30248-1-brijesh.singh@amd.com> Signed-off-by: Paolo Bonzini --- include/linux/psp-sev.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 49d155cd2dfe..b801ead1e2bb 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -66,6 +66,7 @@ enum sev_cmd { SEV_CMD_LAUNCH_MEASURE = 0x033, SEV_CMD_LAUNCH_UPDATE_SECRET = 0x034, SEV_CMD_LAUNCH_FINISH = 0x035, + SEV_CMD_ATTESTATION_REPORT = 0x036, /* Guest migration commands (outgoing) */ SEV_CMD_SEND_START = 0x040, @@ -483,6 +484,22 @@ struct sev_data_dbg { u32 len; /* In */ } __packed; +/** + * struct sev_data_attestation_report - SEV_ATTESTATION_REPORT command parameters + * + * @handle: handle of the VM + * @mnonce: a random nonce that will be included in the report. + * @address: physical address where the report will be copied. + * @len: length of the physical buffer. + */ +struct sev_data_attestation_report { + u32 handle; /* In */ + u32 reserved; + u64 address; /* In */ + u8 mnonce[16]; /* In */ + u32 len; /* In/Out */ +} __packed; + #ifdef CONFIG_CRYPTO_DEV_SP_PSP /** -- cgit v1.2.3 From 26128cb6c7e6731fe644c687af97733adfdb5ee9 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 2 Feb 2021 10:57:12 -0800 Subject: locking/rwlocks: Add contention detection for rwlocks rwlocks do not currently have any facility to detect contention like spinlocks do. In order to allow users of rwlocks to better manage latency, add contention detection for queued rwlocks. CC: Ingo Molnar CC: Will Deacon Acked-by: Peter Zijlstra Acked-by: Davidlohr Bueso Acked-by: Waiman Long Acked-by: Paolo Bonzini Signed-off-by: Ben Gardon Message-Id: <20210202185734.1680553-7-bgardon@google.com> Signed-off-by: Paolo Bonzini --- include/linux/rwlock.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index 3dcd617e65ae..7ce9a51ae5c0 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -128,4 +128,11 @@ do { \ 1 : ({ local_irq_restore(flags); 0; }); \ }) +#ifdef arch_rwlock_is_contended +#define rwlock_is_contended(lock) \ + arch_rwlock_is_contended(&(lock)->raw_lock) +#else +#define rwlock_is_contended(lock) ((void)(lock), 0) +#endif /* arch_rwlock_is_contended */ + #endif /* __LINUX_RWLOCK_H */ -- cgit v1.2.3 From a09a689a534183c48f200bc2de1ae61ae9c462ad Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 2 Feb 2021 10:57:13 -0800 Subject: sched: Add needbreak for rwlocks Contention awareness while holding a spin lock is essential for reducing latency when long running kernel operations can hold that lock. Add the same contention detection interface for read/write spin locks. CC: Ingo Molnar CC: Will Deacon Acked-by: Peter Zijlstra Acked-by: Davidlohr Bueso Acked-by: Waiman Long Acked-by: Paolo Bonzini Signed-off-by: Ben Gardon Message-Id: <20210202185734.1680553-8-bgardon@google.com> Signed-off-by: Paolo Bonzini --- include/linux/sched.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6e3a5eeec509..5d1378e5a040 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1912,6 +1912,23 @@ static inline int spin_needbreak(spinlock_t *lock) #endif } +/* + * Check if a rwlock is contended. + * Returns non-zero if there is another task waiting on the rwlock. + * Returns zero if the lock is not contended or the system / underlying + * rwlock implementation does not support contention detection. + * Technically does not depend on CONFIG_PREEMPTION, but a general need + * for low latency. + */ +static inline int rwlock_needbreak(rwlock_t *lock) +{ +#ifdef CONFIG_PREEMPTION + return rwlock_is_contended(lock); +#else + return 0; +#endif +} + static __always_inline bool need_resched(void) { return unlikely(tif_need_resched()); -- cgit v1.2.3 From f3d4b4b1dc1c5fb9ea17cac14133463bfe72f170 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 2 Feb 2021 10:57:14 -0800 Subject: sched: Add cond_resched_rwlock Safely rescheduling while holding a spin lock is essential for keeping long running kernel operations running smoothly. Add the facility to cond_resched rwlocks. CC: Ingo Molnar CC: Will Deacon Acked-by: Peter Zijlstra Acked-by: Davidlohr Bueso Acked-by: Waiman Long Acked-by: Paolo Bonzini Signed-off-by: Ben Gardon Message-Id: <20210202185734.1680553-9-bgardon@google.com> Signed-off-by: Paolo Bonzini --- include/linux/sched.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5d1378e5a040..3052d16da3cf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1883,12 +1883,24 @@ static inline int _cond_resched(void) { return 0; } }) extern int __cond_resched_lock(spinlock_t *lock); +extern int __cond_resched_rwlock_read(rwlock_t *lock); +extern int __cond_resched_rwlock_write(rwlock_t *lock); #define cond_resched_lock(lock) ({ \ ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\ __cond_resched_lock(lock); \ }) +#define cond_resched_rwlock_read(lock) ({ \ + __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ + __cond_resched_rwlock_read(lock); \ +}) + +#define cond_resched_rwlock_write(lock) ({ \ + __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ + __cond_resched_rwlock_write(lock); \ +}) + static inline void cond_resched_rcu(void) { #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) -- cgit v1.2.3 From 531810caa9f4bc99ffbb90e09256792c56a6b07a Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 2 Feb 2021 10:57:24 -0800 Subject: KVM: x86/mmu: Use an rwlock for the x86 MMU Add a read / write lock to be used in place of the MMU spinlock on x86. The rwlock will enable the TDP MMU to handle page faults, and other operations in parallel in future commits. Reviewed-by: Peter Feiner Signed-off-by: Ben Gardon Message-Id: <20210202185734.1680553-19-bgardon@google.com> [Introduce virt/kvm/mmu_lock.h - Paolo] Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f3b1013fb22c..f417447129b9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -451,7 +451,12 @@ struct kvm_memslots { }; struct kvm { +#ifdef KVM_HAVE_MMU_RWLOCK + rwlock_t mmu_lock; +#else spinlock_t mmu_lock; +#endif /* KVM_HAVE_MMU_RWLOCK */ + struct mutex slots_lock; struct mm_struct *mm; /* userspace tied to this vm */ struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM]; -- cgit v1.2.3 From 9fd6dad1261a541b3f5fa7dc5b152222306e6702 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 5 Feb 2021 05:07:11 -0500 Subject: mm: provide a saner PTE walking API for modules Currently, the follow_pfn function is exported for modules but follow_pte is not. However, follow_pfn is very easy to misuse, because it does not provide protections (so most of its callers assume the page is writable!) and because it returns after having already unlocked the page table lock. Provide instead a simplified version of follow_pte that does not have the pmdpp and range arguments. The older version survives as follow_invalidate_pte() for use by fs/dax.c. Reviewed-by: Jason Gunthorpe Signed-off-by: Paolo Bonzini --- include/linux/mm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ecdf8a8cd6ae..24b292fce8e5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1658,9 +1658,11 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); int copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); +int follow_invalidate_pte(struct mm_struct *mm, unsigned long address, + struct mmu_notifier_range *range, pte_t **ptepp, + pmd_t **pmdpp, spinlock_t **ptlp); int follow_pte(struct mm_struct *mm, unsigned long address, - struct mmu_notifier_range *range, pte_t **ptepp, pmd_t **pmdpp, - spinlock_t **ptlp); + pte_t **ptepp, spinlock_t **ptlp); int follow_pfn(struct vm_area_struct *vma, unsigned long address, unsigned long *pfn); int follow_phys(struct vm_area_struct *vma, unsigned long address, -- cgit v1.2.3 From 4fc096a99e01dd06dc55bef76ade7f8d76653245 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 28 Jan 2021 13:01:31 -0500 Subject: KVM: Raise the maximum number of user memslots Current KVM_USER_MEM_SLOTS limits are arch specific (512 on Power, 509 on x86, 32 on s390, 16 on MIPS) but they don't really need to be. Memory slots are allocated dynamically in KVM when added so the only real limitation is 'id_to_index' array which is 'short'. We don't have any other KVM_MEM_SLOTS_NUM/KVM_USER_MEM_SLOTS-sized statically defined structures. Low KVM_USER_MEM_SLOTS can be a limiting factor for some configurations. In particular, when QEMU tries to start a Windows guest with Hyper-V SynIC enabled and e.g. 256 vCPUs the limit is hit as SynIC requires two pages per vCPU and the guest is free to pick any GFN for each of them, this fragments memslots as QEMU wants to have a separate memslot for each of these pages (which are supposed to act as 'overlay' pages). Signed-off-by: Vitaly Kuznetsov Message-Id: <20210127175731.2020089-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f417447129b9..e126ebda36d0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -425,9 +425,8 @@ struct kvm_irq_routing_table { #define KVM_PRIVATE_MEM_SLOTS 0 #endif -#ifndef KVM_MEM_SLOTS_NUM -#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) -#endif +#define KVM_MEM_SLOTS_NUM SHRT_MAX +#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_PRIVATE_MEM_SLOTS) #ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) -- cgit v1.2.3