From 07a368b3f55a79d33cad113d506b279e04fd2a00 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 25 Oct 2022 15:47:27 +0300 Subject: bug: introduce ASSERT_STRUCT_OFFSET ASSERT_STRUCT_OFFSET allows to assert during the build of the kernel that a field in a struct have an expected offset. KVM used to have such macro, but there is almost nothing KVM specific in it so move it to build_bug.h, so that it can be used in other places in KVM. Signed-off-by: Maxim Levitsky Message-Id: <20221025124741.228045-10-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/build_bug.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h index e3a0be2c90ad..3aa3640f8c18 100644 --- a/include/linux/build_bug.h +++ b/include/linux/build_bug.h @@ -77,4 +77,13 @@ #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr) #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) + +/* + * Compile time check that field has an expected offset + */ +#define ASSERT_STRUCT_OFFSET(type, field, expected_offset) \ + BUILD_BUG_ON_MSG(offsetof(type, field) != (expected_offset), \ + "Offset of " #field " in " #type " has changed.") + + #endif /* _LINUX_BUILD_BUG_H */ -- cgit v1.2.3 From 93c5c61d9e58a9ea423439d358c198be5b674a58 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Oct 2022 15:58:06 -0400 Subject: mm/gup: Add FOLL_INTERRUPTIBLE We have had FAULT_FLAG_INTERRUPTIBLE but it was never applied to GUPs. One issue with it is that not all GUP paths are able to handle signal delivers besides SIGKILL. That's not ideal for the GUP users who are actually able to handle these cases, like KVM. KVM uses GUP extensively on faulting guest pages, during which we've got existing infrastructures to retry a page fault at a later time. Allowing the GUP to be interrupted by generic signals can make KVM related threads to be more responsive. For examples: (1) SIGUSR1: which QEMU/KVM uses to deliver an inter-process IPI, e.g. when the admin issues a vm_stop QMP command, SIGUSR1 can be generated to kick the vcpus out of kernel context immediately, (2) SIGINT: which can be used with interactive hypervisor users to stop a virtual machine with Ctrl-C without any delays/hangs, (3) SIGTRAP: which grants GDB capability even during page faults that are stuck for a long time. Normally hypervisor will be able to receive these signals properly, but not if we're stuck in a GUP for a long time for whatever reason. It happens easily with a stucked postcopy migration when e.g. a network temp failure happens, then some vcpu threads can hang death waiting for the pages. With the new FOLL_INTERRUPTIBLE, we can allow GUP users like KVM to selectively enable the ability to trap these signals. Reviewed-by: John Hubbard Reviewed-by: David Hildenbrand Signed-off-by: Peter Xu Message-Id: <20221011195809.557016-2-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8bbcccbc5565..3c84f4e48cd7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2958,6 +2958,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, #define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ #define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */ #define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */ +#define FOLL_INTERRUPTIBLE 0x100000 /* allow interrupts from generic signals */ /* * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each -- cgit v1.2.3 From fe5ed56c79733b7808f968567c581118ab79552e Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Oct 2022 15:58:07 -0400 Subject: kvm: Add KVM_PFN_ERR_SIGPENDING Add a new pfn error to show that we've got a pending signal to handle during hva_to_pfn_slow() procedure (of -EINTR retval). Signed-off-by: Peter Xu Reviewed-by: Sean Christopherson Message-Id: <20221011195809.557016-3-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 18592bdf4c1b..911b064878df 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -96,6 +96,7 @@ #define KVM_PFN_ERR_FAULT (KVM_PFN_ERR_MASK) #define KVM_PFN_ERR_HWPOISON (KVM_PFN_ERR_MASK + 1) #define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 2) +#define KVM_PFN_ERR_SIGPENDING (KVM_PFN_ERR_MASK + 3) /* * error pfns indicate that the gfn is in slot but faild to @@ -106,6 +107,15 @@ static inline bool is_error_pfn(kvm_pfn_t pfn) return !!(pfn & KVM_PFN_ERR_MASK); } +/* + * KVM_PFN_ERR_SIGPENDING indicates that fetching the PFN was interrupted + * by a pending signal. Note, the signal may or may not be fatal. + */ +static inline bool is_sigpending_pfn(kvm_pfn_t pfn) +{ + return pfn == KVM_PFN_ERR_SIGPENDING; +} + /* * error_noslot pfns indicate that the gfn can not be * translated to pfn - it is not in slot or failed to -- cgit v1.2.3 From c8b88b332bedf47a9aa008dfb69998c90623375c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Oct 2022 15:58:08 -0400 Subject: kvm: Add interruptible flag to __gfn_to_pfn_memslot() Add a new "interruptible" flag showing that the caller is willing to be interrupted by signals during the __gfn_to_pfn_memslot() request. Wire it up with a FOLL_INTERRUPTIBLE flag that we've just introduced. This prepares KVM to be able to respond to SIGUSR1 (for QEMU that's the SIGIPI) even during e.g. handling an userfaultfd page fault. No functional change intended. Signed-off-by: Peter Xu Reviewed-by: Sean Christopherson Message-Id: <20221011195809.557016-4-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 911b064878df..8fe4665bd020 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1150,8 +1150,8 @@ kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn); kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn); kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn, - bool atomic, bool *async, bool write_fault, - bool *writable, hva_t *hva); + bool atomic, bool interruptible, bool *async, + bool write_fault, bool *writable, hva_t *hva); void kvm_release_pfn_clean(kvm_pfn_t pfn); void kvm_release_pfn_dirty(kvm_pfn_t pfn); -- cgit v1.2.3 From d663b8a285986072428a6a145e5994bc275df994 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 3 Nov 2022 10:44:10 -0400 Subject: KVM: replace direct irq.h inclusion virt/kvm/irqchip.c is including "irq.h" from the arch-specific KVM source directory (i.e. not from arch/*/include) for the sole purpose of retrieving irqchip_in_kernel. Making the function inline in a header that is already included, such as asm/kvm_host.h, is not possible because it needs to look at struct kvm which is defined after asm/kvm_host.h is included. So add a kvm_arch_irqchip_in_kernel non-inline function; irqchip_in_kernel() is only performance critical on arm64 and x86, and the non-inline function is enough on all other architectures. irq.h can then be deleted from all architectures except x86. Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8fe4665bd020..e6e66c5e56f2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -663,6 +663,8 @@ struct kvm_irq_routing_table { */ struct hlist_head map[]; }; + +bool kvm_arch_irqchip_in_kernel(struct kvm *kvm); #endif #ifndef KVM_INTERNAL_MEM_SLOTS -- cgit v1.2.3 From cf87ac739e488055a6046a410caa8f4da108948f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Nov 2022 18:49:08 +0800 Subject: KVM: x86: Introduce KVM_REQ_DIRTY_RING_SOFT_FULL The VCPU isn't expected to be runnable when the dirty ring becomes soft full, until the dirty pages are harvested and the dirty ring is reset from userspace. So there is a check in each guest's entrace to see if the dirty ring is soft full or not. The VCPU is stopped from running if its dirty ring has been soft full. The similar check will be needed when the feature is going to be supported on ARM64. As Marc Zyngier suggested, a new event will avoid pointless overhead to check the size of the dirty ring ('vcpu->kvm->dirty_ring_size') in each guest's entrance. Add KVM_REQ_DIRTY_RING_SOFT_FULL. The event is raised when the dirty ring becomes soft full in kvm_dirty_ring_push(). The event is only cleared in the check, done in the newly added helper kvm_dirty_ring_check_request(). Since the VCPU is not runnable when the dirty ring becomes soft full, the KVM_REQ_DIRTY_RING_SOFT_FULL event is always set to prevent the VCPU from running until the dirty pages are harvested and the dirty ring is reset by userspace. kvm_dirty_ring_soft_full() becomes a private function with the newly added helper kvm_dirty_ring_check_request(). The alignment for the various event definitions in kvm_host.h is changed to tab character by the way. In order to avoid using 'container_of()', the argument @ring is replaced by @vcpu in kvm_dirty_ring_push(). Link: https://lore.kernel.org/kvmarm/87lerkwtm5.wl-maz@kernel.org Suggested-by: Marc Zyngier Signed-off-by: Gavin Shan Reviewed-by: Peter Xu Reviewed-by: Sean Christopherson Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110104914.31280-2-gshan@redhat.com --- include/linux/kvm_dirty_ring.h | 12 ++++-------- include/linux/kvm_host.h | 9 +++++---- 2 files changed, 9 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h index 906f899813dc..9c13c4c3d30c 100644 --- a/include/linux/kvm_dirty_ring.h +++ b/include/linux/kvm_dirty_ring.h @@ -49,7 +49,7 @@ static inline int kvm_dirty_ring_reset(struct kvm *kvm, return 0; } -static inline void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, +static inline void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset) { } @@ -64,11 +64,6 @@ static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) { } -static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring) -{ - return true; -} - #else /* CONFIG_HAVE_KVM_DIRTY_RING */ u32 kvm_dirty_ring_get_rsvd_entries(void); @@ -84,13 +79,14 @@ int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring); * returns =0: successfully pushed * <0: unable to push, need to wait */ -void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset); +void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset); + +bool kvm_dirty_ring_check_request(struct kvm_vcpu *vcpu); /* for use in vm_operations_struct */ struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset); void kvm_dirty_ring_free(struct kvm_dirty_ring *ring); -bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring); #endif /* CONFIG_HAVE_KVM_DIRTY_RING */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 00c3448ba7f8..648d663f32c4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -153,10 +153,11 @@ static inline bool is_error_page(struct page *page) * Architecture-independent vcpu->requests bit members * Bits 3-7 are reserved for more arch-independent bits. */ -#define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_UNBLOCK 2 -#define KVM_REQUEST_ARCH_BASE 8 +#define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_UNBLOCK 2 +#define KVM_REQ_DIRTY_RING_SOFT_FULL 3 +#define KVM_REQUEST_ARCH_BASE 8 /* * KVM_REQ_OUTSIDE_GUEST_MODE exists is purely as way to force the vCPU to -- cgit v1.2.3 From e8a18565e59303ac12c626a161d72bd890bd2062 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Nov 2022 18:49:09 +0800 Subject: KVM: Move declaration of kvm_cpu_dirty_log_size() to kvm_dirty_ring.h Not all architectures like ARM64 need to override the function. Move its declaration to kvm_dirty_ring.h to avoid the following compiling warning on ARM64 when the feature is enabled. arch/arm64/kvm/../../../virt/kvm/dirty_ring.c:14:12: \ warning: no previous prototype for 'kvm_cpu_dirty_log_size' \ [-Wmissing-prototypes] \ int __weak kvm_cpu_dirty_log_size(void) Reported-by: kernel test robot Signed-off-by: Gavin Shan Reviewed-by: Peter Xu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110104914.31280-3-gshan@redhat.com --- include/linux/kvm_dirty_ring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h index 9c13c4c3d30c..199ead37b104 100644 --- a/include/linux/kvm_dirty_ring.h +++ b/include/linux/kvm_dirty_ring.h @@ -66,6 +66,7 @@ static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) #else /* CONFIG_HAVE_KVM_DIRTY_RING */ +int kvm_cpu_dirty_log_size(void); u32 kvm_dirty_ring_get_rsvd_entries(void); int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size); -- cgit v1.2.3 From 86bdf3ebcfe1ded055282536fecce13001874740 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Nov 2022 18:49:10 +0800 Subject: KVM: Support dirty ring in conjunction with bitmap ARM64 needs to dirty memory outside of a VCPU context when VGIC/ITS is enabled. It's conflicting with that ring-based dirty page tracking always requires a running VCPU context. Introduce a new flavor of dirty ring that requires the use of both VCPU dirty rings and a dirty bitmap. The expectation is that for non-VCPU sources of dirty memory (such as the VGIC/ITS on arm64), KVM writes to the dirty bitmap. Userspace should scan the dirty bitmap before migrating the VM to the target. Use an additional capability to advertise this behavior. The newly added capability (KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP) can't be enabled before KVM_CAP_DIRTY_LOG_RING_ACQ_REL on ARM64. In this way, the newly added capability is treated as an extension of KVM_CAP_DIRTY_LOG_RING_ACQ_REL. Suggested-by: Marc Zyngier Suggested-by: Peter Xu Co-developed-by: Oliver Upton Signed-off-by: Oliver Upton Signed-off-by: Gavin Shan Acked-by: Peter Xu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110104914.31280-4-gshan@redhat.com --- include/linux/kvm_dirty_ring.h | 7 +++++++ include/linux/kvm_host.h | 1 + 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h index 199ead37b104..4862c98d80d3 100644 --- a/include/linux/kvm_dirty_ring.h +++ b/include/linux/kvm_dirty_ring.h @@ -37,6 +37,11 @@ static inline u32 kvm_dirty_ring_get_rsvd_entries(void) return 0; } +static inline bool kvm_use_dirty_bitmap(struct kvm *kvm) +{ + return true; +} + static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size) { @@ -67,6 +72,8 @@ static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) #else /* CONFIG_HAVE_KVM_DIRTY_RING */ int kvm_cpu_dirty_log_size(void); +bool kvm_use_dirty_bitmap(struct kvm *kvm); +bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm); u32 kvm_dirty_ring_get_rsvd_entries(void); int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 648d663f32c4..db83f63f4e61 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -779,6 +779,7 @@ struct kvm { pid_t userspace_pid; unsigned int max_halt_poll_ns; u32 dirty_ring_size; + bool dirty_ring_with_bitmap; bool vm_bugged; bool vm_dead; -- cgit v1.2.3 From 6c7b2202e4d11572ab23a89aeec49005b94bb966 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 17 Nov 2022 12:25:02 -0500 Subject: KVM: x86: avoid memslot check in NX hugepage recovery if it cannot succeed Since gfn_to_memslot() is relatively expensive, it helps to skip it if it the memslot cannot possibly have dirty logging enabled. In order to do this, add to struct kvm a counter of the number of log-page memslots. While the correct value can only be read with slots_lock taken, the NX recovery thread is content with using an approximate value. Therefore, the counter is an atomic_t. Based on https://lore.kernel.org/kvm/20221027200316.2221027-2-dmatlack@google.com/ by David Matlack. Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e6e66c5e56f2..6f0f389f5f9c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -722,6 +722,11 @@ struct kvm { /* The current active memslot set for each address space */ struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM]; struct xarray vcpu_array; + /* + * Protected by slots_lock, but can be read outside if an + * incorrect answer is acceptable. + */ + atomic_t nr_memslots_dirty_logging; /* Used to wait for completion of MMU notifiers. */ spinlock_t mn_invalidate_lock; -- cgit v1.2.3 From 3ca9d84e722e8044c09e80992aa7b15bd904d3ce Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 23 Nov 2022 19:40:01 -0500 Subject: KVM: always declare prototype for kvm_arch_irqchip_in_kernel Architecture code might want to use it even if CONFIG_HAVE_KVM_IRQ_ROUTING is false; for example PPC XICS has KVM_IRQ_LINE and wants to use kvm_arch_irqchip_in_kernel from there, but it does not have KVM_SET_GSI_ROUTING so the prototype was not provided. Fixes: d663b8a28598 ("KVM: replace direct irq.h inclusion") Reported-by: kernel test robot Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6f0f389f5f9c..b8d12356f015 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -663,9 +663,9 @@ struct kvm_irq_routing_table { */ struct hlist_head map[]; }; +#endif bool kvm_arch_irqchip_in_kernel(struct kvm *kvm); -#endif #ifndef KVM_INTERNAL_MEM_SLOTS #define KVM_INTERNAL_MEM_SLOTS 0 -- cgit v1.2.3 From b0284cd29a957e62d60c2886fd663be93c56f9c0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 3 Nov 2022 18:10:34 -0700 Subject: mm: Do not enable PG_arch_2 for all 64-bit architectures Commit 4beba9486abd ("mm: Add PG_arch_2 page flag") introduced a new page flag for all 64-bit architectures. However, even if an architecture is 64-bit, it may still have limited spare bits in the 'flags' member of 'struct page'. This may happen if an architecture enables SPARSEMEM without SPARSEMEM_VMEMMAP as is the case with the newly added loongarch. This architecture port needs 19 more bits for the sparsemem section information and, while it is currently fine with PG_arch_2, adding any more PG_arch_* flags will trigger build-time warnings. Add a new CONFIG_ARCH_USES_PG_ARCH_X option which can be selected by architectures that need more PG_arch_* flags beyond PG_arch_1. Select it on arm64. Signed-off-by: Catalin Marinas [pcc@google.com: fix build with CONFIG_ARM64_MTE disabled] Signed-off-by: Peter Collingbourne Reported-by: kernel test robot Cc: Andrew Morton Cc: Steven Price Reviewed-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221104011041.290951-2-pcc@google.com --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 0b0ae5084e60..5dc7977edf9d 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -132,7 +132,7 @@ enum pageflags { PG_young, PG_idle, #endif -#ifdef CONFIG_64BIT +#ifdef CONFIG_ARCH_USES_PG_ARCH_X PG_arch_2, #endif #ifdef CONFIG_KASAN_HW_TAGS -- cgit v1.2.3 From ef6458b1b6ca3fdb991ce4182e981a88d4c58c0f Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 3 Nov 2022 18:10:37 -0700 Subject: mm: Add PG_arch_3 page flag As with PG_arch_2, this flag is only allowed on 64-bit architectures due to the shortage of bits available. It will be used by the arm64 MTE code in subsequent patches. Signed-off-by: Peter Collingbourne Cc: Will Deacon Cc: Marc Zyngier Cc: Steven Price [catalin.marinas@arm.com: added flag preserving in __split_huge_page_tail()] Signed-off-by: Catalin Marinas Reviewed-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221104011041.290951-5-pcc@google.com --- include/linux/kernel-page-flags.h | 1 + include/linux/page-flags.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h index eee1877a354e..859f4b0c1b2b 100644 --- a/include/linux/kernel-page-flags.h +++ b/include/linux/kernel-page-flags.h @@ -18,5 +18,6 @@ #define KPF_UNCACHED 39 #define KPF_SOFTDIRTY 40 #define KPF_ARCH_2 41 +#define KPF_ARCH_3 42 #endif /* LINUX_KERNEL_PAGE_FLAGS_H */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5dc7977edf9d..c50ce2812f17 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -134,6 +134,7 @@ enum pageflags { #endif #ifdef CONFIG_ARCH_USES_PG_ARCH_X PG_arch_2, + PG_arch_3, #endif #ifdef CONFIG_KASAN_HW_TAGS PG_skip_kasan_poison, -- cgit v1.2.3 From aba3caef58626f09b629085440eec5dd1368669a Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 13 Oct 2022 21:12:22 +0000 Subject: KVM: Shorten gfn_to_pfn_cache function names Formalize "gpc" as the acronym and use it in function names. No functional change intended. Suggested-by: Sean Christopherson Signed-off-by: Michal Luczaj Signed-off-by: Sean Christopherson Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b8d12356f015..8f874a964313 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1288,16 +1288,15 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc); * -EFAULT for an untranslatable guest physical address. * * This primes a gfn_to_pfn_cache and links it into the @kvm's list for - * invalidations to be processed. Callers are required to use - * kvm_gfn_to_pfn_cache_check() to ensure that the cache is valid before - * accessing the target page. + * invalidations to be processed. Callers are required to use kvm_gpc_check() + * to ensure that the cache is valid before accessing the target page. */ int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, struct kvm_vcpu *vcpu, enum pfn_cache_usage usage, gpa_t gpa, unsigned long len); /** - * kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache. + * kvm_gpc_check - check validity of a gfn_to_pfn_cache. * * @kvm: pointer to kvm instance. * @gpc: struct gfn_to_pfn_cache object. @@ -1314,11 +1313,11 @@ int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, * Callers in IN_GUEST_MODE may do so without locking, although they should * still hold a read lock on kvm->scru for the memslot checks. */ -bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, - gpa_t gpa, unsigned long len); +bool kvm_gpc_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa, + unsigned long len); /** - * kvm_gfn_to_pfn_cache_refresh - update a previously initialized cache. + * kvm_gpc_refresh - update a previously initialized cache. * * @kvm: pointer to kvm instance. * @gpc: struct gfn_to_pfn_cache object. @@ -1335,11 +1334,11 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, * still lock and check the cache status, as this function does not return * with the lock still held to permit access. */ -int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, - gpa_t gpa, unsigned long len); +int kvm_gpc_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa, + unsigned long len); /** - * kvm_gfn_to_pfn_cache_unmap - temporarily unmap a gfn_to_pfn_cache. + * kvm_gpc_unmap - temporarily unmap a gfn_to_pfn_cache. * * @kvm: pointer to kvm instance. * @gpc: struct gfn_to_pfn_cache object. @@ -1348,7 +1347,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, * but at least the mapping from GPA to userspace HVA will remain cached * and can be reused on a subsequent refresh. */ -void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); +void kvm_gpc_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); /** * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache. -- cgit v1.2.3 From 8c82a0b3ba1a411b84af5d43a4cc5994efa897ec Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 13 Oct 2022 21:12:24 +0000 Subject: KVM: Store immutable gfn_to_pfn_cache properties Move the assignment of immutable properties @kvm, @vcpu, and @usage to the initializer. Make _activate() and _deactivate() use stored values. Note, @len is also effectively immutable for most cases, but not in the case of the Xen runstate cache, which may be split across two pages and the length of the first segment will depend on its address. Suggested-by: Sean Christopherson Signed-off-by: Michal Luczaj [sean: handle @len in a separate patch] Signed-off-by: Sean Christopherson [dwmw2: acknowledge that @len can actually change for some use cases] Signed-off-by: David Woodhouse --- include/linux/kvm_host.h | 37 ++++++++++++++++++------------------- include/linux/kvm_types.h | 1 + 2 files changed, 19 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8f874a964313..73ded328f9dc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1260,18 +1260,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); * kvm_gpc_init - initialize gfn_to_pfn_cache. * * @gpc: struct gfn_to_pfn_cache object. - * - * This sets up a gfn_to_pfn_cache by initializing locks. Note, the cache must - * be zero-allocated (or zeroed by the caller before init). - */ -void kvm_gpc_init(struct gfn_to_pfn_cache *gpc); - -/** - * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest - * physical address. - * * @kvm: pointer to kvm instance. - * @gpc: struct gfn_to_pfn_cache object. * @vcpu: vCPU to be used for marking pages dirty and to be woken on * invalidation. * @usage: indicates if the resulting host physical PFN is used while @@ -1280,20 +1269,31 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc); * changes!---will also force @vcpu to exit the guest and * refresh the cache); and/or if the PFN used directly * by KVM (and thus needs a kernel virtual mapping). + * + * This sets up a gfn_to_pfn_cache by initializing locks and assigning the + * immutable attributes. Note, the cache must be zero-allocated (or zeroed by + * the caller before init). + */ +void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm, + struct kvm_vcpu *vcpu, enum pfn_cache_usage usage); + +/** + * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest + * physical address. + * + * @gpc: struct gfn_to_pfn_cache object. * @gpa: guest physical address to map. * @len: sanity check; the range being access must fit a single page. * * @return: 0 for success. * -EINVAL for a mapping which would cross a page boundary. - * -EFAULT for an untranslatable guest physical address. + * -EFAULT for an untranslatable guest physical address. * - * This primes a gfn_to_pfn_cache and links it into the @kvm's list for + * This primes a gfn_to_pfn_cache and links it into the @gpc->kvm's list for * invalidations to be processed. Callers are required to use kvm_gpc_check() * to ensure that the cache is valid before accessing the target page. */ -int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, - struct kvm_vcpu *vcpu, enum pfn_cache_usage usage, - gpa_t gpa, unsigned long len); +int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); /** * kvm_gpc_check - check validity of a gfn_to_pfn_cache. @@ -1352,13 +1352,12 @@ void kvm_gpc_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); /** * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache. * - * @kvm: pointer to kvm instance. * @gpc: struct gfn_to_pfn_cache object. * - * This removes a cache from the @kvm's list to be processed on MMU notifier + * This removes a cache from the VM's list to be processed on MMU notifier * invocation. */ -void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); +void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc); void kvm_sigset_activate(struct kvm_vcpu *vcpu); void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 3ca3db020e0e..76de36e56cdf 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -67,6 +67,7 @@ struct gfn_to_pfn_cache { gpa_t gpa; unsigned long uhva; struct kvm_memory_slot *memslot; + struct kvm *kvm; struct kvm_vcpu *vcpu; struct list_head list; rwlock_t lock; -- cgit v1.2.3 From e308c24a358d1e79951b16c387cbc6c6593639a5 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 13 Oct 2022 21:12:26 +0000 Subject: KVM: Use gfn_to_pfn_cache's immutable "kvm" in kvm_gpc_check() Make kvm_gpc_check() use kvm instance cached in gfn_to_pfn_cache. Suggested-by: Sean Christopherson Signed-off-by: Michal Luczaj Signed-off-by: Sean Christopherson Signed-off-by: David Woodhouse --- include/linux/kvm_host.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 73ded328f9dc..befc8114ed0d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1298,7 +1298,6 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) /** * kvm_gpc_check - check validity of a gfn_to_pfn_cache. * - * @kvm: pointer to kvm instance. * @gpc: struct gfn_to_pfn_cache object. * @gpa: current guest physical address to map. * @len: sanity check; the range being access must fit a single page. @@ -1313,8 +1312,7 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) * Callers in IN_GUEST_MODE may do so without locking, although they should * still hold a read lock on kvm->scru for the memslot checks. */ -bool kvm_gpc_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa, - unsigned long len); +bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); /** * kvm_gpc_refresh - update a previously initialized cache. -- cgit v1.2.3 From 0318f207d1c2e297d1ec1c6e145bb8bd053236f9 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 13 Oct 2022 21:12:28 +0000 Subject: KVM: Use gfn_to_pfn_cache's immutable "kvm" in kvm_gpc_refresh() Make kvm_gpc_refresh() use kvm instance cached in gfn_to_pfn_cache. No functional change intended. Suggested-by: Sean Christopherson Signed-off-by: Michal Luczaj [sean: leave kvm_gpc_unmap() as-is] Signed-off-by: Sean Christopherson Signed-off-by: David Woodhouse --- include/linux/kvm_host.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index befc8114ed0d..3ce4650776b8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1317,23 +1317,21 @@ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); /** * kvm_gpc_refresh - update a previously initialized cache. * - * @kvm: pointer to kvm instance. * @gpc: struct gfn_to_pfn_cache object. * @gpa: updated guest physical address to map. * @len: sanity check; the range being access must fit a single page. - * + * @return: 0 for success. * -EINVAL for a mapping which would cross a page boundary. - * -EFAULT for an untranslatable guest physical address. + * -EFAULT for an untranslatable guest physical address. * * This will attempt to refresh a gfn_to_pfn_cache. Note that a successful - * returm from this function does not mean the page can be immediately + * return from this function does not mean the page can be immediately * accessed because it may have raced with an invalidation. Callers must * still lock and check the cache status, as this function does not return * with the lock still held to permit access. */ -int kvm_gpc_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa, - unsigned long len); +int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); /** * kvm_gpc_unmap - temporarily unmap a gfn_to_pfn_cache. -- cgit v1.2.3 From 9f87791d686d85614584438d4f249eb32ef7964c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 13 Oct 2022 21:12:29 +0000 Subject: KVM: Drop KVM's API to allow temporarily unmapping gfn=>pfn cache Drop kvm_gpc_unmap() as it has no users and unclear requirements. The API was added as part of the original gfn_to_pfn_cache support, but its sole usage[*] was never merged. Fold the guts of kvm_gpc_unmap() into the deactivate path and drop the API. Omit acquiring refresh_lock as as concurrent calls to kvm_gpc_deactivate() are not allowed (this is not enforced, e.g. via lockdep. due to it being called during vCPU destruction). If/when temporary unmapping makes a comeback, the desirable behavior is likely to restrict temporary unmapping to vCPU-exclusive mappings and require the vcpu->mutex be held to serialize unmap. Use of the refresh_lock to protect unmapping was somewhat specuatively added by commit 93984f19e7bc ("KVM: Fully serialize gfn=>pfn cache refresh via mutex") to guard against concurrent unmaps, but the primary use case of the temporary unmap, nested virtualization[*], doesn't actually need or want concurrent unmaps. [*] https://lore.kernel.org/all/20211210163625.2886-7-dwmw2@infradead.org Signed-off-by: Sean Christopherson Signed-off-by: David Woodhouse --- include/linux/kvm_host.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3ce4650776b8..eac76965cf44 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1333,18 +1333,6 @@ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); */ int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); -/** - * kvm_gpc_unmap - temporarily unmap a gfn_to_pfn_cache. - * - * @kvm: pointer to kvm instance. - * @gpc: struct gfn_to_pfn_cache object. - * - * This unmaps the referenced page. The cache is left in the invalid state - * but at least the mapping from GPA to userspace HVA will remain cached - * and can be reused on a subsequent refresh. - */ -void kvm_gpc_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); - /** * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache. * -- cgit v1.2.3 From 58f5ee5fedd981e05cb086cba4e8f923c3727a04 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 13 Oct 2022 21:12:31 +0000 Subject: KVM: Drop @gpa from exported gfn=>pfn cache check() and refresh() helpers Drop the @gpa param from the exported check()+refresh() helpers and limit changing the cache's GPA to the activate path. All external users just feed in gpc->gpa, i.e. this is a fancy nop. Allowing users to change the GPA at check()+refresh() is dangerous as those helpers explicitly allow concurrent calls, e.g. KVM could get into a livelock scenario. It's also unclear as to what the expected behavior should be if multiple tasks attempt to refresh with different GPAs. Signed-off-by: Sean Christopherson Signed-off-by: David Woodhouse --- include/linux/kvm_host.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index eac76965cf44..7008846fd3dd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1299,7 +1299,6 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) * kvm_gpc_check - check validity of a gfn_to_pfn_cache. * * @gpc: struct gfn_to_pfn_cache object. - * @gpa: current guest physical address to map. * @len: sanity check; the range being access must fit a single page. * * @return: %true if the cache is still valid and the address matches. @@ -1312,15 +1311,14 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) * Callers in IN_GUEST_MODE may do so without locking, although they should * still hold a read lock on kvm->scru for the memslot checks. */ -bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); +bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len); /** * kvm_gpc_refresh - update a previously initialized cache. * * @gpc: struct gfn_to_pfn_cache object. - * @gpa: updated guest physical address to map. * @len: sanity check; the range being access must fit a single page. - + * * @return: 0 for success. * -EINVAL for a mapping which would cross a page boundary. * -EFAULT for an untranslatable guest physical address. @@ -1331,7 +1329,7 @@ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); * still lock and check the cache status, as this function does not return * with the lock still held to permit access. */ -int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); +int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len); /** * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache. -- cgit v1.2.3 From 30ee198ce42d60101620d33f8bc70c3234798365 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 2 Dec 2022 11:50:10 +0100 Subject: KVM: Reference to kvm_userspace_memory_region in doc and comments There are still references to the removed kvm_memory_region data structure but the doc and comments should mention struct kvm_userspace_memory_region instead, since that is what's used by the ioctl that replaced the old one and this data structure support the same set of flags. Signed-off-by: Javier Martinez Canillas Message-Id: <20221202105011.185147-4-javierm@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8f874a964313..a5a82b536774 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -50,8 +50,8 @@ #endif /* - * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used - * in kvm, other bits are visible for userspace which are defined in + * The bit 16 ~ bit 31 of kvm_userspace_memory_region::flags are internally + * used in kvm, other bits are visible for userspace which are defined in * include/linux/kvm_h. */ #define KVM_MEMSLOT_INVALID (1UL << 16) -- cgit v1.2.3