From 2b2d7ca7ce25fbec8389e7d85e57742caa47c97d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Tue, 10 Dec 2024 10:08:42 +0100
Subject: dma-buf: fix incorrect dma-fence documentation v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There isn't much worse than documentation giving an incorrect advise.
Grabbing a spinlock while interrupts are disabled usually means that you
must also disable interrupts for all other uses of this spinlock.

Otherwise really hard to debug issues can occur. So fix that invalid
documentation.

v2: use Dmitry's suggestion on the documentation

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Simona Vetter <simona.vetter@ffwll.ch> (v1)
Link: https://patchwork.freedesktop.org/patch/msgid/20250211163109.12200-2-christian.koenig@amd.com
---
 include/linux/dma-fence.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index e7ad819962e3..52587d390aca 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -169,8 +169,8 @@ struct dma_fence_ops {
 	 * implementation know that there is another driver waiting on the
 	 * signal (ie. hw->sw case).
 	 *
-	 * This function can be called from atomic context, but not
-	 * from irq context, so normal spinlocks can be used.
+	 * This is called with irq's disabled, so only spinlocks which disable
+	 * IRQ's can be used in the code outside of this callback.
 	 *
 	 * A return value of false indicates the fence already passed,
 	 * or some failure occurred that made it impossible to enable
-- 
cgit v1.2.3


From 2ce07fea3cc8b866f7955a7ce1d62b0cc1f74819 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Wed, 18 Sep 2024 08:16:57 +0200
Subject: dma-buf/dma-fence: remove unnecessary callbacks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The fence_value_str and timeline_value_str callbacks were just an
unnecessary abstraction in the SW sync implementation.

The only caller of those callbacks already knew that the fence in
questions is a timeline_fence. So print the values directly instead
of using a redirection.

Additional to that remove the implementations from virtgpu and vgem.
As far as I can see those were never used in the first place.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Simona Vetter <simona.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20250211163109.12200-3-christian.koenig@amd.com
---
 include/linux/dma-fence.h | 21 ---------------------
 1 file changed, 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 52587d390aca..b12776883d14 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -238,27 +238,6 @@ struct dma_fence_ops {
 	 */
 	void (*release)(struct dma_fence *fence);
 
-	/**
-	 * @fence_value_str:
-	 *
-	 * Callback to fill in free-form debug info specific to this fence, like
-	 * the sequence number.
-	 *
-	 * This callback is optional.
-	 */
-	void (*fence_value_str)(struct dma_fence *fence, char *str, int size);
-
-	/**
-	 * @timeline_value_str:
-	 *
-	 * Fills in the current value of the timeline as a string, like the
-	 * sequence number. Note that the specific fence passed to this function
-	 * should not matter, drivers should only use it to look up the
-	 * corresponding timeline structures.
-	 */
-	void (*timeline_value_str)(struct dma_fence *fence,
-				   char *str, int size);
-
 	/**
 	 * @set_deadline:
 	 *
-- 
cgit v1.2.3


From de68b17d5d0716c9a02b8a6ffa34f47c8f2f7690 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Tue, 11 Feb 2025 15:26:16 +0100
Subject: dma-buf: dma-buf: stop mapping sg_tables on attach v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As a workaround to smoothly transit from static to dynamic DMA-buf
handling we cached the sg_table on attach if dynamic handling mismatched
between exporter and importer.

Since Dmitry and Thomas cleaned that up and also documented the lock
handling we can drop this workaround now.

V2: implement Sima's comments

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Simona Vetter <simona.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20250211163109.12200-4-christian.koenig@amd.com
---
 include/linux/dma-buf.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 36216d28d8bd..c54ff2dda8cb 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -583,20 +583,6 @@ static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf)
 	return !!dmabuf->ops->pin;
 }
 
-/**
- * dma_buf_attachment_is_dynamic - check if a DMA-buf attachment uses dynamic
- * mappings
- * @attach: the DMA-buf attachment to check
- *
- * Returns true if a DMA-buf importer wants to call the map/unmap functions with
- * the dma_resv lock held.
- */
-static inline bool
-dma_buf_attachment_is_dynamic(struct dma_buf_attachment *attach)
-{
-	return !!attach->importer_ops;
-}
-
 struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 					  struct device *dev);
 struct dma_buf_attachment *
-- 
cgit v1.2.3


From b72f66f22c0e39ae6684c43fead774c13db24e73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Tue, 11 Feb 2025 17:20:53 +0100
Subject: dma-buf: drop caching of sg_tables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

That was purely for the transition from static to dynamic dma-buf
handling and can be removed again now.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Simona Vetter <simona.vetter@ffwll.ch>
Reviewed-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250211163109.12200-5-christian.koenig@amd.com
---
 include/linux/dma-buf.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index c54ff2dda8cb..544f8f8c3f44 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -34,15 +34,6 @@ struct dma_buf_attachment;
  * @vunmap: [optional] unmaps a vmap from the buffer
  */
 struct dma_buf_ops {
-	/**
-	  * @cache_sgt_mapping:
-	  *
-	  * If true the framework will cache the first mapping made for each
-	  * attachment. This avoids creating mappings for attachments multiple
-	  * times.
-	  */
-	bool cache_sgt_mapping;
-
 	/**
 	 * @attach:
 	 *
@@ -493,8 +484,6 @@ struct dma_buf_attach_ops {
  * @dmabuf: buffer for this attachment.
  * @dev: device attached to the buffer.
  * @node: list of dma_buf_attachment, protected by dma_resv lock of the dmabuf.
- * @sgt: cached mapping.
- * @dir: direction of cached mapping.
  * @peer2peer: true if the importer can handle peer resources without pages.
  * @priv: exporter specific attachment data.
  * @importer_ops: importer operations for this attachment, if provided
@@ -514,8 +503,6 @@ struct dma_buf_attachment {
 	struct dma_buf *dmabuf;
 	struct device *dev;
 	struct list_head node;
-	struct sg_table *sgt;
-	enum dma_data_direction dir;
 	bool peer2peer;
 	const struct dma_buf_attach_ops *importer_ops;
 	void *importer_priv;
-- 
cgit v1.2.3


From 62b1fa69f31969e11d03529d3a80599ddda2d043 Mon Sep 17 00:00:00 2001
From: Kai Huang <kai.huang@intel.com>
Date: Fri, 15 Nov 2024 22:52:40 +1300
Subject: KVM: Export hardware virtualization enabling/disabling functions

To support TDX, KVM will need to enable TDX during KVM module loading
time.  Enabling TDX requires enabling hardware virtualization first so
that all online CPUs (and the new CPU going online) are in post-VMXON
state.

KVM by default enables hardware virtualization but that is done in
kvm_init(), which must be the last step after all initialization is done
thus is too late for enabling TDX.

Export functions to enable/disable hardware virtualization so that TDX
code can use them to handle hardware virtualization enabling before
kvm_init().

Signed-off-by: Kai Huang <kai.huang@intel.com>
Message-ID: <dfe17314c0d9978b7bc3b0833dff6f167fbd28f5.1731664295.git.kai.huang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f34f4cfaa513..1e75fa114f34 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2571,4 +2571,12 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
 				    struct kvm_pre_fault_memory *range);
 #endif
 
+#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
+int kvm_enable_virtualization(void);
+void kvm_disable_virtualization(void);
+#else
+static inline int kvm_enable_virtualization(void) { return 0; }
+static inline void kvm_disable_virtualization(void) { }
+#endif
+
 #endif
-- 
cgit v1.2.3


From fcdbdf63431c9faf639bffc957ea2ce9b545432e Mon Sep 17 00:00:00 2001
From: Kai Huang <kai.huang@intel.com>
Date: Fri, 15 Nov 2024 22:52:41 +1300
Subject: KVM: VMX: Initialize TDX during KVM module load

Before KVM can use TDX to create and run TDX guests, TDX needs to be
initialized from two perspectives: 1) TDX module must be initialized
properly to a working state; 2) A per-cpu TDX initialization, a.k.a the
TDH.SYS.LP.INIT SEAMCALL must be done on any logical cpu before it can
run any other TDX SEAMCALLs.

The TDX host core-kernel provides two functions to do the above two
respectively: tdx_enable() and tdx_cpu_enable().

There are two options in terms of when to initialize TDX: initialize TDX
at KVM module loading time, or when creating the first TDX guest.

Choose to initialize TDX during KVM module loading time:

Initializing TDX module is both memory and CPU time consuming: 1) the
kernel needs to allocate a non-trivial size(~1/256) of system memory
as metadata used by TDX module to track each TDX-usable memory page's
status; 2) the TDX module needs to initialize this metadata, one entry
for each TDX-usable memory page.

Also, the kernel uses alloc_contig_pages() to allocate those metadata
chunks, because they are large and need to be physically contiguous.
alloc_contig_pages() can fail.  If initializing TDX when creating the
first TDX guest, then there's chance that KVM won't be able to run any
TDX guests albeit KVM _declares_ to be able to support TDX.

This isn't good for the user.

On the other hand, initializing TDX at KVM module loading time can make
sure KVM is providing a consistent view of whether KVM can support TDX
to the user.

Always only try to initialize TDX after VMX has been initialized.  TDX
is based on VMX, and if VMX fails to initialize then TDX is likely to be
broken anyway.  Also, in practice, supporting TDX will require part of
VMX and common x86 infrastructure in working order, so TDX cannot be
enabled alone w/o VMX support.

There are two cases that can result in failure to initialize TDX: 1) TDX
cannot be supported (e.g., because of TDX is not supported or enabled by
hardware, or module is not loaded, or missing some dependency in KVM's
configuration); 2) Any unexpected error during TDX bring-up.  For the
first case only mark TDX is disabled but still allow KVM module to be
loaded.  For the second case just fail to load the KVM module so that
the user can be aware.

Because TDX costs additional memory, don't enable TDX by default.  Add a
new module parameter 'enable_tdx' to allow the user to opt-in.

Note, the name tdx_init() has already been taken by the early boot code.
Use tdx_bringup() for initializing TDX (and tdx_cleanup() since KVM
doesn't actually teardown TDX).  They don't match vt_init()/vt_exit(),
vmx_init()/vmx_exit() etc but it's not end of the world.

Also, once initialized, the TDX module cannot be disabled and enabled
again w/o the TDX module runtime update, which isn't supported by the
kernel.  After TDX is enabled, nothing needs to be done when KVM
disables hardware virtualization, e.g., when offlining CPU, or during
suspend/resume.  TDX host core-kernel code internally tracks TDX status
and can handle "multiple enabling" scenario.

Similar to KVM_AMD_SEV, add a new KVM_INTEL_TDX Kconfig to guide KVM TDX
code.  Make it depend on INTEL_TDX_HOST but not replace INTEL_TDX_HOST
because in the longer term there's a use case that requires making
SEAMCALLs w/o KVM as mentioned by Dan [1].

Link: https://lore.kernel.org/6723fc2070a96_60c3294dc@dwillia2-mobl3.amr.corp.intel.com.notmuch/ [1]
Signed-off-by: Kai Huang <kai.huang@intel.com>
Message-ID: <162f9dee05c729203b9ad6688db1ca2960b4b502.1731664295.git.kai.huang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1e75fa114f34..3bfe3140f444 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2284,6 +2284,7 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
 }
 
 #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
+extern bool enable_virt_at_load;
 extern bool kvm_rebooting;
 #endif
 
-- 
cgit v1.2.3


From 7c035bea94074b19ed560a4f23a76c5a6c8e594f Mon Sep 17 00:00:00 2001
From: Zhiming Hu <zhiming.hu@intel.com>
Date: Wed, 19 Feb 2025 09:02:51 -0500
Subject: KVM: TDX: Register TDX host key IDs to cgroup misc controller

TDX host key IDs (HKID) are limit resources in a machine, and the misc
cgroup lets the machine owner track their usage and limits the possibility
of abusing them outside the owner's control.

The cgroup v2 miscellaneous subsystem was introduced to control the
resource of AMD SEV & SEV-ES ASIDs.  Likewise introduce HKIDs as a misc
resource.

Signed-off-by: Zhiming Hu <zhiming.hu@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/misc_cgroup.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h
index 49eef10c8e59..8c0e4f4d71be 100644
--- a/include/linux/misc_cgroup.h
+++ b/include/linux/misc_cgroup.h
@@ -17,6 +17,10 @@ enum misc_res_type {
 	MISC_CG_RES_SEV,
 	/** @MISC_CG_RES_SEV_ES: AMD SEV-ES ASIDs resource */
 	MISC_CG_RES_SEV_ES,
+#endif
+#ifdef CONFIG_INTEL_TDX_HOST
+	/* Intel TDX HKIDs resource */
+	MISC_CG_RES_TDX,
 #endif
 	/** @MISC_CG_RES_TYPES: count of enum misc_res_type constants */
 	MISC_CG_RES_TYPES
-- 
cgit v1.2.3


From c4a92f12cf35b83ce81757f6e5e8eb6223b87388 Mon Sep 17 00:00:00 2001
From: Yan Zhao <yan.y.zhao@intel.com>
Date: Mon, 13 Jan 2025 11:08:41 +0800
Subject: KVM: Add parameter "kvm" to kvm_cpu_dirty_log_size() and its callers

Add a parameter "kvm" to kvm_cpu_dirty_log_size() and down to its callers:
kvm_dirty_ring_get_rsvd_entries(), kvm_dirty_ring_alloc().

This is a preparation to make cpu_dirty_log_size a per-VM value rather than
a system-wide value.

No function changes expected.

Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_dirty_ring.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h
index 4862c98d80d3..da4d9b5f58f1 100644
--- a/include/linux/kvm_dirty_ring.h
+++ b/include/linux/kvm_dirty_ring.h
@@ -32,7 +32,7 @@ struct kvm_dirty_ring {
  * If CONFIG_HAVE_HVM_DIRTY_RING not defined, kvm_dirty_ring.o should
  * not be included as well, so define these nop functions for the arch.
  */
-static inline u32 kvm_dirty_ring_get_rsvd_entries(void)
+static inline u32 kvm_dirty_ring_get_rsvd_entries(struct kvm *kvm)
 {
 	return 0;
 }
@@ -42,7 +42,7 @@ static inline bool kvm_use_dirty_bitmap(struct kvm *kvm)
 	return true;
 }
 
-static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring,
+static inline int kvm_dirty_ring_alloc(struct kvm *kvm, struct kvm_dirty_ring *ring,
 				       int index, u32 size)
 {
 	return 0;
@@ -71,11 +71,12 @@ static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring)
 
 #else /* CONFIG_HAVE_KVM_DIRTY_RING */
 
-int kvm_cpu_dirty_log_size(void);
+int kvm_cpu_dirty_log_size(struct kvm *kvm);
 bool kvm_use_dirty_bitmap(struct kvm *kvm);
 bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm);
-u32 kvm_dirty_ring_get_rsvd_entries(void);
-int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size);
+u32 kvm_dirty_ring_get_rsvd_entries(struct kvm *kvm);
+int kvm_dirty_ring_alloc(struct kvm *kvm, struct kvm_dirty_ring *ring,
+			 int index, u32 size);
 
 /*
  * called with kvm->slots_lock held, returns the number of
-- 
cgit v1.2.3


From 44428e4936022a7a31743017849b167e64f33a32 Mon Sep 17 00:00:00 2001
From: Binbin Wu <binbin.wu@linux.intel.com>
Date: Sat, 22 Feb 2025 09:42:18 +0800
Subject: KVM: x86: Move pv_unhalted check out of kvm_vcpu_has_events()

Move pv_unhalted check out of kvm_vcpu_has_events(), check pv_unhalted
explicitly when handling PV unhalt and expose kvm_vcpu_has_events().

kvm_vcpu_has_events() returns true if pv_unhalted is set, and pv_unhalted
is only cleared on transitions to KVM_MP_STATE_RUNNABLE.  If the guest
initiates a spurious wakeup, pv_unhalted could be left set in perpetuity.
Currently, this is not problematic because kvm_vcpu_has_events() is only
called when handling PV unhalt.  However, if kvm_vcpu_has_events() is used
for other purposes in the future, it could return the unexpected results.

Export kvm_vcpu_has_events() for its usage in broader contexts.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Binbin Wu <binbin.wu@linux.intel.com>
Message-ID: <20250222014225.897298-3-binbin.wu@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3bfe3140f444..ed1968f6f841 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1609,6 +1609,7 @@ void kvm_arch_disable_virtualization(void);
 int kvm_arch_enable_virtualization_cpu(void);
 void kvm_arch_disable_virtualization_cpu(void);
 #endif
+bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu);
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
-- 
cgit v1.2.3


From 0b28b7080ef5a323c3afa3860c3d45d627629830 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Beh=C3=BAn?= <kabel@kernel.org>
Date: Tue, 4 Feb 2025 14:14:12 +0100
Subject: platform: cznic: Add keyctl helpers for Turris platform
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some Turris devices support signing messages with a per-device unique
asymmetric key that was created on the device at manufacture time.

Add helper module that helps to expose this ability via the keyctl()
syscall.

A device-specific driver can register a signing key by calling
devm_turris_signing_key_create().

Both the `.turris-signing-keys` keyring and the signing key are created
with only the VIEW, READ and SEARCH permissions for userspace - it is
impossible to link / unlink / move them, set their attributes, or unlink
the keyring from userspace.

Signed-off-by: Marek Behún <kabel@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/turris-signing-key.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 include/linux/turris-signing-key.h

(limited to 'include/linux')

diff --git a/include/linux/turris-signing-key.h b/include/linux/turris-signing-key.h
new file mode 100644
index 000000000000..032ca8cbf636
--- /dev/null
+++ b/include/linux/turris-signing-key.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * 2025 by Marek Behún <kabel@kernel.org>
+ */
+
+#ifndef __TURRIS_SIGNING_KEY_H
+#define __TURRIS_SIGNING_KEY_H
+
+#include <linux/key.h>
+#include <linux/types.h>
+
+struct device;
+
+struct turris_signing_key_subtype {
+	u16 key_size;
+	u8 data_size;
+	u8 sig_size;
+	u8 public_key_size;
+	const char *hash_algo;
+	const void *(*get_public_key)(const struct key *key);
+	int (*sign)(const struct key *key, const void *msg, void *signature);
+};
+
+static inline struct device *turris_signing_key_get_dev(const struct key *key)
+{
+	return key->payload.data[1];
+}
+
+int
+devm_turris_signing_key_create(struct device *dev, const struct turris_signing_key_subtype *subtype,
+			       const char *desc);
+
+#endif /* __TURRIS_SIGNING_KEY_H */
-- 
cgit v1.2.3


From 78a0323506f01e8017a5826cd7e91951c13184fa Mon Sep 17 00:00:00 2001
From: Sohil Mehta <sohil.mehta@intel.com>
Date: Thu, 27 Mar 2025 23:46:22 +0000
Subject: x86/nmi: Consolidate NMI panic variables

Commit:

  c305a4e98378 ("x86: Move sysctls into arch/x86")

recently moved the sysctl handling of panic_on_unrecovered_nmi and
panic_on_io_nmi to x86-specific code. These variables no longer need to
be declared in the generic header file.

Relocate the variable definitions and declarations closer to where they
are used. This makes all the NMI panic options consistent and easier to
track.

[ mingo: Fixed up the SHA1 of the commit reference. ]

Signed-off-by: Sohil Mehta <sohil.mehta@intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Kai Huang <kai.huang@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Nikolay Borisov <nik.borisov@suse.com>
Cc: Joel Granados <joel.granados@kernel.org>
Link: https://lore.kernel.org/r/20250327234629.3953536-3-sohil.mehta@intel.com
---
 include/linux/panic.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/panic.h b/include/linux/panic.h
index 2494d51707ef..4adc65766935 100644
--- a/include/linux/panic.h
+++ b/include/linux/panic.h
@@ -20,8 +20,6 @@ extern bool panic_triggering_all_cpu_backtrace;
 extern int panic_timeout;
 extern unsigned long panic_print;
 extern int panic_on_oops;
-extern int panic_on_unrecovered_nmi;
-extern int panic_on_io_nmi;
 extern int panic_on_warn;
 
 extern unsigned long panic_on_taint;
-- 
cgit v1.2.3


From 642989287350fa4964c37df0f3769094072421c3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 24 Mar 2025 17:59:59 +0100
Subject: firmware: turris-mox-rwtm: fix building without CONFIG_KEYS

"struct key" is defined conditionally, so the code referencing it
must be made conditional as well:

In file included from drivers/firmware/turris-mox-rwtm.c:29:
include/linux/turris-signing-key.h: In function 'turris_signing_key_get_dev':
include/linux/turris-signing-key.h:26:19: error: invalid use of undefined type 'const struct key'
   26 |         return key->payload.data[1];
      |                   ^~

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/turris-signing-key.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/turris-signing-key.h b/include/linux/turris-signing-key.h
index 032ca8cbf636..8a435b73c3a9 100644
--- a/include/linux/turris-signing-key.h
+++ b/include/linux/turris-signing-key.h
@@ -11,6 +11,7 @@
 
 struct device;
 
+#ifdef CONFIG_KEYS
 struct turris_signing_key_subtype {
 	u16 key_size;
 	u8 data_size;
@@ -29,5 +30,6 @@ static inline struct device *turris_signing_key_get_dev(const struct key *key)
 int
 devm_turris_signing_key_create(struct device *dev, const struct turris_signing_key_subtype *subtype,
 			       const char *desc);
+#endif
 
 #endif /* __TURRIS_SIGNING_KEY_H */
-- 
cgit v1.2.3


From f6e9a26e2d488c743757d66898ae91c53ffbe528 Mon Sep 17 00:00:00 2001
From: JP Kobryn <inwardvessel@gmail.com>
Date: Thu, 3 Apr 2025 18:10:46 -0700
Subject: cgroup: move rstat base stat objects into their own struct

This non-functional change serves as preparation for moving to
subsystem-based rstat trees. The base stats are not an actual subsystem,
but in future commits they will have exclusive rstat trees just as other
subsystems will.

Moving the base stat objects into a new struct allows the cgroup_rstat_cpu
struct to become more compact since it now only contains the minimum amount
of pointers needed for rstat participation. Subsystems will (in future
commits) make use of the compact cgroup_rstat_cpu struct while avoiding the
memory overhead of the base stat objects which they will not use.

An instance of the new struct cgroup_rstat_base_cpu was placed on the
cgroup struct so it can retain ownership of these base stats common to all
cgroups. A helper function was added for looking up the cpu-specific base
stats of a given cgroup. Finally, initialization and variable names were
adjusted where applicable.

Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 5bc8f55c8cca..1bf2e8db6dac 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -344,10 +344,29 @@ struct cgroup_base_stat {
  * frequency decreases the cost of each read.
  *
  * This struct hosts both the fields which implement the above -
- * updated_children and updated_next - and the fields which track basic
- * resource statistics on top of it - bsync, bstat and last_bstat.
+ * updated_children and updated_next.
  */
 struct cgroup_rstat_cpu {
+	/*
+	 * Child cgroups with stat updates on this cpu since the last read
+	 * are linked on the parent's ->updated_children through
+	 * ->updated_next.
+	 *
+	 * In addition to being more compact, singly-linked list pointing
+	 * to the cgroup makes it unnecessary for each per-cpu struct to
+	 * point back to the associated cgroup.
+	 *
+	 * Protected by per-cpu cgroup_rstat_cpu_lock.
+	 */
+	struct cgroup *updated_children;	/* terminated by self cgroup */
+	struct cgroup *updated_next;		/* NULL iff not on the list */
+};
+
+/*
+ * This struct hosts the fields which track basic resource statistics on
+ * top of it - bsync, bstat and last_bstat.
+ */
+struct cgroup_rstat_base_cpu {
 	/*
 	 * ->bsync protects ->bstat.  These are the only fields which get
 	 * updated in the hot path.
@@ -374,20 +393,6 @@ struct cgroup_rstat_cpu {
 	 * deltas to propagate to the per-cpu subtree_bstat.
 	 */
 	struct cgroup_base_stat last_subtree_bstat;
-
-	/*
-	 * Child cgroups with stat updates on this cpu since the last read
-	 * are linked on the parent's ->updated_children through
-	 * ->updated_next.
-	 *
-	 * In addition to being more compact, singly-linked list pointing
-	 * to the cgroup makes it unnecessary for each per-cpu struct to
-	 * point back to the associated cgroup.
-	 *
-	 * Protected by per-cpu cgroup_rstat_cpu_lock.
-	 */
-	struct cgroup *updated_children;	/* terminated by self cgroup */
-	struct cgroup *updated_next;		/* NULL iff not on the list */
 };
 
 struct cgroup_freezer_state {
@@ -518,6 +523,7 @@ struct cgroup {
 
 	/* per-cpu recursive resource statistics */
 	struct cgroup_rstat_cpu __percpu *rstat_cpu;
+	struct cgroup_rstat_base_cpu __percpu *rstat_base_cpu;
 	struct list_head rstat_css_list;
 
 	/*
-- 
cgit v1.2.3


From 845a7245801142bfff411bc84afa8cdbc789562f Mon Sep 17 00:00:00 2001
From: JP Kobryn <inwardvessel@gmail.com>
Date: Thu, 3 Apr 2025 18:10:47 -0700
Subject: cgroup: add helper for checking when css is cgroup::self

The cgroup struct has a css field called "self". The main difference
between this css and the others found in the cgroup::subsys array is that
cgroup::self has a NULL subsystem pointer. There are several places where
checks are performed to determine whether the css in question is
cgroup::self or not. Instead of accessing css->ss directly, introduce a
helper function that shows the intent and use where applicable.

Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e7da3c3b098b..65d95bb2199f 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -347,6 +347,11 @@ static inline bool css_is_dying(struct cgroup_subsys_state *css)
 	return css->flags & CSS_DYING;
 }
 
+static inline bool css_is_cgroup(struct cgroup_subsys_state *css)
+{
+	return css->ss == NULL;
+}
+
 static inline void cgroup_get(struct cgroup *cgrp)
 {
 	css_get(&cgrp->self);
-- 
cgit v1.2.3


From a97915559f5c5ff1972d678b94fd460c72a3b5f2 Mon Sep 17 00:00:00 2001
From: JP Kobryn <inwardvessel@gmail.com>
Date: Thu, 3 Apr 2025 18:10:48 -0700
Subject: cgroup: change rstat function signatures from cgroup-based to
 css-based

This non-functional change serves as preparation for moving to
subsystem-based rstat trees. To simplify future commits, change the
signatures of existing cgroup-based rstat functions to become css-based and
rename them to reflect that.

Though the signatures have changed, the implementations have not. Within
these functions use the css->cgroup pointer to obtain the associated cgroup
and allow code to function the same just as it did before this patch. At
applicable call sites, pass the subsystem-specific css pointer as an
argument or pass a pointer to cgroup::self if not in subsystem context.

Note that cgroup_rstat_updated_list() and cgroup_rstat_push_children()
are not altered yet since there would be a larger amount of css to
cgroup conversions which may overcomplicate the code at this
intermediate phase.

Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h | 2 +-
 include/linux/cgroup.h      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 1bf2e8db6dac..e58bfb880111 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -536,7 +536,7 @@ struct cgroup {
 	/*
 	 * A singly-linked list of cgroup structures to be rstat flushed.
 	 * This is a scratch field to be used exclusively by
-	 * cgroup_rstat_flush_locked() and protected by cgroup_rstat_lock.
+	 * css_rstat_flush_locked() and protected by cgroup_rstat_lock.
 	 */
 	struct cgroup	*rstat_flush_next;
 
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 65d95bb2199f..1f5b0a4a3356 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -693,8 +693,8 @@ static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
 /*
  * cgroup scalable recursive statistics.
  */
-void cgroup_rstat_updated(struct cgroup *cgrp, int cpu);
-void cgroup_rstat_flush(struct cgroup *cgrp);
+void css_rstat_updated(struct cgroup_subsys_state *css, int cpu);
+void css_rstat_flush(struct cgroup_subsys_state *css);
 
 /*
  * Basic resource stats.
-- 
cgit v1.2.3


From dd8a9807fa03666bff52cb28472fb227eaac36c9 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 31 Mar 2025 13:35:59 +0300
Subject: spi: Group CS related fields in struct spi_device

The CS related fields are sparse in the struct spi_device. Group them.
While at it, fix the comment style of cs_index_mask.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://patch.msgid.link/20250331103609.4160281-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 0ba5e49bace4..e10f0ebb8250 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -136,13 +136,6 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg,
  * @max_speed_hz: Maximum clock rate to be used with this chip
  *	(on this board); may be changed by the device's driver.
  *	The spi_transfer.speed_hz can override this for each transfer.
- * @chip_select: Array of physical chipselect, spi->chipselect[i] gives
- *	the corresponding physical CS for logical CS i.
- * @mode: The spi mode defines how data is clocked out and in.
- *	This may be changed by the device's driver.
- *	The "active low" default for chipselect mode can be overridden
- *	(by specifying SPI_CS_HIGH) as can the "MSB first" default for
- *	each word in a transfer (by specifying SPI_LSB_FIRST).
  * @bits_per_word: Data transfers involve one or more words; word sizes
  *	like eight or 12 bits are common.  In-memory wordsizes are
  *	powers of two bytes (e.g. 20 bit samples use 32 bits).
@@ -150,6 +143,11 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg,
  *	default (0) indicating protocol words are eight bit bytes.
  *	The spi_transfer.bits_per_word can override this for each transfer.
  * @rt: Make the pump thread real time priority.
+ * @mode: The spi mode defines how data is clocked out and in.
+ *	This may be changed by the device's driver.
+ *	The "active low" default for chipselect mode can be overridden
+ *	(by specifying SPI_CS_HIGH) as can the "MSB first" default for
+ *	each word in a transfer (by specifying SPI_LSB_FIRST).
  * @irq: Negative, or the number passed to request_irq() to receive
  *	interrupts from this device.
  * @controller_state: Controller's runtime state
@@ -162,8 +160,7 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg,
  *	the device will bind to the named driver and only the named driver.
  *	Do not set directly, because core frees it; use driver_set_override() to
  *	set or clear it.
- * @cs_gpiod: Array of GPIO descriptors of the corresponding chipselect lines
- *	(optional, NULL when not using a GPIO line)
+ * @pcpu_statistics: statistics for the spi_device
  * @word_delay: delay to be inserted between consecutive
  *	words of a transfer
  * @cs_setup: delay to be introduced by the controller after CS is asserted
@@ -171,8 +168,11 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg,
  * @cs_inactive: delay to be introduced by the controller after CS is
  *	deasserted. If @cs_change_delay is used from @spi_transfer, then the
  *	two delays will be added up.
- * @pcpu_statistics: statistics for the spi_device
+ * @chip_select: Array of physical chipselect, spi->chipselect[i] gives
+ *	the corresponding physical CS for logical CS i.
  * @cs_index_mask: Bit mask of the active chipselect(s) in the chipselect array
+ * @cs_gpiod: Array of GPIO descriptors of the corresponding chipselect lines
+ *	(optional, NULL when not using a GPIO line)
  *
  * A @spi_device is used to interchange data between an SPI target device
  * (usually a discrete chip) and CPU memory.
@@ -187,7 +187,6 @@ struct spi_device {
 	struct device		dev;
 	struct spi_controller	*controller;
 	u32			max_speed_hz;
-	u8			chip_select[SPI_CS_CNT_MAX];
 	u8			bits_per_word;
 	bool			rt;
 #define SPI_NO_TX		BIT(31)		/* No transmit wire */
@@ -218,23 +217,29 @@ struct spi_device {
 	void			*controller_data;
 	char			modalias[SPI_NAME_SIZE];
 	const char		*driver_override;
-	struct gpio_desc	*cs_gpiod[SPI_CS_CNT_MAX];	/* Chip select gpio desc */
+
+	/* The statistics */
+	struct spi_statistics __percpu	*pcpu_statistics;
+
 	struct spi_delay	word_delay; /* Inter-word delay */
+
 	/* CS delays */
 	struct spi_delay	cs_setup;
 	struct spi_delay	cs_hold;
 	struct spi_delay	cs_inactive;
 
-	/* The statistics */
-	struct spi_statistics __percpu	*pcpu_statistics;
+	u8			chip_select[SPI_CS_CNT_MAX];
 
-	/* Bit mask of the chipselect(s) that the driver need to use from
-	 * the chipselect array.When the controller is capable to handle
+	/*
+	 * Bit mask of the chipselect(s) that the driver need to use from
+	 * the chipselect array. When the controller is capable to handle
 	 * multiple chip selects & memories are connected in parallel
 	 * then more than one bit need to be set in cs_index_mask.
 	 */
 	u32			cs_index_mask : SPI_CS_CNT_MAX;
 
+	struct gpio_desc	*cs_gpiod[SPI_CS_CNT_MAX];	/* Chip select gpio desc */
+
 	/*
 	 * Likely need more hooks for more protocol options affecting how
 	 * the controller talks to each chip, like:
-- 
cgit v1.2.3


From f7b86e0e75bc234751cb7a82d888083a57ef28b2 Mon Sep 17 00:00:00 2001
From: Ashish Kalra <ashish.kalra@amd.com>
Date: Mon, 24 Mar 2025 21:15:17 +0000
Subject: crypto: ccp - Add new SEV/SNP platform shutdown API

Add new API interface to do SEV/SNP platform shutdown when KVM module
is unloaded.

Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/psp-sev.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h
index f3cad182d4ef..0b3a36bdaa90 100644
--- a/include/linux/psp-sev.h
+++ b/include/linux/psp-sev.h
@@ -954,6 +954,7 @@ int sev_do_cmd(int cmd, void *data, int *psp_ret);
 void *psp_copy_user_blob(u64 uaddr, u32 len);
 void *snp_alloc_firmware_page(gfp_t mask);
 void snp_free_firmware_page(void *addr);
+void sev_platform_shutdown(void);
 
 #else	/* !CONFIG_CRYPTO_DEV_SP_PSP */
 
@@ -988,6 +989,8 @@ static inline void *snp_alloc_firmware_page(gfp_t mask)
 
 static inline void snp_free_firmware_page(void *addr) { }
 
+static inline void sev_platform_shutdown(void) { }
+
 #endif	/* CONFIG_CRYPTO_DEV_SP_PSP */
 
 #endif	/* __PSP_SEV_H__ */
-- 
cgit v1.2.3


From 1be1cd03a93339f14c8f4fe300bca321fddc6478 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 3 Apr 2025 18:59:14 +0300
Subject: gpiolib: acpi: Reduce memory footprint for struct acpi_gpio_params

The line_index member in the struct acpi_gpio_params replicates
what is covered in the ACPI GpioIo() or GpioInt() resource.
The value there is limited to 16-bit one, so we don't really need
to have a full 32-bit storage for it. Together with followed
boolean the structure will be smaller.

add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-3 (-3)
Function                                     old     new   delta
acpi_gpio_property_lookup                    417     414      -3
Total: Before=15361, After=15358, chg -0.02%

`pahole` difference before and after:

-	/* size: 12, cachelines: 1, members: 3 */
-	/* padding: 3 */

+	/* size: 8, cachelines: 1, members: 3 */
+	/* padding: 1 */

Acked-by: Mika Westerberg <westeri@kernel.org>
Link: https://lore.kernel.org/r/20250403160034.2680485-4-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 include/linux/gpio/consumer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 45b651c05b9c..899179972bec 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -587,7 +587,7 @@ struct gpio_desc *devm_fwnode_gpiod_get(struct device *dev,
 
 struct acpi_gpio_params {
 	unsigned int crs_entry_index;
-	unsigned int line_index;
+	unsigned short line_index;
 	bool active_low;
 };
 
-- 
cgit v1.2.3


From 5741909697a31cfb08e45d56b4211959fb791487 Mon Sep 17 00:00:00 2001
From: NeilBrown <neil@brown.name>
Date: Wed, 19 Mar 2025 14:01:32 +1100
Subject: VFS: improve interface for lookup_one functions

The family of functions:
  lookup_one()
  lookup_one_unlocked()
  lookup_one_positive_unlocked()

appear designed to be used by external clients of the filesystem rather
than by filesystems acting on themselves as the lookup_one_len family
are used.

They are used by:
   btrfs/ioctl - which is a user-space interface rather than an internal
     activity
   exportfs - i.e. from nfsd or the open_by_handle_at interface
   overlayfs - at access the underlying filesystems
   smb/server - for file service

They should be used by nfsd (more than just the exportfs path) and
cachefs but aren't.

It would help if the documentation didn't claim they should "not be
called by generic code".

Also the path component name is passed as "name" and "len" which are
(confusingly?) separate by the "base".  In some cases the len in simply
"strlen" and so passing a qstr using QSTR() would make the calling
clearer.
Other callers do pass separate name and len which are stored in a
struct.  Sometimes these are already stored in a qstr, other times it
easily could be.

So this patch changes these three functions to receive a 'struct qstr *',
and improves the documentation.

QSTR_LEN() is added to make it easy to pass a QSTR containing a known
len.

[brauner@kernel.org: take a struct qstr pointer]
Signed-off-by: NeilBrown <neil@brown.name>
Link: https://lore.kernel.org/r/20250319031545.2999807-2-neil@brown.name
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/dcache.h | 3 ++-
 include/linux/namei.h  | 9 ++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 8d1395f945bf..e974e63bcdbc 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -57,7 +57,8 @@ struct qstr {
 };
 
 #define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
-#define QSTR(n) (struct qstr)QSTR_INIT(n, strlen(n))
+#define QSTR_LEN(n,l) (struct qstr)QSTR_INIT(n,l)
+#define QSTR(n) QSTR_LEN(n, strlen(n))
 
 extern const struct qstr empty_name;
 extern const struct qstr slash_name;
diff --git a/include/linux/namei.h b/include/linux/namei.h
index e3042176cdf4..ba02304a2a8a 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -73,13 +73,12 @@ extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int);
 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
 extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int);
 extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int);
-struct dentry *lookup_one(struct mnt_idmap *, const char *, struct dentry *, int);
+struct dentry *lookup_one(struct mnt_idmap *, struct qstr *, struct dentry *);
 struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap,
-				   const char *name, struct dentry *base,
-				   int len);
+				   struct qstr *name, struct dentry *base);
 struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap,
-					    const char *name,
-					    struct dentry *base, int len);
+					    struct qstr *name,
+					    struct dentry *base);
 
 extern int follow_down_one(struct path *);
 extern int follow_down(struct path *path, unsigned int flags);
-- 
cgit v1.2.3


From 0a02e1f4a54ace747304687ced3b76d159e58914 Mon Sep 17 00:00:00 2001
From: Yixun Lan <dlan@gentoo.org>
Date: Wed, 26 Mar 2025 06:06:19 +0800
Subject: irqdomain: Support three-cell scheme interrupts

Add new function *_twothreecell() to extend support to parse three-cell
interrupts which encoded as <instance hwirq irqflag>, the translate
function will retrieve irq number and flag from last two cells.

This API will be used in gpio irq driver which need to work with
two or three cells cases.

Signed-off-by: Yixun Lan <dlan@gentoo.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250326-04-gpio-irq-threecell-v3-1-aab006ab0e00@gentoo.org
---
 include/linux/irqdomain.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index bb7111105296..df7e9278c8ac 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -571,16 +571,16 @@ int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr,
 int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr,
 			const u32 *intspec, unsigned int intsize,
 			irq_hw_number_t *out_hwirq, unsigned int *out_type);
-
-int irq_domain_translate_twocell(struct irq_domain *d,
-				 struct irq_fwspec *fwspec,
-				 unsigned long *out_hwirq,
-				 unsigned int *out_type);
-
-int irq_domain_translate_onecell(struct irq_domain *d,
-				 struct irq_fwspec *fwspec,
-				 unsigned long *out_hwirq,
-				 unsigned int *out_type);
+int irq_domain_xlate_twothreecell(struct irq_domain *d, struct device_node *ctrlr,
+				  const u32 *intspec, unsigned int intsize,
+				  irq_hw_number_t *out_hwirq, unsigned int *out_type);
+
+int irq_domain_translate_onecell(struct irq_domain *d, struct irq_fwspec *fwspec,
+				 unsigned long *out_hwirq, unsigned int *out_type);
+int irq_domain_translate_twocell(struct irq_domain *d, struct irq_fwspec *fwspec,
+				 unsigned long *out_hwirq, unsigned int *out_type);
+int irq_domain_translate_twothreecell(struct irq_domain *d, struct irq_fwspec *fwspec,
+				      unsigned long *out_hwirq, unsigned int *out_type);
 
 /* IPI functions */
 int irq_reserve_ipi(struct irq_domain *domain, const struct cpumask *dest);
-- 
cgit v1.2.3


From 7b73c12c6ebf006ad496f0e38a605d92dfe05157 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 2 Apr 2025 15:59:59 +0100
Subject: shmem: Add shmem_writeout()

This will be the replacement for shmem_writepage().

Signed-off-by: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Link: https://lore.kernel.org/r/20250402150005.2309458-6-willy@infradead.org
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/shmem_fs.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 0b273a7b9f01..5f03a39a26f7 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -104,10 +104,11 @@ static inline bool shmem_mapping(struct address_space *mapping)
 	return false;
 }
 #endif /* CONFIG_SHMEM */
-extern void shmem_unlock_mapping(struct address_space *mapping);
-extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
+void shmem_unlock_mapping(struct address_space *mapping);
+struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
 					pgoff_t index, gfp_t gfp_mask);
-extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
+int shmem_writeout(struct folio *folio, struct writeback_control *wbc);
+void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
 int shmem_unuse(unsigned int type);
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-- 
cgit v1.2.3


From 6b0dfabb35550cb7b0808585dea6c24971d685d3 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 2 Apr 2025 16:00:03 +0100
Subject: fs: Remove aops->writepage

All callers and implementations are now removed, so remove the operation
and update the documentation to match.

Signed-off-by: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Link: https://lore.kernel.org/r/20250402150005.2309458-10-willy@infradead.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 016b0fe1536e..f7beefb917a8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -433,7 +433,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb)
 }
 
 struct address_space_operations {
-	int (*writepage)(struct page *page, struct writeback_control *wbc);
 	int (*read_folio)(struct file *, struct folio *);
 
 	/* Write back some dirty pages from this mapping. */
-- 
cgit v1.2.3


From 559b3bbfa978ce3b23dc9c52d09a0eddca52c439 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Thu, 27 Mar 2025 10:06:10 -0400
Subject: locking/percpu-rwsem: add freezable alternative to down_read

Percpu-rwsems are used for superblock locking.  However, we know the
read percpu-rwsem we take for sb_start_write() on a frozen filesystem
needs not to inhibit system from suspending or hibernating.  That
means it needs to wait with TASK_UNINTERRUPTIBLE | TASK_FREEZABLE.

Introduce a new percpu_down_read_freezable() that allows us to control
whether TASK_FREEZABLE is added to the wait flags.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Link: https://lore.kernel.org/r/20250327140613.25178-2-James.Bottomley@HansenPartnership.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/percpu-rwsem.h | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index af7d75ede619..288f5235649a 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -43,9 +43,10 @@ is_static struct percpu_rw_semaphore name = {				\
 #define DEFINE_STATIC_PERCPU_RWSEM(name)	\
 	__DEFINE_PERCPU_RWSEM(name, static)
 
-extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool);
+extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool, bool);
 
-static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+static inline void percpu_down_read_internal(struct percpu_rw_semaphore *sem,
+					     bool freezable)
 {
 	might_sleep();
 
@@ -63,7 +64,7 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
 	if (likely(rcu_sync_is_idle(&sem->rss)))
 		this_cpu_inc(*sem->read_count);
 	else
-		__percpu_down_read(sem, false); /* Unconditional memory barrier */
+		__percpu_down_read(sem, false, freezable); /* Unconditional memory barrier */
 	/*
 	 * The preempt_enable() prevents the compiler from
 	 * bleeding the critical section out.
@@ -71,6 +72,17 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
 	preempt_enable();
 }
 
+static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+{
+	percpu_down_read_internal(sem, false);
+}
+
+static inline void percpu_down_read_freezable(struct percpu_rw_semaphore *sem,
+					      bool freeze)
+{
+	percpu_down_read_internal(sem, freeze);
+}
+
 static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
 {
 	bool ret = true;
@@ -82,7 +94,7 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
 	if (likely(rcu_sync_is_idle(&sem->rss)))
 		this_cpu_inc(*sem->read_count);
 	else
-		ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */
+		ret = __percpu_down_read(sem, true, false); /* Unconditional memory barrier */
 	preempt_enable();
 	/*
 	 * The barrier() from preempt_enable() prevents the compiler from
-- 
cgit v1.2.3


From f73bae83675b860cae9f58dba233b6be92e750ca Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Thu, 27 Mar 2025 10:06:11 -0400
Subject: fs: allow all writers to be frozen

During freeze/thaw we need to be able to freeze all writers during
suspend/hibernate. Otherwise tasks such as systemd-journald that mmap a
file and write to it will not be frozen after we've already frozen the
filesystem.

This has some risk of not being able to freeze processes in case a
process has acquired SB_FREEZE_PAGEFAULT under mmap_sem or
SB_FREEZE_INTERNAL under some other filesytem specific lock. If the
filesystem is frozen, a task can block on the frozen filesystem with
e.g., mmap_sem held. If some other task then blocks on grabbing that
mmap_sem, hibernation ill fail because it is unable to hibernate a task
holding mmap_sem. This could be fixed by making a range of filesystem
related locks use freezable sleeping. That's impractical and not
warranted just for suspend/hibernate. Assume that this is an infrequent
problem and we've given userspace a way to skip filesystem freezing
through a sysfs file.

Link: https://lore.kernel.org/r/20250402-work-freeze-v2-2-6719a97b52ac@kernel.org
Link: https://lore.kernel.org/r/20250327140613.25178-3-James.Bottomley@HansenPartnership.com
[brauner: make all freeze levels set TASK_FREEZABLE and rewrite commit message]
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 016b0fe1536e..35b81f497904 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1780,7 +1780,7 @@ static inline void __sb_end_write(struct super_block *sb, int level)
 
 static inline void __sb_start_write(struct super_block *sb, int level)
 {
-	percpu_down_read(sb->s_writers.rw_sem + level - 1);
+	percpu_down_read_freezable(sb->s_writers.rw_sem + level - 1, true);
 }
 
 static inline bool __sb_start_write_trylock(struct super_block *sb, int level)
-- 
cgit v1.2.3


From 2992476528aeecbaee17ba0a6396a817481205a3 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Sat, 29 Mar 2025 09:42:17 +0100
Subject: super: use a common iterator (Part 1)

Use a common iterator for all callbacks.

Link: https://lore.kernel.org/r/20250329-work-freeze-v2-4-a47af37ecc3d@kernel.org
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 35b81f497904..5e007bffd00e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3515,7 +3515,11 @@ extern void put_filesystem(struct file_system_type *fs);
 extern struct file_system_type *get_fs_type(const char *name);
 extern void drop_super(struct super_block *sb);
 extern void drop_super_exclusive(struct super_block *sb);
-extern void iterate_supers(void (*)(struct super_block *, void *), void *);
+void __iterate_supers(void (*f)(struct super_block *, void *), void *arg, bool excl);
+static inline void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
+{
+	__iterate_supers(f, arg, false);
+}
 extern void iterate_supers_type(struct file_system_type *,
 			        void (*)(struct super_block *, void *), void *);
 
-- 
cgit v1.2.3


From b47e42d10e8c20525febccbd6e0dc8528861aea4 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Sat, 29 Mar 2025 09:42:18 +0100
Subject: super: use common iterator (Part 2)

Use a common iterator for all callbacks. We could go for something even
more elaborate (advance step-by-step similar to iov_iter) but I really
don't think this is warranted.

Link: https://lore.kernel.org/r/20250329-work-freeze-v2-5-a47af37ecc3d@kernel.org
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5e007bffd00e..96f8c8a794ea 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3515,11 +3515,7 @@ extern void put_filesystem(struct file_system_type *fs);
 extern struct file_system_type *get_fs_type(const char *name);
 extern void drop_super(struct super_block *sb);
 extern void drop_super_exclusive(struct super_block *sb);
-void __iterate_supers(void (*f)(struct super_block *, void *), void *arg, bool excl);
-static inline void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
-{
-	__iterate_supers(f, arg, false);
-}
+extern void iterate_supers(void (*f)(struct super_block *, void *), void *arg);
 extern void iterate_supers_type(struct file_system_type *,
 			        void (*)(struct super_block *, void *), void *);
 
-- 
cgit v1.2.3


From 06f2f68a670aae28b825065439301831e74da880 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 13 Mar 2025 15:31:15 +0100
Subject: genirq/generic-chip: Make locking unconditional

The SMP conditional wrappers around raw_spin_[un]lock() have no real
value. On !SMP kernels the lock operations are NOOPs except for a
preempt_disable/enable() pair on PREEMPT enabled kernels, which are not
really worth to optimize for. Aside of that this evades lockdep on !SMP
kernels.

Remove the !SMP stubs and make it unconditional.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250313142524.011345765@linutronix.de
---
 include/linux/irq.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index dd5df1e2d032..500772943207 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -1222,7 +1222,6 @@ static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d)
 
 #define IRQ_MSK(n) (u32)((n) < 32 ? ((1 << (n)) - 1) : UINT_MAX)
 
-#ifdef CONFIG_SMP
 static inline void irq_gc_lock(struct irq_chip_generic *gc)
 {
 	raw_spin_lock(&gc->lock);
@@ -1232,10 +1231,6 @@ static inline void irq_gc_unlock(struct irq_chip_generic *gc)
 {
 	raw_spin_unlock(&gc->lock);
 }
-#else
-static inline void irq_gc_lock(struct irq_chip_generic *gc) { }
-static inline void irq_gc_unlock(struct irq_chip_generic *gc) { }
-#endif
 
 /*
  * The irqsave variants are for usage in non interrupt code. Do not use
-- 
cgit v1.2.3


From 7ae844a6650c5c15ccfbf76ed767e7f2cc61ec1d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 13 Mar 2025 15:31:28 +0100
Subject: genirq/generic-chip: Remove unused lock wrappers

All users are converted to lock guards.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250313142524.388478168@linutronix.de
---
 include/linux/irq.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 500772943207..d896d3a471ec 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -1222,26 +1222,6 @@ static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d)
 
 #define IRQ_MSK(n) (u32)((n) < 32 ? ((1 << (n)) - 1) : UINT_MAX)
 
-static inline void irq_gc_lock(struct irq_chip_generic *gc)
-{
-	raw_spin_lock(&gc->lock);
-}
-
-static inline void irq_gc_unlock(struct irq_chip_generic *gc)
-{
-	raw_spin_unlock(&gc->lock);
-}
-
-/*
- * The irqsave variants are for usage in non interrupt code. Do not use
- * them in irq_chip callbacks. Use irq_gc_lock() instead.
- */
-#define irq_gc_lock_irqsave(gc, flags)	\
-	raw_spin_lock_irqsave(&(gc)->lock, flags)
-
-#define irq_gc_unlock_irqrestore(gc, flags)	\
-	raw_spin_unlock_irqrestore(&(gc)->lock, flags)
-
 static inline void irq_reg_writel(struct irq_chip_generic *gc,
 				  u32 val, int reg_offset)
 {
-- 
cgit v1.2.3


From 1ce4c3aeef333be1e6290ec6d1f7891c2bfc7a1f Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 1 Apr 2025 11:37:16 +0200
Subject: firmware: sysfb: Move bpp-depth calculation into screen_info helper

Move the calculation of the bits per pixels for screen_info into a
helper function. This will make it available to other callers besides
the firmware code.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://lore.kernel.org/r/20250401094056.32904-14-tzimmermann@suse.de
---
 include/linux/screen_info.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h
index 6a4a3cec4638..ab3cffbb58b7 100644
--- a/include/linux/screen_info.h
+++ b/include/linux/screen_info.h
@@ -128,6 +128,8 @@ static inline unsigned int screen_info_video_type(const struct screen_info *si)
 
 ssize_t screen_info_resources(const struct screen_info *si, struct resource *r, size_t num);
 
+u32 __screen_info_lfb_bits_per_pixel(const struct screen_info *si);
+
 #if defined(CONFIG_PCI)
 void screen_info_apply_fixups(void);
 struct pci_dev *screen_info_pci_dev(const struct screen_info *si);
-- 
cgit v1.2.3


From 814d270b31d27b6ea4f722b8ae2db9802fe332ff Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 1 Apr 2025 11:37:21 +0200
Subject: drm/sysfb: vesadrm: Add gamma correction

Add palette support and export GAMMA properties via sysfs. User-space
compositors can use this interface for programming gamma ramps or night
mode.

Vesadrm supports palette updates via VGA DAC registers or VESA palette
calls. Up to 256 palette entries are available. Userspace always supplies
gamma ramps of 256 entries. If the native color format does not match
this because pixel component have less then 8 bits, vesadrm interpolates
among the palette entries.

The code uses CamelCase style in a few places to match the VESA manuals.

v3:
- fix coding style
v2:
- use CONFIG_X86_32 instead of __i386__ (checkpatch)
- protect struct vesadrm.pmi with CONFIG_X86_32

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://lore.kernel.org/r/20250401094056.32904-19-tzimmermann@suse.de
---
 include/linux/screen_info.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h
index ab3cffbb58b7..923d68e07679 100644
--- a/include/linux/screen_info.h
+++ b/include/linux/screen_info.h
@@ -126,6 +126,13 @@ static inline unsigned int screen_info_video_type(const struct screen_info *si)
 	return VIDEO_TYPE_CGA;
 }
 
+static inline u32 __screen_info_vesapm_info_base(const struct screen_info *si)
+{
+	if (si->vesapm_seg < 0xc000)
+		return 0;
+	return (si->vesapm_seg << 4) + si->vesapm_off;
+}
+
 ssize_t screen_info_resources(const struct screen_info *si, struct resource *r, size_t num);
 
 u32 __screen_info_lfb_bits_per_pixel(const struct screen_info *si);
-- 
cgit v1.2.3


From af1c968d25c7c44cd2738349c479f8b610a3fc40 Mon Sep 17 00:00:00 2001
From: Cezary Rojewski <cezary.rojewski@intel.com>
Date: Mon, 7 Apr 2025 13:23:47 +0200
Subject: ASoC: Intel: avs: PTL-based platforms support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Define handlers specific to ACE platforms, that Frisco Lake (FCL), a
PantherLake (PTL)-based platform, is founded upon. Most operations are
still inherited from their predecessors with the major difference being
AudioDSP cores management - replaced by DSP-domain power management.

Software has to ensure the DSP domain is both powered on and its
power-gating disabled before it can be utilized for streaming.

Reviewed-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
Signed-off-by: Cezary Rojewski <cezary.rojewski@intel.com>
Acked-by: Liam Girdwood <liam.r.girdwood@linux.intel.com>
Link: https://patch.msgid.link/20250407112352.3720779-6-cezary.rojewski@intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 2e28182c3af0..981ed45cc45e 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -3070,6 +3070,7 @@
 #define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
 #define PCI_DEVICE_ID_INTEL_5100_22	0x65f6
 #define PCI_DEVICE_ID_INTEL_IOAT_SCNB	0x65ff
+#define PCI_DEVICE_ID_INTEL_HDA_FCL	0x67a8
 #define PCI_DEVICE_ID_INTEL_82371SB_0	0x7000
 #define PCI_DEVICE_ID_INTEL_82371SB_1	0x7010
 #define PCI_DEVICE_ID_INTEL_82371SB_2	0x7020
-- 
cgit v1.2.3


From 83b9ae77f06607d19f7d3dcc6008742051137b27 Mon Sep 17 00:00:00 2001
From: Cezary Rojewski <cezary.rojewski@intel.com>
Date: Fri, 4 Apr 2025 11:03:30 +0200
Subject: lib/string_helpers: Introduce parse_int_array()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Existing parse_inte_array_user() works with __user buffers only.
Separate array parsing from __user bits so the functionality can be
utilized with kernel buffers too.

Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
Signed-off-by: Cezary Rojewski <cezary.rojewski@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://patch.msgid.link/20250404090337.3564117-2-cezary.rojewski@intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/string_helpers.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index e93fbb5b0c01..3fb88a1e9898 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -31,6 +31,7 @@ enum string_size_units {
 int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
 		    char *buf, int len);
 
+int parse_int_array(const char *buf, size_t count, int **array);
 int parse_int_array_user(const char __user *from, size_t count, int **array);
 
 #define UNESCAPE_SPACE		BIT(0)
-- 
cgit v1.2.3


From 5f3e0b4a1f59ab616a09a45fddddefa6ef756229 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Mon, 7 Apr 2025 01:58:05 +0200
Subject: fs: predict not having to do anything in fdput()

This matches the annotation in fdget().

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Link: https://lore.kernel.org/20250406235806.1637000-2-mjguzik@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/file.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/file.h b/include/linux/file.h
index 302f11355b10..af1768d934a0 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -59,7 +59,7 @@ static inline struct fd CLONED_FD(struct file *f)
 
 static inline void fdput(struct fd fd)
 {
-	if (fd.word & FDPUT_FPUT)
+	if (unlikely(fd.word & FDPUT_FPUT))
 		fput(fd_file(fd));
 }
 
-- 
cgit v1.2.3


From fa6fe07d1536361a227d655e69ca270faf28fdbe Mon Sep 17 00:00:00 2001
From: NeilBrown <neil@brown.name>
Date: Wed, 19 Mar 2025 14:01:35 +1100
Subject: VFS: rename lookup_one_len family to lookup_noperm and remove
 permission check

The lookup_one_len family of functions is (now) only used internally by
a filesystem on itself either
- in a context where permission checking is irrelevant such as by a
  virtual filesystem populating itself, or xfs accessing its ORPHANAGE
  or dquota accessing the quota file; or
- in a context where a permission check (MAY_EXEC on the parent) has just
  been performed such as a network filesystem finding in "silly-rename"
  file in the same directory.  This is also the context after the
  _parentat() functions where currently lookup_one_qstr_excl() is used.

So the permission check is pointless.

The name "one_len" is unhelpful in understanding the purpose of these
functions and should be changed.  Most of the callers pass the len as
"strlen()" so using a qstr and QSTR() can simplify the code.

This patch renames these functions (include lookup_positive_unlocked()
which is part of the family despite the name) to have a name based on
"lookup_noperm".  They are changed to receive a 'struct qstr' instead
of separate name and len.  In a few cases the use of QSTR() results in a
new call to strlen().

try_lookup_noperm() takes a pointer to a qstr instead of the whole
qstr.  This is consistent with d_hash_and_lookup() (which is nearly
identical) and useful for lookup_noperm_unlocked().

The new lookup_noperm_common() doesn't take a qstr yet.  That will be
tidied up in a subsequent patch.

Signed-off-by: NeilBrown <neil@brown.name>
Link: https://lore.kernel.org/r/20250319031545.2999807-5-neil@brown.name
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/namei.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index ba02304a2a8a..cb6ce97782e0 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -69,10 +69,10 @@ int vfs_path_parent_lookup(struct filename *filename, unsigned int flags,
 int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *,
 		    unsigned int, struct path *);
 
-extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int);
-extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
-extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int);
-extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int);
+extern struct dentry *try_lookup_noperm(struct qstr *, struct dentry *);
+extern struct dentry *lookup_noperm(struct qstr *, struct dentry *);
+extern struct dentry *lookup_noperm_unlocked(struct qstr *, struct dentry *);
+extern struct dentry *lookup_noperm_positive_unlocked(struct qstr *, struct dentry *);
 struct dentry *lookup_one(struct mnt_idmap *, struct qstr *, struct dentry *);
 struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap,
 				   struct qstr *name, struct dentry *base);
-- 
cgit v1.2.3


From 06c567403ae5a0b56005c2d4a184c903f572c844 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 19 Mar 2025 14:01:36 +1100
Subject: Use try_lookup_noperm() instead of d_hash_and_lookup() outside of VFS

try_lookup_noperm() and d_hash_and_lookup() are nearly identical.  The
former does some validation of the name where the latter doesn't.
Outside of the VFS that validation is likely valuable, and having only
one exported function for this task is certainly a good idea.

So make d_hash_and_lookup() local to VFS files and change all other
callers to try_lookup_noperm().  Note that the arguments are swapped.

Signed-off-by: NeilBrown <neilb@suse.de>
Link: https://lore.kernel.org/r/20250319031545.2999807-6-neil@brown.name
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/dcache.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index e974e63bcdbc..a324f82df562 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -288,7 +288,6 @@ extern void d_exchange(struct dentry *, struct dentry *);
 extern struct dentry *d_ancestor(struct dentry *, struct dentry *);
 
 extern struct dentry *d_lookup(const struct dentry *, const struct qstr *);
-extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);
 
 static inline unsigned d_count(const struct dentry *dentry)
 {
-- 
cgit v1.2.3


From da916e96e2dedcb2d40de77a7def833d315b81a6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 25 Oct 2024 10:21:41 +0200
Subject: perf: Make perf_pmu_unregister() useable

Previously it was only safe to call perf_pmu_unregister() if there
were no active events of that pmu around -- which was impossible to
guarantee since it races all sorts against perf_init_event().

Rework the whole thing by:

 - keeping track of all events for a given pmu

 - 'hiding' the pmu from perf_init_event()

 - waiting for the appropriate (s)rcu grace periods such that all
   prior references to the PMU will be completed

 - detaching all still existing events of that pmu (see first point)
   and moving them to a new REVOKED state.

 - actually freeing the pmu data.

Where notably the new REVOKED state must inhibit all event actions
from reaching code that wants to use event->pmu.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lkml.kernel.org/r/20250307193723.525402029@infradead.org
---
 include/linux/perf_event.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 0069ba6866a4..7f49a58b271d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -325,6 +325,9 @@ struct perf_output_handle;
 struct pmu {
 	struct list_head		entry;
 
+	spinlock_t			events_lock;
+	struct list_head		events;
+
 	struct module			*module;
 	struct device			*dev;
 	struct device			*parent;
@@ -622,9 +625,10 @@ struct perf_addr_filter_range {
  * enum perf_event_state - the states of an event:
  */
 enum perf_event_state {
-	PERF_EVENT_STATE_DEAD		= -4,
-	PERF_EVENT_STATE_EXIT		= -3,
-	PERF_EVENT_STATE_ERROR		= -2,
+	PERF_EVENT_STATE_DEAD		= -5,
+	PERF_EVENT_STATE_REVOKED	= -4, /* pmu gone, must not touch */
+	PERF_EVENT_STATE_EXIT		= -3, /* task died, still inherit */
+	PERF_EVENT_STATE_ERROR		= -2, /* scheduling error, can enable */
 	PERF_EVENT_STATE_OFF		= -1,
 	PERF_EVENT_STATE_INACTIVE	=  0,
 	PERF_EVENT_STATE_ACTIVE		=  1,
@@ -865,6 +869,7 @@ struct perf_event {
 	void *security;
 #endif
 	struct list_head		sb_list;
+	struct list_head		pmu_list;
 
 	/*
 	 * Certain events gets forwarded to another pmu internally by over-
@@ -1155,7 +1160,7 @@ extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags);
 extern void perf_event_itrace_started(struct perf_event *event);
 
 extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
-extern void perf_pmu_unregister(struct pmu *pmu);
+extern int perf_pmu_unregister(struct pmu *pmu);
 
 extern void __perf_event_task_sched_in(struct task_struct *prev,
 				       struct task_struct *task);
@@ -1760,7 +1765,7 @@ static inline bool needs_branch_stack(struct perf_event *event)
 
 static inline bool has_aux(struct perf_event *event)
 {
-	return event->pmu->setup_aux;
+	return event->pmu && event->pmu->setup_aux;
 }
 
 static inline bool has_aux_action(struct perf_event *event)
-- 
cgit v1.2.3


From 4dfe3232cc04325a09e96f6c7f9546ba6c0b132b Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 27 Mar 2025 12:52:13 -0700
Subject: perf/x86: Add dynamic constraint

More and more features require a dynamic event constraint, e.g., branch
counter logging, auto counter reload, Arch PEBS, etc.

Add a generic flag, PMU_FL_DYN_CONSTRAINT, to indicate the case. It
avoids keeping adding the individual flag in intel_cpuc_prepare().

Add a variable dyn_constraint in the struct hw_perf_event to track the
dynamic constraint of the event. Apply it if it's updated.

Apply the generic dynamic constraint for branch counter logging.
Many features on and after V6 require dynamic constraint. So
unconditionally set the flag for V6+.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Thomas Falcon <thomas.falcon@intel.com>
Link: https://lkml.kernel.org/r/20250327195217.2683619-2-kan.liang@linux.intel.com
---
 include/linux/perf_event.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7f49a58b271d..54dad174ed7a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -158,6 +158,7 @@ struct hw_perf_event {
 		struct { /* hardware */
 			u64		config;
 			u64		last_tag;
+			u64		dyn_constraint;
 			unsigned long	config_base;
 			unsigned long	event_base;
 			int		event_base_rdpmc;
-- 
cgit v1.2.3


From c9449c8506a5df5052ef4d17867699517b10b55a Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 27 Mar 2025 12:52:15 -0700
Subject: perf: Extend the bit width of the arch-specific flag

The auto counter reload feature requires an event flag to indicate an
auto counter reload group, which can only be scheduled on specific
counters that enumerated in CPUID. However, the hw_perf_event.flags has
run out on X86.

Two solutions were considered to address the issue.
- Currently, 20 bits are reserved for the architecture-specific flags.
  Only the bit 31 is used for the generic flag. There is still plenty
  of space left. Reserve 8 more bits for the arch-specific flags.
- Add a new X86 specific hw_perf_event.flags1 to support more flags.

The former is implemented. Enough room is still left in the global
generic flag.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Thomas Falcon <thomas.falcon@intel.com>
Link: https://lkml.kernel.org/r/20250327195217.2683619-4-kan.liang@linux.intel.com
---
 include/linux/perf_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 54dad174ed7a..5c547329cf02 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -144,7 +144,7 @@ struct hw_perf_event_extra {
  * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific
  * usage.
  */
-#define PERF_EVENT_FLAG_ARCH			0x000fffff
+#define PERF_EVENT_FLAG_ARCH			0x0fffffff
 #define PERF_EVENT_FLAG_USER_READ_CNT		0x80000000
 
 static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0);
-- 
cgit v1.2.3


From ec980e4facef8110f6fce27e5b6344660117f01f Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 27 Mar 2025 12:52:17 -0700
Subject: perf/x86/intel: Support auto counter reload
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The relative rates among two or more events are useful for performance
analysis, e.g., a high branch miss rate may indicate a performance
issue. Usually, the samples with a relative rate that exceeds some
threshold are more useful. However, the traditional sampling takes
samples of events separately. To get the relative rates among two or
more events, a high sample rate is required, which can bring high
overhead. Many samples taken in the non-hotspot area are also dropped
(useless) in the post-process.

The auto counter reload (ACR) feature takes samples when the relative
rate of two or more events exceeds some threshold, which provides the
fine-grained information at a low cost.
To support the feature, two sets of MSRs are introduced. For a given
counter IA32_PMC_GPn_CTR/IA32_PMC_FXm_CTR, bit fields in the
IA32_PMC_GPn_CFG_B/IA32_PMC_FXm_CFG_B MSR indicate which counter(s)
can cause a reload of that counter. The reload value is stored in the
IA32_PMC_GPn_CFG_C/IA32_PMC_FXm_CFG_C.
The details can be found at Intel SDM (085), Volume 3, 21.9.11 Auto
Counter Reload.

In the hw_config(), an ACR event is specially configured, because the
cause/reloadable counter mask has to be applied to the dyn_constraint.
Besides the HW limit, e.g., not support perf metrics, PDist and etc, a
SW limit is applied as well. ACR events in a group must be contiguous.
It facilitates the later conversion from the event idx to the counter
idx. Otherwise, the intel_pmu_acr_late_setup() has to traverse the whole
event list again to find the "cause" event.
Also, add a new flag PERF_X86_EVENT_ACR to indicate an ACR group, which
is set to the group leader.

The late setup() is also required for an ACR group. It's to convert the
event idx to the counter idx, and saved it in hw.config1.

The ACR configuration MSRs are only updated in the enable_event().
The disable_event() doesn't clear the ACR CFG register.
Add acr_cfg_b/acr_cfg_c in the struct cpu_hw_events to cache the MSR
values. It can avoid a MSR write if the value is not changed.

Expose an acr_mask to the sysfs. The perf tool can utilize the new
format to configure the relation of events in the group. The bit
sequence of the acr_mask follows the events enabled order of the group.

Example:

Here is the snippet of the mispredict.c. Since the array has a random
numbers, jumps are random and often mispredicted.
The mispredicted rate depends on the compared value.

For the Loop1, ~11% of all branches are mispredicted.
For the Loop2, ~21% of all branches are mispredicted.

main()
{
...
        for (i = 0; i < N; i++)
                data[i] = rand() % 256;
...
        /* Loop 1 */
        for (k = 0; k < 50; k++)
                for (i = 0; i < N; i++)
                        if (data[i] >= 64)
                                sum += data[i];
...

...
        /* Loop 2 */
        for (k = 0; k < 50; k++)
                for (i = 0; i < N; i++)
                        if (data[i] >= 128)
                                sum += data[i];
...
}

Usually, a code with a high branch miss rate means a bad performance.
To understand the branch miss rate of the codes, the traditional method
usually samples both branches and branch-misses events. E.g.,
perf record -e "{cpu_atom/branch-misses/ppu, cpu_atom/branch-instructions/u}"
               -c 1000000 -- ./mispredict

[ perf record: Woken up 4 times to write data ]
[ perf record: Captured and wrote 0.925 MB perf.data (5106 samples) ]
The 5106 samples are from both events and spread in both Loops.
In the post-process stage, a user can know that the Loop 2 has a 21%
branch miss rate. Then they can focus on the samples of branch-misses
events for the Loop 2.

With this patch, the user can generate the samples only when the branch
miss rate > 20%. For example,
perf record -e "{cpu_atom/branch-misses,period=200000,acr_mask=0x2/ppu,
                 cpu_atom/branch-instructions,period=1000000,acr_mask=0x3/u}"
                -- ./mispredict

(Two different periods are applied to branch-misses and
branch-instructions. The ratio is set to 20%.
If the branch-instructions is overflowed first, the branch-miss
rate < 20%. No samples should be generated. All counters should be
automatically reloaded.
If the branch-misses is overflowed first, the branch-miss rate > 20%.
A sample triggered by the branch-misses event should be
generated. Just the counter of the branch-instructions should be
automatically reloaded.

The branch-misses event should only be automatically reloaded when
the branch-instructions is overflowed. So the "cause" event is the
branch-instructions event. The acr_mask is set to 0x2, since the
event index in the group of branch-instructions is 1.

The branch-instructions event is automatically reloaded no matter which
events are overflowed. So the "cause" events are the branch-misses
and the branch-instructions event. The acr_mask should be set to 0x3.)

[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.098 MB perf.data (2498 samples) ]

 $perf report

Percent       │154:   movl    $0x0,-0x14(%rbp)
              │     ↓ jmp     1af
              │     for (i = j; i < N; i++)
              │15d:   mov     -0x10(%rbp),%eax
              │       mov     %eax,-0x18(%rbp)
              │     ↓ jmp     1a2
              │     if (data[i] >= 128)
              │165:   mov     -0x18(%rbp),%eax
              │       cltq
              │       lea     0x0(,%rax,4),%rdx
              │       mov     -0x8(%rbp),%rax
              │       add     %rdx,%rax
              │       mov     (%rax),%eax
              │    ┌──cmp     $0x7f,%eax
100.00   0.00 │    ├──jle     19e
              │    │sum += data[i];

The 2498 samples are all from the branch-misses events for the Loop 2.

The number of samples and overhead is significantly reduced without
losing any information.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Thomas Falcon <thomas.falcon@intel.com>
Link: https://lkml.kernel.org/r/20250327195217.2683619-6-kan.liang@linux.intel.com
---
 include/linux/perf_event.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 5c547329cf02..947ad12dfdbe 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -157,6 +157,7 @@ struct hw_perf_event {
 	union {
 		struct { /* hardware */
 			u64		config;
+			u64		config1;
 			u64		last_tag;
 			u64		dyn_constraint;
 			unsigned long	config_base;
-- 
cgit v1.2.3


From a36283e2b683f172aa1760c77325e50b16c0f792 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 7 Apr 2025 17:45:41 +0200
Subject: udp_tunnel: create a fastpath GRO lookup.

Most UDP tunnels bind a socket to a local port, with ANY address, no
peer and no interface index specified.
Additionally it's quite common to have a single tunnel device per
namespace.

Track in each namespace the UDP tunnel socket respecting the above.
When only a single one is present, store a reference in the netns.

When such reference is not NULL, UDP tunnel GRO lookup just need to
match the incoming packet destination port vs the socket local port.

The tunnel socket never sets the reuse[port] flag[s]. When bound to no
address and interface, no other socket can exist in the same netns
matching the specified local port.

Matching packets with non-local destination addresses will be
aggregated, and eventually segmented as needed - no behavior changes
intended.

Restrict the optimization to kernel sockets only: it covers all the
relevant use-cases, and user-space owned sockets could be disconnected
and rebound after setup_udp_tunnel_sock(), breaking the uniqueness
assumption

Note that the UDP tunnel socket reference is stored into struct
netns_ipv4 for both IPv4 and IPv6 tunnels. That is intentional to keep
all the fastpath-related netns fields in the same struct and allow
cacheline-based optimization. Currently both the IPv4 and IPv6 socket
pointer share the same cacheline as the `udp_table` field.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/41d16bc8d1257d567f9344c445b4ae0b4a91ede4.1744040675.git.pabeni@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/udp.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 0807e21cfec9..895240177f4f 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -101,6 +101,13 @@ struct udp_sock {
 
 	/* Cache friendly copy of sk->sk_peek_off >= 0 */
 	bool		peeking_with_offset;
+
+	/*
+	 * Accounting for the tunnel GRO fastpath.
+	 * Unprotected by compilers guard, as it uses space available in
+	 * the last UDP socket cacheline.
+	 */
+	struct hlist_node	tunnel_list;
 };
 
 #define udp_test_bit(nr, sk)			\
@@ -219,4 +226,13 @@ static inline void udp_allow_gso(struct sock *sk)
 
 #define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE)
 
+static inline struct sock *udp_tunnel_sk(const struct net *net, bool is_ipv6)
+{
+#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL)
+	return rcu_dereference(net->ipv4.udp_tunnel_gro[is_ipv6].sk);
+#else
+	return NULL;
+#endif
+}
+
 #endif	/* _LINUX_UDP_H */
-- 
cgit v1.2.3


From 420aabef3ab5fa743afb4d3d391f03ef0e777ca8 Mon Sep 17 00:00:00 2001
From: Michal Luczaj <mhal@rbox.co>
Date: Mon, 7 Apr 2025 21:01:02 +0200
Subject: net: Drop unused @sk of __skb_try_recv_from_queue()

__skb_try_recv_from_queue() deals with a queue, @sk is not used
since commit  e427cad6eee4 ("net: datagram: drop 'destructor'
argument from several helpers"). Remove sk from function parameters,
adapt callers.

No functional change intended.

Signed-off-by: Michal Luczaj <mhal@rbox.co>
Reviewed-by: Joe Damato <jdamato@fastly.com>
Link: https://patch.msgid.link/20250407-cleanup-drop-param-sk-v1-1-cd076979afac@rbox.co
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b974a277975a..f1381aff0f89 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4105,8 +4105,7 @@ static inline void skb_frag_list_init(struct sk_buff *skb)
 int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue,
 				int *err, long *timeo_p,
 				const struct sk_buff *skb);
-struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
-					  struct sk_buff_head *queue,
+struct sk_buff *__skb_try_recv_from_queue(struct sk_buff_head *queue,
 					  unsigned int flags,
 					  int *off, int *err,
 					  struct sk_buff **last);
-- 
cgit v1.2.3


From 265daffe788aa1cc5925d0afcde4fe6e99c66638 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Mon, 7 Apr 2025 09:08:14 +0200
Subject: gpio: provide gpiod_is_equal()

There are users in the kernel that directly compare raw GPIO descriptor
pointers in order to determine whether they refer to the same physical
GPIO pin. This accidentally works like this but is not guaranteed by any
API contract. Let's provide a comparator function that hides the actual
logic.

Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20250407-gpiod-is-equal-v1-1-7d85f568ae6e@linaro.org
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio/consumer.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 45b651c05b9c..7355abadaef4 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -180,6 +180,8 @@ struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev,
 					      enum gpiod_flags flags,
 					      const char *label);
 
+bool gpiod_is_equal(struct gpio_desc *desc, struct gpio_desc *other);
+
 #else /* CONFIG_GPIOLIB */
 
 #include <linux/bug.h>
@@ -547,6 +549,13 @@ struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev,
 	return ERR_PTR(-ENOSYS);
 }
 
+static inline bool
+gpiod_is_equal(struct gpio_desc *desc, struct gpio_desc *other)
+{
+	WARN_ON(desc || other);
+	return false;
+}
+
 #endif /* CONFIG_GPIOLIB */
 
 #if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_HTE)
-- 
cgit v1.2.3


From dd293df6395a2c9e0fc4faa8defeceaa907e7717 Mon Sep 17 00:00:00 2001
From: Joel Granados <joel.granados@kernel.org>
Date: Sun, 29 Dec 2024 16:11:25 +0100
Subject: tracing: Move trace sysctls into trace.c

Move trace ctl tables into their own const array in
kernel/trace/trace.c. The sysctl table register is called with
subsys_initcall placing if after its original place in proc_root_init.
This is part of a greater effort to move ctl tables into their
respective subsystems which will reduce the merge conflicts in
kernel/sysctl.c.

Signed-off-by: Joel Granados <joel.granados@kernel.org>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fbabc3d848b3..59774513ae45 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1298,16 +1298,9 @@ static inline void unpause_graph_tracing(void) { }
 #ifdef CONFIG_TRACING
 enum ftrace_dump_mode;
 
-#define MAX_TRACER_SIZE		100
-extern char ftrace_dump_on_oops[];
 extern int ftrace_dump_on_oops_enabled(void);
-extern int tracepoint_printk;
 
 extern void disable_trace_on_warning(void);
-extern int __disable_trace_on_warning;
-
-int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
-			     void *buffer, size_t *lenp, loff_t *ppos);
 
 #else /* CONFIG_TRACING */
 static inline void  disable_trace_on_warning(void) { }
-- 
cgit v1.2.3


From 67049b53e06fa1758df1463789f286a7cba67c50 Mon Sep 17 00:00:00 2001
From: Joel Granados <joel.granados@kernel.org>
Date: Wed, 8 Jan 2025 12:55:58 +0100
Subject: stack_tracer: move sysctl registration to kernel/trace/trace_stack.c

Move stack_tracer_enabled into trace_stack_sysctl_table. This is part of
a greater effort to move ctl tables into their respective subsystems
which will reduce the merge conflicts in kernel/sysctl.c.

Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Joel Granados <joel.granados@kernel.org>
---
 include/linux/ftrace.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 59774513ae45..95851a6fb942 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -569,8 +569,6 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs,
 
 #ifdef CONFIG_STACK_TRACER
 
-extern int stack_tracer_enabled;
-
 int stack_trace_sysctl(const struct ctl_table *table, int write, void *buffer,
 		       size_t *lenp, loff_t *ppos);
 
-- 
cgit v1.2.3


From e1bdbbc98279164d910d2de82a745f090a8b249f Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Mon, 7 Apr 2025 13:36:55 -0500
Subject: ACPI: Add missing prototype for non CONFIG_SUSPEND/CONFIG_X86 case

acpi_register_lps0_dev() and acpi_unregister_lps0_dev() may be used
in drivers that don't require CONFIG_SUSPEND or compile on !X86.

Add prototypes for those cases.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202502191627.fRgoBwcZ-lkp@intel.com/
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Link: https://patch.msgid.link/20250407183656.1503446-1-superm1@kernel.org
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 3f2e93ed9730..fc372bbaa547 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1125,13 +1125,13 @@ void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state,
 
 acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state,
 					   u32 val_a, u32 val_b);
-#if defined(CONFIG_SUSPEND) && defined(CONFIG_X86)
 struct acpi_s2idle_dev_ops {
 	struct list_head list_node;
 	void (*prepare)(void);
 	void (*check)(void);
 	void (*restore)(void);
 };
+#if defined(CONFIG_SUSPEND) && defined(CONFIG_X86)
 int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg);
 void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg);
 int acpi_get_lps0_constraint(struct acpi_device *adev);
@@ -1140,6 +1140,13 @@ static inline int acpi_get_lps0_constraint(struct device *dev)
 {
 	return ACPI_STATE_UNKNOWN;
 }
+static inline int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg)
+{
+	return -ENODEV;
+}
+static inline void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg)
+{
+}
 #endif /* CONFIG_SUSPEND && CONFIG_X86 */
 void arch_reserve_mem_area(acpi_physical_address addr, size_t size);
 #else
-- 
cgit v1.2.3


From 092d00ead733563f6d278295e0b5c5f97558b726 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 19 Mar 2025 11:56:38 +0100
Subject: cleanup: Provide retain_and_null_ptr()

In cases where an allocation is consumed by another function, the
allocation needs to be retained on success or freed on failure. The code
pattern is usually:

	struct foo *f = kzalloc(sizeof(*f), GFP_KERNEL);
	struct bar *b;

	,,,
	// Initialize f
	...
	if (ret)
		goto free;
        ...
	bar = bar_create(f);
	if (!bar) {
		ret = -ENOMEM;
	   	goto free;
	}
	...
	return 0;
free:
	kfree(f);
	return ret;

This prevents using __free(kfree) on @f because there is no canonical way
to tell the cleanup code that the allocation should not be freed.

Abusing no_free_ptr() by force ignoring the return value is not really a
sensible option either.

Provide an explicit macro retain_and_null_ptr(), which NULLs the cleanup
pointer. That makes it easy to analyze and reason about.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Link: https://lore.kernel.org/all/20250319105506.083538907@linutronix.de
---
 include/linux/cleanup.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index 7e57047e1564..7093e1d08af0 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -216,6 +216,25 @@ const volatile void * __must_check_fn(const volatile void *val)
 
 #define return_ptr(p)	return no_free_ptr(p)
 
+/*
+ * Only for situations where an allocation is handed in to another function
+ * and consumed by that function on success.
+ *
+ *	struct foo *f __free(kfree) = kzalloc(sizeof(*f), GFP_KERNEL);
+ *
+ *	setup(f);
+ *	if (some_condition)
+ *		return -EINVAL;
+ *	....
+ *	ret = bar(f);
+ *	if (!ret)
+ *		retain_and_null_ptr(f);
+ *	return ret;
+ *
+ * After retain_and_null_ptr(f) the variable f is NULL and cannot be
+ * dereferenced anymore.
+ */
+#define retain_and_null_ptr(p)		((void)__get_and_null(p, NULL))
 
 /*
  * DEFINE_CLASS(name, type, exit, init, init_args...):
-- 
cgit v1.2.3


From 0dac2b09303c89ffb21d25d40bf6aefd39f214b0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 19 Mar 2025 11:56:39 +0100
Subject: genirq/msi: Use lock guards for MSI descriptor locking

Provide a lock guard for MSI descriptor locking and update the core code
accordingly.

No functional change intended.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/all/20250319105506.144672678@linutronix.de
---
 include/linux/irqdomain.h | 2 ++
 include/linux/msi.h       | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index bb7111105296..2f955716cf82 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -281,6 +281,8 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa)
 
 void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
 
+DEFINE_FREE(irq_domain_free_fwnode, struct fwnode_handle *, if (_T) irq_domain_free_fwnode(_T))
+
 struct irq_domain_chip_generic_info;
 
 /**
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 86e42742fd0f..4e439916dbcf 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -232,6 +232,9 @@ int msi_setup_device_data(struct device *dev);
 void msi_lock_descs(struct device *dev);
 void msi_unlock_descs(struct device *dev);
 
+DEFINE_LOCK_GUARD_1(msi_descs_lock, struct device, msi_lock_descs(_T->lock),
+		    msi_unlock_descs(_T->lock));
+
 struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid,
 				       enum msi_desc_filter filter);
 
-- 
cgit v1.2.3


From 9357e329cdeb6f2d27b431a22d4965700bec478a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 19 Mar 2025 11:57:01 +0100
Subject: genirq/msi: Rename msi_[un]lock_descs()

Now that all abuse is gone and the legit users are converted to
guard(msi_descs_lock), rename the lock functions and document them as
internal.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huwei.com>
Link: https://lore.kernel.org/all/20250319105506.864699741@linutronix.de
---
 include/linux/msi.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 4e439916dbcf..8c0ec9fc05a3 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -229,11 +229,11 @@ struct msi_dev_domain {
 
 int msi_setup_device_data(struct device *dev);
 
-void msi_lock_descs(struct device *dev);
-void msi_unlock_descs(struct device *dev);
+void __msi_lock_descs(struct device *dev);
+void __msi_unlock_descs(struct device *dev);
 
-DEFINE_LOCK_GUARD_1(msi_descs_lock, struct device, msi_lock_descs(_T->lock),
-		    msi_unlock_descs(_T->lock));
+DEFINE_LOCK_GUARD_1(msi_descs_lock, struct device, __msi_lock_descs(_T->lock),
+		    __msi_unlock_descs(_T->lock));
 
 struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid,
 				       enum msi_desc_filter filter);
-- 
cgit v1.2.3


From 6fec833b9d70c54ceacbf7d07665215fbd0cddef Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 28 Mar 2025 21:42:48 +0100
Subject: cpufreq: Add and use cpufreq policy locking guards

Introduce "read" and "write" locking guards for cpufreq policies and use
them where applicable in the cpufreq core.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://patch.msgid.link/8518682.T7Z3S40VBb@rjwysocki.net
---
 include/linux/cpufreq.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 400fee6427a5..cb972d2aa8df 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -170,6 +170,12 @@ struct cpufreq_policy {
 	struct notifier_block nb_max;
 };
 
+DEFINE_GUARD(cpufreq_policy_write, struct cpufreq_policy *,
+	     down_write(&_T->rwsem), up_write(&_T->rwsem))
+
+DEFINE_GUARD(cpufreq_policy_read, struct cpufreq_policy *,
+	     down_read(&_T->rwsem), up_read(&_T->rwsem))
+
 /*
  * Used for passing new cpufreq policy data to the cpufreq driver's ->verify()
  * callback for sanitization.  That callback is only expected to modify the min
-- 
cgit v1.2.3


From c7282dce257480f0f22ed3db69cfb400a18709f4 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 28 Mar 2025 21:45:38 +0100
Subject: cpufreq: Drop cpufreq_cpu_acquire() and cpufreq_cpu_release()

Since cpufreq_cpu_acquire() and cpufreq_cpu_release() have no more
users in the tree, remove them.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://patch.msgid.link/3880470.kQq0lBPeGt@rjwysocki.net
---
 include/linux/cpufreq.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index cb972d2aa8df..a33a094ef755 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -241,8 +241,6 @@ void disable_cpufreq(void);
 
 u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy);
 
-struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu);
-void cpufreq_cpu_release(struct cpufreq_policy *policy);
 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu);
 void refresh_frequency_limits(struct cpufreq_policy *policy);
 void cpufreq_update_policy(unsigned int cpu);
-- 
cgit v1.2.3


From eaff6b62d3439ca6ee00dba4f77673a8c37dac20 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 28 Mar 2025 21:48:54 +0100
Subject: cpufreq: Pass policy pointer to ->update_limits()

Since cpufreq_update_limits() obtains a cpufreq policy pointer for the
given CPU and reference counts the corresponding policy object, it may
as well pass the policy pointer to the cpufreq driver's ->update_limits()
callback which allows that callback to avoid invoking cpufreq_cpu_get()
for the same CPU.

Accordingly, redefine ->update_limits() to take a policy pointer instead
of a CPU number and update both drivers implementing it, intel_pstate
and amd-pstate, as needed.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://patch.msgid.link/8560367.NyiUUSuA9g@rjwysocki.net
---
 include/linux/cpufreq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index a33a094ef755..f3cf2adea18f 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -399,7 +399,7 @@ struct cpufreq_driver {
 	unsigned int	(*get)(unsigned int cpu);
 
 	/* Called to update policy limits on firmware notifications. */
-	void		(*update_limits)(unsigned int cpu);
+	void		(*update_limits)(struct cpufreq_policy *policy);
 
 	/* optional */
 	int		(*bios_limit)(int cpu, unsigned int *limit);
-- 
cgit v1.2.3


From 855c634930f04c21434fae9c41a7a7372b6ac879 Mon Sep 17 00:00:00 2001
From: Philipp Stanner <pstanner@redhat.com>
Date: Thu, 27 Mar 2025 12:07:08 +0100
Subject: PCI: Remove pcim_iounmap_regions()

All users of the deprecated function pcim_iounmap_regions() have been
ported by now. Remove it.

Signed-off-by: Philipp Stanner <pstanner@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Zijun Hu <quic_zijuhu@quicinc.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://patch.msgid.link/20250327110707.20025-4-phasta@kernel.org
---
 include/linux/pci.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0e8e3fd77e96..a60fdb344d9e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2322,7 +2322,6 @@ void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr);
 void __iomem * const *pcim_iomap_table(struct pci_dev *pdev);
 int pcim_request_region(struct pci_dev *pdev, int bar, const char *name);
 int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name);
-void pcim_iounmap_regions(struct pci_dev *pdev, int mask);
 void __iomem *pcim_iomap_range(struct pci_dev *pdev, int bar,
 				unsigned long offset, unsigned long len);
 
-- 
cgit v1.2.3


From a82dc19db13649aa4232ce37cb6f4ceff851e2fe Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 8 Apr 2025 12:59:48 -0700
Subject: net: avoid potential race between netdev_get_by_index_lock() and
 netns switch

netdev_get_by_index_lock() performs following steps:

  rcu_lock();
  dev = lookup(netns, ifindex);
  dev_get(dev);
  rcu_unlock();
  [... lock & validate the dev ...]
  return dev

Validation right now only checks if the device is registered but since
the lookup is netns-aware we must also protect against the device
switching netns right after we dropped the RCU lock. Otherwise
the caller in netns1 may get a pointer to a device which has just
switched to netns2.

We can't hold the lock for the entire netns change process (because of
the NETDEV_UNREGISTER notifier), and there's no existing marking to
indicate that the netns is unlisted because of netns move, so add one.

AFAIU none of the existing netdev_get_by_index_lock() callers can
suffer from this problem (NAPI code double checks the netns membership
and other callers are either under rtnl_lock or not ns-sensitive),
so this patch does not have to be treated as a fix.

Reviewed-by: Joe Damato <jdamato@fastly.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250408195956.412733-2-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cf3b6445817b..8e9be80bc167 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1952,6 +1952,7 @@ enum netdev_reg_state {
  *	@priv_destructor:	Called from unregister
  *	@npinfo:		XXX: need comments on this one
  * 	@nd_net:		Network namespace this network device is inside
+ *				protected by @lock
  *
  * 	@ml_priv:	Mid-layer private
  *	@ml_priv_type:  Mid-layer private type
@@ -2359,6 +2360,9 @@ struct net_device {
 
 	bool dismantle;
 
+	/** @moving_ns: device is changing netns, protected by @lock */
+	bool moving_ns;
+
 	enum {
 		RTNL_LINK_INITIALIZED,
 		RTNL_LINK_INITIALIZING,
@@ -2521,7 +2525,7 @@ struct net_device {
 	 *	@net_shaper_hierarchy, @reg_state, @threaded
 	 *
 	 * Double protects:
-	 *	@up
+	 *	@up, @moving_ns, @nd_net
 	 *
 	 * Double ops protects:
 	 *	@real_num_rx_queues, @real_num_tx_queues
-- 
cgit v1.2.3


From 606048cbd8346e616cfaee01b0143d072534136d Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 8 Apr 2025 12:59:49 -0700
Subject: net: designate XSK pool pointers in queues as "ops protected"

Read accesses go via xsk_get_pool_from_qid(), the call coming
from the core and gve look safe (other "ops locked" drivers
don't support XSK).

Write accesses go via xsk_reg_pool_at_qid() and xsk_clear_pool_at_qid().
Former is already under the ops lock, latter is not (both coming from
the workqueue via xp_clear_dev() and NETDEV_UNREGISTER via xsk_notifier()).

Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250408195956.412733-3-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8e9be80bc167..7242fb8a22fc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -688,6 +688,7 @@ struct netdev_queue {
 	/* Subordinate device that the queue has been assigned to */
 	struct net_device	*sb_dev;
 #ifdef CONFIG_XDP_SOCKETS
+	/* "ops protected", see comment about net_device::lock */
 	struct xsk_buff_pool    *pool;
 #endif
 
-- 
cgit v1.2.3


From 03df156dd3a6d5992f17682cd5c3b11e5ffdae02 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 8 Apr 2025 12:59:52 -0700
Subject: xdp: double protect netdev->xdp_flags with netdev->lock

Protect xdp_features with netdev->lock. This way pure readers
no longer have to take rtnl_lock to access the field.

This includes calling NETDEV_XDP_FEAT_CHANGE under the lock.
Looks like that's fine for bonding, the only "real" listener,
it's the same as ethtool feature change.

In terms of normal drivers - only GVE need special consideration
(other drivers don't use instance lock or don't support XDP).
It calls xdp_set_features_flag() helper from gve_init_priv() which
in turn is called from gve_reset_recovery() (locked), or prior
to netdev registration. So switch to _locked.

Reviewed-by: Joe Damato <jdamato@fastly.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Acked-by: Harshitha Ramamurthy <hramamurthy@google.com>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://patch.msgid.link/20250408195956.412733-6-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7242fb8a22fc..dece2ae396a1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2526,7 +2526,7 @@ struct net_device {
 	 *	@net_shaper_hierarchy, @reg_state, @threaded
 	 *
 	 * Double protects:
-	 *	@up, @moving_ns, @nd_net
+	 *	@up, @moving_ns, @nd_net, @xdp_flags
 	 *
 	 * Double ops protects:
 	 *	@real_num_rx_queues, @real_num_tx_queues
-- 
cgit v1.2.3


From 229671ac60e298b85c2644f52d7e487e9f487d06 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 8 Apr 2025 20:27:42 +0000
Subject: net: remove cpu stall in txq_trans_update()

txq_trans_update() currently uses txq->xmit_lock_owner
to conditionally update txq->trans_start.

For regular devices, txq->xmit_lock_owner is updated
from HARD_TX_LOCK() and HARD_TX_UNLOCK(), and this apparently
causes cpu stalls.

Using dev->lltx, which sits in a read-mostly cache-line,
and already used in HARD_TX_LOCK() and HARD_TX_UNLOCK()
helps cpu prediction.

On an AMD EPYC 7B12 dual socket server, tcp_rr with 128 threads
and 30,000 flows gets a 5 % increase in throughput.

As explained in commit 95ecba62e2fd ("net: fix races in
netdev_tx_sent_queue()/dev_watchdog()") I am planning
to no longer update txq->trans_start in the fast path
in a followup patch.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250408202742.2145516-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dece2ae396a1..a28a08046615 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4693,9 +4693,10 @@ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
 /*
  * txq->trans_start can be read locklessly from dev_watchdog()
  */
-static inline void txq_trans_update(struct netdev_queue *txq)
+static inline void txq_trans_update(const struct net_device *dev,
+				    struct netdev_queue *txq)
 {
-	if (txq->xmit_lock_owner != -1)
+	if (!dev->lltx)
 		WRITE_ONCE(txq->trans_start, jiffies);
 }
 
@@ -5214,7 +5215,7 @@ static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_devi
 
 	rc = __netdev_start_xmit(ops, skb, dev, more);
 	if (rc == NETDEV_TX_OK)
-		txq_trans_update(txq);
+		txq_trans_update(dev, txq);
 
 	return rc;
 }
-- 
cgit v1.2.3


From 8702048bb8313a21ec9179e9ce7ad3c5cb2ef072 Mon Sep 17 00:00:00 2001
From: Jocelyn Falempe <jfalempe@redhat.com>
Date: Mon, 7 Apr 2025 15:42:25 +0200
Subject: mm/kmap: Add kmap_local_page_try_from_panic()

kmap_local_page() can be unsafe to call from a panic handler, if
CONFIG_HIGHMEM is set, and the page is in the highmem zone.
So add kmap_local_page_try_from_panic() to handle this case.

Suggested-by: Simona Vetter <simona.vetter@ffwll.ch>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com>
Link: https://lore.kernel.org/r/20250407140138.162383-2-jfalempe@redhat.com
---
 include/linux/highmem-internal.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index dd100e849f5e..9a7683d79a4b 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -73,6 +73,14 @@ static inline void *kmap_local_page(struct page *page)
 	return __kmap_local_page_prot(page, kmap_prot);
 }
 
+static inline void *kmap_local_page_try_from_panic(struct page *page)
+{
+	if (!PageHighMem(page))
+		return page_address(page);
+	/* If the page is in HighMem, it's not safe to kmap it.*/
+	return NULL;
+}
+
 static inline void *kmap_local_folio(struct folio *folio, size_t offset)
 {
 	struct page *page = folio_page(folio, offset / PAGE_SIZE);
@@ -180,6 +188,11 @@ static inline void *kmap_local_page(struct page *page)
 	return page_address(page);
 }
 
+static inline void *kmap_local_page_try_from_panic(struct page *page)
+{
+	return page_address(page);
+}
+
 static inline void *kmap_local_folio(struct folio *folio, size_t offset)
 {
 	return page_address(&folio->page) + offset;
-- 
cgit v1.2.3


From 1490cbb9dbfd0eabfe45f9b674097aea7e6760fc Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 Mar 2025 16:54:51 +0200
Subject: device property: Split fwnode_get_child_node_count()

The new helper is introduced to allow counting the child firmware nodes
of their parent without requiring a device to be passed. This also makes
the fwnode and device property API more symmetrical with the rest.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: "Rafael J. Wysocki" <rafael@kernel.org>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/20250310150835.3139322-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/property.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/property.h b/include/linux/property.h
index e214ecd241eb..bc5bfc98176b 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -208,7 +208,12 @@ DEFINE_FREE(fwnode_handle, struct fwnode_handle *, fwnode_handle_put(_T))
 int fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index);
 int fwnode_irq_get_byname(const struct fwnode_handle *fwnode, const char *name);
 
-unsigned int device_get_child_node_count(const struct device *dev);
+unsigned int fwnode_get_child_node_count(const struct fwnode_handle *fwnode);
+
+static inline unsigned int device_get_child_node_count(const struct device *dev)
+{
+	return fwnode_get_child_node_count(dev_fwnode(dev));
+}
 
 static inline int device_property_read_u8(const struct device *dev,
 					  const char *propname, u8 *val)
-- 
cgit v1.2.3


From 7e3711eb87c584ed224a7ad7000eba36e6fa3a51 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 21 Mar 2025 10:53:55 +0100
Subject: fbdev: Track display blanking state

Store the display's blank status in struct fb_info.blank and track
it in fb_blank(). As an extra, the status is now available from the
sysfs blank attribute.

Support for blanking is optional. Therefore framebuffer_alloc()
initializes the state to FB_BLANK_UNBLANK (i.e., the display is
on). If the fb_blank callback has been set, register_framebuffer()
sets the state to FB_BLANK_POWERDOWN. On the first modeset, the
call to fb_blank() will update it to _UNBLANK. This is important,
as listeners to FB_EVENT_BLANK will now see the display being
switched on.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Simona Vetter <simona.vetter@ffwll.ch>
Link: https://lore.kernel.org/r/20250321095517.313713-3-tzimmermann@suse.de
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/fb.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index cd653862ab99..348aa2e146e2 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -472,6 +472,8 @@ struct fb_info {
 	struct list_head modelist;      /* mode list */
 	struct fb_videomode *mode;	/* current mode */
 
+	int blank; /* current blanking; see FB_BLANK_ constants */
+
 #if IS_ENABLED(CONFIG_FB_BACKLIGHT)
 	/* assigned backlight device */
 	/* set before framebuffer registration,
-- 
cgit v1.2.3


From 726491f2038ec71122d45700f3abf36fdb277aaa Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 21 Mar 2025 10:53:57 +0100
Subject: backlight: Implement fbdev tracking with blank state from event

Look at the blank state provided by FB_EVENT_BLANK to determine
whether to enable or disable a backlight. Remove the tracking fields
from struct backlight_device.

Tracking requires three variables, fb_on, prev_fb_on and the
backlight's use_count. If fb_on is true, the display has been
unblanked. The backlight needs to be enabled if the display was
blanked before (i.e., prev_fb_on is false) or if use_count is still
at 0. If fb_on is false, the display has been blanked. In this case,
the backlight has to be disabled was unblanked before and the
backlight's use_count is greater than 0.

This change removes fbdev state tracking from blacklight. All the
backlight requires it its own use counter and information about
changes to the display. Removing fbdev internals makes  backlight
drivers easier to integrate into other display drivers, such as DRM.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: "Daniel Thompson (RISCstar)" <danielt@kernel.org>
Acked-by: Simona Vetter <simona.vetter@ffwll.ch>
Link: https://lore.kernel.org/r/20250321095517.313713-5-tzimmermann@suse.de
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/backlight.h | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index f5652e5a9060..03723a5478f8 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -294,15 +294,7 @@ struct backlight_device {
 	struct device dev;
 
 	/**
-	 * @fb_bl_on: The state of individual fbdev's.
-	 *
-	 * Multiple fbdev's may share one backlight device. The fb_bl_on
-	 * records the state of the individual fbdev.
-	 */
-	bool fb_bl_on[FB_MAX];
-
-	/**
-	 * @use_count: The number of uses of fb_bl_on.
+	 * @use_count: The number of unblanked displays.
 	 */
 	int use_count;
 };
-- 
cgit v1.2.3


From b01beb2f1f6bcda17634a5b529865ffc5a9b795f Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 21 Mar 2025 10:53:59 +0100
Subject: backlight: Replace fb events with a dedicated function call

Remove support for fb events from backlight subsystem. Provide the
helper backlight_notify_blank_all() instead. Also export the existing
helper backlight_notify_blank() to update a single backlight device.

In fbdev, call either helper to inform the backlight subsystem of
changes to a display's blank state. If the framebuffer device has a
specific backlight, only update this one; otherwise update all.

v4:
- protect blacklight declarations with IS_REACHABLE() (kernel test robot)
v3:
- declare empty fb_bl_notify_blank() as static inline (kernel test robot)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Simona Vetter <simona.vetter@ffwll.ch>
Reviewed-by: "Daniel Thompson (RISCstar)" <danielt@kernel.org>
Link: https://lore.kernel.org/r/20250321095517.313713-7-tzimmermann@suse.de
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/backlight.h | 22 ++++++++++++++++------
 include/linux/fb.h        |  4 ++++
 2 files changed, 20 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 03723a5478f8..10e626db7eee 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -12,7 +12,6 @@
 #include <linux/device.h>
 #include <linux/fb.h>
 #include <linux/mutex.h>
-#include <linux/notifier.h>
 #include <linux/types.h>
 
 /**
@@ -278,11 +277,6 @@ struct backlight_device {
 	 */
 	const struct backlight_ops *ops;
 
-	/**
-	 * @fb_notif: The framebuffer notifier block
-	 */
-	struct notifier_block fb_notif;
-
 	/**
 	 * @entry: List entry of all registered backlight devices
 	 */
@@ -400,6 +394,22 @@ struct backlight_device *backlight_device_get_by_type(enum backlight_type type);
 int backlight_device_set_brightness(struct backlight_device *bd,
 				    unsigned long brightness);
 
+#if IS_REACHABLE(CONFIG_BACKLIGHT_CLASS_DEVICE)
+void backlight_notify_blank(struct backlight_device *bd,
+			    struct device *display_dev,
+			    bool fb_on, bool prev_fb_on);
+void backlight_notify_blank_all(struct device *display_dev,
+				bool fb_on, bool prev_fb_on);
+#else
+static inline void backlight_notify_blank(struct backlight_device *bd,
+					  struct device *display_dev,
+					  bool fb_on, bool prev_fb_on)
+{ }
+static inline void backlight_notify_blank_all(struct device *display_dev,
+					      bool fb_on, bool prev_fb_on)
+{ }
+#endif
+
 #define to_backlight_device(obj) container_of(obj, struct backlight_device, dev)
 
 /**
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 348aa2e146e2..835e13d6eba8 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -758,11 +758,15 @@ extern void fb_bl_default_curve(struct fb_info *fb_info, u8 off, u8 min, u8 max)
 
 #if IS_ENABLED(CONFIG_FB_BACKLIGHT)
 struct backlight_device *fb_bl_device(struct fb_info *info);
+void fb_bl_notify_blank(struct fb_info *info, int old_blank);
 #else
 static inline struct backlight_device *fb_bl_device(struct fb_info *info)
 {
 	return NULL;
 }
+
+static inline void fb_bl_notify_blank(struct fb_info *info, int old_blank)
+{ }
 #endif
 
 static inline struct lcd_device *fb_lcd_device(struct fb_info *info)
-- 
cgit v1.2.3


From bc70cc84f5a2ebfd7e7112e9b8837e0c7954fc65 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 21 Mar 2025 10:54:01 +0100
Subject: backlight: lcd: Replace fb events with a dedicated function call

Remove support for fb events from the lcd subsystem. Provide the
helper lcd_notify_blank_all() instead. In fbdev, call
lcd_notify_blank_all() to inform the lcd subsystem of changes
to a display's blank state.

Fbdev maintains a list of all installed notifiers. Instead of fbdev
notifiers, maintain an internal list of lcd devices.

v3:
- export lcd_notify_mode_change_all() (kernel test robot)
v2:
- maintain global list of lcd devices
- avoid IS_REACHABLE() in source file
- use lock guards
- initialize lcd list and list mutex

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Simona Vetter <simona.vetter@ffwll.ch>
Reviewed-by: "Daniel Thompson (RISCstar)" <danielt@kernel.org>
Link: https://lore.kernel.org/r/20250321095517.313713-9-tzimmermann@suse.de
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/lcd.h | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lcd.h b/include/linux/lcd.h
index c3ccdff4519a..d4fa03722b72 100644
--- a/include/linux/lcd.h
+++ b/include/linux/lcd.h
@@ -11,7 +11,6 @@
 
 #include <linux/device.h>
 #include <linux/mutex.h>
-#include <linux/notifier.h>
 
 #define LCD_POWER_ON			(0)
 #define LCD_POWER_REDUCED		(1) // deprecated; don't use in new code
@@ -79,8 +78,11 @@ struct lcd_device {
 	const struct lcd_ops *ops;
 	/* Serialise access to set_power method */
 	struct mutex update_lock;
-	/* The framebuffer notifier block */
-	struct notifier_block fb_notif;
+
+	/**
+	 * @entry: List entry of all registered lcd devices
+	 */
+	struct list_head entry;
 
 	struct device dev;
 };
@@ -125,6 +127,19 @@ extern void lcd_device_unregister(struct lcd_device *ld);
 extern void devm_lcd_device_unregister(struct device *dev,
 	struct lcd_device *ld);
 
+#if IS_REACHABLE(CONFIG_LCD_CLASS_DEVICE)
+void lcd_notify_blank_all(struct device *display_dev, int power);
+void lcd_notify_mode_change_all(struct device *display_dev,
+				unsigned int width, unsigned int height);
+#else
+static inline void lcd_notify_blank_all(struct device *display_dev, int power)
+{}
+
+static inline void lcd_notify_mode_change_all(struct device *display_dev,
+					      unsigned int width, unsigned int height)
+{}
+#endif
+
 #define to_lcd_device(obj) container_of(obj, struct lcd_device, dev)
 
 static inline void * lcd_get_data(struct lcd_device *ld_dev)
-- 
cgit v1.2.3


From dc2139c0aa3283e5749109641af1878ed7bf7371 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 21 Mar 2025 10:54:03 +0100
Subject: leds: backlight trigger: Replace fb events with a dedicated function
 call

Remove support for fb events from the led backlight trigger. Provide
the helper ledtrig_backlight_blank() instead. Call it from fbdev to
inform the trigger of changes to a display's blank state.

Fbdev maintains a list of all installed notifiers. Instead of the fbdev
notifiers, maintain an internal list of led backlight triggers.

v3:
- export ledtrig_backlight_blank()
v2:
- maintain global list of led backlight triggers (Lee)
- avoid IS_REACHABLE() in source file (Lee)
- notify on changes to blank state instead of display state
- use lock guards
- initialize led list and list mutex

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Simona Vetter <simona.vetter@ffwll.ch>
Link: https://lore.kernel.org/r/20250321095517.313713-11-tzimmermann@suse.de
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/leds.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 98f9719c924c..b3f0aa081064 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -640,6 +640,12 @@ static inline void ledtrig_flash_ctrl(bool on) {}
 static inline void ledtrig_torch_ctrl(bool on) {}
 #endif
 
+#if IS_REACHABLE(CONFIG_LEDS_TRIGGER_BACKLIGHT)
+void ledtrig_backlight_blank(bool blank);
+#else
+static inline void ledtrig_backlight_blank(bool blank) {}
+#endif
+
 /*
  * Generic LED platform data for describing LED names and default triggers.
  */
-- 
cgit v1.2.3


From d32a0b567a8a8b6e677d35c4f8eb8bd32b7029a0 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 21 Mar 2025 10:54:04 +0100
Subject: fbdev: Remove constants of unused events

The constants FB_EVENT_MODE_CHANGE and FB_EVENT_BLANK are unused.
Remove them from the header file.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Simona Vetter <simona.vetter@ffwll.ch>
Link: https://lore.kernel.org/r/20250321095517.313713-12-tzimmermann@suse.de
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/fb.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index 835e13d6eba8..05cc251035da 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -129,18 +129,12 @@ struct fb_cursor_user {
  * Register/unregister for framebuffer events
  */
 
-/*	The resolution of the passed in fb_info about to change */
-#define FB_EVENT_MODE_CHANGE		0x01
-
 #ifdef CONFIG_GUMSTIX_AM200EPD
 /* only used by mach-pxa/am200epd.c */
 #define FB_EVENT_FB_REGISTERED          0x05
 #define FB_EVENT_FB_UNREGISTERED        0x06
 #endif
 
-/*      A display blank is requested       */
-#define FB_EVENT_BLANK                  0x09
-
 struct fb_event {
 	struct fb_info *info;
 	void *data;
-- 
cgit v1.2.3


From 589a7c406a721f5d3a818ad0003799180f027dfa Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 10 Apr 2025 12:20:43 +0200
Subject: cpufreq: Drop unused cpufreq_get_policy()

A recent change has introduced a bug into cpufreq_get_policy(), but this
function is not used, so it's better to drop it altogether.

Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://patch.msgid.link/2802770.mvXUDI8C0e@rjwysocki.net
---
 include/linux/cpufreq.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index f3cf2adea18f..850fe7371cb1 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -241,7 +241,6 @@ void disable_cpufreq(void);
 
 u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy);
 
-int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu);
 void refresh_frequency_limits(struct cpufreq_policy *policy);
 void cpufreq_update_policy(unsigned int cpu);
 void cpufreq_update_limits(unsigned int cpu);
-- 
cgit v1.2.3


From 588d5d20ca8defa5ba5d1b536ff3695f6ab7aa87 Mon Sep 17 00:00:00 2001
From: Kaustabh Chakraborty <kauschluss@disroot.org>
Date: Thu, 10 Apr 2025 14:01:14 +0530
Subject: phy: exynos5-usbdrd: add exynos7870 USBDRD support

Implement support for Exynos7870 USB DRD on top of the existing
exynos5-usbdrd driver.

Exynos7870 has a single USB 2.0 DRD PHY controller and no 3.0 PHYs. Thus,
it only supports the UTMI interface.

Moreover, the PMU register offset for enabling the PHY controller is
different for SoCs such as Exynos7870, where BIT(0) is for the 3.0 PHY and
BIT(1) is for the 2.0 PHY. The phy_isol function for Exynos7870 uses the
appropriate register offsets.

Signed-off-by: Kaustabh Chakraborty <kauschluss@disroot.org>
Link: https://lore.kernel.org/r/20250410-exynos7870-usbphy-v2-3-2eb005987455@disroot.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soc/samsung/exynos-regs-pmu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h
index ce1a3790d6fb..cde299a85384 100644
--- a/include/linux/soc/samsung/exynos-regs-pmu.h
+++ b/include/linux/soc/samsung/exynos-regs-pmu.h
@@ -55,6 +55,8 @@
 #define EXYNOS4_MIPI_PHY_SRESETN		(1 << 1)
 #define EXYNOS4_MIPI_PHY_MRESETN		(1 << 2)
 #define EXYNOS4_MIPI_PHY_RESET_MASK		(3 << 1)
+/* USB PHY enable bit, valid for Exynos7870 */
+#define EXYNOS7870_USB2PHY_ENABLE		(1 << 1)
 
 #define S5P_INFORM0				0x0800
 #define S5P_INFORM1				0x0804
-- 
cgit v1.2.3


From b2849b0723668ae3e0739b2c9c066eafe8bc0961 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Thu, 3 Apr 2025 12:09:40 +0200
Subject: svsm: Add header with SVSM_VTPM_CMD helpers

Add helpers for the SVSM_VTPM_CMD calls used by the vTPM protocol defined by
the AMD SVSM spec [1].

The vTPM protocol follows the Official TPM 2.0 Reference Implementation
(originally by Microsoft, now part of the TCG) simulator protocol.

  [1] "Secure VM Service Module for SEV-SNP Guests"
      Publication # 58019 Revision: 1.00

Co-developed-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Co-developed-by: Claudio Carvalho <cclaudio@linux.ibm.com>
Signed-off-by: Claudio Carvalho <cclaudio@linux.ibm.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Link: https://lore.kernel.org/r/20250403100943.120738-3-sgarzare@redhat.com
---
 include/linux/tpm_svsm.h | 149 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 149 insertions(+)
 create mode 100644 include/linux/tpm_svsm.h

(limited to 'include/linux')

diff --git a/include/linux/tpm_svsm.h b/include/linux/tpm_svsm.h
new file mode 100644
index 000000000000..38e341f9761a
--- /dev/null
+++ b/include/linux/tpm_svsm.h
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 James.Bottomley@HansenPartnership.com
+ * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved.
+ *
+ * Helpers for the SVSM_VTPM_CMD calls used by the vTPM protocol defined by the
+ * AMD SVSM spec [1].
+ *
+ * The vTPM protocol follows the Official TPM 2.0 Reference Implementation
+ * (originally by Microsoft, now part of the TCG) simulator protocol.
+ *
+ * [1] "Secure VM Service Module for SEV-SNP Guests"
+ *     Publication # 58019 Revision: 1.00
+ */
+#ifndef _TPM_SVSM_H_
+#define _TPM_SVSM_H_
+
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#define SVSM_VTPM_MAX_BUFFER		4096 /* max req/resp buffer size */
+
+/**
+ * struct svsm_vtpm_request - Generic request for single word command
+ * @cmd:	The command to send
+ *
+ * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" -
+ * Table 15: vTPM Common Request/Response Structure
+ *     Byte      Size       In/Out    Description
+ *     Offset    (Bytes)
+ *     0x000     4          In        Platform command
+ *                          Out       Platform command response size
+ */
+struct svsm_vtpm_request {
+	u32 cmd;
+};
+
+/**
+ * struct svsm_vtpm_response - Generic response
+ * @size:	The response size (zero if nothing follows)
+ *
+ * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" -
+ * Table 15: vTPM Common Request/Response Structure
+ *     Byte      Size       In/Out    Description
+ *     Offset    (Bytes)
+ *     0x000     4          In        Platform command
+ *                          Out       Platform command response size
+ *
+ * Note: most TCG Simulator commands simply return zero here with no indication
+ * of success or failure.
+ */
+struct svsm_vtpm_response {
+	u32 size;
+};
+
+/**
+ * struct svsm_vtpm_cmd_request - Structure for a TPM_SEND_COMMAND request
+ * @cmd:	The command to send (must be TPM_SEND_COMMAND)
+ * @locality:	The locality
+ * @buf_size:	The size of the input buffer following
+ * @buf:	A buffer of size buf_size
+ *
+ * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" -
+ * Table 16: TPM_SEND_COMMAND Request Structure
+ *     Byte      Size       Meaning
+ *     Offset    (Bytes)
+ *     0x000     4          Platform command (8)
+ *     0x004     1          Locality (must-be-0)
+ *     0x005     4          TPM Command size (in bytes)
+ *     0x009     Variable   TPM Command
+ *
+ * Note: the TCG Simulator expects @buf_size to be equal to the size of the
+ * specific TPM command, otherwise an TPM_RC_COMMAND_SIZE error is returned.
+ */
+struct svsm_vtpm_cmd_request {
+	u32 cmd;
+	u8 locality;
+	u32 buf_size;
+	u8 buf[];
+} __packed;
+
+/**
+ * struct svsm_vtpm_cmd_response - Structure for a TPM_SEND_COMMAND response
+ * @buf_size:	The size of the output buffer following
+ * @buf:	A buffer of size buf_size
+ *
+ * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" -
+ * Table 17: TPM_SEND_COMMAND Response Structure
+ *     Byte      Size       Meaning
+ *     Offset    (Bytes)
+ *     0x000     4          Response size (in bytes)
+ *     0x004     Variable   Response
+ */
+struct svsm_vtpm_cmd_response {
+	u32 buf_size;
+	u8 buf[];
+};
+
+/**
+ * svsm_vtpm_cmd_request_fill() - Fill a TPM_SEND_COMMAND request to be sent to SVSM
+ * @req: The struct svsm_vtpm_cmd_request to fill
+ * @locality: The locality
+ * @buf: The buffer from where to copy the payload of the command
+ * @len: The size of the buffer
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+static inline int
+svsm_vtpm_cmd_request_fill(struct svsm_vtpm_cmd_request *req, u8 locality,
+			   const u8 *buf, size_t len)
+{
+	if (len > SVSM_VTPM_MAX_BUFFER - sizeof(*req))
+		return -EINVAL;
+
+	req->cmd = 8; /* TPM_SEND_COMMAND */
+	req->locality = locality;
+	req->buf_size = len;
+
+	memcpy(req->buf, buf, len);
+
+	return 0;
+}
+
+/**
+ * svsm_vtpm_cmd_response_parse() - Parse a TPM_SEND_COMMAND response received from SVSM
+ * @resp: The struct svsm_vtpm_cmd_response to parse
+ * @buf: The buffer where to copy the response
+ * @len: The size of the buffer
+ *
+ * Return: buffer size filled with the response on success, negative error
+ * code on failure.
+ */
+static inline int
+svsm_vtpm_cmd_response_parse(const struct svsm_vtpm_cmd_response *resp, u8 *buf,
+			     size_t len)
+{
+	if (len < resp->buf_size)
+		return -E2BIG;
+
+	if (resp->buf_size > SVSM_VTPM_MAX_BUFFER - sizeof(*resp))
+		return -EINVAL;  // Invalid response from the platform TPM
+
+	memcpy(buf, resp->buf, resp->buf_size);
+
+	return resp->buf_size;
+}
+
+#endif /* _TPM_SVSM_H_ */
-- 
cgit v1.2.3


From 74a70e80daa993445a8f0be817f8152a3b9d3b41 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 9 Apr 2025 22:43:10 +0200
Subject: PCI: Remove pci_fixup_cardbus()

Since 1c7f4fe86f17 ("powerpc/pci: Remove pcibios_setup_bus_devices()")
there's no architecture left setting pci_fixup_cardbus. Therefore remove
support from PCI core.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://patch.msgid.link/8de7da4c-2b16-4ee1-8c42-0d04f3c821c6@gmail.com
---
 include/linux/pci.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0e8e3fd77e96..d26e6611bd00 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1139,9 +1139,6 @@ resource_size_t pcibios_align_resource(void *, const struct resource *,
 				resource_size_t,
 				resource_size_t);
 
-/* Weak but can be overridden by arch */
-void pci_fixup_cardbus(struct pci_bus *);
-
 /* Generic PCI functions used internally */
 
 void pcibios_resource_to_bus(struct pci_bus *bus, struct pci_bus_region *region,
-- 
cgit v1.2.3


From b1e904999542ad6764eafa54545f1c55776006d1 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Tue, 8 Apr 2025 11:32:01 -0700
Subject: net: pass const to msg_data_left()

The msg_data_left() function doesn't modify the struct msghdr parameter,
so mark it as const. This allows the function to be used with const
references, improving type safety and making the API more flexible.

Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250408-tcpsendmsg-v3-1-208b87064c28@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/socket.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index c3322eb3d686..3b262487ec06 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -168,7 +168,7 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr
 	return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg);
 }
 
-static inline size_t msg_data_left(struct msghdr *msg)
+static inline size_t msg_data_left(const struct msghdr *msg)
 {
 	return iov_iter_count(&msg->msg_iter);
 }
-- 
cgit v1.2.3


From de66754e9f8029f8ae955a588959b99cab56b506 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Wed, 9 Apr 2025 12:47:34 -0700
Subject: xhci: sideband: add initial api to register a secondary interrupter
 entity

Introduce XHCI sideband, which manages the USB endpoints being requested by
a client driver.  This is used for when client drivers are attempting to
offload USB endpoints to another entity for handling USB transfers.  XHCI
sec intr will allow for drivers to fetch the required information about the
transfer ring, so the user can submit transfers independently.  Expose the
required APIs for drivers to register and request for a USB endpoint and to
manage XHCI secondary interrupters.

Driver renaming, multiple ring segment page linking, proper endpoint clean
up, and allowing module compilation added by Wesley Cheng to complete
original concept code by Mathias Nyman.

Tested-by: Puma Hsu <pumahsu@google.com>
Tested-by: Daehwan Jung <dh10.jung@samsung.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Co-developed-by: Wesley Cheng <quic_wcheng@quicinc.com>
Signed-off-by: Wesley Cheng <quic_wcheng@quicinc.com>
Acked-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20250409194804.3773260-2-quic_wcheng@quicinc.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/xhci-sideband.h | 74 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 include/linux/usb/xhci-sideband.h

(limited to 'include/linux')

diff --git a/include/linux/usb/xhci-sideband.h b/include/linux/usb/xhci-sideband.h
new file mode 100644
index 000000000000..4b382af892fa
--- /dev/null
+++ b/include/linux/usb/xhci-sideband.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * xHCI host controller sideband support
+ *
+ * Copyright (c) 2023-2025, Intel Corporation.
+ *
+ * Author: Mathias Nyman <mathias.nyman@linux.intel.com>
+ */
+#ifndef __LINUX_XHCI_SIDEBAND_H
+#define __LINUX_XHCI_SIDEBAND_H
+
+#include <linux/scatterlist.h>
+#include <linux/usb.h>
+
+#define	EP_CTX_PER_DEV		31	/* FIXME defined twice, from xhci.h */
+
+struct xhci_sideband;
+
+enum xhci_sideband_type {
+	XHCI_SIDEBAND_AUDIO,
+	XHCI_SIDEBAND_VENDOR,
+};
+
+/**
+ * struct xhci_sideband - representation of a sideband accessed usb device.
+ * @xhci: The xhci host controller the usb device is connected to
+ * @vdev: the usb device accessed via sideband
+ * @eps: array of endpoints controlled via sideband
+ * @ir: event handling and buffer for sideband accessed device
+ * @type: xHCI sideband type
+ * @mutex: mutex for sideband operations
+ * @intf: USB sideband client interface
+ *
+ * FIXME usb device accessed via sideband Keeping track of sideband accessed usb devices.
+ */
+struct xhci_sideband {
+	struct xhci_hcd                 *xhci;
+	struct xhci_virt_device         *vdev;
+	struct xhci_virt_ep             *eps[EP_CTX_PER_DEV];
+	struct xhci_interrupter         *ir;
+	enum xhci_sideband_type		type;
+
+	/* Synchronizing xHCI sideband operations with client drivers operations */
+	struct mutex			mutex;
+
+	struct usb_interface		*intf;
+};
+
+struct xhci_sideband *
+xhci_sideband_register(struct usb_interface *intf, enum xhci_sideband_type type);
+void
+xhci_sideband_unregister(struct xhci_sideband *sb);
+int
+xhci_sideband_add_endpoint(struct xhci_sideband *sb,
+			   struct usb_host_endpoint *host_ep);
+int
+xhci_sideband_remove_endpoint(struct xhci_sideband *sb,
+			      struct usb_host_endpoint *host_ep);
+int
+xhci_sideband_stop_endpoint(struct xhci_sideband *sb,
+			    struct usb_host_endpoint *host_ep);
+struct sg_table *
+xhci_sideband_get_endpoint_buffer(struct xhci_sideband *sb,
+				  struct usb_host_endpoint *host_ep);
+struct sg_table *
+xhci_sideband_get_event_buffer(struct xhci_sideband *sb);
+int
+xhci_sideband_create_interrupter(struct xhci_sideband *sb, int num_seg,
+				 bool ip_autoclear, u32 imod_interval);
+void
+xhci_sideband_remove_interrupter(struct xhci_sideband *sb);
+int
+xhci_sideband_interrupter_id(struct xhci_sideband *sb);
+#endif /* __LINUX_XHCI_SIDEBAND_H */
-- 
cgit v1.2.3


From fce57295497df70711d50ed01bf6d914de0ca647 Mon Sep 17 00:00:00 2001
From: Wesley Cheng <quic_wcheng@quicinc.com>
Date: Wed, 9 Apr 2025 12:47:36 -0700
Subject: usb: host: xhci-mem: Allow for interrupter clients to choose specific
 index

Some clients may operate only on a specific XHCI interrupter instance.
Allow for the associated class driver to request for the interrupter that
it requires.

Tested-by: Puma Hsu <pumahsu@google.com>
Tested-by: Daehwan Jung <dh10.jung@samsung.com>
Signed-off-by: Wesley Cheng <quic_wcheng@quicinc.com>
Acked-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20250409194804.3773260-4-quic_wcheng@quicinc.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/xhci-sideband.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/xhci-sideband.h b/include/linux/usb/xhci-sideband.h
index 4b382af892fa..f8722afb8a2d 100644
--- a/include/linux/usb/xhci-sideband.h
+++ b/include/linux/usb/xhci-sideband.h
@@ -66,7 +66,7 @@ struct sg_table *
 xhci_sideband_get_event_buffer(struct xhci_sideband *sb);
 int
 xhci_sideband_create_interrupter(struct xhci_sideband *sb, int num_seg,
-				 bool ip_autoclear, u32 imod_interval);
+				 bool ip_autoclear, u32 imod_interval, int intr_num);
 void
 xhci_sideband_remove_interrupter(struct xhci_sideband *sb);
 int
-- 
cgit v1.2.3


From b85a2ebda1034881d09e6127726dc78950669474 Mon Sep 17 00:00:00 2001
From: Wesley Cheng <quic_wcheng@quicinc.com>
Date: Wed, 9 Apr 2025 12:47:38 -0700
Subject: usb: host: xhci: Notify xHCI sideband on transfer ring free

In the case of handling a USB bus reset, the xhci_discover_or_reset_device
can run without first notifying the xHCI sideband client driver to stop or
prevent the use of the transfer ring.  It was seen that when a bus reset
situation happened, the USB offload driver was attempting to fetch the xHCI
transfer ring information, which was already freed.

Tested-by: Puma Hsu <pumahsu@google.com>
Tested-by: Daehwan Jung <dh10.jung@samsung.com>
Signed-off-by: Wesley Cheng <quic_wcheng@quicinc.com>
Acked-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20250409194804.3773260-6-quic_wcheng@quicinc.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/xhci-sideband.h | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/xhci-sideband.h b/include/linux/usb/xhci-sideband.h
index f8722afb8a2d..45288c392f6e 100644
--- a/include/linux/usb/xhci-sideband.h
+++ b/include/linux/usb/xhci-sideband.h
@@ -21,6 +21,20 @@ enum xhci_sideband_type {
 	XHCI_SIDEBAND_VENDOR,
 };
 
+enum xhci_sideband_notify_type {
+	XHCI_SIDEBAND_XFER_RING_FREE,
+};
+
+/**
+ * struct xhci_sideband_event - sideband event
+ * @type: notifier type
+ * @evt_data: event data
+ */
+struct xhci_sideband_event {
+	enum xhci_sideband_notify_type type;
+	void *evt_data;
+};
+
 /**
  * struct xhci_sideband - representation of a sideband accessed usb device.
  * @xhci: The xhci host controller the usb device is connected to
@@ -30,6 +44,7 @@ enum xhci_sideband_type {
  * @type: xHCI sideband type
  * @mutex: mutex for sideband operations
  * @intf: USB sideband client interface
+ * @notify_client: callback for xHCI sideband sequences
  *
  * FIXME usb device accessed via sideband Keeping track of sideband accessed usb devices.
  */
@@ -44,10 +59,14 @@ struct xhci_sideband {
 	struct mutex			mutex;
 
 	struct usb_interface		*intf;
+	int (*notify_client)(struct usb_interface *intf,
+			     struct xhci_sideband_event *evt);
 };
 
 struct xhci_sideband *
-xhci_sideband_register(struct usb_interface *intf, enum xhci_sideband_type type);
+xhci_sideband_register(struct usb_interface *intf, enum xhci_sideband_type type,
+		       int (*notify_client)(struct usb_interface *intf,
+				    struct xhci_sideband_event *evt));
 void
 xhci_sideband_unregister(struct xhci_sideband *sb);
 int
@@ -71,4 +90,13 @@ void
 xhci_sideband_remove_interrupter(struct xhci_sideband *sb);
 int
 xhci_sideband_interrupter_id(struct xhci_sideband *sb);
+
+#if IS_ENABLED(CONFIG_USB_XHCI_SIDEBAND)
+void xhci_sideband_notify_ep_ring_free(struct xhci_sideband *sb,
+				       unsigned int ep_index);
+#else
+static inline void xhci_sideband_notify_ep_ring_free(struct xhci_sideband *sb,
+						     unsigned int ep_index)
+{ }
+#endif /* IS_ENABLED(CONFIG_USB_XHCI_SIDEBAND) */
 #endif /* __LINUX_XHCI_SIDEBAND_H */
-- 
cgit v1.2.3


From 67890d579402804b1d32b3280d9860073542528e Mon Sep 17 00:00:00 2001
From: Wesley Cheng <quic_wcheng@quicinc.com>
Date: Wed, 9 Apr 2025 12:47:40 -0700
Subject: ALSA: Add USB audio device jack type

Add an USB jack type, in order to support notifying of a valid USB audio
device.  Since USB audio devices can have a slew of different
configurations that reach beyond the basic headset and headphone use cases,
classify these devices differently.

Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Wesley Cheng <quic_wcheng@quicinc.com>
Acked-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20250409194804.3773260-8-quic_wcheng@quicinc.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/mod_devicetable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index bd7e60c0b72f..dde836875deb 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -340,7 +340,7 @@ struct pcmcia_device_id {
 #define INPUT_DEVICE_ID_LED_MAX		0x0f
 #define INPUT_DEVICE_ID_SND_MAX		0x07
 #define INPUT_DEVICE_ID_FF_MAX		0x7f
-#define INPUT_DEVICE_ID_SW_MAX		0x10
+#define INPUT_DEVICE_ID_SW_MAX		0x11
 #define INPUT_DEVICE_ID_PROP_MAX	0x1f
 
 #define INPUT_DEVICE_ID_MATCH_BUS	1
-- 
cgit v1.2.3


From 10ed34d6eaaf86e301a8f2dd190d26dfbc9799bd Mon Sep 17 00:00:00 2001
From: Sandor Yu <Sandor.yu@nxp.com>
Date: Tue, 18 Mar 2025 14:35:35 +0200
Subject: phy: Add HDMI configuration options

Allow HDMI PHYs to be configured through the generic
functions through a custom structure added to the generic union.

The parameters added here are based on HDMI PHY
implementation practices.  The current set of parameters
should cover the potential users.

Signed-off-by: Sandor Yu <Sandor.yu@nxp.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Vinod Koul <vkoul@kernel.org>
Link: https://lore.kernel.org/r/d1cff6c03ec3732d2244022029245ab2d954d997.1734340233.git.Sandor.yu@nxp.com
Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Link: https://lore.kernel.org/r/20250318-phy-sam-hdptx-bpc-v6-1-8cb1678e7663@collabora.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/phy/phy-hdmi.h | 19 +++++++++++++++++++
 include/linux/phy/phy.h      |  7 ++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/phy/phy-hdmi.h

(limited to 'include/linux')

diff --git a/include/linux/phy/phy-hdmi.h b/include/linux/phy/phy-hdmi.h
new file mode 100644
index 000000000000..6a696922bc7f
--- /dev/null
+++ b/include/linux/phy/phy-hdmi.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2022,2024 NXP
+ */
+
+#ifndef __PHY_HDMI_H_
+#define __PHY_HDMI_H_
+
+/**
+ * struct phy_configure_opts_hdmi - HDMI configuration set
+ * @tmds_char_rate: HDMI TMDS Character Rate in Hertz.
+ *
+ * This structure is used to represent the configuration state of a HDMI phy.
+ */
+struct phy_configure_opts_hdmi {
+	unsigned long long tmds_char_rate;
+};
+
+#endif /* __PHY_HDMI_H_ */
diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index e63e6e70e860..437769e061b7 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -17,6 +17,7 @@
 #include <linux/regulator/consumer.h>
 
 #include <linux/phy/phy-dp.h>
+#include <linux/phy/phy-hdmi.h>
 #include <linux/phy/phy-lvds.h>
 #include <linux/phy/phy-mipi-dphy.h>
 
@@ -42,7 +43,8 @@ enum phy_mode {
 	PHY_MODE_MIPI_DPHY,
 	PHY_MODE_SATA,
 	PHY_MODE_LVDS,
-	PHY_MODE_DP
+	PHY_MODE_DP,
+	PHY_MODE_HDMI,
 };
 
 enum phy_media {
@@ -60,11 +62,14 @@ enum phy_media {
  *		the DisplayPort protocol.
  * @lvds:	Configuration set applicable for phys supporting
  *		the LVDS phy mode.
+ * @hdmi:	Configuration set applicable for phys supporting
+ *		the HDMI phy mode.
  */
 union phy_configure_opts {
 	struct phy_configure_opts_mipi_dphy	mipi_dphy;
 	struct phy_configure_opts_dp		dp;
 	struct phy_configure_opts_lvds		lvds;
+	struct phy_configure_opts_hdmi		hdmi;
 };
 
 /**
-- 
cgit v1.2.3


From 3bb9286f4ece6acbc1fbaa9f192a82645d30efbf Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Date: Tue, 18 Mar 2025 14:35:36 +0200
Subject: phy: hdmi: Add color depth configuration

Extend the HDMI configuration options to allow managing bits per color
channel.  This is required by some PHY drivers such as
rockchip-samsung-hdptx.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Link: https://lore.kernel.org/r/20250318-phy-sam-hdptx-bpc-v6-2-8cb1678e7663@collabora.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/phy/phy-hdmi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy/phy-hdmi.h b/include/linux/phy/phy-hdmi.h
index 6a696922bc7f..f0ec963c6e84 100644
--- a/include/linux/phy/phy-hdmi.h
+++ b/include/linux/phy/phy-hdmi.h
@@ -9,11 +9,13 @@
 /**
  * struct phy_configure_opts_hdmi - HDMI configuration set
  * @tmds_char_rate: HDMI TMDS Character Rate in Hertz.
+ * @bpc: Bits per color channel.
  *
  * This structure is used to represent the configuration state of a HDMI phy.
  */
 struct phy_configure_opts_hdmi {
 	unsigned long long tmds_char_rate;
+	unsigned int bpc;
 };
 
 #endif /* __PHY_HDMI_H_ */
-- 
cgit v1.2.3


From 1692632146451184c4bcb68554098470a119fb01 Mon Sep 17 00:00:00 2001
From: Zijun Hu <quic_zijuhu@quicinc.com>
Date: Tue, 8 Apr 2025 20:08:51 +0800
Subject: USB: core: Correct API usb_(enable|disable)_autosuspend() prototypes

API usb_(enable|disable)_autosuspend() have inconsistent prototypes
regarding if CONFIG_PM is defined.

Correct prototypes when the macro is undefined by referring to those
when the macro is defined.

Signed-off-by: Zijun Hu <quic_zijuhu@quicinc.com>
Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
Link: https://lore.kernel.org/r/20250408-fix_usb_hdr-v1-1-e785c5b49481@quicinc.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index b46738701f8d..1b2545b4363b 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -815,10 +815,10 @@ static inline void usb_mark_last_busy(struct usb_device *udev)
 
 #else
 
-static inline int usb_enable_autosuspend(struct usb_device *udev)
-{ return 0; }
-static inline int usb_disable_autosuspend(struct usb_device *udev)
-{ return 0; }
+static inline void usb_enable_autosuspend(struct usb_device *udev)
+{ }
+static inline void usb_disable_autosuspend(struct usb_device *udev)
+{ }
 
 static inline int usb_autopm_get_interface(struct usb_interface *intf)
 { return 0; }
-- 
cgit v1.2.3


From 2acaf27cd7f4f32bfe8bf7335690618e2417e744 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <npitre@baylibre.com>
Date: Wed, 9 Apr 2025 21:13:54 -0400
Subject: vt: move unicode processing to a separate file

This will make it easier to maintain. Also make it depend on
CONFIG_CONSOLE_TRANSLATIONS.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
Link: https://lore.kernel.org/r/20250410011839.64418-3-nico@fluxnic.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/consolemap.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h
index c35db4896c37..caf079bcb8c9 100644
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -28,6 +28,7 @@ int conv_uni_to_pc(struct vc_data *conp, long ucs);
 u32 conv_8bit_to_uni(unsigned char c);
 int conv_uni_to_8bit(u32 uni);
 void console_map_init(void);
+bool ucs_is_double_width(uint32_t cp);
 #else
 static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph,
 		bool use_unicode)
@@ -57,6 +58,11 @@ static inline int conv_uni_to_8bit(u32 uni)
 }
 
 static inline void console_map_init(void) { }
+
+static inline bool ucs_is_double_width(uint32_t cp)
+{
+	return false;
+}
 #endif /* CONFIG_CONSOLE_TRANSLATIONS */
 
 #endif /* __LINUX_CONSOLEMAP_H__ */
-- 
cgit v1.2.3


From e88391f730e46d208b7fb37b02611d24137af1ef Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <npitre@baylibre.com>
Date: Wed, 9 Apr 2025 21:13:55 -0400
Subject: vt: properly support zero-width Unicode code points

Zero-width Unicode code points are causing misalignment in vertically
aligned content, disrupting the visual layout. Let's handle zero-width
code points more intelligently.

Double-width code points are stored in the screen grid followed by a white
space code point to create the expected screen layout. When a double-width
code point is followed by a zero-width code point in the console incoming
bytestream (e.g., an emoji with a presentation selector) then we may
replace the white space padding by that zero-width code point instead of
dropping it. This maximize screen content information while preserving
proper layout.

If a zero-width code point is preceded by a single-width code point then
the above trick is not possible and such zero-width code point must
be dropped.

VS16 (Variation Selector 16, U+FE0F) is special as it doubles the width
of the preceding single-width code point. We handle that case by giving
VS16 a width of 1 when that happens.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
Link: https://lore.kernel.org/r/20250410011839.64418-4-nico@fluxnic.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/consolemap.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h
index caf079bcb8c9..7d778752dcef 100644
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -29,6 +29,11 @@ u32 conv_8bit_to_uni(unsigned char c);
 int conv_uni_to_8bit(u32 uni);
 void console_map_init(void);
 bool ucs_is_double_width(uint32_t cp);
+static inline bool ucs_is_zero_width(uint32_t cp)
+{
+	/* coming soon */
+	return false;
+}
 #else
 static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph,
 		bool use_unicode)
@@ -63,6 +68,11 @@ static inline bool ucs_is_double_width(uint32_t cp)
 {
 	return false;
 }
+
+static inline bool ucs_is_zero_width(uint32_t cp)
+{
+	return false;
+}
 #endif /* CONFIG_CONSOLE_TRANSLATIONS */
 
 #endif /* __LINUX_CONSOLEMAP_H__ */
-- 
cgit v1.2.3


From 3a1ab63aa05b4736a7d30ae0a769385662f13def Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <npitre@baylibre.com>
Date: Wed, 9 Apr 2025 21:13:57 -0400
Subject: vt: update ucs_width.c using gen_ucs_width.py

This replaces ucs_width.c with the code generated by gen_ucs_width.py
providing comprehensive tables for double-width and zero-width Unicode
code points. Also make ucs_is_zero_width() effective.

Note: scripts/checkpatch.pl complains about "... exceeds 100 columns".
      Please ignore.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
Link: https://lore.kernel.org/r/20250410011839.64418-6-nico@fluxnic.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/consolemap.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h
index 7d778752dcef..b3a911866662 100644
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -29,11 +29,7 @@ u32 conv_8bit_to_uni(unsigned char c);
 int conv_uni_to_8bit(u32 uni);
 void console_map_init(void);
 bool ucs_is_double_width(uint32_t cp);
-static inline bool ucs_is_zero_width(uint32_t cp)
-{
-	/* coming soon */
-	return false;
-}
+bool ucs_is_zero_width(uint32_t cp);
 #else
 static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph,
 		bool use_unicode)
-- 
cgit v1.2.3


From 54af55b990eda5a6a0140a3cded8094b42c0c3b7 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <npitre@baylibre.com>
Date: Wed, 9 Apr 2025 21:13:59 -0400
Subject: vt: create ucs_recompose.c using gen_ucs_recompose.py

This provides ucs_recompose() to recompose two Unicode characters into
a single character if possible. This is needed for the VT to properly
display decomposed UTF8 sequences.

Note: scripts/checkpatch.pl complains about "... exceeds 100 columns".
      Please ignore.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
Link: https://lore.kernel.org/r/20250410011839.64418-8-nico@fluxnic.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/consolemap.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h
index b3a911866662..4d3a34c288e5 100644
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -30,6 +30,7 @@ int conv_uni_to_8bit(u32 uni);
 void console_map_init(void);
 bool ucs_is_double_width(uint32_t cp);
 bool ucs_is_zero_width(uint32_t cp);
+uint32_t ucs_recompose(uint32_t base, uint32_t combining);
 #else
 static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph,
 		bool use_unicode)
@@ -69,6 +70,11 @@ static inline bool ucs_is_zero_width(uint32_t cp)
 {
 	return false;
 }
+
+static inline uint32_t ucs_recompose(uint32_t base, uint32_t combining)
+{
+	return 0;
+}
 #endif /* CONFIG_CONSOLE_TRANSLATIONS */
 
 #endif /* __LINUX_CONSOLEMAP_H__ */
-- 
cgit v1.2.3


From 2a63dd0edf388802074f1d4d6b588a3b4c380688 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Wed, 9 Apr 2025 19:36:45 -0700
Subject: net: Retire DCCP socket.

DCCP was orphaned in 2021 by commit 054c4610bd05 ("MAINTAINERS: dccp:
move Gerrit Renker to CREDITS"), which noted that the last maintainer
had been inactive for five years.

In recent years, it has become a playground for syzbot, and most changes
to DCCP have been odd bug fixes triggered by syzbot.  Apart from that,
the only changes have been driven by treewide or networking API updates
or adjustments related to TCP.

Thus, in 2023, we announced we would remove DCCP in 2025 via commit
b144fcaf46d4 ("dccp: Print deprecation notice.").

Since then, only one individual has contacted the netdev mailing list. [0]

There is ongoing research for Multipath DCCP.  The repository is hosted
on GitHub [1], and development is not taking place through the upstream
community.  While the repository is published under the GPLv2 license,
the scheduling part remains proprietary, with a LICENSE file [2] stating:

  "This is not Open Source software."

The researcher mentioned a plan to address the licensing issue, upstream
the patches, and step up as a maintainer, but there has been no further
communication since then.

Maintaining DCCP for a decade without any real users has become a burden.

Therefore, it's time to remove it.

Removing DCCP will also provide significant benefits to TCP.  It allows
us to freely reorganize the layout of struct inet_connection_sock, which
is currently shared with DCCP, and optimize it to reduce the number of
cachelines accessed in the TCP fast path.

Note that we keep DCCP netfilter modules as requested.  [3]

Link: https://lore.kernel.org/netdev/20230710182253.81446-1-kuniyu@amazon.com/T/#u #[0]
Link: https://github.com/telekom/mp-dccp #[1]
Link: https://github.com/telekom/mp-dccp/blob/mpdccp_v03_k5.10/net/dccp/non_gpl_scheduler/LICENSE #[2]
Link: https://lore.kernel.org/netdev/Z_VQ0KlCRkqYWXa-@calendula/ #[3]
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Acked-by: Paul Moore <paul@paul-moore.com> (LSM and SELinux)
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Link: https://patch.msgid.link/20250410023921.11307-3-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/dccp.h | 289 ---------------------------------------------------
 include/linux/tfrc.h |  51 ---------
 2 files changed, 340 deletions(-)
 delete mode 100644 include/linux/tfrc.h

(limited to 'include/linux')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 325af611909f..0b61b8b996d4 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -2,79 +2,8 @@
 #ifndef _LINUX_DCCP_H
 #define _LINUX_DCCP_H
 
-
-#include <linux/in.h>
-#include <linux/interrupt.h>
-#include <linux/ktime.h>
-#include <linux/list.h>
-#include <linux/uio.h>
-#include <linux/workqueue.h>
-
-#include <net/inet_connection_sock.h>
-#include <net/inet_sock.h>
-#include <net/inet_timewait_sock.h>
-#include <net/tcp_states.h>
 #include <uapi/linux/dccp.h>
 
-enum dccp_state {
-	DCCP_OPEN	     = TCP_ESTABLISHED,
-	DCCP_REQUESTING	     = TCP_SYN_SENT,
-	DCCP_LISTEN	     = TCP_LISTEN,
-	DCCP_RESPOND	     = TCP_SYN_RECV,
-	/*
-	 * States involved in closing a DCCP connection:
-	 * 1) ACTIVE_CLOSEREQ is entered by a server sending a CloseReq.
-	 *
-	 * 2) CLOSING can have three different meanings (RFC 4340, 8.3):
-	 *  a. Client has performed active-close, has sent a Close to the server
-	 *     from state OPEN or PARTOPEN, and is waiting for the final Reset
-	 *     (in this case, SOCK_DONE == 1).
-	 *  b. Client is asked to perform passive-close, by receiving a CloseReq
-	 *     in (PART)OPEN state. It sends a Close and waits for final Reset
-	 *     (in this case, SOCK_DONE == 0).
-	 *  c. Server performs an active-close as in (a), keeps TIMEWAIT state.
-	 *
-	 * 3) The following intermediate states are employed to give passively
-	 *    closing nodes a chance to process their unread data:
-	 *    - PASSIVE_CLOSE    (from OPEN => CLOSED) and
-	 *    - PASSIVE_CLOSEREQ (from (PART)OPEN to CLOSING; case (b) above).
-	 */
-	DCCP_ACTIVE_CLOSEREQ = TCP_FIN_WAIT1,
-	DCCP_PASSIVE_CLOSE   = TCP_CLOSE_WAIT,	/* any node receiving a Close */
-	DCCP_CLOSING	     = TCP_CLOSING,
-	DCCP_TIME_WAIT	     = TCP_TIME_WAIT,
-	DCCP_CLOSED	     = TCP_CLOSE,
-	DCCP_NEW_SYN_RECV    = TCP_NEW_SYN_RECV,
-	DCCP_PARTOPEN	     = TCP_MAX_STATES,
-	DCCP_PASSIVE_CLOSEREQ,			/* clients receiving CloseReq */
-	DCCP_MAX_STATES
-};
-
-enum {
-	DCCPF_OPEN	      = TCPF_ESTABLISHED,
-	DCCPF_REQUESTING      = TCPF_SYN_SENT,
-	DCCPF_LISTEN	      = TCPF_LISTEN,
-	DCCPF_RESPOND	      = TCPF_SYN_RECV,
-	DCCPF_ACTIVE_CLOSEREQ = TCPF_FIN_WAIT1,
-	DCCPF_CLOSING	      = TCPF_CLOSING,
-	DCCPF_TIME_WAIT	      = TCPF_TIME_WAIT,
-	DCCPF_CLOSED	      = TCPF_CLOSE,
-	DCCPF_NEW_SYN_RECV    = TCPF_NEW_SYN_RECV,
-	DCCPF_PARTOPEN	      = (1 << DCCP_PARTOPEN),
-};
-
-static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb)
-{
-	return (struct dccp_hdr *)skb_transport_header(skb);
-}
-
-static inline struct dccp_hdr *dccp_zeroed_hdr(struct sk_buff *skb, int headlen)
-{
-	skb_push(skb, headlen);
-	skb_reset_transport_header(skb);
-	return memset(skb_transport_header(skb), 0, headlen);
-}
-
 static inline struct dccp_hdr_ext *dccp_hdrx(const struct dccp_hdr *dh)
 {
 	return (struct dccp_hdr_ext *)((unsigned char *)dh + sizeof(*dh));
@@ -85,12 +14,6 @@ static inline unsigned int __dccp_basic_hdr_len(const struct dccp_hdr *dh)
 	return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0);
 }
 
-static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb)
-{
-	const struct dccp_hdr *dh = dccp_hdr(skb);
-	return __dccp_basic_hdr_len(dh);
-}
-
 static inline __u64 dccp_hdr_seq(const struct dccp_hdr *dh)
 {
 	__u64 seq_nr =  ntohs(dh->dccph_seq);
@@ -103,222 +26,10 @@ static inline __u64 dccp_hdr_seq(const struct dccp_hdr *dh)
 	return seq_nr;
 }
 
-static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb)
-{
-	return (struct dccp_hdr_request *)(skb_transport_header(skb) +
-					   dccp_basic_hdr_len(skb));
-}
-
-static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb)
-{
-	return (struct dccp_hdr_ack_bits *)(skb_transport_header(skb) +
-					    dccp_basic_hdr_len(skb));
-}
-
-static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb)
-{
-	const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb);
-	return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) + ntohl(dhack->dccph_ack_nr_low);
-}
-
-static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb)
-{
-	return (struct dccp_hdr_response *)(skb_transport_header(skb) +
-					    dccp_basic_hdr_len(skb));
-}
-
-static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb)
-{
-	return (struct dccp_hdr_reset *)(skb_transport_header(skb) +
-					 dccp_basic_hdr_len(skb));
-}
-
 static inline unsigned int __dccp_hdr_len(const struct dccp_hdr *dh)
 {
 	return __dccp_basic_hdr_len(dh) +
 	       dccp_packet_hdr_len(dh->dccph_type);
 }
 
-static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
-{
-	return __dccp_hdr_len(dccp_hdr(skb));
-}
-
-/**
- * struct dccp_request_sock  -  represent DCCP-specific connection request
- * @dreq_inet_rsk: structure inherited from
- * @dreq_iss: initial sequence number, sent on the first Response (RFC 4340, 7.1)
- * @dreq_gss: greatest sequence number sent (for retransmitted Responses)
- * @dreq_isr: initial sequence number received in the first Request
- * @dreq_gsr: greatest sequence number received (for retransmitted Request(s))
- * @dreq_service: service code present on the Request (there is just one)
- * @dreq_featneg: feature negotiation options for this connection
- * The following two fields are analogous to the ones in dccp_sock:
- * @dreq_timestamp_echo: last received timestamp to echo (13.1)
- * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo
- */
-struct dccp_request_sock {
-	struct inet_request_sock dreq_inet_rsk;
-	__u64			 dreq_iss;
-	__u64			 dreq_gss;
-	__u64			 dreq_isr;
-	__u64			 dreq_gsr;
-	__be32			 dreq_service;
-	spinlock_t		 dreq_lock;
-	struct list_head	 dreq_featneg;
-	__u32			 dreq_timestamp_echo;
-	__u32			 dreq_timestamp_time;
-};
-
-static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req)
-{
-	return (struct dccp_request_sock *)req;
-}
-
-extern struct inet_timewait_death_row dccp_death_row;
-
-extern int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
-			      struct sk_buff *skb);
-
-struct dccp_options_received {
-	u64	dccpor_ndp:48;
-	u32	dccpor_timestamp;
-	u32	dccpor_timestamp_echo;
-	u32	dccpor_elapsed_time;
-};
-
-struct ccid;
-
-enum dccp_role {
-	DCCP_ROLE_UNDEFINED,
-	DCCP_ROLE_LISTEN,
-	DCCP_ROLE_CLIENT,
-	DCCP_ROLE_SERVER,
-};
-
-struct dccp_service_list {
-	__u32	dccpsl_nr;
-	__be32	dccpsl_list[];
-};
-
-#define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1)
-#define DCCP_SERVICE_CODE_IS_ABSENT		0
-
-static inline bool dccp_list_has_service(const struct dccp_service_list *sl,
-					const __be32 service)
-{
-	if (likely(sl != NULL)) {
-		u32 i = sl->dccpsl_nr;
-		while (i--)
-			if (sl->dccpsl_list[i] == service)
-				return true;
-	}
-	return false;
-}
-
-struct dccp_ackvec;
-
-/**
- * struct dccp_sock - DCCP socket state
- *
- * @dccps_swl - sequence number window low
- * @dccps_swh - sequence number window high
- * @dccps_awl - acknowledgement number window low
- * @dccps_awh - acknowledgement number window high
- * @dccps_iss - initial sequence number sent
- * @dccps_isr - initial sequence number received
- * @dccps_osr - first OPEN sequence number received
- * @dccps_gss - greatest sequence number sent
- * @dccps_gsr - greatest valid sequence number received
- * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss
- * @dccps_service - first (passive sock) or unique (active sock) service code
- * @dccps_service_list - second .. last service code on passive socket
- * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option
- * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo
- * @dccps_l_ack_ratio - feature-local Ack Ratio
- * @dccps_r_ack_ratio - feature-remote Ack Ratio
- * @dccps_l_seq_win - local Sequence Window (influences ack number validity)
- * @dccps_r_seq_win - remote Sequence Window (influences seq number validity)
- * @dccps_pcslen - sender   partial checksum coverage (via sockopt)
- * @dccps_pcrlen - receiver partial checksum coverage (via sockopt)
- * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2)
- * @dccps_ndp_count - number of Non Data Packets since last data packet
- * @dccps_mss_cache - current value of MSS (path MTU minus header sizes)
- * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4)
- * @dccps_featneg - tracks feature-negotiation state (mostly during handshake)
- * @dccps_hc_rx_ackvec - rx half connection ack vector
- * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection)
- * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection)
- * @dccps_options_received - parsed set of retrieved options
- * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy
- * @dccps_tx_qlen - maximum length of the TX queue
- * @dccps_role - role of this sock, one of %dccp_role
- * @dccps_hc_rx_insert_options - receiver wants to add options when acking
- * @dccps_hc_tx_insert_options - sender wants to add options when sending
- * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3)
- * @dccps_sync_scheduled - flag which signals "send out-of-band message soon"
- * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets
- * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing)
- * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs)
- */
-struct dccp_sock {
-	/* inet_connection_sock has to be the first member of dccp_sock */
-	struct inet_connection_sock	dccps_inet_connection;
-#define dccps_syn_rtt			dccps_inet_connection.icsk_ack.lrcvtime
-	__u64				dccps_swl;
-	__u64				dccps_swh;
-	__u64				dccps_awl;
-	__u64				dccps_awh;
-	__u64				dccps_iss;
-	__u64				dccps_isr;
-	__u64				dccps_osr;
-	__u64				dccps_gss;
-	__u64				dccps_gsr;
-	__u64				dccps_gar;
-	__be32				dccps_service;
-	__u32				dccps_mss_cache;
-	struct dccp_service_list	*dccps_service_list;
-	__u32				dccps_timestamp_echo;
-	__u32				dccps_timestamp_time;
-	__u16				dccps_l_ack_ratio;
-	__u16				dccps_r_ack_ratio;
-	__u64				dccps_l_seq_win:48;
-	__u64				dccps_r_seq_win:48;
-	__u8				dccps_pcslen:4;
-	__u8				dccps_pcrlen:4;
-	__u8				dccps_send_ndp_count:1;
-	__u64				dccps_ndp_count:48;
-	unsigned long			dccps_rate_last;
-	struct list_head		dccps_featneg;
-	struct dccp_ackvec		*dccps_hc_rx_ackvec;
-	struct ccid			*dccps_hc_rx_ccid;
-	struct ccid			*dccps_hc_tx_ccid;
-	struct dccp_options_received	dccps_options_received;
-	__u8				dccps_qpolicy;
-	__u32				dccps_tx_qlen;
-	enum dccp_role			dccps_role:2;
-	__u8				dccps_hc_rx_insert_options:1;
-	__u8				dccps_hc_tx_insert_options:1;
-	__u8				dccps_server_timewait:1;
-	__u8				dccps_sync_scheduled:1;
-	struct tasklet_struct		dccps_xmitlet;
-	struct timer_list		dccps_xmit_timer;
-};
-
-#define dccp_sk(ptr)	container_of_const(ptr, struct dccp_sock, \
-					   dccps_inet_connection.icsk_inet.sk)
-
-static inline const char *dccp_role(const struct sock *sk)
-{
-	switch (dccp_sk(sk)->dccps_role) {
-	case DCCP_ROLE_UNDEFINED: return "undefined";
-	case DCCP_ROLE_LISTEN:	  return "listen";
-	case DCCP_ROLE_SERVER:	  return "server";
-	case DCCP_ROLE_CLIENT:	  return "client";
-	}
-	return NULL;
-}
-
-extern void dccp_syn_ack_timeout(const struct request_sock *req);
-
 #endif /* _LINUX_DCCP_H */
diff --git a/include/linux/tfrc.h b/include/linux/tfrc.h
deleted file mode 100644
index a5acc768085d..000000000000
--- a/include/linux/tfrc.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _LINUX_TFRC_H_
-#define _LINUX_TFRC_H_
-/*
- *  TFRC - Data Structures for the TCP-Friendly Rate Control congestion
- *         control mechanism as specified in RFC 3448.
- *
- *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
- *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
- */
-#include <linux/types.h>
-
-/** 	tfrc_rx_info    -    TFRC Receiver Data Structure
- *
- * 	@tfrcrx_x_recv:	receiver estimate of sending rate (3.2.2)
- * 	@tfrcrx_rtt:	round-trip-time (communicated by sender)
- * 	@tfrcrx_p:	current estimate of loss event rate (3.2.2)
- */
-struct tfrc_rx_info {
-  	__u32 tfrcrx_x_recv;
-	__u32 tfrcrx_rtt;
-  	__u32 tfrcrx_p;
-};
-
-/** 	tfrc_tx_info    -    TFRC Sender Data Structure
- *
- * 	@tfrctx_x:	computed transmit rate (4.3 (4))
- * 	@tfrctx_x_recv: receiver estimate of send rate (4.3)
- * 	@tfrctx_x_calc:	return value of throughput equation (3.1)
- * 	@tfrctx_rtt:	(moving average) estimate of RTT (4.3)
- * 	@tfrctx_p:	current loss event rate (5.4)
- * 	@tfrctx_rto:	estimate of RTO, equals 4*RTT (4.3)
- * 	@tfrctx_ipi:	inter-packet interval (4.6)
- *
- *  Note: X and X_recv are both maintained in units of 64 * bytes/second. This
- *        enables a finer resolution of sending rates and avoids problems with
- *        integer arithmetic; u32 is not sufficient as scaling consumes 6 bits.
- */
-struct tfrc_tx_info {
-	__u64 tfrctx_x;
-	__u64 tfrctx_x_recv;
-	__u32 tfrctx_x_calc;
-	__u32 tfrctx_rtt;
-	__u32 tfrctx_p;
-	__u32 tfrctx_rto;
-	__u32 tfrctx_ipi;
-};
-
-#endif /* _LINUX_TFRC_H_ */
-- 
cgit v1.2.3


From 88113e09ada52be28968aacf4af7b3d667832f00 Mon Sep 17 00:00:00 2001
From: Mukesh Kumar Savaliya <quic_msavaliy@quicinc.com>
Date: Fri, 4 Apr 2025 19:24:27 +0530
Subject: spi: Add support for Double Transfer Rate (DTR) mode

Introduce support for protocol drivers to specify whether a transfer
should use single or dual transfer mode. Currently, the SPI controller
cannot determine this information from the user, leading to potential
limitations in transfer capabilities.

Add a new field `dtr_mode` in the `spi_transfer` structure. The `dtr_mode`
field allows protocol drivers to indicate if Double Transfer Rate (DTR)
mode is supported for a given transfer. When `dtr_mode` is set to true,
the SPI controller will use DTR mode; otherwise, it will default to single
transfer mode.

Introduce another field `dtr_caps` to indicate if the QSPI controller is
capable of supporting DTR mode (SDR and DDR). By default, both `dtr_caps`
and `dtr_mode` will be false. These flags manage the QSPI controller's DTR
mode capabilities within the SPI framework.

The QSPI controller driver uses these flags to configure single or double
transfer rates using the controller register.

The existing spi-mem driver helps configure the DTR mode but is limited to
memory devices. There is no support available to set DTR mode for non-memory
devices, e.g., touch or any generic SPI sensor. This change is backward
compatible and doesn't break existing SPI or QSPI drivers.

Changes include:
- Addition of `dtr_mode` and `dtr_caps` fields in the `spi_transfer`
  structure.
- Documentation updates to reflect the new `dtr_mode` and `dtr_caps` fields.

Signed-off-by: Mukesh Kumar Savaliya <quic_msavaliy@quicinc.com>
Link: https://patch.msgid.link/20250404135427.313825-1-quic_msavaliy@quicinc.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index e10f0ebb8250..834a09bd8ccc 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -508,6 +508,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
  *	found.
  * @put_offload: release the offload instance acquired by @get_offload.
  * @mem_caps: controller capabilities for the handling of memory operations.
+ * @dtr_caps: true if controller has dtr(single/dual transfer rate) capability.
+ *	QSPI based controller should fill this based on controller's capability.
  * @unprepare_message: undo any work done by prepare_message().
  * @target_abort: abort the ongoing transfer request on an SPI target controller
  * @cs_gpiods: Array of GPIO descriptors to use as chip select lines; one per CS
@@ -751,6 +753,9 @@ struct spi_controller {
 	const struct spi_controller_mem_ops *mem_ops;
 	const struct spi_controller_mem_caps *mem_caps;
 
+	/* SPI or QSPI controller can set to true if supports SDR/DDR transfer rate */
+	bool			dtr_caps;
+
 	struct spi_offload *(*get_offload)(struct spi_device *spi,
 					   const struct spi_offload_config *config);
 	void (*put_offload)(struct spi_offload *offload);
@@ -1003,6 +1008,7 @@ struct spi_res {
  *	processed the word, i.e. the "pre" timestamp should be taken before
  *	transmitting the "pre" word, and the "post" timestamp after receiving
  *	transmit confirmation from the controller for the "post" word.
+ * @dtr_mode: true if supports double transfer rate.
  * @timestamped: true if the transfer has been timestamped
  * @error: Error status logged by SPI controller driver.
  *
@@ -1054,6 +1060,9 @@ struct spi_res {
  * two should both be set. User can set transfer mode with SPI_NBITS_SINGLE(1x)
  * SPI_NBITS_DUAL(2x) and SPI_NBITS_QUAD(4x) to support these three transfer.
  *
+ * User may also set dtr_mode to true to use dual transfer mode if desired. if
+ * not, default considered as single transfer mode.
+ *
  * The code that submits an spi_message (and its spi_transfers)
  * to the lower layers is responsible for managing its memory.
  * Zero-initialize every field you don't set up explicitly, to
@@ -1088,6 +1097,7 @@ struct spi_transfer {
 	unsigned	tx_nbits:4;
 	unsigned	rx_nbits:4;
 	unsigned	timestamped:1;
+	bool		dtr_mode;
 #define	SPI_NBITS_SINGLE	0x01 /* 1-bit transfer */
 #define	SPI_NBITS_DUAL		0x02 /* 2-bit transfer */
 #define	SPI_NBITS_QUAD		0x04 /* 4-bit transfer */
-- 
cgit v1.2.3


From 7cfe1e208b86c7fd4b35a8a502a8b15604eec1e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@baylibre.com>
Date: Thu, 3 Apr 2025 17:11:32 +0200
Subject: pwm: Make chip parameter to pwmchip_get_drvdata() a const pointer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

dev_get_drvdata()'s parameter is a const pointer, so the chip passed to
pwmchip_get_drvdata() can be const, too.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Link: https://lore.kernel.org/r/20250403151134.266388-2-u.kleine-koenig@baylibre.com
Signed-off-by: Uwe Kleine-König <ukleinek@kernel.org>
---
 include/linux/pwm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 9ece4e5d3815..bf0469b2201d 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -369,7 +369,7 @@ static inline struct device *pwmchip_parent(const struct pwm_chip *chip)
 	return chip->dev.parent;
 }
 
-static inline void *pwmchip_get_drvdata(struct pwm_chip *chip)
+static inline void *pwmchip_get_drvdata(const struct pwm_chip *chip)
 {
 	return dev_get_drvdata(&chip->dev);
 }
-- 
cgit v1.2.3


From cb7ca40a3882360ce87191793449d48df0b29184 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 9 Apr 2025 23:11:22 +0200
Subject: x86/fpu: Make task_struct::thread constant size

Turn thread.fpu into a pointer. Since most FPU code internals work by passing
around the FPU pointer already, the code generation impact is small.

This allows us to remove the old kludge of task_struct being variable size:

  struct task_struct {

       ...
       /*
        * New fields for task_struct should be added above here, so that
        * they are included in the randomized portion of task_struct.
        */
       randomized_struct_fields_end

       /* CPU-specific state of this task: */
       struct thread_struct            thread;

       /*
        * WARNING: on x86, 'thread_struct' contains a variable-sized
        * structure.  It *MUST* be at the end of 'task_struct'.
        *
        * Do not put anything below here!
        */
  };

... which creates a number of problems, such as requiring thread_struct to be
the last member of the struct - not allowing it to be struct-randomized, etc.

But the primary motivation is to allow the decoupling of task_struct from
hardware details (<asm/processor.h> in particular), and to eventually allow
the per-task infrastructure:

   DECLARE_PER_TASK(type, name);
   ...
   per_task(current, name) = val;

... which requires task_struct to be a constant size struct.

The fpu_thread_struct_whitelist() quirk to hardened usercopy can be removed,
now that the FPU structure is not embedded in the task struct anymore, which
reduces text footprint a bit.

Fixed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Chang S. Bae <chang.seok.bae@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20250409211127.3544993-4-mingo@kernel.org
---
 include/linux/sched.h | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f96ac1982893..4ecc0c6b1cb0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1646,22 +1646,15 @@ struct task_struct {
 	struct user_event_mm		*user_event_mm;
 #endif
 
-	/*
-	 * New fields for task_struct should be added above here, so that
-	 * they are included in the randomized portion of task_struct.
-	 */
-	randomized_struct_fields_end
-
 	/* CPU-specific state of this task: */
 	struct thread_struct		thread;
 
 	/*
-	 * WARNING: on x86, 'thread_struct' contains a variable-sized
-	 * structure.  It *MUST* be at the end of 'task_struct'.
-	 *
-	 * Do not put anything below here!
+	 * New fields for task_struct should be added above here, so that
+	 * they are included in the randomized portion of task_struct.
 	 */
-};
+	randomized_struct_fields_end
+} __attribute__ ((aligned (64)));
 
 #define TASK_REPORT_IDLE	(TASK_REPORT + 1)
 #define TASK_REPORT_MAX		(TASK_REPORT_IDLE << 1)
-- 
cgit v1.2.3


From 34180863e000f325e5d11a2fd4af300a0ae50f71 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Tue, 8 Apr 2025 16:44:27 +0800
Subject: firmware: arm_scmi: imx: Add i.MX95 LMM protocol

Add Logical Machine Management(LMM) protocol which is intended for boot,
shutdown, and reset of other logical machines (LM). It is usually used to
allow one LM to manager another used as an offload or accelerator engine.

Reviewed-by: Cristian Marussi <cristian.marussi@arm.com>
Signed-off-by: Peng Fan <peng.fan@nxp.com>
Message-Id: <20250408-imx-lmm-cpu-v4-3-4c5f4a456e49@nxp.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 include/linux/scmi_imx_protocol.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/scmi_imx_protocol.h b/include/linux/scmi_imx_protocol.h
index 53b356a26414..2a96fc29cb6f 100644
--- a/include/linux/scmi_imx_protocol.h
+++ b/include/linux/scmi_imx_protocol.h
@@ -11,8 +11,10 @@
 #include <linux/bitfield.h>
 #include <linux/device.h>
 #include <linux/notifier.h>
+#include <linux/scmi_protocol.h>
 #include <linux/types.h>
 
+#define SCMI_PROTOCOL_IMX_LMM	0x80
 #define	SCMI_PROTOCOL_IMX_BBM	0x81
 #define	SCMI_PROTOCOL_IMX_MISC	0x84
 
@@ -57,4 +59,34 @@ struct scmi_imx_misc_proto_ops {
 	int (*misc_ctrl_req_notify)(const struct scmi_protocol_handle *ph,
 				    u32 ctrl_id, u32 evt_id, u32 flags);
 };
+
+/* See LMM_ATTRIBUTES in imx95.rst */
+#define	LMM_ID_DISCOVER	0xFFFFFFFFU
+#define	LMM_MAX_NAME	16
+
+enum scmi_imx_lmm_state {
+	LMM_STATE_LM_OFF,
+	LMM_STATE_LM_ON,
+	LMM_STATE_LM_SUSPEND,
+	LMM_STATE_LM_POWERED,
+};
+
+struct scmi_imx_lmm_info {
+	u32 lmid;
+	enum scmi_imx_lmm_state state;
+	u32 errstatus;
+	u8 name[LMM_MAX_NAME];
+};
+
+struct scmi_imx_lmm_proto_ops {
+	int (*lmm_power_boot)(const struct scmi_protocol_handle *ph, u32 lmid,
+			      bool boot);
+	int (*lmm_info)(const struct scmi_protocol_handle *ph, u32 lmid,
+			struct scmi_imx_lmm_info *info);
+	int (*lmm_reset_vector_set)(const struct scmi_protocol_handle *ph,
+				    u32 lmid, u32 cpuid, u32 flags, u64 vector);
+	int (*lmm_shutdown)(const struct scmi_protocol_handle *ph, u32 lmid,
+			    u32 flags);
+};
+
 #endif
-- 
cgit v1.2.3


From e68c305bc2f3cdc0b6fa21cd9579650c1457c070 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Tue, 8 Apr 2025 16:44:28 +0800
Subject: firmware: arm_scmi: imx: Add i.MX95 CPU Protocol

This protocol allows an agent to start, stop a CPU or set reset vector. It
is used to manage auxiliary CPUs in an LM (e.g. additional cores in an AP
cluster).

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Message-Id: <20250408-imx-lmm-cpu-v4-4-4c5f4a456e49@nxp.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 include/linux/scmi_imx_protocol.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/scmi_imx_protocol.h b/include/linux/scmi_imx_protocol.h
index 2a96fc29cb6f..27bd372cbfb1 100644
--- a/include/linux/scmi_imx_protocol.h
+++ b/include/linux/scmi_imx_protocol.h
@@ -16,6 +16,7 @@
 
 #define SCMI_PROTOCOL_IMX_LMM	0x80
 #define	SCMI_PROTOCOL_IMX_BBM	0x81
+#define SCMI_PROTOCOL_IMX_CPU	0x82
 #define	SCMI_PROTOCOL_IMX_MISC	0x84
 
 #define SCMI_IMX_VENDOR		"NXP"
@@ -89,4 +90,13 @@ struct scmi_imx_lmm_proto_ops {
 			    u32 flags);
 };
 
+struct scmi_imx_cpu_proto_ops {
+	int (*cpu_reset_vector_set)(const struct scmi_protocol_handle *ph,
+				    u32 cpuid, u64 vector, bool start,
+				    bool boot, bool resume);
+	int (*cpu_start)(const struct scmi_protocol_handle *ph, u32 cpuid,
+			 bool start);
+	int (*cpu_started)(const struct scmi_protocol_handle *ph, u32 cpuid,
+			   bool *started);
+};
 #endif
-- 
cgit v1.2.3


From 7242bbf418f0521c0e3dd7034b7b3cc4413d7d5a Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Tue, 8 Apr 2025 16:44:29 +0800
Subject: firmware: imx: Add i.MX95 SCMI LMM driver

The i.MX95 System manager exports SCMI LMM protocol for linux to manage
Logical Machines. The driver is to use the LMM Protocol interface to
boot, shutdown a LM.

Reviewed-by: Cristian Marussi <cristian.marussi@arm.com>
Signed-off-by: Peng Fan <peng.fan@nxp.com>
Message-Id: <20250408-imx-lmm-cpu-v4-5-4c5f4a456e49@nxp.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 include/linux/firmware/imx/sm.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h
index 9b85a3f028d1..bc27b04afb2f 100644
--- a/include/linux/firmware/imx/sm.h
+++ b/include/linux/firmware/imx/sm.h
@@ -8,6 +8,7 @@
 
 #include <linux/bitfield.h>
 #include <linux/errno.h>
+#include <linux/scmi_imx_protocol.h>
 #include <linux/types.h>
 
 #define SCMI_IMX_CTRL_PDM_CLK_SEL	0	/* AON PDM clock sel */
@@ -20,4 +21,17 @@
 int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val);
 int scmi_imx_misc_ctrl_set(u32 id, u32 val);
 
+enum scmi_imx_lmm_op {
+	SCMI_IMX_LMM_BOOT,
+	SCMI_IMX_LMM_POWER_ON,
+	SCMI_IMX_LMM_SHUTDOWN,
+};
+
+/* For shutdown pperation */
+#define SCMI_IMX_LMM_OP_FORCEFUL	0
+#define SCMI_IMX_LMM_OP_GRACEFUL	BIT(0)
+
+int scmi_imx_lmm_operation(u32 lmid, enum scmi_imx_lmm_op op, u32 flags);
+int scmi_imx_lmm_info(u32 lmid, struct scmi_imx_lmm_info *info);
+int scmi_imx_lmm_reset_vector_set(u32 lmid, u32 cpuid, u32 flags, u64 vector);
 #endif
-- 
cgit v1.2.3


From 1055faa5d6606cccc706e37956f6ff8fb7c22d55 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Tue, 8 Apr 2025 16:44:30 +0800
Subject: firmware: imx: Add i.MX95 SCMI CPU driver

The i.MX95 System manager exports SCMI CPU protocol for linux to manage
cpu cores. The driver is to use the cpu Protocol interface to
start, stop a cpu cores (eg, M7).

Reviewed-by: Cristian Marussi <cristian.marussi@arm.com>
Signed-off-by: Peng Fan <peng.fan@nxp.com>
Message-Id: <20250408-imx-lmm-cpu-v4-6-4c5f4a456e49@nxp.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 include/linux/firmware/imx/sm.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h
index bc27b04afb2f..a8a17eeb7d90 100644
--- a/include/linux/firmware/imx/sm.h
+++ b/include/linux/firmware/imx/sm.h
@@ -21,6 +21,11 @@
 int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val);
 int scmi_imx_misc_ctrl_set(u32 id, u32 val);
 
+int scmi_imx_cpu_start(u32 cpuid, bool start);
+int scmi_imx_cpu_started(u32 cpuid, bool *started);
+int scmi_imx_cpu_reset_vector_set(u32 cpuid, u64 vector, bool start, bool boot,
+				  bool resume);
+
 enum scmi_imx_lmm_op {
 	SCMI_IMX_LMM_BOOT,
 	SCMI_IMX_LMM_POWER_ON,
-- 
cgit v1.2.3


From 097f171f98289cf737437599c40b0d1e81266e9e Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 9 Apr 2025 18:42:46 -0700
Subject: net: convert dev->rtnl_link_state to a bool

netdevice reg_state was split into two 16 bit enums back in 2010
in commit a2835763e130 ("rtnetlink: handle rtnl_link netlink
notifications manually"). Since the split the fields have been
moved apart, and last year we converted reg_state to a normal
u8 in commit 4d42b37def70 ("net: convert dev->reg_state to u8").

rtnl_link_state being a 16 bitfield makes no sense. Convert it
to a single bool, it seems very unlikely after 15 years that
we'll need more values in it.

We could drop dev->rtnl_link_ops from the conditions but feels
like having it there more clearly points at the reason for this
hack.

Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250410014246.780885-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d8544f6a680c..e6036b82ef4c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1946,9 +1946,6 @@ enum netdev_reg_state {
  *
  *	@reg_state:		Register/unregister state machine
  *	@dismantle:		Device is going to be freed
- *	@rtnl_link_state:	This enum represents the phases of creating
- *				a new link
- *
  *	@needs_free_netdev:	Should unregister perform free_netdev?
  *	@priv_destructor:	Called from unregister
  *	@npinfo:		XXX: need comments on this one
@@ -2363,11 +2360,8 @@ struct net_device {
 
 	/** @moving_ns: device is changing netns, protected by @lock */
 	bool moving_ns;
-
-	enum {
-		RTNL_LINK_INITIALIZED,
-		RTNL_LINK_INITIALIZING,
-	} rtnl_link_state:16;
+	/** @rtnl_link_initializing: Device being created, suppress events */
+	bool rtnl_link_initializing;
 
 	bool needs_free_netdev;
 	void (*priv_destructor)(struct net_device *dev);
-- 
cgit v1.2.3


From cd3c93167da0e760b5819246eae7a4ea30fd014b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Wed, 9 Apr 2025 12:41:36 +0200
Subject: page_pool: Move pp_magic check into helper functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since we are about to stash some more information into the pp_magic
field, let's move the magic signature checks into a pair of helper
functions so it can be changed in one place.

Reviewed-by: Mina Almasry <almasrymina@google.com>
Tested-by: Yonglong Liu <liuyonglong@huawei.com>
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Reviewed-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://patch.msgid.link/20250409-page-pool-track-dma-v9-1-6a9ef2e0cba8@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mm.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index b7f13f087954..56c47f4a38ca 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4248,4 +4248,24 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
 #define VM_SEALED_SYSMAP	VM_NONE
 #endif
 
+/* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is
+ * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for
+ * the head page of compound page and bit 1 for pfmemalloc page.
+ * page_is_pfmemalloc() is checked in __page_pool_put_page() to avoid recycling
+ * the pfmemalloc page.
+ */
+#define PP_MAGIC_MASK ~0x3UL
+
+#ifdef CONFIG_PAGE_POOL
+static inline bool page_pool_page_is_pp(struct page *page)
+{
+	return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE;
+}
+#else
+static inline bool page_pool_page_is_pp(struct page *page)
+{
+	return false;
+}
+#endif
+
 #endif /* _LINUX_MM_H */
-- 
cgit v1.2.3


From ee62ce7a1d909ccba0399680a03c2dee83bcae95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Wed, 9 Apr 2025 12:41:37 +0200
Subject: page_pool: Track DMA-mapped pages and unmap them when destroying the
 pool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When enabling DMA mapping in page_pool, pages are kept DMA mapped until
they are released from the pool, to avoid the overhead of re-mapping the
pages every time they are used. This causes resource leaks and/or
crashes when there are pages still outstanding while the device is torn
down, because page_pool will attempt an unmap through a non-existent DMA
device on the subsequent page return.

To fix this, implement a simple tracking of outstanding DMA-mapped pages
in page pool using an xarray. This was first suggested by Mina[0], and
turns out to be fairly straight forward: We simply store pointers to
pages directly in the xarray with xa_alloc() when they are first DMA
mapped, and remove them from the array on unmap. Then, when a page pool
is torn down, it can simply walk the xarray and unmap all pages still
present there before returning, which also allows us to get rid of the
get/put_device() calls in page_pool. Using xa_cmpxchg(), no additional
synchronisation is needed, as a page will only ever be unmapped once.

To avoid having to walk the entire xarray on unmap to find the page
reference, we stash the ID assigned by xa_alloc() into the page
structure itself, using the upper bits of the pp_magic field. This
requires a couple of defines to avoid conflicting with the
POINTER_POISON_DELTA define, but this is all evaluated at compile-time,
so does not affect run-time performance. The bitmap calculations in this
patch gives the following number of bits for different architectures:

- 23 bits on 32-bit architectures
- 21 bits on PPC64 (because of the definition of ILLEGAL_POINTER_VALUE)
- 32 bits on other 64-bit architectures

Stashing a value into the unused bits of pp_magic does have the effect
that it can make the value stored there lie outside the unmappable
range (as governed by the mmap_min_addr sysctl), for architectures that
don't define ILLEGAL_POINTER_VALUE. This means that if one of the
pointers that is aliased to the pp_magic field (such as page->lru.next)
is dereferenced while the page is owned by page_pool, that could lead to
a dereference into userspace, which is a security concern. The risk of
this is mitigated by the fact that (a) we always clear pp_magic before
releasing a page from page_pool, and (b) this would need a
use-after-free bug for struct page, which can have many other risks
since page->lru.next is used as a generic list pointer in multiple
places in the kernel. As such, with this patch we take the position that
this risk is negligible in practice. For more discussion, see[1].

Since all the tracking added in this patch is performed on DMA
map/unmap, no additional code is needed in the fast path, meaning the
performance overhead of this tracking is negligible there. A
micro-benchmark shows that the total overhead of the tracking itself is
about 400 ns (39 cycles(tsc) 395.218 ns; sum for both map and unmap[2]).
Since this cost is only paid on DMA map and unmap, it seems like an
acceptable cost to fix the late unmap issue. Further optimisation can
narrow the cases where this cost is paid (for instance by eliding the
tracking when DMA map/unmap is a no-op).

The extra memory needed to track the pages is neatly encapsulated inside
xarray, which uses the 'struct xa_node' structure to track items. This
structure is 576 bytes long, with slots for 64 items, meaning that a
full node occurs only 9 bytes of overhead per slot it tracks (in
practice, it probably won't be this efficient, but in any case it should
be an acceptable overhead).

[0] https://lore.kernel.org/all/CAHS8izPg7B5DwKfSuzz-iOop_YRbk3Sd6Y4rX7KBG9DcVJcyWg@mail.gmail.com/
[1] https://lore.kernel.org/r/20250320023202.GA25514@openwall.com
[2] https://lore.kernel.org/r/ae07144c-9295-4c9d-a400-153bb689fe9e@huawei.com

Reported-by: Yonglong Liu <liuyonglong@huawei.com>
Closes: https://lore.kernel.org/r/8743264a-9700-4227-a556-5f931c720211@huawei.com
Fixes: ff7d6b27f894 ("page_pool: refurbish version of page_pool code")
Suggested-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Jesper Dangaard Brouer <hawk@kernel.org>
Tested-by: Jesper Dangaard Brouer <hawk@kernel.org>
Tested-by: Qiuling Ren <qren@redhat.com>
Tested-by: Yuying Ma <yuma@redhat.com>
Tested-by: Yonglong Liu <liuyonglong@huawei.com>
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://patch.msgid.link/20250409-page-pool-track-dma-v9-2-6a9ef2e0cba8@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mm.h     | 46 ++++++++++++++++++++++++++++++++++++++++++----
 include/linux/poison.h |  4 ++++
 2 files changed, 46 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 56c47f4a38ca..130d3c9d2ee4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4248,13 +4248,51 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
 #define VM_SEALED_SYSMAP	VM_NONE
 #endif
 
+/*
+ * DMA mapping IDs for page_pool
+ *
+ * When DMA-mapping a page, page_pool allocates an ID (from an xarray) and
+ * stashes it in the upper bits of page->pp_magic. We always want to be able to
+ * unambiguously identify page pool pages (using page_pool_page_is_pp()). Non-PP
+ * pages can have arbitrary kernel pointers stored in the same field as pp_magic
+ * (since it overlaps with page->lru.next), so we must ensure that we cannot
+ * mistake a valid kernel pointer with any of the values we write into this
+ * field.
+ *
+ * On architectures that set POISON_POINTER_DELTA, this is already ensured,
+ * since this value becomes part of PP_SIGNATURE; meaning we can just use the
+ * space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the
+ * lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is
+ * 0, we make sure that we leave the two topmost bits empty, as that guarantees
+ * we won't mistake a valid kernel pointer for a value we set, regardless of the
+ * VMSPLIT setting.
+ *
+ * Altogether, this means that the number of bits available is constrained by
+ * the size of an unsigned long (at the upper end, subtracting two bits per the
+ * above), and the definition of PP_SIGNATURE (with or without
+ * POISON_POINTER_DELTA).
+ */
+#define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA))
+#if POISON_POINTER_DELTA > 0
+/* PP_SIGNATURE includes POISON_POINTER_DELTA, so limit the size of the DMA
+ * index to not overlap with that if set
+ */
+#define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT)
+#else
+/* Always leave out the topmost two; see above. */
+#define PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 2)
+#endif
+
+#define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \
+				  PP_DMA_INDEX_SHIFT)
+
 /* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is
  * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for
- * the head page of compound page and bit 1 for pfmemalloc page.
- * page_is_pfmemalloc() is checked in __page_pool_put_page() to avoid recycling
- * the pfmemalloc page.
+ * the head page of compound page and bit 1 for pfmemalloc page, as well as the
+ * bits used for the DMA index. page_is_pfmemalloc() is checked in
+ * __page_pool_put_page() to avoid recycling the pfmemalloc page.
  */
-#define PP_MAGIC_MASK ~0x3UL
+#define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL)
 
 #ifdef CONFIG_PAGE_POOL
 static inline bool page_pool_page_is_pp(struct page *page)
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 331a9a996fa8..8ca2235f78d5 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -70,6 +70,10 @@
 #define KEY_DESTROY		0xbd
 
 /********** net/core/page_pool.c **********/
+/*
+ * page_pool uses additional free bits within this value to store data, see the
+ * definition of PP_DMA_INDEX_MASK in mm.h
+ */
 #define PP_SIGNATURE		(0x40 + POISON_POINTER_DELTA)
 
 /********** net/core/skbuff.c **********/
-- 
cgit v1.2.3


From ceaceaf79ea0fe337344fc5c1fb10a421a362410 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Fri, 11 Apr 2025 23:04:19 +0100
Subject: net: ethtool: fix get_ts_stats() documentation

Commit 0e9c127729be ("ethtool: add interface to read Tx hardware
timestamping statistics") added documentation for timestamping
statistics, but added the detailed explanation for this method to
the get_ts_info() rather than get_ts_stats(). Move it to the correct
entry.

Cc: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1u3MTz-000Crx-IW@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 8210ece94fa6..013d25858642 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -926,10 +926,11 @@ struct kernel_ethtool_ts_info {
  * @get_ts_info: Get the time stamping and PTP hardware clock capabilities.
  *	It may be called with RCU, or rtnl or reference on the device.
  *	Drivers supporting transmit time stamps in software should set this to
- *	ethtool_op_get_ts_info(). Drivers must not zero statistics which they
- *	don't report. The stats	structure is initialized to ETHTOOL_STAT_NOT_SET
- *	indicating driver does not report statistics.
- * @get_ts_stats: Query the device hardware timestamping statistics.
+ *	ethtool_op_get_ts_info().
+ * @get_ts_stats: Query the device hardware timestamping statistics. Drivers
+ *	must not zero statistics which they don't report. The stats structure
+ *	is initialized to ETHTOOL_STAT_NOT_SET indicating driver does not
+ *	report statistics.
  * @get_module_info: Get the size and type of the eeprom contained within
  *	a plug-in module.
  * @get_module_eeprom: Get the eeprom information from the plug-in module
-- 
cgit v1.2.3


From 651f88cb046c5e002f7c11de2cebf207787d2346 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Sat, 12 Apr 2025 09:08:45 +0100
Subject: net: stmmac: remove eee_usecs_rate

plat_dat->eee_users_rate is now unused, so remove this member.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1u3Vuv-000E7y-9k@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/stmmac.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index c4ec8bb8144e..8aed09d65b4a 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -276,7 +276,6 @@ struct plat_stmmacenet_data {
 	int mac_port_sel_speed;
 	int has_xgmac;
 	u8 vlan_fail_q;
-	unsigned long eee_usecs_rate;
 	struct pci_dev *pdev;
 	int int_snapshot_num;
 	int msi_mac_vec;
-- 
cgit v1.2.3


From d30f83d278a921485b3e877d03fe937bccfcbcdd Mon Sep 17 00:00:00 2001
From: Raviteja Laggyshetty <quic_rlaggysh@quicinc.com>
Date: Tue, 15 Apr 2025 09:53:38 +0000
Subject: interconnect: core: Add dynamic id allocation support

The current interconnect framework relies on static IDs for node
creation and registration, which limits topologies with multiple
instances of the same interconnect provider. To address this,
introduce icc_node_create_dyn() and icc_link_nodes() APIs to
dynamically allocate IDs for interconnect nodes during creation
and link. This change removes the dependency on static IDs,
allowing multiple instances of the same hardware, such as EPSS L3.

Signed-off-by: Raviteja Laggyshetty <quic_rlaggysh@quicinc.com>
Link: https://lore.kernel.org/r/20250415095343.32125-3-quic_rlaggysh@quicinc.com
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 include/linux/interconnect-provider.h | 12 ++++++++++++
 include/linux/interconnect.h          |  3 +++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h
index f5aef8784692..55cfebc658e6 100644
--- a/include/linux/interconnect-provider.h
+++ b/include/linux/interconnect-provider.h
@@ -116,8 +116,10 @@ struct icc_node {
 
 int icc_std_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
 		      u32 peak_bw, u32 *agg_avg, u32 *agg_peak);
+struct icc_node *icc_node_create_dyn(void);
 struct icc_node *icc_node_create(int id);
 void icc_node_destroy(int id);
+int icc_link_nodes(struct icc_node *src_node, struct icc_node **dst_node);
 int icc_link_create(struct icc_node *node, const int dst_id);
 void icc_node_add(struct icc_node *node, struct icc_provider *provider);
 void icc_node_del(struct icc_node *node);
@@ -136,6 +138,11 @@ static inline int icc_std_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
 	return -ENOTSUPP;
 }
 
+static inline struct icc_node *icc_node_create_dyn(void)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
 static inline struct icc_node *icc_node_create(int id)
 {
 	return ERR_PTR(-ENOTSUPP);
@@ -145,6 +152,11 @@ static inline void icc_node_destroy(int id)
 {
 }
 
+static inline int icc_link_nodes(struct icc_node *src_node, struct icc_node **dst_node)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int icc_link_create(struct icc_node *node, const int dst_id)
 {
 	return -ENOTSUPP;
diff --git a/include/linux/interconnect.h b/include/linux/interconnect.h
index 97ac253df62c..e4b8808823ad 100644
--- a/include/linux/interconnect.h
+++ b/include/linux/interconnect.h
@@ -20,6 +20,9 @@
 #define Mbps_to_icc(x)	((x) * 1000 / 8)
 #define Gbps_to_icc(x)	((x) * 1000 * 1000 / 8)
 
+/* macro to indicate dynamic id allocation */
+#define ICC_ALLOC_DYN_ID	-1
+
 struct icc_path;
 struct device;
 
-- 
cgit v1.2.3


From 0c4f8ed498cea1e2beebf7aa3d198fa381264f88 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Mon, 14 Apr 2025 15:22:09 -0700
Subject: mm: skip folio reclaim in legacy memcg contexts for deadlockable
 mappings

Currently in shrink_folio_list(), reclaim for folios under writeback
falls into 3 different cases:
1) Reclaim is encountering an excessive number of folios under
   writeback and this folio has both the writeback and reclaim flags
   set
2) Dirty throttling is enabled (this happens if reclaim through cgroup
   is not enabled, if reclaim through cgroupv2 memcg is enabled, or
   if reclaim is on the root cgroup), or if the folio is not marked for
   immediate reclaim, or if the caller does not have __GFP_FS (or
   __GFP_IO if it's going to swap) set
3) Legacy cgroupv1 encounters a folio that already has the reclaim flag
   set and the caller did not have __GFP_FS (or __GFP_IO if swap) set

In cases 1) and 2), we activate the folio and skip reclaiming it while
in case 3), we wait for writeback to finish on the folio and then try
to reclaim the folio again. In case 3, we wait on writeback because
cgroupv1 does not have dirty folio throttling, as such this is a
mitigation against the case where there are too many folios in writeback
with nothing else to reclaim.

If a filesystem (eg fuse) may deadlock due to reclaim waiting on
writeback, then the filesystem needs to add inefficient messy workarounds
to prevent this. To improve the performance of these filesystems, this
commit adds two things:
a) a AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM mapping flag that filesystems
   may set to indicate that reclaim should not wait on writeback
b) if legacy memcg encounters a folio with this
   AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM flag set (eg case 3), the folio
   will be activated and skip reclaim (eg default to behavior in case 2)
   instead.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jingbo Xu <jefflexu@linux.alibaba.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/pagemap.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 26baa78f1ca7..e19ff3183bbf 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -210,6 +210,7 @@ enum mapping_flags {
 	AS_STABLE_WRITES = 7,	/* must wait for writeback before modifying
 				   folio contents */
 	AS_INACCESSIBLE = 8,	/* Do not attempt direct R/W access to the mapping */
+	AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9,
 	/* Bits 16-25 are used for FOLIO_ORDER */
 	AS_FOLIO_ORDER_BITS = 5,
 	AS_FOLIO_ORDER_MIN = 16,
@@ -335,6 +336,16 @@ static inline bool mapping_inaccessible(struct address_space *mapping)
 	return test_bit(AS_INACCESSIBLE, &mapping->flags);
 }
 
+static inline void mapping_set_writeback_may_deadlock_on_reclaim(struct address_space *mapping)
+{
+	set_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags);
+}
+
+static inline bool mapping_writeback_may_deadlock_on_reclaim(struct address_space *mapping)
+{
+	return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags);
+}
+
 static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
 {
 	return mapping->gfp_mask;
-- 
cgit v1.2.3


From eaa0d30216c1d54b157ea0ad7f35ba76f6e9a825 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Tue, 18 Feb 2025 20:29:46 +0100
Subject: driver core: auxiliary bus: add device creation helpers

Add helper functions to create a device on the auxiliary bus.

This is meant for fairly simple usage of the auxiliary bus, to avoid having
the same code repeated in the different drivers.

Suggested-by: Stephen Boyd <sboyd@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://lore.kernel.org/r/20250218-aux-device-create-helper-v4-1-c3d7dfdea2e6@baylibre.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/auxiliary_bus.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h
index 65dd7f154374..4086afd0cc6b 100644
--- a/include/linux/auxiliary_bus.h
+++ b/include/linux/auxiliary_bus.h
@@ -254,6 +254,23 @@ int __auxiliary_driver_register(struct auxiliary_driver *auxdrv, struct module *
 
 void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv);
 
+struct auxiliary_device *auxiliary_device_create(struct device *dev,
+						 const char *modname,
+						 const char *devname,
+						 void *platform_data,
+						 int id);
+void auxiliary_device_destroy(void *auxdev);
+
+struct auxiliary_device *__devm_auxiliary_device_create(struct device *dev,
+							const char *modname,
+							const char *devname,
+							void *platform_data,
+							int id);
+
+#define devm_auxiliary_device_create(dev, devname, platform_data)     \
+	__devm_auxiliary_device_create(dev, KBUILD_MODNAME, devname,  \
+				       platform_data, 0)
+
 /**
  * module_auxiliary_driver() - Helper macro for registering an auxiliary driver
  * @__auxiliary_driver: auxiliary driver struct
-- 
cgit v1.2.3


From 981527828c301644bc4014faa9c523e8a5e32a32 Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@nvidia.com>
Date: Sat, 12 Apr 2025 12:18:37 +0300
Subject: platform/mellanox: Rename field to improve code readability
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename field 'counter' in 'mlxreg_core_hotplug_platform_data' to count.

Signed-off-by: Vadim Pasternak <vadimp@nvidia.com>
Link: https://lore.kernel.org/r/20250412091843.33943-2-vadimp@nvidia.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/platform_data/mlxreg.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h
index 0b9f81a6f753..f6cca7a035c7 100644
--- a/include/linux/platform_data/mlxreg.h
+++ b/include/linux/platform_data/mlxreg.h
@@ -209,7 +209,7 @@ struct mlxreg_core_platform_data {
  * @items: same type components with the hotplug capability;
  * @irq: platform interrupt number;
  * @regmap: register map of parent device;
- * @counter: number of the components with the hotplug capability;
+ * @count: number of the components with the hotplug capability;
  * @cell: location of top aggregation interrupt register;
  * @mask: top aggregation interrupt common mask;
  * @cell_low: location of low aggregation interrupt register;
@@ -224,7 +224,7 @@ struct mlxreg_core_hotplug_platform_data {
 	struct mlxreg_core_item *items;
 	int irq;
 	void *regmap;
-	int counter;
+	int count;
 	u32 cell;
 	u32 mask;
 	u32 cell_low;
-- 
cgit v1.2.3


From f99564688f38458d86b64f099ebf03f19517cf77 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 13 Apr 2025 16:09:40 +0200
Subject: net: phy: remove device_phy_find_device

AFAICS this function has never had a user.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/ab7b8094-2eea-4e82-a047-fd60117f220b@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index a2bfae80c449..fb755358d965 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1757,7 +1757,6 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id,
 int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id);
 struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode);
 struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode);
-struct phy_device *device_phy_find_device(struct device *dev);
 struct fwnode_handle *fwnode_get_phy_node(const struct fwnode_handle *fwnode);
 struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45);
 int phy_device_register(struct phy_device *phy);
@@ -1779,11 +1778,6 @@ struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode)
 	return NULL;
 }
 
-static inline struct phy_device *device_phy_find_device(struct device *dev)
-{
-	return NULL;
-}
-
 static inline
 struct fwnode_handle *fwnode_get_phy_node(struct fwnode_handle *fwnode)
 {
-- 
cgit v1.2.3


From 97d06802d10a2827ef46fd31789a26117ce7f3d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Thu, 13 Mar 2025 16:57:45 +0100
Subject: sysfs: constify bin_attribute argument of bin_attribute::read/write()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All callback implementers have been moved to the const variant of the
callbacks. The signature of the original callbacks can now be changed.
Also remove the now unnecessary transition machinery inside __BIN_ATTR().

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20250313-sysfs-const-bin_attr-final-v2-1-96284e1e88ce@weissschuh.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 25 ++++---------------------
 1 file changed, 4 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 18f7e1fd093c..576b8b3c60af 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -306,11 +306,11 @@ struct bin_attribute {
 	size_t			size;
 	void			*private;
 	struct address_space *(*f_mapping)(void);
-	ssize_t (*read)(struct file *, struct kobject *, struct bin_attribute *,
+	ssize_t (*read)(struct file *, struct kobject *, const struct bin_attribute *,
 			char *, loff_t, size_t);
 	ssize_t (*read_new)(struct file *, struct kobject *, const struct bin_attribute *,
 			    char *, loff_t, size_t);
-	ssize_t (*write)(struct file *, struct kobject *, struct bin_attribute *,
+	ssize_t (*write)(struct file *, struct kobject *, const struct bin_attribute *,
 			 char *, loff_t, size_t);
 	ssize_t (*write_new)(struct file *, struct kobject *,
 			     const struct bin_attribute *, char *, loff_t, size_t);
@@ -332,28 +332,11 @@ struct bin_attribute {
  */
 #define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&(bin_attr)->attr)
 
-typedef ssize_t __sysfs_bin_rw_handler_new(struct file *, struct kobject *,
-					   const struct bin_attribute *, char *, loff_t, size_t);
-
 /* macros to create static binary attributes easier */
 #define __BIN_ATTR(_name, _mode, _read, _write, _size) {		\
 	.attr = { .name = __stringify(_name), .mode = _mode },		\
-	.read = _Generic(_read,						\
-		__sysfs_bin_rw_handler_new * : NULL,			\
-		default : _read						\
-	),								\
-	.read_new = _Generic(_read,					\
-		__sysfs_bin_rw_handler_new * : _read,			\
-		default : NULL						\
-	),								\
-	.write = _Generic(_write,					\
-		__sysfs_bin_rw_handler_new * : NULL,			\
-		default : _write					\
-	),								\
-	.write_new = _Generic(_write,					\
-		__sysfs_bin_rw_handler_new * : _write,			\
-		default : NULL						\
-	),								\
+	.read = _read,							\
+	.write = _write,						\
 	.size	= _size,						\
 }
 
-- 
cgit v1.2.3


From 9bec944506faf10d6274c75e3a55bc107b75cea3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Thu, 13 Mar 2025 16:57:46 +0100
Subject: sysfs: constify attribute_group::bin_attrs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All users of this field have been migrated to bin_attrs_new.
It can now be constified.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20250313-sysfs-const-bin_attr-final-v2-2-96284e1e88ce@weissschuh.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 576b8b3c60af..f418aae4f113 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -107,7 +107,7 @@ struct attribute_group {
 					    int);
 	struct attribute	**attrs;
 	union {
-		struct bin_attribute		**bin_attrs;
+		const struct bin_attribute	*const *bin_attrs;
 		const struct bin_attribute	*const *bin_attrs_new;
 	};
 };
-- 
cgit v1.2.3


From 2af781a9edc4ef5f6684c0710cc3542d9be48b31 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Thu, 10 Apr 2025 17:27:12 +0200
Subject: PCI: pciehp: Ignore Link Down/Up caused by Secondary Bus Reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a Secondary Bus Reset is issued at a hotplug port, it causes a Data
Link Layer State Changed event as a side effect.  On hotplug ports using
in-band presence detect, it additionally causes a Presence Detect Changed
event.

These spurious events should not result in teardown and re-enumeration of
the device in the slot.  Hence commit 2e35afaefe64 ("PCI: pciehp: Add
reset_slot() method") masked the Presence Detect Changed Enable bit in the
Slot Control register during a Secondary Bus Reset.  Commit 06a8d89af551
("PCI: pciehp: Disable link notification across slot reset") additionally
masked the Data Link Layer State Changed Enable bit.

However masking those bits only disables interrupt generation (PCIe r6.2
sec 6.7.3.1).  The events are still visible in the Slot Status register
and picked up by the IRQ handler if it runs during a Secondary Bus Reset.
This can happen if the interrupt is shared or if an unmasked hotplug event
occurs, e.g. Attention Button Pressed or Power Fault Detected.

The likelihood of this happening used to be small, so it wasn't much of a
problem in practice.  That has changed with the recent introduction of
bandwidth control in v6.13-rc1 with commit 665745f27487 ("PCI/bwctrl:
Re-add BW notification portdrv as PCIe BW controller"):

Bandwidth control shares the interrupt with PCIe hotplug.  A Secondary Bus
Reset causes a Link Bandwidth Notification, so the hotplug IRQ handler
runs, picks up the masked events and tears down the device in the slot.

As a result, Joel reports VFIO passthrough failure of a GPU, which Ilpo
root-caused to the incorrect handling of masked hotplug events.

Clearly, a more reliable way is needed to ignore spurious hotplug events.

For Downstream Port Containment, a new ignore mechanism was introduced by
commit a97396c6eb13 ("PCI: pciehp: Ignore Link Down/Up caused by DPC").
It has been working reliably for the past four years.

Adapt it for Secondary Bus Resets.

Introduce two helpers to annotate code sections which cause spurious link
changes:  pci_hp_ignore_link_change() and pci_hp_unignore_link_change()
Use those helpers in lieu of masking interrupts in the Slot Control
register.

Introduce a helper to check whether such a code section is executing
concurrently and if so, await it:  pci_hp_spurious_link_change()
Invoke the helper in the hotplug IRQ thread pciehp_ist().  Re-use the
IRQ thread's existing code which ignores DPC-induced link changes unless
the link is unexpectedly down after reset recovery or the device was
replaced during the bus reset.

That code block in pciehp_ist() was previously only executed if a Data
Link Layer State Changed event has occurred.  Additionally execute it for
Presence Detect Changed events.  That's necessary for compatibility with
PCIe r1.0 hotplug ports because Data Link Layer State Changed didn't exist
before PCIe r1.1.  DPC was added with PCIe r3.1 and thus DPC-capable
hotplug ports always support Data Link Layer State Changed events.
But the same cannot be assumed for Secondary Bus Reset, which already
existed in PCIe r1.0.

Secondary Bus Reset is only one of many causes of spurious link changes.
Others include runtime suspend to D3cold, firmware updates or FPGA
reconfiguration.  The new pci_hp_{,un}ignore_link_change() helpers may be
used by all kinds of drivers to annotate such code sections, hence their
declarations are publicly visible in <linux/pci.h>.  A case in point is
the Mellanox Ethernet driver which disables a firmware reset feature if
the Ethernet card is attached to a hotplug port, see commit 3d7a3f2612d7
("net/mlx5: Nack sync reset request when HotPlug is enabled").  Going
forward, PCIe hotplug will be able to cope gracefully with all such use
cases once the code sections are properly annotated.

The new helpers internally use two bits in struct pci_dev's priv_flags as
well as a wait_queue.  This mirrors what was done for DPC by commit
a97396c6eb13 ("PCI: pciehp: Ignore Link Down/Up caused by DPC").  That may
be insufficient if spurious link changes are caused by multiple sources
simultaneously.  An example might be a Secondary Bus Reset issued by AER
during FPGA reconfiguration.  If this turns out to happen in real life,
support for it can easily be added by replacing the PCI_LINK_CHANGING flag
with an atomic_t counter incremented by pci_hp_ignore_link_change() and
decremented by pci_hp_unignore_link_change().  Instead of awaiting a zero
PCI_LINK_CHANGING flag, the pci_hp_spurious_link_change() helper would
then simply await a zero counter.

Fixes: 665745f27487 ("PCI/bwctrl: Re-add BW notification portdrv as PCIe BW controller")
Reported-by: Joel Mathew Thomas <proxy0@tutamail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219765
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Joel Mathew Thomas <proxy0@tutamail.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://patch.msgid.link/d04deaf49d634a2edf42bf3c06ed81b4ca54d17b.1744298239.git.lukas@wunner.de
---
 include/linux/pci.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0e8e3fd77e96..833b54f3ce6d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1848,6 +1848,14 @@ static inline bool pcie_aspm_support_enabled(void) { return false; }
 static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; }
 #endif
 
+#ifdef CONFIG_HOTPLUG_PCI
+void pci_hp_ignore_link_change(struct pci_dev *pdev);
+void pci_hp_unignore_link_change(struct pci_dev *pdev);
+#else
+static inline void pci_hp_ignore_link_change(struct pci_dev *pdev) { }
+static inline void pci_hp_unignore_link_change(struct pci_dev *pdev) { }
+#endif
+
 #ifdef CONFIG_PCIEAER
 bool pci_aer_available(void);
 #else
-- 
cgit v1.2.3


From 7c571ac57d9d97190dcba18212fabf99888b0c48 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Mon, 14 Apr 2025 14:26:30 -0700
Subject: net: ptp: introduce .supported_extts_flags to ptp_clock_info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PTP_EXTTS_REQUEST(2) ioctl has a flags field which specifies how the
external timestamp request should behave. This includes which edge of the
signal to timestamp, as well as a specialized "offset" mode. It is expected
that more flags will be added in the future.

Driver authors routinely do not check the flags, often accepting requests
with flags which they do not support. Even drivers which do check flags may
not be future-proofed to reject flags not yet defined. Thus, any future
flag additions often require manually updating drivers to reject these
flags.

This approach of hoping we catch flag checks during review, or playing
whack-a-mole after the fact is the wrong approach.

Introduce the "supported_extts_flags" field to the ptp_clock_info
structure. This field defines the set of flags the device actually
supports.

Update the core character device logic to check this field and reject
unsupported requests. Getting this right is somewhat tricky. First, to
avoid unnecessary repetition and make basic functionality work when
.supported_extts_flags is 0, the core always accepts the PTP_ENABLE_FEATURE
flag. This flag is used to set the 'on' parameter to the .enable function
and is thus always 'supported' by all drivers.

For backwards compatibility, the PTP_RISING_EDGE and PTP_FALLING_EDGE flags
are merely "hints" when using the old PTP_EXTTS_REQUEST ioctl, and are not
expected to be enforced. If the user issues PTP_EXTTS_REQUEST2, the
PTP_STRICT_FLAGS flag is added which is supposed to inform the driver to
strictly validate the flags and reject unsupported requests. To handle
this, first check if the driver reports PTP_STRICT_FLAGS support. If it
does not, then always allow the PTP_RISING_EDGE and PTP_FALLING_EDGE flags.
This keeps backwards compatibility with the original PTP_EXTTS_REQUEST
ioctl where these flags are not guaranteed to be honored.

This way, drivers which do not set the supported_extts_flags will continue
to accept requests for the original PTP_EXTTS_REQUEST ioctl. The core will
automatically reject requests with new flags, and correctly reject requests
with PTP_STRICT_FLAGS, where the driver is supposed to strictly validate
the flags.

Update the various drivers, refactoring their validation logic into the
.supported_extts_flags field. For consistency and readability,
PTP_ENABLE_FEATURE is not set in the supported flags list, and
PTP_EXTTS_EDGES is expanded to PTP_RISING_EDGE | PTP_FALLING_EDGE in all
cases.

Note the following driver files set n_ext_ts to a non-zero value but did
not check flags at all:

 • drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
 • drivers/net/ethernet/freescale/enetc/enetc_ptp.c
 • drivers/net/ethernet/intel/i40e/i40e_ptp.c
 • drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
 • drivers/net/ethernet/renesas/ravb_ptp.c
 • drivers/net/ethernet/renesas/rtsn.c
 • drivers/net/ethernet/renesas/rtsn.h
 • drivers/net/ethernet/ti/am65-cpts.c
 • drivers/net/ethernet/ti/cpts.h
 • drivers/net/ethernet/ti/icssg/icss_iep.c
 • drivers/net/ethernet/xscale/ptp_ixp46x.c
 • drivers/net/phy/bcm-phy-ptp.c
 • drivers/ptp/ptp_ocp.c
 • drivers/ptp/ptp_pch.c
 • drivers/ptp/ptp_qoriq.c

These drivers behavior does change slightly: they will now reject the
PTP_EXTTS_REQUEST2 ioctl, because they do not strictly validate their
flags. This also makes them no longer incorrectly accept PTP_EXT_OFFSET.

Also note that the renesas ravb driver does not support PTP_STRICT_FLAGS.
We could leave the .supported_extts_flags as 0, but I added the
PTP_RISING_EDGE | PTP_FALLING_EDGE since the driver previously manually
validated these flags. This is equivalent to 0 because the core will allow
these flags regardless unless PTP_STRICT_FLAGS is also set.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://patch.msgid.link/20250414-jk-supported-perout-flags-v2-1-f6b17d15475c@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ptp_clock_kernel.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 0d68d09bedd1..25cba2e5ee69 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -68,6 +68,17 @@ struct ptp_system_timestamp {
  * @n_per_out: The number of programmable periodic signals.
  * @n_pins:    The number of programmable pins.
  * @pps:       Indicates whether the clock supports a PPS callback.
+ *
+ * @supported_extts_flags:  The set of flags the driver supports for the
+ *                          PTP_EXTTS_REQUEST ioctl. The PTP core will use
+ *                          this list to reject unsupported requests.
+ *                          PTP_ENABLE_FEATURE is assumed and does not need to
+ *                          be included. If PTP_STRICT_FLAGS is *not* set,
+ *                          then both PTP_RISING_EDGE and PTP_FALLING_EDGE
+ *                          will be assumed. Note that PTP_STRICT_FLAGS must
+ *                          be set if the drivers wants to honor
+ *                          PTP_EXTTS_REQUEST2 and any future flags.
+ *
  * @pin_config: Array of length 'n_pins'. If the number of
  *              programmable pins is nonzero, then drivers must
  *              allocate and initialize this array.
@@ -174,6 +185,7 @@ struct ptp_clock_info {
 	int n_per_out;
 	int n_pins;
 	int pps;
+	unsigned int supported_extts_flags;
 	struct ptp_pin_desc *pin_config;
 	int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm);
 	int (*adjphase)(struct ptp_clock_info *ptp, s32 phase);
-- 
cgit v1.2.3


From d9f3e9ecc4562ae07aaf614cf0a6690ef7ca0e10 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Mon, 14 Apr 2025 14:26:31 -0700
Subject: net: ptp: introduce .supported_perout_flags to ptp_clock_info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PTP_PEROUT_REQUEST2 ioctl has gained support for flags specifying
specific output behavior including PTP_PEROUT_ONE_SHOT,
PTP_PEROUT_DUTY_CYCLE, PTP_PEROUT_PHASE.

Driver authors are notorious for not checking the flags of the request.
This results in misinterpreting the request, generating an output signal
that does not match the requested value. It is anticipated that even more
flags will be added in the future, resulting in even more broken requests.

Expecting these issues to be caught during review or playing whack-a-mole
after the fact is not a great solution.

Instead, introduce the supported_perout_flags field in the ptp_clock_info
structure. Update the core character device logic to explicitly reject any
request which has a flag not on this list.

This ensures that drivers must 'opt in' to the flags they support. Drivers
which don't set the .supported_perout_flags field will not need to check
that unsupported flags aren't passed, as the core takes care of this.

Update the drivers which do support flags to set this new field.

Note the following driver files set n_per_out to a non-zero value but did
not check the flags at all:

 • drivers/ptp/ptp_clockmatrix.c
 • drivers/ptp/ptp_idt82p33.c
 • drivers/ptp/ptp_fc3.c
 • drivers/net/ethernet/ti/am65-cpts.c
 • drivers/net/ethernet/aquantia/atlantic/aq_ptp.c
 • drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
 • drivers/net/dsa/sja1105/sja1105_ptp.c
 • drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
 • drivers/net/ethernet/mscc/ocelot_vsc7514.c
 • drivers/net/ethernet/intel/i40e/i40e_ptp.c

Reviewed-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://patch.msgid.link/20250414-jk-supported-perout-flags-v2-2-f6b17d15475c@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ptp_clock_kernel.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 25cba2e5ee69..eced7e9bf69a 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -69,6 +69,11 @@ struct ptp_system_timestamp {
  * @n_pins:    The number of programmable pins.
  * @pps:       Indicates whether the clock supports a PPS callback.
  *
+ * @supported_perout_flags:  The set of flags the driver supports for the
+ *                           PTP_PEROUT_REQUEST ioctl. The PTP core will
+ *                           reject a request with any flag not specified
+ *                           here.
+ *
  * @supported_extts_flags:  The set of flags the driver supports for the
  *                          PTP_EXTTS_REQUEST ioctl. The PTP core will use
  *                          this list to reject unsupported requests.
@@ -185,6 +190,7 @@ struct ptp_clock_info {
 	int n_per_out;
 	int n_pins;
 	int pps;
+	unsigned int supported_perout_flags;
 	unsigned int supported_extts_flags;
 	struct ptp_pin_desc *pin_config;
 	int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm);
-- 
cgit v1.2.3


From 5bb61dc76d11a661c323dee1505b408d18c31565 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 12 Apr 2025 13:37:00 +0800
Subject: crypto: ahash - Remove request chaining

Request chaining requires the user to do too much book keeping.
Remove it from ahash.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 1e3809d28abd..dd817f56ff0c 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -14,7 +14,6 @@
 
 #include <linux/completion.h>
 #include <linux/errno.h>
-#include <linux/list.h>
 #include <linux/refcount.h>
 #include <linux/slab.h>
 #include <linux/types.h>
@@ -179,7 +178,6 @@ struct crypto_async_request {
 	struct crypto_tfm *tfm;
 
 	u32 flags;
-	int err;
 };
 
 /**
@@ -473,19 +471,6 @@ static inline unsigned int crypto_tfm_ctx_alignment(void)
 	return __alignof__(tfm->__crt_ctx);
 }
 
-static inline void crypto_reqchain_init(struct crypto_async_request *req)
-{
-	req->err = -EINPROGRESS;
-	INIT_LIST_HEAD(&req->list);
-}
-
-static inline void crypto_request_chain(struct crypto_async_request *req,
-					struct crypto_async_request *head)
-{
-	req->err = -EINPROGRESS;
-	list_add_tail(&req->list, &head->list);
-}
-
 static inline bool crypto_tfm_is_async(struct crypto_tfm *tfm)
 {
 	return tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
-- 
cgit v1.2.3


From 1451e3e561be9ff4e86b911b9367a2223275d16f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 7 Apr 2025 18:02:51 +0800
Subject: crypto: api - Add helpers to manage request flags

Add helpers so that the ON_STACK request flag management is not
duplicated all over the place.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index dd817f56ff0c..a387f1547ea0 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -476,5 +476,29 @@ static inline bool crypto_tfm_is_async(struct crypto_tfm *tfm)
 	return tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
 }
 
+static inline bool crypto_req_on_stack(struct crypto_async_request *req)
+{
+	return req->flags & CRYPTO_TFM_REQ_ON_STACK;
+}
+
+static inline void crypto_request_set_callback(
+	struct crypto_async_request *req, u32 flags,
+	crypto_completion_t compl, void *data)
+{
+	u32 keep = CRYPTO_TFM_REQ_ON_STACK;
+
+	req->complete = compl;
+	req->data = data;
+	req->flags &= keep;
+	req->flags |= flags & ~keep;
+}
+
+static inline void crypto_request_set_tfm(struct crypto_async_request *req,
+					  struct crypto_tfm *tfm)
+{
+	req->tfm = tfm;
+	req->flags &= ~CRYPTO_TFM_REQ_ON_STACK;
+}
+
 #endif	/* _LINUX_CRYPTO_H */
 
-- 
cgit v1.2.3


From 6eed1e3552fc076d2617f6793cb148d485696ab6 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 7 Apr 2025 18:20:57 +0800
Subject: crypto: api - Mark cra_init/cra_exit as deprecated

These functions have been obsoleted by the type-specific init/exit
functions.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index a387f1547ea0..56cf229e2530 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -300,17 +300,8 @@ struct cipher_alg {
  *	   by @cra_type and @cra_flags above, the associated structure must be
  *	   filled with callbacks. This field might be empty. This is the case
  *	   for ahash, shash.
- * @cra_init: Initialize the cryptographic transformation object. This function
- *	      is used to initialize the cryptographic transformation object.
- *	      This function is called only once at the instantiation time, right
- *	      after the transformation context was allocated. In case the
- *	      cryptographic hardware has some special requirements which need to
- *	      be handled by software, this function shall check for the precise
- *	      requirement of the transformation and put any software fallbacks
- *	      in place.
- * @cra_exit: Deinitialize the cryptographic transformation object. This is a
- *	      counterpart to @cra_init, used to remove various changes set in
- *	      @cra_init.
+ * @cra_init: Deprecated, do not use.
+ * @cra_exit: Deprecated, do not use.
  * @cra_u.cipher: Union member which contains a single-block symmetric cipher
  *		  definition. See @struct @cipher_alg.
  * @cra_module: Owner of this transformation implementation. Set to THIS_MODULE
-- 
cgit v1.2.3


From afddce13ce81d52a13898fa0700917835c71acd6 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 7 Apr 2025 18:20:59 +0800
Subject: crypto: api - Add reqsize to crypto_alg

Add a reqsize field to crypto_alg with the intention of replacing
the type-specific reqsize field currently used by ahash and acomp.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 56cf229e2530..15476b085ce3 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -276,6 +276,7 @@ struct cipher_alg {
  *		   to the alignmask of the algorithm being used, in order to
  *		   avoid the API having to realign them.  Note: the alignmask is
  *		   not supported for hash algorithms and is always 0 for them.
+ * @cra_reqsize: Size of the request context for this algorithm.
  * @cra_priority: Priority of this transformation implementation. In case
  *		  multiple transformations with same @cra_name are available to
  *		  the Crypto API, the kernel will use the one with highest
@@ -322,6 +323,7 @@ struct crypto_alg {
 	unsigned int cra_blocksize;
 	unsigned int cra_ctxsize;
 	unsigned int cra_alignmask;
+	unsigned int cra_reqsize;
 
 	int cra_priority;
 	refcount_t cra_refcnt;
@@ -441,6 +443,11 @@ static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm)
 	return tfm->__crt_alg->cra_alignmask;
 }
 
+static inline unsigned int crypto_tfm_alg_reqsize(struct crypto_tfm *tfm)
+{
+	return tfm->__crt_alg->cra_reqsize;
+}
+
 static inline u32 crypto_tfm_get_flags(struct crypto_tfm *tfm)
 {
 	return tfm->crt_flags;
-- 
cgit v1.2.3


From f1440a90465bea1993f937ac7add592ce1e4ff44 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 12 Apr 2025 13:16:43 +0800
Subject: crypto: api - Add support for duplicating algorithms before
 registration

If the bit CRYPTO_ALG_DUP_FIRST is set, an algorithm will be
duplicated by kmemdup before registration.  This is inteded for
hardware-based algorithms that may be unplugged at will.

Do not use this if the algorithm data structure is embedded in a
bigger data structure.  Perform the duplication in the driver
instead.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 15476b085ce3..b89b1b348095 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -49,6 +49,15 @@
  */
 #define CRYPTO_ALG_NEED_FALLBACK	0x00000100
 
+/*
+ * Set if the algorithm data structure should be duplicated into
+ * kmalloc memory before registration.  This is useful for hardware
+ * that can be disconnected at will.  Do not use this if the data
+ * structure is embedded into a bigger one.  Duplicate the overall
+ * data structure in the driver in that case.
+ */
+#define CRYPTO_ALG_DUP_FIRST		0x00000200
+
 /*
  * Set if the algorithm has passed automated run-time testing.  Note that
  * if there is no run-time testing for a given algorithm it is considered
-- 
cgit v1.2.3


From 43eca05b6a3b917c600e10cc6b06bfa57fa57401 Mon Sep 17 00:00:00 2001
From: Cosmin Ratiu <cratiu@nvidia.com>
Date: Fri, 11 Apr 2025 10:49:56 +0300
Subject: xfrm: Add explicit dev to .xdo_dev_state_{add,delete,free}

Previously, device driver IPSec offload implementations would fall into
two categories:
1. Those that used xso.dev to determine the offload device.
2. Those that used xso.real_dev to determine the offload device.

The first category didn't work with bonding while the second did.
In a non-bonding setup the two pointers are the same.

This commit adds explicit pointers for the offload netdevice to
.xdo_dev_state_add() / .xdo_dev_state_delete() / .xdo_dev_state_free()
which eliminates the confusion and allows drivers from the first
category to work with bonding.

xso.real_dev now becomes a private pointer managed by the bonding
driver.

Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/linux/netdevice.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d8544f6a680c..88dfb8aeed3c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1013,9 +1013,13 @@ struct netdev_bpf {
 
 #ifdef CONFIG_XFRM_OFFLOAD
 struct xfrmdev_ops {
-	int	(*xdo_dev_state_add) (struct xfrm_state *x, struct netlink_ext_ack *extack);
-	void	(*xdo_dev_state_delete) (struct xfrm_state *x);
-	void	(*xdo_dev_state_free) (struct xfrm_state *x);
+	int	(*xdo_dev_state_add)(struct net_device *dev,
+				     struct xfrm_state *x,
+				     struct netlink_ext_ack *extack);
+	void	(*xdo_dev_state_delete)(struct net_device *dev,
+					struct xfrm_state *x);
+	void	(*xdo_dev_state_free)(struct net_device *dev,
+				      struct xfrm_state *x);
 	bool	(*xdo_dev_offload_ok) (struct sk_buff *skb,
 				       struct xfrm_state *x);
 	void	(*xdo_dev_state_advance_esn) (struct xfrm_state *x);
-- 
cgit v1.2.3


From adc8d1200dbbe6726abf89070edb58bca95f1485 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 16 Apr 2025 10:01:36 +0300
Subject: i2c: core: Deprecate of_node in struct i2c_board_info

Two members of the same or quite similar semantics is quite confusing
to begin with. Moreover, fwnode covers all possible firmware descriptions
that Linux kernel supports. Deprecate of_node in struct i2c_board_info,
so users will be warned and in the future there is a plan to convert
the users and remove it completely.

Tested-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/i2c.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 2e4903b7f7bc..cc1437f29823 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -405,7 +405,7 @@ static inline bool i2c_detect_slave_mode(struct device *dev) { return false; }
  * @addr: stored in i2c_client.addr
  * @dev_name: Overrides the default <busnr>-<addr> dev_name if set
  * @platform_data: stored in i2c_client.dev.platform_data
- * @of_node: pointer to OpenFirmware device node
+ * @of_node: **DEPRECATED** - use @fwnode for this
  * @fwnode: device node supplied by the platform firmware
  * @swnode: software node for the device
  * @resources: resources associated with the device
-- 
cgit v1.2.3


From 0e3f6c3696424fa90d6f512779d617a05a1cf031 Mon Sep 17 00:00:00 2001
From: K Prateek Nayak <kprateek.nayak@amd.com>
Date: Wed, 9 Apr 2025 05:34:44 +0000
Subject: sched/topology: Introduce sched_update_asym_prefer_cpu()

A subset of AMD Processors supporting Preferred Core Rankings also
feature the ability to dynamically switch these rankings at runtime to
bias load balancing towards or away from the LLC domain with larger
cache.

To support dynamically updating "sg->asym_prefer_cpu" without needing to
rebuild the sched domain, introduce sched_update_asym_prefer_cpu() which
recomutes the "asym_prefer_cpu" when the core-ranking of a CPU changes.

sched_update_asym_prefer_cpu() swaps the "sg->asym_prefer_cpu" with the
CPU whose ranking has changed if the new ranking is greater than that of
the "asym_prefer_cpu". If CPU whose ranking has changed is the current
"asym_prefer_cpu", it scans the CPUs of the sched groups to find the new
"asym_prefer_cpu" and sets it accordingly.

get_group() for non-overlapping sched domains returns the sched group
for the first CPU in the sched_group_span() which ensures all CPUs in
the group see the updated value of "asym_prefer_cpu".

Overlapping groups are allocated differently and will require moving the
"asym_prefer_cpu" to "sg->sgc" but since the current implementations do
not set "SD_ASYM_PACKING" at NUMA domains, skip additional
indirection and place a SCHED_WARN_ON() to alert any future users.

Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20250409053446.23367-3-kprateek.nayak@amd.com
---
 include/linux/sched/topology.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 7b4301b7235f..198bb5cc1774 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -195,6 +195,8 @@ struct sched_domain_topology_level {
 };
 
 extern void __init set_sched_topology(struct sched_domain_topology_level *tl);
+extern void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio);
+
 
 # define SD_INIT_NAME(type)		.name = #type
 
@@ -223,6 +225,10 @@ static inline bool cpus_share_resources(int this_cpu, int that_cpu)
 	return true;
 }
 
+static inline void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio)
+{
+}
+
 #endif	/* !CONFIG_SMP */
 
 #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
-- 
cgit v1.2.3


From dd09eb0e2cc4ba91cd64dba2b733d3f8e1248fd8 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Mon, 7 Apr 2025 10:29:35 -0700
Subject: EISA: Increase length of device names

GCC 15's -Wunterminated-string-initialization warned about truncated
name strings. Instead of marking them with the "nonstring" attribute[1],
increase their length to correctly include enough space for the
terminating NUL character, as they are used with %s format specifiers
when showing resource allocations in /proc/ioports:

        seq_printf(m, "%*s%0*llx-%0*llx : %s\n", ..., r->name);

The strings in eisa.ids have a max length of 73, and the 50 limit was an
arbitrary limit that was removed back in 2008 with commit ca52a49846f1
("driver core: remove DEVICE_NAME_SIZE define"). Change the limit to 74
so nothing is truncated any more.

Additionally fix the Makefile to use "if_changed" instead of "cmd"
to detect changes to the command line used to generate the target,
otherwise devlist.h won't be rebuilt.

Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117178 [1]
Signed-off-by: Kees Cook <kees@kernel.org>
Acked-by: Alejandro Colomar <alx@kernel.org>
Link: https://lore.kernel.org/r/20250407172926.it.281-kees@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/eisa.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/eisa.h b/include/linux/eisa.h
index f98200cae637..21a2ecc1e538 100644
--- a/include/linux/eisa.h
+++ b/include/linux/eisa.h
@@ -28,6 +28,9 @@
 #define EISA_CONFIG_ENABLED         1
 #define EISA_CONFIG_FORCED          2
 
+/* Chosen to hold the longest string in eisa.ids. */
+#define EISA_DEVICE_INFO_NAME_SIZE 74
+
 /* There is not much we can say about an EISA device, apart from
  * signature, slot number, and base address. dma_mask is set by
  * default to parent device mask..*/
@@ -41,7 +44,7 @@ struct eisa_device {
 	u64                   dma_mask;
 	struct device         dev; /* generic device */
 #ifdef CONFIG_EISA_NAMES
-	char		      pretty_name[50];
+	char		      pretty_name[EISA_DEVICE_INFO_NAME_SIZE];
 #endif
 };
 
-- 
cgit v1.2.3


From e80c235994fd1d7004a4e5d64123f8f07ec80ade Mon Sep 17 00:00:00 2001
From: Alan Borzeszkowski <alan.borzeszkowski@linux.intel.com>
Date: Mon, 14 Apr 2025 19:55:53 +0200
Subject: thunderbolt: Add Thunderbolt/USB4 <-> USB3 match function

This function checks whether given USB4 port device matches with USB3.x
port device, using ACPI _DSD property.

It is designed to be used by component framework to match
USB4 ports with Type-C ports they are connected to.

Also, added USB4 config stub in case mapping function is not reachable.

Signed-off-by: Alan Borzeszkowski <alan.borzeszkowski@linux.intel.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 include/linux/thunderbolt.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h
index 7d902d8c054b..75247486616b 100644
--- a/include/linux/thunderbolt.h
+++ b/include/linux/thunderbolt.h
@@ -11,6 +11,13 @@
 #ifndef THUNDERBOLT_H_
 #define THUNDERBOLT_H_
 
+#include <linux/types.h>
+
+struct fwnode_handle;
+struct device;
+
+#if IS_REACHABLE(CONFIG_USB4)
+
 #include <linux/device.h>
 #include <linux/idr.h>
 #include <linux/list.h>
@@ -674,4 +681,15 @@ static inline struct device *tb_ring_dma_device(struct tb_ring *ring)
 	return &ring->nhi->pdev->dev;
 }
 
+bool usb4_usb3_port_match(struct device *usb4_port_dev,
+			  const struct fwnode_handle *usb3_port_fwnode);
+
+#else /* CONFIG_USB4 */
+static inline bool usb4_usb3_port_match(struct device *usb4_port_dev,
+					const struct fwnode_handle *usb3_port_fwnode)
+{
+	return false;
+}
+#endif /* CONFIG_USB4 */
+
 #endif /* THUNDERBOLT_H_ */
-- 
cgit v1.2.3


From 17240749f26e07cafa676688d8a3326086498447 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@openvpn.net>
Date: Tue, 15 Apr 2025 13:17:29 +0200
Subject: skb: implement skb_send_sock_locked_with_flags()

When sending an skb over a socket using skb_send_sock_locked(),
it is currently not possible to specify any flag to be set in
msghdr->msg_flags.

However, we may want to pass flags the user may have specified,
like MSG_NOSIGNAL.

Extend __skb_send_sock() with a new argument 'flags' and add a
new interface named skb_send_sock_locked_with_flags().

Cc: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>
Signed-off-by: Antonio Quartulli <antonio@openvpn.net>
Link: https://patch.msgid.link/20250415-b4-ovpn-v26-12-577f6097b964@openvpn.net
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/skbuff.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f1381aff0f89..beb084ee4f4d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4145,6 +4145,8 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
 		    unsigned int flags);
 int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
 			 int len);
+int skb_send_sock_locked_with_flags(struct sock *sk, struct sk_buff *skb,
+				    int offset, int len, int flags);
 int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len);
 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
-- 
cgit v1.2.3


From 13f43d7cf3e0570004a0d960bc1be23db827c2ff Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 8 Apr 2025 13:53:56 -0300
Subject: iommu/pages: Formalize the freelist API

We want to get rid of struct page references outside the internal
allocator implementation. The free list has the driver open code something
like:

   list_add_tail(&virt_to_page(ptr)->lru, freelist);

Move the above into a small inline and make the freelist into a wrapper
type 'struct iommu_pages_list' so that the compiler can help check all the
conversion.

This struct has also proven helpful in some future ideas to convert to a
singly linked list to get an extra pointer in the struct page, and to
signal that the pages should be freed with RCU.

Use a temporary _Generic so we don't need to rename the free function as
the patches progress.

Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/8-v4-c8663abbb606+3f7-iommu_pages_jgg@nvidia.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index ccce8a751e2a..6dd3a221c5bc 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -341,6 +341,18 @@ typedef unsigned int ioasid_t;
 /* Read but do not clear any dirty bits */
 #define IOMMU_DIRTY_NO_CLEAR (1 << 0)
 
+/*
+ * Pages allocated through iommu_alloc_pages_node() can be placed on this list
+ * using iommu_pages_list_add(). Note: ONLY pages from iommu_alloc_pages_node()
+ * can be used this way!
+ */
+struct iommu_pages_list {
+	struct list_head pages;
+};
+
+#define IOMMU_PAGES_LIST_INIT(name) \
+	((struct iommu_pages_list){ .pages = LIST_HEAD_INIT(name.pages) })
+
 #ifdef CONFIG_IOMMU_API
 
 /**
-- 
cgit v1.2.3


From 868240c34eb113e4121d4ad8ad7458d8caad87d3 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 8 Apr 2025 13:53:59 -0300
Subject: iommu: Change iommu_iotlb_gather to use iommu_page_list

This converts the remaining places using list of pages to the new API.

The Intel free path was shared with its gather path, so it is converted at
the same time.

Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/11-v4-c8663abbb606+3f7-iommu_pages_jgg@nvidia.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 6dd3a221c5bc..3fb62165db19 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -375,7 +375,7 @@ struct iommu_iotlb_gather {
 	unsigned long		start;
 	unsigned long		end;
 	size_t			pgsize;
-	struct list_head	freelist;
+	struct iommu_pages_list	freelist;
 	bool			queued;
 };
 
@@ -864,7 +864,7 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
 {
 	*gather = (struct iommu_iotlb_gather) {
 		.start	= ULONG_MAX,
-		.freelist = LIST_HEAD_INIT(gather->freelist),
+		.freelist = IOMMU_PAGES_LIST_INIT(gather->freelist),
 	};
 }
 
-- 
cgit v1.2.3


From b3efacc451e19a85fb5acb56d95f40532c4e31d2 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 8 Apr 2025 13:54:03 -0300
Subject: iommu/pages: Allow sub page sizes to be passed into the allocator

Generally drivers have a specific idea what their HW structure size should
be. In a lot of cases this is related to PAGE_SIZE, but not always. ARM64,
for example, allows a 4K IO page table size on a 64K CPU page table
system.

Currently we don't have any good support for sub page allocations, but
make the API accommodate this by accepting a sub page size from the caller
and rounding up internally.

This is done by moving away from order as the size input and using size:
  size == 1 << (order + PAGE_SHIFT)

Following patches convert drivers away from using order and try to specify
allocation sizes independent of PAGE_SIZE.

Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Tested-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/15-v4-c8663abbb606+3f7-iommu_pages_jgg@nvidia.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 3fb62165db19..062818b51822 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -342,9 +342,9 @@ typedef unsigned int ioasid_t;
 #define IOMMU_DIRTY_NO_CLEAR (1 << 0)
 
 /*
- * Pages allocated through iommu_alloc_pages_node() can be placed on this list
- * using iommu_pages_list_add(). Note: ONLY pages from iommu_alloc_pages_node()
- * can be used this way!
+ * Pages allocated through iommu_alloc_pages_node_sz() can be placed on this
+ * list using iommu_pages_list_add(). Note: ONLY pages from
+ * iommu_alloc_pages_node_sz() can be used this way!
  */
 struct iommu_pages_list {
 	struct list_head pages;
-- 
cgit v1.2.3


From 163ddf1fea590229c30a8dc4c29ff4febfb895c3 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 17 Apr 2025 18:24:46 +0300
Subject: spi: Add spi_bpw_to_bytes() helper and use it

This helper converts the given bits per word to bytes. The result
will always be power-of-two, e.g.,

    ===============    =================
    Input (in bits)    Output (in bytes)
    ===============    =================
            5                   1
            9                   2
            21                  4
            37                  8
    ===============    =================

It will return 0 for the 0 input.

There are a couple of cases in SPI that are using the same approach
and at least one more (in IIO) would benefit of it. Add a helper
for everyone.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: David Lechner <dlechner@baylibre.com>
Link: https://patch.msgid.link/20250417152529.490582-2-andriy.shevchenko@linux.intel.com
Acked-by: Mukesh Kumar Savaliya <quic_msavaliy@quicinc.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 0ba5e49bace4..5b872fe3b1a2 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -1325,6 +1325,32 @@ static inline bool spi_is_bpw_supported(struct spi_device *spi, u32 bpw)
 	return false;
 }
 
+/**
+ * spi_bpw_to_bytes - Covert bits per word to bytes
+ * @bpw: Bits per word
+ *
+ * This function converts the given @bpw to bytes. The result is always
+ * power-of-two, e.g.,
+ *
+ *  ===============    =================
+ *  Input (in bits)    Output (in bytes)
+ *  ===============    =================
+ *          5                   1
+ *          9                   2
+ *          21                  4
+ *          37                  8
+ *  ===============    =================
+ *
+ * It will return 0 for the 0 input.
+ *
+ * Returns:
+ * Bytes for the given @bpw.
+ */
+static inline u32 spi_bpw_to_bytes(u32 bpw)
+{
+	return roundup_pow_of_two(BITS_TO_BYTES(bpw));
+}
+
 /**
  * spi_controller_xfer_timeout - Compute a suitable timeout value
  * @ctlr: SPI device
-- 
cgit v1.2.3


From a1b669ea16c4d7c1a1a8fc7e25aaf651ea0078c3 Mon Sep 17 00:00:00 2001
From: Amery Hung <ameryhung@gmail.com>
Date: Wed, 9 Apr 2025 14:45:57 -0700
Subject: bpf: Prepare to reuse get_ctx_arg_idx
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename get_ctx_arg_idx to bpf_ctx_arg_idx, and allow others to call it.
No functional change.

Signed-off-by: Amery Hung <ameryhung@gmail.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://patch.msgid.link/20250409214606.2000194-2-ameryhung@gmail.com
---
 include/linux/btf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index ebc0c0c9b944..b2983706292f 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -522,6 +522,7 @@ bool btf_param_match_suffix(const struct btf *btf,
 			    const char *suffix);
 int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto,
 		       u32 arg_no);
+u32 btf_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto, int off);
 
 struct bpf_verifier_log;
 
-- 
cgit v1.2.3


From 151e13ece86d234213b7f224f0e26a957c0eeb3e Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Tue, 15 Apr 2025 18:02:15 -0700
Subject: net: ethtool: Adjust exactly ETH_GSTRING_LEN-long stats to use memcpy

Many drivers populate the stats buffer using C-String based APIs (e.g.
ethtool_sprintf() and ethtool_puts()), usually when building up the
list of stats individually (i.e. with a for() loop). This, however,
requires that the source strings be populated in such a way as to have
a terminating NUL byte in the source.

Other drivers populate the stats buffer directly using one big memcpy()
of an entire array of strings. No NUL termination is needed here, as the
bytes are being directly passed through. Yet others will build up the
stats buffer individually, but also use memcpy(). This, too, does not
need NUL termination of the source strings.

However, there are cases where the strings that populate the
source stats strings are exactly ETH_GSTRING_LEN long, and GCC
15's -Wunterminated-string-initialization option complains that the
trailing NUL byte has been truncated. This situation is fine only if the
driver is using the memcpy() approach. If the C-String APIs are used,
the destination string name will have its final byte truncated by the
required trailing NUL byte applied by the C-string API.

For drivers that are already using memcpy() but have initializers that
truncate the NUL terminator, mark their source strings as __nonstring to
silence the GCC warnings.

For drivers that have initializers that truncate the NUL terminator and
are using the C-String APIs, switch to memcpy() to avoid destination
string truncation and mark their source strings as __nonstring to silence
the GCC warnings. (Also introduce ethtool_cpy() as a helper to make this
an easy replacement).

Specifically the following warnings were investigated and addressed:

../drivers/net/ethernet/chelsio/cxgb/cxgb2.c:364:9: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization]
  364 |         "TxFramesAbortedDueToXSCollisions",
      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../drivers/net/ethernet/freescale/enetc/enetc_ethtool.c:165:33: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization]
  165 |         { ENETC_PM_R1523X(0),   "MAC rx 1523 to max-octet packets" },
      |                                 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../drivers/net/ethernet/freescale/enetc/enetc_ethtool.c:190:33: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization]
  190 |         { ENETC_PM_T1523X(0),   "MAC tx 1523 to max-octet packets" },
      |                                 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../drivers/net/ethernet/google/gve/gve_ethtool.c:76:9: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization]
   76 |         "adminq_dcfg_device_resources_cnt", "adminq_set_driver_parameter_cnt",
      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c:117:53: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization]
  117 |         STMMAC_STAT(ptp_rx_msg_type_pdelay_follow_up),
      |                                                     ^
../drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c:46:12: note: in definition of macro 'STMMAC_STAT'
   46 |         { #m, sizeof_field(struct stmmac_extra_stats, m),       \
      |            ^
../drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c:328:24: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization]
  328 |                 .str = "a_mac_control_frames_transmitted",
      |                        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c:340:24: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization]
  340 |                 .str = "a_pause_mac_ctrl_frames_received",
      |                        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Kees Cook <kees@kernel.org>
Reviewed-by: Petr Machata <petrm@nvidia.com> # for mlxsw
Reviewed-by: Harshitha Ramamurthy <hramamurthy@google.com>
Link: https://patch.msgid.link/20250416010210.work.904-kees@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 013d25858642..7edb5f5e7134 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -1330,6 +1330,17 @@ extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...);
  */
 extern void ethtool_puts(u8 **data, const char *str);
 
+/**
+ * ethtool_cpy - Write possibly-not-NUL-terminated string to ethtool string data
+ * @data: Pointer to a pointer to the start of string to write into
+ * @str: NUL-byte padded char array of size ETH_GSTRING_LEN to copy from
+ */
+#define ethtool_cpy(data, str)	do {				\
+	BUILD_BUG_ON(sizeof(str) != ETH_GSTRING_LEN);		\
+	memcpy(*(data), str, ETH_GSTRING_LEN);			\
+	*(data) += ETH_GSTRING_LEN;				\
+} while (0)
+
 /* Link mode to forced speed capabilities maps */
 struct ethtool_forced_speed_map {
 	u32		speed;
-- 
cgit v1.2.3


From 22cbc1ee268b7ec0000848708944daa61c6e4909 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 15 Apr 2025 20:04:47 -0700
Subject: netdev: fix the locking for netdev notifications

Kuniyuki reports that the assert for netdev lock fires when
there are netdev event listeners (otherwise we skip the netlink
event generation).

Correct the locking when coming from the notifier.

The NETDEV_XDP_FEAT_CHANGE notifier is already fully locked,
it's the documentation that's incorrect.

Fixes: 99e44f39a8f7 ("netdev: depend on netdev->lock for xdp features")
Reported-by: syzkaller <syzkaller@googlegroups.com>
Reported-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/20250410171019.62128-1-kuniyu@amazon.com
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250416030447.1077551-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e6036b82ef4c..0321fd952f70 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2520,7 +2520,7 @@ struct net_device {
 	 *	@net_shaper_hierarchy, @reg_state, @threaded
 	 *
 	 * Double protects:
-	 *	@up, @moving_ns, @nd_net, @xdp_flags
+	 *	@up, @moving_ns, @nd_net, @xdp_features
 	 *
 	 * Double ops protects:
 	 *	@real_num_rx_queues, @real_num_tx_queues
-- 
cgit v1.2.3


From 9ff2aa4206eff40a202e425f232036bc84ad4c0e Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 17 Mar 2025 23:07:30 -0400
Subject: net: ethtool: mm: extract stmmac verification logic into common
 library

It appears that stmmac is not the only hardware which requires a
software-driven verification state machine for the MAC Merge layer.

While on the one hand it's good to encourage hardware implementations,
on the other hand it's quite difficult to tolerate multiple drivers
implementing independently fairly non-trivial logic.

Extract the hardware-independent logic from stmmac into library code and
put it in ethtool. Name the state structure "mmsv" for MAC Merge
Software Verification. Let this expose an operations structure for
executing the hardware stuff: sync hardware with the tx_active boolean
(result of verification process), enable/disable the pMAC, send mPackets,
notify library of external events (reception of mPackets), as well as
link state changes.

Note that it is assumed that the external events are received in hardirq
context. If they are not, it is probably a good idea to disable hardirqs
when calling ethtool_mmsv_event_handle(), because the library does not
do so.

Also, the MM software verification process has no business with the
tx_min_frag_size, that is all the driver's to handle.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Co-developed-by: Choong Yong Liang <yong.liang.choong@linux.intel.com>
Signed-off-by: Choong Yong Liang <yong.liang.choong@linux.intel.com>
Tested-by: Choong Yong Liang <yong.liang.choong@linux.intel.com>
Tested-by: Furong Xu <0x1207@gmail.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/ethtool.h | 73 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 7edb5f5e7134..117718c24814 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -17,9 +17,13 @@
 #include <linux/compat.h>
 #include <linux/if_ether.h>
 #include <linux/netlink.h>
+#include <linux/timer_types.h>
 #include <uapi/linux/ethtool.h>
 #include <uapi/linux/net_tstamp.h>
 
+#define ETHTOOL_MM_MAX_VERIFY_TIME_MS		128
+#define ETHTOOL_MM_MAX_VERIFY_RETRIES		3
+
 struct compat_ethtool_rx_flow_spec {
 	u32		flow_type;
 	union ethtool_flow_union h_u;
@@ -718,6 +722,75 @@ struct ethtool_mm_stats {
 	u64 MACMergeHoldCount;
 };
 
+enum ethtool_mmsv_event {
+	ETHTOOL_MMSV_LP_SENT_VERIFY_MPACKET,
+	ETHTOOL_MMSV_LD_SENT_VERIFY_MPACKET,
+	ETHTOOL_MMSV_LP_SENT_RESPONSE_MPACKET,
+};
+
+/* MAC Merge verification mPacket type */
+enum ethtool_mpacket {
+	ETHTOOL_MPACKET_VERIFY,
+	ETHTOOL_MPACKET_RESPONSE,
+};
+
+struct ethtool_mmsv;
+
+/**
+ * struct ethtool_mmsv_ops - Operations for MAC Merge Software Verification
+ * @configure_tx: Driver callback for the event where the preemptible TX
+ *		  becomes active or inactive. Preemptible traffic
+ *		  classes must be committed to hardware only while
+ *		  preemptible TX is active.
+ * @configure_pmac: Driver callback for the event where the pMAC state
+ *		    changes as result of an administrative setting
+ *		    (ethtool) or a call to ethtool_mmsv_link_state_handle().
+ * @send_mpacket: Driver-provided method for sending a Verify or a Response
+ *		  mPacket.
+ */
+struct ethtool_mmsv_ops {
+	void (*configure_tx)(struct ethtool_mmsv *mmsv, bool tx_active);
+	void (*configure_pmac)(struct ethtool_mmsv *mmsv, bool pmac_enabled);
+	void (*send_mpacket)(struct ethtool_mmsv *mmsv, enum ethtool_mpacket mpacket);
+};
+
+/**
+ * struct ethtool_mmsv - MAC Merge Software Verification
+ * @ops: operations for MAC Merge Software Verification
+ * @dev: pointer to net_device structure
+ * @lock: serialize access to MAC Merge state between
+ *	  ethtool requests and link state updates.
+ * @status: current verification FSM state
+ * @verify_timer: timer for verification in local TX direction
+ * @verify_enabled: indicates if verification is enabled
+ * @verify_retries: number of retries for verification
+ * @pmac_enabled: indicates if the preemptible MAC is enabled
+ * @verify_time: time for verification in milliseconds
+ * @tx_enabled: indicates if transmission is enabled
+ */
+struct ethtool_mmsv {
+	const struct ethtool_mmsv_ops *ops;
+	struct net_device *dev;
+	spinlock_t lock;
+	enum ethtool_mm_verify_status status;
+	struct timer_list verify_timer;
+	bool verify_enabled;
+	int verify_retries;
+	bool pmac_enabled;
+	u32 verify_time;
+	bool tx_enabled;
+};
+
+void ethtool_mmsv_stop(struct ethtool_mmsv *mmsv);
+void ethtool_mmsv_link_state_handle(struct ethtool_mmsv *mmsv, bool up);
+void ethtool_mmsv_event_handle(struct ethtool_mmsv *mmsv,
+			       enum ethtool_mmsv_event event);
+void ethtool_mmsv_get_mm(struct ethtool_mmsv *mmsv,
+			 struct ethtool_mm_state *state);
+void ethtool_mmsv_set_mm(struct ethtool_mmsv *mmsv, struct ethtool_mm_cfg *cfg);
+void ethtool_mmsv_init(struct ethtool_mmsv *mmsv, struct net_device *dev,
+		       const struct ethtool_mmsv_ops *ops);
+
 /**
  * struct ethtool_rxfh_param - RXFH (RSS) parameters
  * @hfunc: Defines the current RSS hash function used by HW (or to be set to).
-- 
cgit v1.2.3


From 818bd489f137e9257d701c0d4bf11efdfd02ca5f Mon Sep 17 00:00:00 2001
From: Romain Gantois <romain.gantois@bootlin.com>
Date: Thu, 6 Mar 2025 17:23:25 +0100
Subject: i2c: use client addresses directly in ATR interface

The I2C Address Translator (ATR) module defines mappings from i2c_client
structs to aliases. However, only the physical address of each i2c_client
struct is actually relevant to the workings of the ATR module. Moreover,
some drivers require address translation functionality but do not allocate
i2c_client structs, accessing the adapter directly instead. The SFP
subsystem is an example of this.

Replace the "i2c_client" field of the i2c_atr_alias_pair struct with a u16
"addr" field. Rewrite helper functions and callbacks as needed.

Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Tested-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Signed-off-by: Romain Gantois <romain.gantois@bootlin.com>
Acked-by: Andi Shyti <andi.shyti@kernel.org>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/i2c-atr.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c-atr.h b/include/linux/i2c-atr.h
index 4d5da161c225..14c1f9175c0d 100644
--- a/include/linux/i2c-atr.h
+++ b/include/linux/i2c-atr.h
@@ -20,20 +20,20 @@ struct i2c_atr;
 
 /**
  * struct i2c_atr_ops - Callbacks from ATR to the device driver.
- * @attach_client: Notify the driver of a new device connected on a child
- *                 bus, with the alias assigned to it. The driver must
- *                 configure the hardware to use the alias.
- * @detach_client: Notify the driver of a device getting disconnected. The
- *                 driver must configure the hardware to stop using the
- *                 alias.
+ * @attach_addr: Notify the driver of a new device connected on a child
+ *               bus, with the alias assigned to it. The driver must
+ *               configure the hardware to use the alias.
+ * @detach_addr: Notify the driver of a device getting disconnected. The
+ *               driver must configure the hardware to stop using the
+ *               alias.
  *
  * All these functions return 0 on success, a negative error code otherwise.
  */
 struct i2c_atr_ops {
-	int (*attach_client)(struct i2c_atr *atr, u32 chan_id,
-			     const struct i2c_client *client, u16 alias);
-	void (*detach_client)(struct i2c_atr *atr, u32 chan_id,
-			      const struct i2c_client *client);
+	int (*attach_addr)(struct i2c_atr *atr, u32 chan_id,
+			   u16 addr, u16 alias);
+	void (*detach_addr)(struct i2c_atr *atr, u32 chan_id,
+			    u16 addr);
 };
 
 /**
-- 
cgit v1.2.3


From 328a106ce0e8d0596d2b020b6f83efd0c859b084 Mon Sep 17 00:00:00 2001
From: Romain Gantois <romain.gantois@bootlin.com>
Date: Thu, 6 Mar 2025 17:23:28 +0100
Subject: i2c: support per-channel ATR alias pools

Some I2C address translators (ATRs) assign each of their remote peripheral
aliases to a specific channel. To properly handle these devices, add
support for having separate alias pools for each ATR channel.

This is achieved by allowing callers of i2c_atr_add_adapter to pass an
optional alias list. If present, this list will be used to populate the
channel's alias pool. Otherwise, the common alias pool will be used.

Tested-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Signed-off-by: Romain Gantois <romain.gantois@bootlin.com>
Acked-by: Andi Shyti <andi.shyti@kernel.org>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/i2c-atr.h | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c-atr.h b/include/linux/i2c-atr.h
index 14c1f9175c0d..1c3a5bcd939f 100644
--- a/include/linux/i2c-atr.h
+++ b/include/linux/i2c-atr.h
@@ -36,6 +36,29 @@ struct i2c_atr_ops {
 			    u16 addr);
 };
 
+/**
+ * struct i2c_atr_adap_desc - An ATR downstream bus descriptor
+ * @chan_id:        Index of the new adapter (0 .. max_adapters-1).  This value is
+ *                  passed to the callbacks in `struct i2c_atr_ops`.
+ * @parent:         The device used as the parent of the new i2c adapter, or NULL
+ *                  to use the i2c-atr device as the parent.
+ * @bus_handle:     The fwnode handle that points to the adapter's i2c
+ *                  peripherals, or NULL.
+ * @num_aliases:    The number of aliases in this adapter's private alias pool. Set
+ *                  to zero if this adapter uses the ATR's global alias pool.
+ * @aliases:        An optional array of private aliases used by the adapter
+ *                  instead of the ATR's global pool of aliases. Must contain
+ *                  exactly num_aliases entries if num_aliases > 0, is ignored
+ *                  otherwise.
+ */
+struct i2c_atr_adap_desc {
+	u32 chan_id;
+	struct device *parent;
+	struct fwnode_handle *bus_handle;
+	size_t num_aliases;
+	u16 *aliases;
+};
+
 /**
  * i2c_atr_new() - Allocate and initialize an I2C ATR helper.
  * @parent:       The parent (upstream) adapter
@@ -65,12 +88,7 @@ void i2c_atr_delete(struct i2c_atr *atr);
 /**
  * i2c_atr_add_adapter - Create a child ("downstream") I2C bus.
  * @atr:        The I2C ATR
- * @chan_id:    Index of the new adapter (0 .. max_adapters-1).  This value is
- *              passed to the callbacks in `struct i2c_atr_ops`.
- * @adapter_parent: The device used as the parent of the new i2c adapter, or NULL
- *                  to use the i2c-atr device as the parent.
- * @bus_handle: The fwnode handle that points to the adapter's i2c
- *              peripherals, or NULL.
+ * @desc:       An ATR adapter descriptor
  *
  * After calling this function a new i2c bus will appear. Adding and removing
  * devices on the downstream bus will result in calls to the
@@ -85,9 +103,7 @@ void i2c_atr_delete(struct i2c_atr *atr);
  *
  * Return: 0 on success, a negative error code otherwise.
  */
-int i2c_atr_add_adapter(struct i2c_atr *atr, u32 chan_id,
-			struct device *adapter_parent,
-			struct fwnode_handle *bus_handle);
+int i2c_atr_add_adapter(struct i2c_atr *atr, struct i2c_atr_adap_desc *desc);
 
 /**
  * i2c_atr_del_adapter - Remove a child ("downstream") I2C bus added by
-- 
cgit v1.2.3


From e8866e26f5e8ec551eec73dca1c30d458f9dca86 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 11 Apr 2025 07:59:09 +0200
Subject: ata: libata-core: Simplify ata_print_version_once

Use dev_dbg_once() instead of open-coding the once functionality.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
---
 include/linux/libata.h | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index e5695998acb0..98affa1d9136 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -41,17 +41,6 @@
  */
 #undef ATA_IRQ_TRAP		/* define to ack screaming irqs */
 
-
-#define ata_print_version_once(dev, version)			\
-({								\
-	static bool __print_once;				\
-								\
-	if (!__print_once) {					\
-		__print_once = true;				\
-		ata_print_version(dev, version);		\
-	}							\
-})
-
 /* defines only for the constants which don't work well as enums */
 #define ATA_TAG_POISON		0xfafbfcfdU
 
@@ -1593,7 +1582,11 @@ do {								\
 #define ata_dev_dbg(dev, fmt, ...)				\
 	ata_dev_printk(debug, dev, fmt, ##__VA_ARGS__)
 
-void ata_print_version(const struct device *dev, const char *version);
+static inline void ata_print_version_once(const struct device *dev,
+					  const char *version)
+{
+	dev_dbg_once(dev, "version %s\n", version);
+}
 
 /*
  * ata_eh_info helpers
-- 
cgit v1.2.3


From 296b67059e3026125a1ca942f5506e6ca051749e Mon Sep 17 00:00:00 2001
From: Zijun Hu <quic_zijuhu@quicinc.com>
Date: Fri, 11 Apr 2025 23:31:40 +0800
Subject: fs/fs_parse: Delete macro fsparam_u32hex()

Delete macro fsparam_u32hex() since:

- it has no caller.

- it uses as type @fs_param_is_u32_hex which is never defined, so will
  cause compile error when caller uses it.

Signed-off-by: Zijun Hu <quic_zijuhu@quicinc.com>
Link: https://lore.kernel.org/20250411-fix_fs-v2-1-5d3395c102e4@quicinc.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs_parser.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h
index 53e566efd5fd..5057faf4f091 100644
--- a/include/linux/fs_parser.h
+++ b/include/linux/fs_parser.h
@@ -125,8 +125,6 @@ static inline bool fs_validate_description(const char *name,
 #define fsparam_u32(NAME, OPT)	__fsparam(fs_param_is_u32, NAME, OPT, 0, NULL)
 #define fsparam_u32oct(NAME, OPT) \
 			__fsparam(fs_param_is_u32, NAME, OPT, 0, (void *)8)
-#define fsparam_u32hex(NAME, OPT) \
-			__fsparam(fs_param_is_u32_hex, NAME, OPT, 0, (void *)16)
 #define fsparam_s32(NAME, OPT)	__fsparam(fs_param_is_s32, NAME, OPT, 0, NULL)
 #define fsparam_u64(NAME, OPT)	__fsparam(fs_param_is_u64, NAME, OPT, 0, NULL)
 #define fsparam_enum(NAME, OPT, array)	__fsparam(fs_param_is_enum, NAME, OPT, 0, array)
-- 
cgit v1.2.3


From d1f482108a2cff2b9c6ebebc40b157aaeb8213b3 Mon Sep 17 00:00:00 2001
From: Zijun Hu <quic_zijuhu@quicinc.com>
Date: Tue, 15 Apr 2025 20:25:00 +0800
Subject: fs/fs_parse: Remove unused and problematic validate_constant_table()

Remove validate_constant_table() since:

- It has no caller.

- It has below 3 bugs for good constant table array array[] which must
  end with a empty entry, and take below invocation for explaination:
  validate_constant_table(array, ARRAY_SIZE(array), ...)

  - Always return wrong value due to the last empty entry.
  - Imprecise error message for missorted case.
  - Potential NULL pointer dereference since the last pr_err() may use
    @tbl[i].name NULL pointer to print the last empty entry's name.

Signed-off-by: Zijun Hu <quic_zijuhu@quicinc.com>
Link: https://lore.kernel.org/20250415-fix_fs-v4-1-5d575124a3ff@quicinc.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs_parser.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h
index 5057faf4f091..5a0e897cae80 100644
--- a/include/linux/fs_parser.h
+++ b/include/linux/fs_parser.h
@@ -87,14 +87,9 @@ extern int lookup_constant(const struct constant_table tbl[], const char *name,
 extern const struct constant_table bool_names[];
 
 #ifdef CONFIG_VALIDATE_FS_PARSER
-extern bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size,
-				    int low, int high, int special);
 extern bool fs_validate_description(const char *name,
 				    const struct fs_parameter_spec *desc);
 #else
-static inline bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size,
-					   int low, int high, int special)
-{ return true; }
 static inline bool fs_validate_description(const char *name,
 					   const struct fs_parameter_spec *desc)
 { return true; }
-- 
cgit v1.2.3


From 79beea2db0431536d79fc5d321225fb42f955466 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Tue, 15 Apr 2025 10:27:50 +0200
Subject: fs: remove uselib() system call

This system call has been deprecated for quite a while now.
Let's try and remove it from the kernel completely.

Link: https://lore.kernel.org/20250415-kanufahren-besten-02ac00e6becd@brauner
Acked-by: Kees Cook <kees@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/binfmts.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 1625c8529e70..65abd5ab8836 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -90,7 +90,6 @@ struct linux_binfmt {
 	struct list_head lh;
 	struct module *module;
 	int (*load_binary)(struct linux_binprm *);
-	int (*load_shlib)(struct file *);
 #ifdef CONFIG_COREDUMP
 	int (*core_dump)(struct coredump_params *cprm);
 	unsigned long min_coredump;	/* minimal dump size */
-- 
cgit v1.2.3


From 4ef4ac360101f8bb11b6486ce60cd60ca015be8c Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Thu, 17 Apr 2025 00:16:26 +0200
Subject: device_cgroup: avoid access to ->i_rdev in the common case in
 devcgroup_inode_permission()

The routine gets called for every path component during lookup.
->i_mode is going to be cached on account of permission checks, while
->i_rdev is an area which is most likely cache-cold.

gcc 14.2 is kind enough to emit one branch:
	movzwl (%rbx),%eax
	mov    %eax,%edx
	and    $0xb000,%dx
	cmp    $0x2000,%dx
	je     11bc <inode_permission+0xec>

This patch is lazy in that I don't know if the ->i_rdev branch makes
any sense with the newly added mode check upfront. I am not changing any
semantics here though.

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Link: https://lore.kernel.org/20250416221626.2710239-3-mjguzik@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/device_cgroup.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h
index d02f32b7514e..0864773a57e8 100644
--- a/include/linux/device_cgroup.h
+++ b/include/linux/device_cgroup.h
@@ -18,15 +18,16 @@ static inline int devcgroup_inode_permission(struct inode *inode, int mask)
 {
 	short type, access = 0;
 
+	if (likely(!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)))
+		return 0;
+
 	if (likely(!inode->i_rdev))
 		return 0;
 
 	if (S_ISBLK(inode->i_mode))
 		type = DEVCG_DEV_BLOCK;
-	else if (S_ISCHR(inode->i_mode))
+	else /* S_ISCHR by the test above */
 		type = DEVCG_DEV_CHAR;
-	else
-		return 0;
 
 	if (mask & MAY_WRITE)
 		access |= DEVCG_ACC_WRITE;
-- 
cgit v1.2.3


From bd32923e5f02fa7b04d487ec265dc8080d27a257 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 31 Mar 2025 17:18:02 +0100
Subject: io_uring: don't store bgid in req->buf_index

Pass buffer group id into the rest of helpers via struct buf_sel_arg
and remove all reassignments of req->buf_index back to bgid. Now, it
only stores buffer indexes, and the group is provided by callers.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/3ea9fa08113ecb4d9224b943e7806e80a324bdf9.1743437358.git.asml.silence@gmail.com
Link: https://lore.kernel.org/io-uring/0c01d76ff12986c2f48614db8610caff8f78c869.1743500909.git.asml.silence@gmail.com/
[axboe: fold in patch from second link]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index b44d201520d8..3b467879bca8 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -653,8 +653,7 @@ struct io_kiocb {
 	u8				iopoll_completed;
 	/*
 	 * Can be either a fixed buffer index, or used with provided buffers.
-	 * For the latter, before issue it points to the buffer group ID,
-	 * and after selection it points to the buffer ID itself.
+	 * For the latter, it points to the selected buffer ID.
 	 */
 	u16				buf_index;
 
-- 
cgit v1.2.3


From 632b3186726984319e2337987de86a442407f30e Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 20 Apr 2025 10:31:19 +0100
Subject: io_uring/zcrx: move zcrx region to struct io_zcrx_ifq

Refill queue region is a part of zcrx and should stay in struct
io_zcrx_ifq. We can't have multiple queues without it, so move it there.

As a result there is no context global zcrx region anymore, and the
region is looked up together with its ifq. To protect a concurrent mmap
from seeing an inconsistent region we were protecting changes to
->zcrx_region with mmap_lock, but now it protect the publishing of the
ifq.

Reviewed-by: David Wei <dw@davidwei.uk>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/24f1a728fc03d0166f16d099575457e10d9d90f2.1745141261.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 3b467879bca8..06d722289fc5 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -448,8 +448,6 @@ struct io_ring_ctx {
 	struct io_mapped_region		ring_region;
 	/* used for optimised request parameter and wait argument passing  */
 	struct io_mapped_region		param_region;
-	/* just one zcrx per ring for now, will move to io_zcrx_ifq eventually */
-	struct io_mapped_region		zcrx_region;
 };
 
 /*
-- 
cgit v1.2.3


From 19bbfe7b5fcc04d8711e8e1352acc77c1a5c3955 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 21 Apr 2025 10:27:40 +0200
Subject: fs: add S_ANON_INODE

This makes it easy to detect proper anonymous inodes and to ensure that
we can detect them in codepaths such as readahead().

Readahead on anonymous inodes didn't work because they didn't have a
proper mode. Now that they have we need to retain EINVAL being returned
otherwise LTP will fail.

We also need to ensure that ioctls aren't simply fired like they are for
regular files so things like inotify inodes continue to correctly call
their own ioctl handlers as in [1].

Reported-by: Xilin Wu <sophon@radxa.com>
Link: https://lore.kernel.org/3A9139D5CD543962+89831381-31b9-4392-87ec-a84a5b3507d8@radxa.com [1]
Link: https://lore.kernel.org/7a1a7076-ff6b-4cb0-94e7-7218a0a44028@sirena.org.uk
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 016b0fe1536e..cf7208500cee 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2344,6 +2344,7 @@ struct super_operations {
 #define S_CASEFOLD	(1 << 15) /* Casefolded file */
 #define S_VERITY	(1 << 16) /* Verity file (using fs/verity/) */
 #define S_KERNEL_FILE	(1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */
+#define S_ANON_INODE	(1 << 19) /* Inode is an anonymous inode */
 
 /*
  * Note that nosuid etc flags are inode-specific: setting some file-system
@@ -2400,6 +2401,7 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
 
 #define IS_WHITEOUT(inode)	(S_ISCHR(inode->i_mode) && \
 				 (inode)->i_rdev == WHITEOUT_DEV)
+#define IS_ANON_FILE(inode)	((inode)->i_flags & S_ANON_INODE)
 
 static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap,
 				   struct inode *inode)
-- 
cgit v1.2.3


From 4e2c719782a84702db7fc2dc07ced796f308fec7 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Tue, 22 Apr 2025 08:32:47 +0200
Subject: x86/cpu: Help users notice when running old Intel microcode

Old microcode is bad for users and for kernel developers.

For users, it exposes them to known fixed security and/or functional
issues. These obviously rarely result in instant dumpster fires in
every environment. But it is as important to keep your microcode up
to date as it is to keep your kernel up to date.

Old microcode also makes kernels harder to debug. A developer looking
at an oops need to consider kernel bugs, known CPU issues and unknown
CPU issues as possible causes. If they know the microcode is up to
date, they can mostly eliminate known CPU issues as the cause.

Make it easier to tell if CPU microcode is out of date. Add a list
of released microcode. If the loaded microcode is older than the
release, tell users in a place that folks can find it:

	/sys/devices/system/cpu/vulnerabilities/old_microcode

Tell kernel kernel developers about it with the existing taint
flag:

	TAINT_CPU_OUT_OF_SPEC

== Discussion ==

When a user reports a potential kernel issue, it is very common
to ask them to reproduce the issue on mainline. Running mainline,
they will (independently from the distro) acquire a more up-to-date
microcode version list. If their microcode is old, they will
get a warning about the taint and kernel developers can take that
into consideration when debugging.

Just like any other entry in "vulnerabilities/", users are free to
make their own assessment of their exposure.

== Microcode Revision Discussion ==

The microcode versions in the table were generated from the Intel
microcode git repo:

	8ac9378a8487 ("microcode-20241112 Release")

which as of this writing lags behind the latest microcode-20250211.

It can be argued that the versions that the kernel picks to call "old"
should be a revision or two old. Which specific version is picked is
less important to me than picking *a* version and enforcing it.

This repository contains only microcode versions that Intel has deemed
to be OS-loadable. It is quite possible that the BIOS has loaded a
newer microcode than the latest in this repo. If this happens, the
system is considered to have new microcode, not old.

Specifically, the sysfs file and taint flag answer the question:

	Is the CPU running on the latest OS-loadable microcode,
	or something even later that the BIOS loaded?

In other words, Intel never publishes an authoritative list of CPUs
and latest microcode revisions. Until it does, this is the best that
Linux can do.

Also note that the "intel-ucode-defs.h" file is simple, ugly and
has lots of magic numbers. That's on purpose and should allow a
single file to be shared across lots of stable kernel regardless of if
they have the new "VFM" infrastructure or not. It was generated with
a dumb script.

== FAQ ==

Q: Does this tell me if my system is secure or insecure?
A: No. It only tells you if your microcode was old when the
   system booted.

Q: Should the kernel warn if the microcode list itself is too old?
A: No. New kernels will get new microcode lists, both mainline
   and stable. The only way to have an old list is to be running
   an old kernel in which case you have bigger problems.

Q: Is this for security or functional issues?
A: Both.

Q: If a given microcode update only has functional problems but
   no security issues, will it be considered old?
A: Yes. All microcode image versions within a microcode release
   are treated identically. Intel appears to make security
   updates without disclosing them in the release notes.  Thus,
   all updates are considered to be security-relevant.

Q: Who runs old microcode?
A: Anybody with an old distro. This happens all the time inside
   of Intel where there are lots of weird systems in labs that
   might not be getting regular distro updates and might also
   be running rather exotic microcode images.

Q: If I update my microcode after booting will it stop saying
   "Vulnerable"?
A: No. Just like all the other vulnerabilies, you need to
   reboot before the kernel will reassess your vulnerability.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: "Ahmed S. Darwish" <darwi@linutronix.de>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/all/20250421195659.CF426C07%40davehans-spike.ostc.intel.com
(cherry picked from commit 9127865b15eb0a1bd05ad7efe29489c44394bdc1)
---
 include/linux/cpu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index e3049543008b..1f5cfc4cc04f 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -78,6 +78,8 @@ extern ssize_t cpu_show_gds(struct device *dev,
 extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev,
 					       struct device_attribute *attr, char *buf);
 extern ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_old_microcode(struct device *dev,
+				      struct device_attribute *attr, char *buf);
 
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,
-- 
cgit v1.2.3


From a8dc26a0ec43fd416ca781a8030807e65f71cfc5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Draszik?= <andre.draszik@linaro.org>
Date: Thu, 27 Mar 2025 12:54:28 +0000
Subject: firmware: exynos-acpm: introduce devm_acpm_get_by_node()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To allow ACPM clients to simply be children of the ACPM node in DT,
they need to be able to get the ACPM handle based on that ACPM node
directly.

Add an API to allow them to do so, devm_acpm_get_by_node().

At the same time, the previous approach of acquiring the ACPM handle
via a DT phandle is now obsolete and we can remove
devm_acpm_get_by_phandle(), which was there to facilitate that. There
are no existing or anticipated upcoming users of that API, because all
clients should be children of the ACPM node going forward.

Note that no DTs have been merged that use the old approach, so doing
this API change in this driver now will not affect any existing DTs or
client drivers.

Signed-off-by: André Draszik <andre.draszik@linaro.org>
Link: https://lore.kernel.org/r/20250327-acpm-children-v1-2-0afe15ee2ff7@linaro.org
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
---
 include/linux/firmware/samsung/exynos-acpm-protocol.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/firmware/samsung/exynos-acpm-protocol.h b/include/linux/firmware/samsung/exynos-acpm-protocol.h
index 76255b5d06b1..f628bf1862c2 100644
--- a/include/linux/firmware/samsung/exynos-acpm-protocol.h
+++ b/include/linux/firmware/samsung/exynos-acpm-protocol.h
@@ -11,6 +11,7 @@
 #include <linux/types.h>
 
 struct acpm_handle;
+struct device_node;
 
 struct acpm_pmic_ops {
 	int (*read_reg)(const struct acpm_handle *handle,
@@ -44,6 +45,7 @@ struct acpm_handle {
 
 struct device;
 
-const struct acpm_handle *devm_acpm_get_by_phandle(struct device *dev,
-						   const char *property);
+const struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
+						struct device_node *np);
+
 #endif /* __EXYNOS_ACPM_PROTOCOL_H */
-- 
cgit v1.2.3


From f24303631489d250f330373a59b3412103a93b67 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Mon, 24 Mar 2025 09:12:50 +0200
Subject: property: Add functions to iterate named child

There are a few use-cases where child nodes with a specific name need to
be parsed. Code like:

fwnode_for_each_child_node()
	if (fwnode_name_eq())
		...

can be found from a various drivers/subsystems. Adding a macro for this
can simplify things a bit.

In a few cases the data from the found nodes is later added to an array,
which is allocated based on the number of found nodes. One example of
such use is the IIO subsystem's ADC channel nodes, where the relevant
nodes are named as channel[@N].

Add helpers for iterating and counting device's sub-nodes with certain
name instead of open-coding this in every user.

Suggested-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Marcelo Schmitt <marcelo.schmitt1@gmail.com>
Link: https://patch.msgid.link/2767173b7b18e974c0bac244688214bd3863ff06.1742560649.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/property.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/property.h b/include/linux/property.h
index e214ecd241eb..3e83babac0b0 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -167,6 +167,10 @@ struct fwnode_handle *fwnode_get_next_available_child_node(
 	for (child = fwnode_get_next_child_node(fwnode, NULL); child;	\
 	     child = fwnode_get_next_child_node(fwnode, child))
 
+#define fwnode_for_each_named_child_node(fwnode, child, name)		\
+	fwnode_for_each_child_node(fwnode, child)			\
+		if (!fwnode_name_eq(child, name)) { } else
+
 #define fwnode_for_each_available_child_node(fwnode, child)		       \
 	for (child = fwnode_get_next_available_child_node(fwnode, NULL); child;\
 	     child = fwnode_get_next_available_child_node(fwnode, child))
@@ -178,11 +182,19 @@ struct fwnode_handle *device_get_next_child_node(const struct device *dev,
 	for (child = device_get_next_child_node(dev, NULL); child;	\
 	     child = device_get_next_child_node(dev, child))
 
+#define device_for_each_named_child_node(dev, child, name)		\
+	device_for_each_child_node(dev, child)				\
+		if (!fwnode_name_eq(child, name)) { } else
+
 #define device_for_each_child_node_scoped(dev, child)			\
 	for (struct fwnode_handle *child __free(fwnode_handle) =	\
 		device_get_next_child_node(dev, NULL);			\
 	     child; child = device_get_next_child_node(dev, child))
 
+#define device_for_each_named_child_node_scoped(dev, child, name)	\
+	device_for_each_child_node_scoped(dev, child)			\
+		if (!fwnode_name_eq(child, name)) { } else
+
 struct fwnode_handle *fwnode_get_named_child_node(const struct fwnode_handle *fwnode,
 						  const char *childname);
 struct fwnode_handle *device_get_named_child_node(const struct device *dev,
@@ -210,6 +222,14 @@ int fwnode_irq_get_byname(const struct fwnode_handle *fwnode, const char *name);
 
 unsigned int device_get_child_node_count(const struct device *dev);
 
+unsigned int fwnode_get_named_child_node_count(const struct fwnode_handle *fwnode,
+					       const char *name);
+static inline unsigned int device_get_named_child_node_count(const struct device *dev,
+							     const char *name)
+{
+	return fwnode_get_named_child_node_count(dev_fwnode(dev), name);
+}
+
 static inline int device_property_read_u8(const struct device *dev,
 					  const char *propname, u8 *val)
 {
-- 
cgit v1.2.3


From f3a8f870fa9c84b34aad4c72c8d0a1213ba13a36 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Mon, 24 Mar 2025 09:13:03 +0200
Subject: iio: adc: add helpers for parsing ADC nodes

There are ADC ICs which may have some of the AIN pins usable for other
functions. These ICs may have some of the AIN pins wired so that they
should not be used for ADC.

A common way of marking pins that can be used as ADC inputs is to add
corresponding channel@N nodes in the device tree as described in the ADC
	      binding yaml.

Add couple of helper functions which can be used to retrieve the channel
information from the device node.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Marcelo Schmitt <marcelo.schmitt1@gmail.com>
Link: https://patch.msgid.link/f1d8b3e15237947738912c0d297b3e1e21d8b03e.1742560649.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/adc-helpers.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 include/linux/iio/adc-helpers.h

(limited to 'include/linux')

diff --git a/include/linux/iio/adc-helpers.h b/include/linux/iio/adc-helpers.h
new file mode 100644
index 000000000000..56b092a2a4c4
--- /dev/null
+++ b/include/linux/iio/adc-helpers.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * The industrial I/O ADC firmware property parsing helpers
+ *
+ * Copyright (c) 2025 Matti Vaittinen <mazziesaccount@gmail.com>
+ */
+
+#ifndef _INDUSTRIAL_IO_ADC_HELPERS_H_
+#define _INDUSTRIAL_IO_ADC_HELPERS_H_
+
+#include <linux/property.h>
+
+struct device;
+struct iio_chan_spec;
+
+static inline int iio_adc_device_num_channels(struct device *dev)
+{
+	return device_get_named_child_node_count(dev, "channel");
+}
+
+int devm_iio_adc_device_alloc_chaninfo_se(struct device *dev,
+					  const struct iio_chan_spec *template,
+					  int max_chan_id,
+					  struct iio_chan_spec **cs);
+
+#endif /* _INDUSTRIAL_IO_ADC_HELPERS_H_ */
-- 
cgit v1.2.3


From cfed1969fcfe69d0b0db780efe2b6fedeaf7f2b3 Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan@foss.st.com>
Date: Fri, 14 Mar 2025 18:14:46 +0100
Subject: iio: trigger: stm32-lptimer: add support for stm32mp25

Add support for STM32MP25 SoC. Use newly introduced compatible to handle
this new HW variant. Add new trigger definitions that can be used by the
stm32 analog-to-digital converter. Use compatible data to identify them.

Signed-off-by: Olivier Moysan <olivier.moysan@foss.st.com>
Signed-off-by: Fabrice Gasnier <fabrice.gasnier@foss.st.com>
Link: https://patch.msgid.link/20250314171451.3497789-4-fabrice.gasnier@foss.st.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/timer/stm32-lptim-trigger.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/timer/stm32-lptim-trigger.h b/include/linux/iio/timer/stm32-lptim-trigger.h
index a34dcf6a6001..ce3cf0addb2e 100644
--- a/include/linux/iio/timer/stm32-lptim-trigger.h
+++ b/include/linux/iio/timer/stm32-lptim-trigger.h
@@ -14,6 +14,15 @@
 #define LPTIM1_OUT	"lptim1_out"
 #define LPTIM2_OUT	"lptim2_out"
 #define LPTIM3_OUT	"lptim3_out"
+#define LPTIM4_OUT	"lptim4_out"
+#define LPTIM5_OUT	"lptim5_out"
+
+#define LPTIM1_CH1	"lptim1_ch1"
+#define LPTIM1_CH2	"lptim1_ch2"
+#define LPTIM2_CH1	"lptim2_ch1"
+#define LPTIM2_CH2	"lptim2_ch2"
+#define LPTIM3_CH1	"lptim3_ch1"
+#define LPTIM4_CH1	"lptim4_ch1"
 
 #if IS_REACHABLE(CONFIG_IIO_STM32_LPTIMER_TRIGGER)
 bool is_stm32_lptim_trigger(struct iio_trigger *trig);
-- 
cgit v1.2.3


From 5d1dff5b45b7e81206ada46ae996f58129a68a7c Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Mon, 31 Mar 2025 13:13:17 +0100
Subject: iio: Adjust internals of handling of direct mode claiming to suit new
 API.

Now there are no remaining callers of iio_device_claim_direct_mode()
and iio_device_release_direct_mode() rename those functions to ensure
they are not used in new drivers. Also make them now return booleans
in line with the sparse friendly static inline wrappers.

Reviewed-by: David Lechner <dlechner@baylibre.com>
Reviewed-by: Marcelo Schmitt <marcelo.schmitt1@gmail.com>
Link: https://patch.msgid.link/20250331121317.1694135-38-jic23@kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 07a0e8132e88..638cf2420fbd 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -659,8 +659,8 @@ void iio_device_unregister(struct iio_dev *indio_dev);
 int __devm_iio_device_register(struct device *dev, struct iio_dev *indio_dev,
 			       struct module *this_mod);
 int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp);
-int iio_device_claim_direct_mode(struct iio_dev *indio_dev);
-void iio_device_release_direct_mode(struct iio_dev *indio_dev);
+bool __iio_device_claim_direct(struct iio_dev *indio_dev);
+void __iio_device_release_direct(struct iio_dev *indio_dev);
 
 /*
  * Helper functions that allow claim and release of direct mode
@@ -671,9 +671,7 @@ void iio_device_release_direct_mode(struct iio_dev *indio_dev);
  */
 static inline bool iio_device_claim_direct(struct iio_dev *indio_dev)
 {
-	int ret = iio_device_claim_direct_mode(indio_dev);
-
-	if (ret)
+	if (!__iio_device_claim_direct(indio_dev))
 		return false;
 
 	__acquire(iio_dev);
@@ -683,7 +681,7 @@ static inline bool iio_device_claim_direct(struct iio_dev *indio_dev)
 
 static inline void iio_device_release_direct(struct iio_dev *indio_dev)
 {
-	iio_device_release_direct_mode(indio_dev);
+	__iio_device_release_direct(indio_dev);
 	__release(indio_dev);
 }
 
-- 
cgit v1.2.3


From 2086321576425361e8ea0052d9ef6eec55f99a6f Mon Sep 17 00:00:00 2001
From: Angelo Dureghello <adureghello@baylibre.com>
Date: Wed, 9 Apr 2025 20:36:30 +0200
Subject: iio: backend: add support for data source get
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add backend support for getting the data source used.

The ad3552r HDL implements an internal ramp generator, so adding the
getter to allow data source get/set by debugfs.

Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Signed-off-by: Angelo Dureghello <adureghello@baylibre.com>
Link: https://patch.msgid.link/20250409-wip-bl-ad3552r-fixes-v5-3-fb429c3a6515@baylibre.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/backend.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h
index e45b7dfbec35..e59d909cb659 100644
--- a/include/linux/iio/backend.h
+++ b/include/linux/iio/backend.h
@@ -84,6 +84,7 @@ enum iio_backend_interface_type {
  * @chan_disable: Disable one channel.
  * @data_format_set: Configure the data format for a specific channel.
  * @data_source_set: Configure the data source for a specific channel.
+ * @data_source_get: Data source getter for a specific channel.
  * @set_sample_rate: Configure the sampling rate for a specific channel.
  * @test_pattern_set: Configure a test pattern.
  * @chan_status: Get the channel status.
@@ -115,6 +116,8 @@ struct iio_backend_ops {
 			       const struct iio_backend_data_fmt *data);
 	int (*data_source_set)(struct iio_backend *back, unsigned int chan,
 			       enum iio_backend_data_source data);
+	int (*data_source_get)(struct iio_backend *back, unsigned int chan,
+			       enum iio_backend_data_source *data);
 	int (*set_sample_rate)(struct iio_backend *back, unsigned int chan,
 			       u64 sample_rate_hz);
 	int (*test_pattern_set)(struct iio_backend *back,
@@ -176,6 +179,8 @@ int iio_backend_data_format_set(struct iio_backend *back, unsigned int chan,
 				const struct iio_backend_data_fmt *data);
 int iio_backend_data_source_set(struct iio_backend *back, unsigned int chan,
 				enum iio_backend_data_source data);
+int iio_backend_data_source_get(struct iio_backend *back, unsigned int chan,
+				enum iio_backend_data_source *data);
 int iio_backend_set_sampling_freq(struct iio_backend *back, unsigned int chan,
 				  u64 sample_rate_hz);
 int iio_backend_test_pattern_set(struct iio_backend *back,
-- 
cgit v1.2.3


From 74e5b13a1b0f10c5a5c6168f6915620a1d369fae Mon Sep 17 00:00:00 2001
From: Song Liu <song@kernel.org>
Date: Mon, 21 Apr 2025 15:52:50 -0700
Subject: lsm: Move security_netlink_send to under CONFIG_SECURITY_NETWORK

security_netlink_send() is a networking hook, so it fits better under
CONFIG_SECURITY_NETWORK.

Signed-off-by: Song Liu <song@kernel.org>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/security.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index cc9b54d95d22..dba349629229 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -563,7 +563,6 @@ int security_setselfattr(unsigned int attr, struct lsm_ctx __user *ctx,
 int security_getprocattr(struct task_struct *p, int lsmid, const char *name,
 			 char **value);
 int security_setprocattr(int lsmid, const char *name, void *value, size_t size);
-int security_netlink_send(struct sock *sk, struct sk_buff *skb);
 int security_ismaclabel(const char *name);
 int security_secid_to_secctx(u32 secid, struct lsm_context *cp);
 int security_lsmprop_to_secctx(struct lsm_prop *prop, struct lsm_context *cp);
@@ -1527,11 +1526,6 @@ static inline int security_setprocattr(int lsmid, char *name, void *value,
 	return -EINVAL;
 }
 
-static inline int security_netlink_send(struct sock *sk, struct sk_buff *skb)
-{
-	return 0;
-}
-
 static inline int security_ismaclabel(const char *name)
 {
 	return 0;
@@ -1629,6 +1623,7 @@ static inline int security_watch_key(struct key *key)
 
 #ifdef CONFIG_SECURITY_NETWORK
 
+int security_netlink_send(struct sock *sk, struct sk_buff *skb);
 int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk);
 int security_unix_may_send(struct socket *sock,  struct socket *other);
 int security_socket_create(int family, int type, int protocol, int kern);
@@ -1684,6 +1679,11 @@ int security_sctp_assoc_established(struct sctp_association *asoc,
 int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk);
 
 #else	/* CONFIG_SECURITY_NETWORK */
+static inline int security_netlink_send(struct sock *sk, struct sk_buff *skb)
+{
+	return 0;
+}
+
 static inline int security_unix_stream_connect(struct sock *sock,
 					       struct sock *other,
 					       struct sock *newsk)
-- 
cgit v1.2.3


From 145436ae01193c0a379fd3ea9c4fbdf32863db1f Mon Sep 17 00:00:00 2001
From: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
Date: Wed, 16 Apr 2025 19:14:49 +0200
Subject: net: phy: Add helper for getting MAC termination resistance

Add helper which returns the MAC termination resistance value. Modifying
the resistance to an appropriate value can reduce signal reflections and
therefore improve signal quality.

Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
Link: https://patch.msgid.link/20250416-dp83822-mac-impedance-v3-3-028ac426cddb@liebherr.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index fb755358d965..066a28a4b64b 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -2040,6 +2040,9 @@ int phy_get_tx_amplitude_gain(struct phy_device *phydev, struct device *dev,
 			      enum ethtool_link_mode_bit_indices linkmode,
 			      u32 *val);
 
+int phy_get_mac_termination(struct phy_device *phydev, struct device *dev,
+			    u32 *val);
+
 void phy_resolve_pause(unsigned long *local_adv, unsigned long *partner_adv,
 		       bool *tx_pause, bool *rx_pause);
 
-- 
cgit v1.2.3


From 7650f826f7b2d84782f9147c51687ff0364125e9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 18 Apr 2025 10:58:41 +0800
Subject: crypto: shash - Handle partial blocks in API

Provide an option to handle the partial blocks in the shash API.
Almost every hash algorithm has a block size and are only able
to hash partial blocks on finalisation.

Rather than duplicating the partial block handling many times,
add this functionality to the shash API.

It is optional (e.g., hmac would never need this by relying on
the partial block handling of the underlying hash), and to enable
it set the bit CRYPTO_AHASH_ALG_BLOCK_ONLY.

The export format is always that of the underlying hash export,
plus the partial block buffer, followed by a single-byte for the
partial block length.

Set the bit CRYPTO_AHASH_ALG_FINAL_NONZERO to withhold an extra
byte in the partial block.  This will come in handy when this
is extended to ahash where hardware often can't deal with a
zero-length final.

It will also be used for algorithms requiring an extra block for
finalisation (e.g., cmac).

As an optimisation, set the bit CRYPTO_AHASH_ALG_FINUP_MAX if
the algorithm wishes to get as much data as possible instead of
just the last partial block.

The descriptor will be zeroed after finalisation.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index b89b1b348095..f691ce01745e 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -136,6 +136,8 @@
 /* Set if the algorithm supports request chains and virtual addresses. */
 #define CRYPTO_ALG_REQ_CHAIN		0x00040000
 
+/* The high bits 0xff000000 are reserved for type-specific flags. */
+
 /*
  * Transform masks and values (for crt_flags).
  */
-- 
cgit v1.2.3


From afc0a570bb6138713d74784a032e4eb5b7f919ca Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 1 Apr 2025 10:17:03 +0100
Subject: PCI: host-generic: Extract an ECAM bridge creation helper from
 pci_host_common_probe()

pci_host_common_probe() is an extremely useful helper, as it
abstracts away most of the gunk that a "mostly-ECAM-compliant"
device driver needs.

However, it is structured as a probe function, meaning that a lot
of the driver-specific setup has to happen in a .init() callback,
after the bridge and config space have been instantiated.

This is a bit awkward, and results in a number of convolutions
that could be avoided if the host-common code was more like
a library.

Introduce a pci_host_common_init() helper that does exactly that,
taking the platform device and a struct pci_ecam_op as parameters.

This can then be called from the probe routine, and a lot of the
code that isn't relevant to PCI setup moved away from the .init()
callback. This also removes the dependency on the device match
data, which is an oddity.

Signed-off-by: Marc Zyngier <maz@kernel.org>
[mani: fixed spelling mistakes]
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Tested-by: Janne Grunau <j@jannau.net>
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Acked-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Link: https://patch.msgid.link/20250401091713.2765724-4-maz@kernel.org
---
 include/linux/pci-ecam.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h
index 3a10f8cfc3ad..bc2ca2c72ee2 100644
--- a/include/linux/pci-ecam.h
+++ b/include/linux/pci-ecam.h
@@ -97,6 +97,8 @@ extern const struct pci_ecam_ops loongson_pci_ecam_ops; /* Loongson PCIe */
 #if IS_ENABLED(CONFIG_PCI_HOST_COMMON)
 /* for DT-based PCI controllers that support ECAM */
 int pci_host_common_probe(struct platform_device *pdev);
+int pci_host_common_init(struct platform_device *pdev,
+			 const struct pci_ecam_ops *ops);
 void pci_host_common_remove(struct platform_device *pdev);
 #endif
 #endif
-- 
cgit v1.2.3


From 9861f21ff16b6cd919144dae7ff355d5145a3474 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 14 Mar 2025 11:00:55 +0100
Subject: pmdomain: core: Add genpd helper to correct the usage/rejected
 counters

In the cpuidle-psci-domain case the ->power_off() callback is usually
returning zero to indicate success. This is because the actual call to the
PSCI FW to enter the selected domain-idlestate, needs to be done after the
->power_off() callback has returned.

When the call to the PSCI FW fails, this leads to receiving an incorrect
tracking of the usage/rejected counts for the selected domain-idlestate.
In other words, the presented debug-statistics for genpd may look better
than what the actually are.

To allow a better correctness of the data, let's add a new genpd helper
function, which enables the caller adjust the usage/rejected counters for a
domain-idlestate, in cases of errors during power-off.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20250314100103.1294715-2-ulf.hansson@linaro.org
---
 include/linux/pm_domain.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index d56a78af4af1..6e808aeecbcb 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -285,6 +285,8 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 int pm_genpd_init(struct generic_pm_domain *genpd,
 		  struct dev_power_governor *gov, bool is_off);
 int pm_genpd_remove(struct generic_pm_domain *genpd);
+void pm_genpd_inc_rejected(struct generic_pm_domain *genpd,
+			   unsigned int state_idx);
 struct device *dev_to_genpd_dev(struct device *dev);
 int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state);
 int dev_pm_genpd_add_notifier(struct device *dev, struct notifier_block *nb);
@@ -336,6 +338,10 @@ static inline int pm_genpd_remove(struct generic_pm_domain *genpd)
 	return -EOPNOTSUPP;
 }
 
+static inline void pm_genpd_inc_rejected(struct generic_pm_domain *genpd,
+					 unsigned int state_idx)
+{ }
+
 static inline struct device *dev_to_genpd_dev(struct device *dev)
 {
 	return ERR_PTR(-EOPNOTSUPP);
-- 
cgit v1.2.3


From 0a8a888167ddaaec7a292e5045782b8a240e6f3e Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 14 Mar 2025 11:00:58 +0100
Subject: pmdomain: core: Add residency reflection for domain-idlestates to
 debugfs

For regular cpuidle states we are reflecting over the selected/entered
state to see if the sleep-duration meets the residency for the state. The
output from the reflection is an "above" value to indicate the number of
times the state was too deep and a "below" value for the number of times it
was too shallow.

Let's implement the similar thing for genpd's domain-idlestates along with
genpd's governor and put the information in the genpd's debugfs.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20250314100103.1294715-5-ulf.hansson@linaro.org
---
 include/linux/pm_domain.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 6e808aeecbcb..0b18160901a2 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -142,6 +142,8 @@ struct genpd_governor_data {
 	bool max_off_time_changed;
 	ktime_t next_wakeup;
 	ktime_t next_hrtimer;
+	ktime_t last_enter;
+	bool reflect_residency;
 	bool cached_power_down_ok;
 	bool cached_power_down_state_idx;
 };
@@ -153,6 +155,8 @@ struct genpd_power_state {
 	s64 residency_ns;
 	u64 usage;
 	u64 rejected;
+	u64 above;
+	u64 below;
 	struct fwnode_handle *fwnode;
 	u64 idle_time;
 	void *data;
-- 
cgit v1.2.3


From 76a853f86c97b348dc96e75a6e6f94d8750715ee Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 13 Mar 2025 13:49:39 +0100
Subject: wifi: free SKBTX_WIFI_STATUS skb tx_flags flag

Jason mentioned at netdevconf that we've run out of tx_flags in
the skb_shinfo(). Gain one bit back by removing the wifi bit.

We can do that because the only userspace application for it
(hostapd) doesn't change the setting on the socket, it just
uses different sockets, and normally doesn't even use this any
more, sending the frames over nl80211 instead.

Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Link: https://patch.msgid.link/20250313134942.52ff54a140ec.If390bbdc46904cf451256ba989d7a056c457af6e@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/skbuff.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b974a277975a..9ee39670e8f4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -481,9 +481,6 @@ enum {
 	/* generate software time stamp on packet tx completion */
 	SKBTX_COMPLETION_TSTAMP = 1 << 3,
 
-	/* generate wifi status information (where possible) */
-	SKBTX_WIFI_STATUS = 1 << 4,
-
 	/* determine hardware time stamp based on time or cycles */
 	SKBTX_HW_TSTAMP_NETDEV = 1 << 5,
 
-- 
cgit v1.2.3


From 76f1cc98b23cefd1f0ae90c51f1fb837e5f46528 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 20 Apr 2025 10:31:20 +0100
Subject: io_uring/zcrx: add support for multiple ifqs

Allow the user to register multiple ifqs / zcrx contexts. With that we
can use multiple interfaces / interface queues in a single io_uring
instance.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/668b03bee03b5216564482edcfefbc2ee337dd30.1745141261.git.asml.silence@gmail.com
[axboe: fold in fix]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 06d722289fc5..7e23e993280e 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -40,8 +40,6 @@ enum io_uring_cmd_flags {
 	IO_URING_F_TASK_DEAD		= (1 << 13),
 };
 
-struct io_zcrx_ifq;
-
 struct io_wq_work_node {
 	struct io_wq_work_node *next;
 };
@@ -394,7 +392,8 @@ struct io_ring_ctx {
 	struct wait_queue_head		poll_wq;
 	struct io_restriction		restrictions;
 
-	struct io_zcrx_ifq		*ifq;
+	/* Stores zcrx object pointers of type struct io_zcrx_ifq */
+	struct xarray			zcrx_ctxs;
 
 	u32			pers_next;
 	struct xarray		personalities;
-- 
cgit v1.2.3


From fcc2d3e11bcc8f01d52a8c419f49f86ff8343b7c Mon Sep 17 00:00:00 2001
From: Karthikeyan Kathirvel <karthikeyan.kathirvel@oss.qualcomm.com>
Date: Mon, 21 Apr 2025 16:45:05 +0530
Subject: wifi: ieee80211: define beacon protection bit field

An AP supporting Beacon Protection should set bit 84 in
the extended capabilities IE (9.4.2.25 in the 802.11be D7 spec).
So the *4th* bit of the 10th byte should be checked to figure out
whether beacon protection is enabled or disabled.

Signed-off-by: Karthikeyan Kathirvel <karthikeyan.kathirvel@oss.qualcomm.com>
Reviewed-by: Jeff Johnson <jeff.johnson@oss.qualcomm.com>
Link: https://patch.msgid.link/20250421111505.3633992-1-karthikeyan.kathirvel@oss.qualcomm.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 508d466de1cc..cbc3928aa504 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4087,6 +4087,9 @@ enum ieee80211_tdls_actioncode {
 /* Defines support for enhanced multi-bssid advertisement*/
 #define WLAN_EXT_CAPA11_EMA_SUPPORT	BIT(3)
 
+/* Enable Beacon Protection */
+#define WLAN_EXT_CAPA11_BCN_PROTECT	BIT(4)
+
 /* TDLS specific payload type in the LLC/SNAP header */
 #define WLAN_TDLS_SNAP_RFTYPE	0x2
 
-- 
cgit v1.2.3


From 91ea0489dc97bfda72ed74f98ab66dc0ab4235c7 Mon Sep 17 00:00:00 2001
From: Rameshkumar Sundaram <quic_ramess@quicinc.com>
Date: Thu, 27 Mar 2025 10:43:19 +0530
Subject: wifi: ieee80211: Add helpers to fetch EMLSR delay and timeout values

Add helpers to get EMLSR transition delay, padding delay and transition
timeout values from EML capabilities field of Multi-link Element.

Signed-off-by: Rameshkumar Sundaram <quic_ramess@quicinc.com>
Link: https://patch.msgid.link/20250327051320.3253783-4-quic_ramess@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 74 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index cbc3928aa504..15a87f522017 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -5618,6 +5618,80 @@ static inline bool ieee80211_tid_to_link_map_size_ok(const u8 *data, size_t len)
 	return len >= fixed + elem_len;
 }
 
+/**
+ * ieee80211_emlsr_pad_delay_in_us - Fetch the EMLSR Padding delay
+ *	in microseconds
+ * @eml_cap: EML capabilities field value from common info field of
+ *	the Multi-link element
+ * Return: the EMLSR Padding delay (in microseconds) encoded in the
+ *	EML Capabilities field
+ */
+
+static inline u32 ieee80211_emlsr_pad_delay_in_us(u16 eml_cap)
+{
+	/* IEEE Std 802.11be-2024 Table 9-417i—Encoding of the EMLSR
+	 * Padding Delay subfield.
+	 */
+	u32 pad_delay = u16_get_bits(eml_cap,
+				     IEEE80211_EML_CAP_EMLSR_PADDING_DELAY);
+
+	if (!pad_delay ||
+	    pad_delay > IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_256US)
+		return 0;
+
+	return 32 * (1 << (pad_delay - 1));
+}
+
+/**
+ * ieee80211_emlsr_trans_delay_in_us - Fetch the EMLSR Transition
+ *	delay in microseconds
+ * @eml_cap: EML capabilities field value from common info field of
+ *	the Multi-link element
+ * Return: the EMLSR Transition delay (in microseconds) encoded in the
+ *	EML Capabilities field
+ */
+
+static inline u32 ieee80211_emlsr_trans_delay_in_us(u16 eml_cap)
+{
+	/* IEEE Std 802.11be-2024 Table 9-417j—Encoding of the EMLSR
+	 * Transition Delay subfield.
+	 */
+	u32 trans_delay =
+		u16_get_bits(eml_cap,
+			     IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY);
+
+	/* invalid values also just use 0 */
+	if (!trans_delay ||
+	    trans_delay > IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_256US)
+		return 0;
+
+	return 16 * (1 << (trans_delay - 1));
+}
+
+/**
+ * ieee80211_eml_trans_timeout_in_us - Fetch the EMLSR Transition
+ *	timeout value in microseconds
+ * @eml_cap: EML capabilities field value from common info field of
+ *	the Multi-link element
+ * Return: the EMLSR Transition timeout (in microseconds) encoded in
+ *	the EML Capabilities field
+ */
+
+static inline u32 ieee80211_eml_trans_timeout_in_us(u16 eml_cap)
+{
+	/* IEEE Std 802.11be-2024 Table 9-417m—Encoding of the
+	 * Transition Timeout subfield.
+	 */
+	u8 timeout = u16_get_bits(eml_cap,
+				  IEEE80211_EML_CAP_TRANSITION_TIMEOUT);
+
+	/* invalid values also just use 0 */
+	if (!timeout || timeout > IEEE80211_EML_CAP_TRANSITION_TIMEOUT_128TU)
+		return 0;
+
+	return 128 * (1 << (timeout - 1));
+}
+
 #define for_each_mle_subelement(_elem, _data, _len)			\
 	if (ieee80211_mle_size_ok(_data, _len))				\
 		for_each_element(_elem,					\
-- 
cgit v1.2.3


From bfa4477751e9909bb121eca860f44d1b4259d871 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 22 Apr 2025 17:05:31 -0600
Subject: PM: runtime: Define pm_runtime_put cleanup helper

Define a cleanup helper for use with __free to automatically drop the
device usage count when the pointer goes out of scope.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Link: https://patch.msgid.link/20250422230534.2295291-2-alex.williamson@redhat.com
---
 include/linux/pm_runtime.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 7fb5a459847e..69d4b2929ee6 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -466,6 +466,8 @@ static inline int pm_runtime_put(struct device *dev)
 	return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC);
 }
 
+DEFINE_FREE(pm_runtime_put, struct device *, if (_T) pm_runtime_put(_T))
+
 /**
  * __pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0.
  * @dev: Target device.
-- 
cgit v1.2.3


From 52358dd63e348c3b6c488acc105be1aeda8fb923 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 18 Apr 2025 11:04:01 +0200
Subject: net: phy: remove function stubs

All callers of these functions depend on PHYLIB or select it directly
or indirectly by selecting PHYLINK. Stubs make sense for optional
functionality, but that's not the case here.

MDIO_XGENE usually is selected by NET_XGENE which also selects PHYLIB.
Add a dependency to PHYLIB nevertheless, in order not to break
randconfig builds.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/f7a69a1f-60e9-4ac0-8b7c-481e0cc850e7@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 37 -------------------------------------
 1 file changed, 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 066a28a4b64b..3beaf225ee88 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1753,7 +1753,6 @@ int phy_modify_paged(struct phy_device *phydev, int page, u32 regnum,
 struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id,
 				     bool is_c45,
 				     struct phy_c45_device_ids *c45_ids);
-#if IS_ENABLED(CONFIG_PHYLIB)
 int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id);
 struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode);
 struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode);
@@ -1761,42 +1760,6 @@ struct fwnode_handle *fwnode_get_phy_node(const struct fwnode_handle *fwnode);
 struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45);
 int phy_device_register(struct phy_device *phy);
 void phy_device_free(struct phy_device *phydev);
-#else
-static inline int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id)
-{
-	return 0;
-}
-static inline
-struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode)
-{
-	return 0;
-}
-
-static inline
-struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode)
-{
-	return NULL;
-}
-
-static inline
-struct fwnode_handle *fwnode_get_phy_node(struct fwnode_handle *fwnode)
-{
-	return NULL;
-}
-
-static inline
-struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45)
-{
-	return NULL;
-}
-
-static inline int phy_device_register(struct phy_device *phy)
-{
-	return 0;
-}
-
-static inline void phy_device_free(struct phy_device *phydev) { }
-#endif /* CONFIG_PHYLIB */
 void phy_device_remove(struct phy_device *phydev);
 int phy_get_c45_ids(struct phy_device *phydev);
 int phy_init_hw(struct phy_device *phydev);
-- 
cgit v1.2.3


From fe7f7ac8e0c708446ff017453add769ffc15deed Mon Sep 17 00:00:00 2001
From: Terry Junge <linuxhid@cosmicgizmosystems.com>
Date: Wed, 12 Mar 2025 15:23:31 -0700
Subject: HID: usbhid: Eliminate recurrent out-of-bounds bug in usbhid_parse()

Update struct hid_descriptor to better reflect the mandatory and
optional parts of the HID Descriptor as per USB HID 1.11 specification.
Note: the kernel currently does not parse any optional HID class
descriptors, only the mandatory report descriptor.

Update all references to member element desc[0] to rpt_desc.

Add test to verify bLength and bNumDescriptors values are valid.

Replace the for loop with direct access to the mandatory HID class
descriptor member for the report descriptor. This eliminates the
possibility of getting an out-of-bounds fault.

Add a warning message if the HID descriptor contains any unsupported
optional HID class descriptors.

Reported-by: syzbot+c52569baf0c843f35495@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=c52569baf0c843f35495
Fixes: f043bfc98c19 ("HID: usbhid: fix out-of-bounds bug")
Cc: stable@vger.kernel.org
Signed-off-by: Terry Junge <linuxhid@cosmicgizmosystems.com>
Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
---
 include/linux/hid.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index ef9a90ca0fbd..daae1d6d11a7 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -740,8 +740,9 @@ struct hid_descriptor {
 	__le16 bcdHID;
 	__u8  bCountryCode;
 	__u8  bNumDescriptors;
+	struct hid_class_descriptor rpt_desc;
 
-	struct hid_class_descriptor desc[1];
+	struct hid_class_descriptor opt_descs[];
 } __attribute__ ((packed));
 
 #define HID_DEVICE(b, g, ven, prod)					\
-- 
cgit v1.2.3


From a058002358b7aaf14674b2a0daa5194bfa5720a1 Mon Sep 17 00:00:00 2001
From: Aditya Garg <gargaditya08@live.com>
Date: Thu, 10 Apr 2025 12:59:27 +0530
Subject: HID: quirks: Add HID_QUIRK_IGNORE_MOUSE quirk

Some USB HID mice have drivers both in HID as well as a separate USB
driver. The already existing hid_mouse_ignore_list in hid-quirks manages
this, but is not yet configurable by usbhid.quirks, unlike all others like
hid_ignore_list. Thus in some HID devices, where the vendor provides USB
drivers only for the mouse and lets keyboard handled by the generic hid
drivers, presence of such a quirk prevents the user from compiling hid core
again to add the device to the table.

Signed-off-by: Aditya Garg <gargaditya08@live.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
---
 include/linux/hid.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index daae1d6d11a7..a1305210b2fd 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -357,6 +357,7 @@ struct hid_item {
  * | @HID_QUIRK_INPUT_PER_APP:
  * | @HID_QUIRK_X_INVERT:
  * | @HID_QUIRK_Y_INVERT:
+ * | @HID_QUIRK_IGNORE_MOUSE:
  * | @HID_QUIRK_SKIP_OUTPUT_REPORTS:
  * | @HID_QUIRK_SKIP_OUTPUT_REPORT_ID:
  * | @HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP:
@@ -382,6 +383,7 @@ struct hid_item {
 #define HID_QUIRK_INPUT_PER_APP			BIT(11)
 #define HID_QUIRK_X_INVERT			BIT(12)
 #define HID_QUIRK_Y_INVERT			BIT(13)
+#define HID_QUIRK_IGNORE_MOUSE			BIT(14)
 #define HID_QUIRK_SKIP_OUTPUT_REPORTS		BIT(16)
 #define HID_QUIRK_SKIP_OUTPUT_REPORT_ID		BIT(17)
 #define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP	BIT(18)
-- 
cgit v1.2.3


From 7a3be00771aa9786c7bb4cdb0ee36fee45f67d69 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 23 Apr 2025 15:48:40 +0530
Subject: OPP: Return opp from dev_pm_opp_get()

For convenience of users, return back the pointer to the opp from
dev_pm_opp_get(), so they can do:

	opp = dev_pm_opp_get(tmp_opp);

No intentional functional impact.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 include/linux/pm_opp.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index c247317aae38..5e4c3428b139 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -161,7 +161,7 @@ struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev,
 struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev,
 					   unsigned int *bw, int index);
 
-void dev_pm_opp_get(struct dev_pm_opp *opp);
+struct dev_pm_opp *dev_pm_opp_get(struct dev_pm_opp *opp);
 void dev_pm_opp_put(struct dev_pm_opp *opp);
 
 int dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *opp);
@@ -345,7 +345,10 @@ static inline struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev,
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
-static inline void dev_pm_opp_get(struct dev_pm_opp *opp) {}
+static inline struct dev_pm_opp *dev_pm_opp_get(struct dev_pm_opp *opp)
+{
+	return opp;
+}
 
 static inline void dev_pm_opp_put(struct dev_pm_opp *opp) {}
 
-- 
cgit v1.2.3


From ead694941686345bfd3f95100d889191cb9e3cda Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 23 Apr 2025 15:48:40 +0530
Subject: OPP: Return opp_table from dev_pm_opp_get_opp_table_ref()

For convenience of users, return back the pointer to the opp_table from
dev_pm_opp_get_opp_table_ref(), so they can do:

	opp_table = dev_pm_opp_get_opp_table_ref(tmp_table);

No intentional functional impact.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 include/linux/pm_opp.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 5e4c3428b139..0deddfa91aca 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -100,7 +100,7 @@ struct dev_pm_opp_data {
 #if defined(CONFIG_PM_OPP)
 
 struct opp_table *dev_pm_opp_get_opp_table(struct device *dev);
-void dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table);
+struct opp_table *dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table);
 void dev_pm_opp_put_opp_table(struct opp_table *opp_table);
 
 unsigned long dev_pm_opp_get_bw(struct dev_pm_opp *opp, bool peak, int index);
@@ -207,7 +207,10 @@ static inline struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device *
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
-static inline void dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table) {}
+static inline struct opp_table *dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table)
+{
+	return opp_table;
+}
 
 static inline void dev_pm_opp_put_opp_table(struct opp_table *opp_table) {}
 
-- 
cgit v1.2.3


From 0128816c42b52c6ee339718621aeda85855cd3be Mon Sep 17 00:00:00 2001
From: Cheng-Yang Chou <yphbchou0911@gmail.com>
Date: Thu, 10 Apr 2025 18:51:43 +0800
Subject: genirq: Fix typo in IRQ_NOTCONNECTED comment

Fix a minor typo in the comment for IRQ_NOTCONNECTED:
"distingiush" is corrected to "distinguish".

Signed-off-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250410105144.214849-1-yphbchou0911@gmail.com
---
 include/linux/interrupt.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index c782a74d2a30..51b6484c0493 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -140,7 +140,7 @@ extern irqreturn_t no_action(int cpl, void *dev_id);
 /*
  * If a (PCI) device interrupt is not connected we set dev->irq to
  * IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we
- * can distingiush that case from other error returns.
+ * can distinguish that case from other error returns.
  *
  * 0x80000000 is guaranteed to be outside the available range of interrupts
  * and easy to distinguish from other possible incorrect values.
-- 
cgit v1.2.3


From 49916e22d9530d6cf027e635a5d824c7d698d67f Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Fri, 18 Apr 2025 21:08:03 +0100
Subject: timers: Remove unused __round_jiffies(_up)

Remove two trivial but long unused functions.

__round_jiffies() has been unused since 2008's
commit 9c133c469d38 ("Add round_jiffies_up and related routines")

__round_jiffies_up() has been unused since 2019's
commit 7ae3f6e130e8 ("powerpc/watchdog: Use hrtimers for per-CPU
heartbeat")

Remove them.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250418200803.427911-1-linux@treblig.org
---
 include/linux/timer.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 10596d7c3a34..e17aac74b5b3 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -172,12 +172,10 @@ extern void init_timers(void);
 struct hrtimer;
 extern enum hrtimer_restart it_real_fn(struct hrtimer *);
 
-unsigned long __round_jiffies(unsigned long j, int cpu);
 unsigned long __round_jiffies_relative(unsigned long j, int cpu);
 unsigned long round_jiffies(unsigned long j);
 unsigned long round_jiffies_relative(unsigned long j);
 
-unsigned long __round_jiffies_up(unsigned long j, int cpu);
 unsigned long __round_jiffies_up_relative(unsigned long j, int cpu);
 unsigned long round_jiffies_up(unsigned long j);
 unsigned long round_jiffies_up_relative(unsigned long j);
-- 
cgit v1.2.3


From 4094040b1a133206ed893da2cf5e17cc22f7ca7c Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Tue, 8 Apr 2025 11:45:59 +0300
Subject: mfd: rohm-bd96801: Support ROHM BD96802

The ROHM BD96802 PMIC looks from software point of view a lot like ROHM
BD96801 PMIC. Just with reduced number of voltage rails. Both PMICs
provide two physical IRQ lines referred as INTB and ERRB and contain
blocks implementing regulator controls and a weatchdog. Hence it makes
sense to use same MFD core for both PMICs.

Add support for ROHM BD96802 scalable companion PMIC to the BD96801
core driver.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/05957d194425a79a4f35f287695c3d9ca2ed1ae2.1744090658.git.mazziesaccount@gmail.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/rohm-bd96801.h |  2 ++
 include/linux/mfd/rohm-bd96802.h | 74 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/rohm-generic.h |  1 +
 3 files changed, 77 insertions(+)
 create mode 100644 include/linux/mfd/rohm-bd96802.h

(limited to 'include/linux')

diff --git a/include/linux/mfd/rohm-bd96801.h b/include/linux/mfd/rohm-bd96801.h
index e2d9e10b6364..68c8ac8ad409 100644
--- a/include/linux/mfd/rohm-bd96801.h
+++ b/include/linux/mfd/rohm-bd96801.h
@@ -40,7 +40,9 @@
  * INTB status registers are at range 0x5c ... 0x63
  */
 #define BD96801_REG_INT_SYS_ERRB1	0x52
+#define BD96801_REG_INT_BUCK2_ERRB	0x56
 #define BD96801_REG_INT_SYS_INTB	0x5c
+#define BD96801_REG_INT_BUCK2_INTB	0x5e
 #define BD96801_REG_INT_LDO7_INTB	0x63
 
 /* MASK registers */
diff --git a/include/linux/mfd/rohm-bd96802.h b/include/linux/mfd/rohm-bd96802.h
new file mode 100644
index 000000000000..bf4b77944edf
--- /dev/null
+++ b/include/linux/mfd/rohm-bd96802.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2025 ROHM Semiconductors
+ *
+ * The digital interface of trhe BD96802 PMIC is a reduced version of the
+ * BD96801. Hence the BD96801 definitions are used for registers and masks
+ * while this header only holds the IRQ definitions - mainly to avoid gaps in
+ * IRQ numbers caused by the lack of some BUCKs / LDOs and their respective
+ * IRQs.
+ */
+
+#ifndef __LINUX_MFD_BD96802_H__
+#define __LINUX_MFD_BD96802_H__
+
+/* ERRB IRQs */
+enum {
+	/* Reg 0x52, 0x53, 0x54 - ERRB system IRQs */
+	BD96802_OTP_ERR_STAT,
+	BD96802_DBIST_ERR_STAT,
+	BD96802_EEP_ERR_STAT,
+	BD96802_ABIST_ERR_STAT,
+	BD96802_PRSTB_ERR_STAT,
+	BD96802_DRMOS1_ERR_STAT,
+	BD96802_DRMOS2_ERR_STAT,
+	BD96802_SLAVE_ERR_STAT,
+	BD96802_VREF_ERR_STAT,
+	BD96802_TSD_ERR_STAT,
+	BD96802_UVLO_ERR_STAT,
+	BD96802_OVLO_ERR_STAT,
+	BD96802_OSC_ERR_STAT,
+	BD96802_PON_ERR_STAT,
+	BD96802_POFF_ERR_STAT,
+	BD96802_CMD_SHDN_ERR_STAT,
+	BD96802_INT_SHDN_ERR_STAT,
+
+	/* Reg 0x55 BUCK1 ERR IRQs */
+	BD96802_BUCK1_PVIN_ERR_STAT,
+	BD96802_BUCK1_OVP_ERR_STAT,
+	BD96802_BUCK1_UVP_ERR_STAT,
+	BD96802_BUCK1_SHDN_ERR_STAT,
+
+	/* Reg 0x56 BUCK2 ERR IRQs */
+	BD96802_BUCK2_PVIN_ERR_STAT,
+	BD96802_BUCK2_OVP_ERR_STAT,
+	BD96802_BUCK2_UVP_ERR_STAT,
+	BD96802_BUCK2_SHDN_ERR_STAT,
+};
+
+/* INTB IRQs */
+enum {
+	/* Reg 0x5c (System INTB) */
+	BD96802_TW_STAT,
+	BD96802_WDT_ERR_STAT,
+	BD96802_I2C_ERR_STAT,
+	BD96802_CHIP_IF_ERR_STAT,
+
+	/* Reg 0x5d (BUCK1 INTB) */
+	BD96802_BUCK1_OCPH_STAT,
+	BD96802_BUCK1_OCPL_STAT,
+	BD96802_BUCK1_OCPN_STAT,
+	BD96802_BUCK1_OVD_STAT,
+	BD96802_BUCK1_UVD_STAT,
+	BD96802_BUCK1_TW_CH_STAT,
+
+	/* Reg 0x5e (BUCK2 INTB) */
+	BD96802_BUCK2_OCPH_STAT,
+	BD96802_BUCK2_OCPL_STAT,
+	BD96802_BUCK2_OCPN_STAT,
+	BD96802_BUCK2_OVD_STAT,
+	BD96802_BUCK2_UVD_STAT,
+	BD96802_BUCK2_TW_CH_STAT,
+};
+
+#endif
diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h
index e7d4e6afe388..11b86f9129e3 100644
--- a/include/linux/mfd/rohm-generic.h
+++ b/include/linux/mfd/rohm-generic.h
@@ -17,6 +17,7 @@ enum rohm_chip_type {
 	ROHM_CHIP_TYPE_BD71837,
 	ROHM_CHIP_TYPE_BD71847,
 	ROHM_CHIP_TYPE_BD96801,
+	ROHM_CHIP_TYPE_BD96802,
 	ROHM_CHIP_TYPE_AMOUNT
 };
 
-- 
cgit v1.2.3


From 6a309b489215f705c20cb4ed7f85d9c16f768e55 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Tue, 8 Apr 2025 11:46:36 +0300
Subject: mfd: bd96801: Support ROHM BD96805

The ROHM BD96805 is from the software perspective almost identical to
the ROHM BD96801. The main difference is different voltage tuning
ranges. Add support differentiating these PMICs based on the compatible,
and invoking the regulator driver with correct IC type.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/8680097dc083f191bea56d3ac7c6fe5c005644ec.1744090658.git.mazziesaccount@gmail.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/rohm-generic.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h
index 11b86f9129e3..867acf5baefc 100644
--- a/include/linux/mfd/rohm-generic.h
+++ b/include/linux/mfd/rohm-generic.h
@@ -18,6 +18,7 @@ enum rohm_chip_type {
 	ROHM_CHIP_TYPE_BD71847,
 	ROHM_CHIP_TYPE_BD96801,
 	ROHM_CHIP_TYPE_BD96802,
+	ROHM_CHIP_TYPE_BD96805,
 	ROHM_CHIP_TYPE_AMOUNT
 };
 
-- 
cgit v1.2.3


From fecc18a9f59ce9c36eb3622ae77bff5fa5c6d976 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Tue, 8 Apr 2025 11:47:10 +0300
Subject: mfd: bd96801: Support ROHM BD96806

The ROHM BD96806 is from the software perspective almost identical to
the ROHM BD96802. The main difference is different voltage tuning
ranges. Add support differentiating these PMICs based on the compatible,
and invoking the regulator driver with correct IC type.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/ccc95ae33613648fdcba08915777d945412ac5c4.1744090658.git.mazziesaccount@gmail.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/rohm-generic.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h
index 867acf5baefc..579e8dcfcca4 100644
--- a/include/linux/mfd/rohm-generic.h
+++ b/include/linux/mfd/rohm-generic.h
@@ -19,6 +19,7 @@ enum rohm_chip_type {
 	ROHM_CHIP_TYPE_BD96801,
 	ROHM_CHIP_TYPE_BD96802,
 	ROHM_CHIP_TYPE_BD96805,
+	ROHM_CHIP_TYPE_BD96806,
 	ROHM_CHIP_TYPE_AMOUNT
 };
 
-- 
cgit v1.2.3


From 7f8ce4d88b42fcbd3350370ec4d02e00979fc5a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@baylibre.com>
Date: Thu, 17 Apr 2025 20:16:11 +0200
Subject: pwm: Fix various formatting issues in kernel-doc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add Return and (where interesting) Context sections, fix some formatting
and drop documenting the internal function __pwm_apply().

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Link: https://lore.kernel.org/r/20250417181611.2693599-2-u.kleine-koenig@baylibre.com
Signed-off-by: Uwe Kleine-König <ukleinek@kernel.org>
---
 include/linux/pwm.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index bf0469b2201d..63a17d2b4ec8 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -218,6 +218,8 @@ static inline void pwm_init_state(const struct pwm_device *pwm,
  *
  * pwm_get_state(pwm, &state);
  * duty = pwm_get_relative_duty_cycle(&state, 100);
+ *
+ * Returns: rounded relative duty cycle multiplied by @scale
  */
 static inline unsigned int
 pwm_get_relative_duty_cycle(const struct pwm_state *state, unsigned int scale)
@@ -244,8 +246,8 @@ pwm_get_relative_duty_cycle(const struct pwm_state *state, unsigned int scale)
  * pwm_set_relative_duty_cycle(&state, 50, 100);
  * pwm_apply_might_sleep(pwm, &state);
  *
- * This functions returns -EINVAL if @duty_cycle and/or @scale are
- * inconsistent (@scale == 0 or @duty_cycle > @scale).
+ * Returns: 0 on success or ``-EINVAL`` if @duty_cycle and/or @scale are
+ * inconsistent (@scale == 0 or @duty_cycle > @scale)
  */
 static inline int
 pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle,
@@ -351,7 +353,7 @@ struct pwm_chip {
  * pwmchip_supports_waveform() - checks if the given chip supports waveform callbacks
  * @chip: The pwm_chip to test
  *
- * Returns true iff the pwm chip support the waveform functions like
+ * Returns: true iff the pwm chip support the waveform functions like
  * pwm_set_waveform_might_sleep() and pwm_round_waveform_might_sleep()
  */
 static inline bool pwmchip_supports_waveform(struct pwm_chip *chip)
-- 
cgit v1.2.3


From 309d28576f0a23931a36bf67324868448f79a77e Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Thu, 27 Mar 2025 12:39:56 -0500
Subject: KVM: SVM: Fix SNP AP destroy race with VMRUN

An AP destroy request for a target vCPU is typically followed by an
RMPADJUST to remove the VMSA attribute from the page currently being
used as the VMSA for the target vCPU. This can result in a vCPU that
is about to VMRUN to exit with #VMEXIT_INVALID.

This usually does not happen as APs are typically sitting in HLT when
being destroyed and therefore the vCPU thread is not running at the time.
However, if HLT is allowed inside the VM, then the vCPU could be about to
VMRUN when the VMSA attribute is removed from the VMSA page, resulting in
a #VMEXIT_INVALID when the vCPU actually issues the VMRUN and causing the
guest to crash. An RMPADJUST against an in-use (already running) VMSA
results in a #NPF for the vCPU issuing the RMPADJUST, so the VMSA
attribute cannot be changed until the VMRUN for target vCPU exits. The
Qemu command line option '-overcommit cpu-pm=on' is an example of allowing
HLT inside the guest.

Update the KVM_REQ_UPDATE_PROTECTED_GUEST_STATE event to include the
KVM_REQUEST_WAIT flag. The kvm_vcpu_kick() function will not wait for
requests to be honored, so create kvm_make_request_and_kick() that will
add a new event request and honor the KVM_REQUEST_WAIT flag. This will
ensure that the target vCPU sees the AP destroy request before returning
to the initiating vCPU should the target vCPU be in guest mode.

Fixes: e366f92ea99e ("KVM: SEV: Support SEV-SNP AP Creation NAE event")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/fe2c885bf35643dd224e91294edb6777d5df23a4.1743097196.git.thomas.lendacky@amd.com
[sean: add a comment explaining the use of smp_send_reschedule()]
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/linux/kvm_host.h | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1dedc421b3e3..c685fb417e92 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1505,7 +1505,16 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu);
-void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
+
+#ifndef CONFIG_S390
+void __kvm_vcpu_kick(struct kvm_vcpu *vcpu, bool wait);
+
+static inline void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+	__kvm_vcpu_kick(vcpu, false);
+}
+#endif
+
 int kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode);
 
@@ -2253,6 +2262,14 @@ static __always_inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
 	__kvm_make_request(req, vcpu);
 }
 
+#ifndef CONFIG_S390
+static inline void kvm_make_request_and_kick(int req, struct kvm_vcpu *vcpu)
+{
+	kvm_make_request(req, vcpu);
+	__kvm_vcpu_kick(vcpu, req & KVM_REQUEST_WAIT);
+}
+#endif
+
 static inline bool kvm_request_pending(struct kvm_vcpu *vcpu)
 {
 	return READ_ONCE(vcpu->requests);
-- 
cgit v1.2.3


From bc2550b4e195754fbb24aac1f012d3dd9e3b4edc Mon Sep 17 00:00:00 2001
From: Jeremy Harris <jgh@exim.org>
Date: Wed, 23 Apr 2025 13:43:33 +0100
Subject: tcp: fastopen: note that a child socket was created

tcp: fastopen: note that a child socket was created

This uses up the last bit in a field of tcp_sock.

Signed-off-by: Jeremy Harris <jgh@exim.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Link: https://patch.msgid.link/20250423124334.4916-2-jgh@exim.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/tcp.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 1669d95bb0f9..a8af71623ba7 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -385,7 +385,8 @@ struct tcp_sock {
 		syn_fastopen:1,	/* SYN includes Fast Open option */
 		syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
 		syn_fastopen_ch:1, /* Active TFO re-enabling probe */
-		syn_data_acked:1;/* data in SYN is acked by SYN-ACK */
+		syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
+		syn_fastopen_child:1; /* created TFO passive child socket */
 
 	u8	keepalive_probes; /* num of allowed keep alive probes	*/
 	u32	tcp_tx_delay;	/* delay (in usec) added to TX packets */
-- 
cgit v1.2.3


From d57ee99831e336576359beb26e2b140511c99106 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 23 Apr 2025 17:08:08 +0200
Subject: net: ethernet: mtk_wed: annotate RCU release in attach()

There are some sparse warnings in wifi, and it seems that
it's actually possible to annotate a function pointer with
__releases(), making the sparse warnings go away. In a way
that also serves as documentation that rcu_read_unlock()
must be called in the attach method, so add that annotation.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://patch.msgid.link/20250423150811.456205-2-johannes@sipsolutions.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/soc/mediatek/mtk_wed.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
index a476648858a6..d8949a4ed0dc 100644
--- a/include/linux/soc/mediatek/mtk_wed.h
+++ b/include/linux/soc/mediatek/mtk_wed.h
@@ -192,7 +192,7 @@ struct mtk_wed_device {
 };
 
 struct mtk_wed_ops {
-	int (*attach)(struct mtk_wed_device *dev);
+	int (*attach)(struct mtk_wed_device *dev) __releases(RCU);
 	int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring,
 			     void __iomem *regs, bool reset);
 	int (*rx_ring_setup)(struct mtk_wed_device *dev, int ring,
-- 
cgit v1.2.3


From eae324ca644554d5ce363186bee820a088bb74ab Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Sun, 20 Apr 2025 12:47:25 +0200
Subject: configfs: Add CONFIGFS_ATTR_PERM helper

This new helper allows creating rw files with custom
permissions.

Signed-off-by: Richard Weinberger <richard@nod.at>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Link: https://lore.kernel.org/r/20250420104726.2963750-1-richard@nod.at
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 include/linux/configfs.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index c771e9d0d0b9..698520b1bfdb 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -120,15 +120,19 @@ struct configfs_attribute {
 	ssize_t (*store)(struct config_item *, const char *, size_t);
 };
 
-#define CONFIGFS_ATTR(_pfx, _name)			\
+#define CONFIGFS_ATTR_PERM(_pfx, _name, _perm)		\
 static struct configfs_attribute _pfx##attr_##_name = {	\
 	.ca_name	= __stringify(_name),		\
-	.ca_mode	= S_IRUGO | S_IWUSR,		\
+	.ca_mode	= _perm,			\
 	.ca_owner	= THIS_MODULE,			\
 	.show		= _pfx##_name##_show,		\
 	.store		= _pfx##_name##_store,		\
 }
 
+#define CONFIGFS_ATTR(_pfx, _name) CONFIGFS_ATTR_PERM(	\
+		_pfx, _name, S_IRUGO | S_IWUSR		\
+)
+
 #define CONFIGFS_ATTR_RO(_pfx, _name)			\
 static struct configfs_attribute _pfx##attr_##_name = {	\
 	.ca_name	= __stringify(_name),		\
-- 
cgit v1.2.3


From be4e3097c1f800b0f39e7e60b2b28eb6603f5d06 Mon Sep 17 00:00:00 2001
From: Zijun Hu <quic_zijuhu@quicinc.com>
Date: Wed, 23 Apr 2025 22:40:56 +0800
Subject: tty: Remove unused API tty_port_register_device_serdev()

Remove API tty_port_register_device_serdev() which has no caller.

Signed-off-by: Zijun Hu <quic_zijuhu@quicinc.com>
Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20250423-remove_api-v1-1-fac673d09feb@quicinc.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty_port.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h
index 1b861f2100b6..08f89a598366 100644
--- a/include/linux/tty_port.h
+++ b/include/linux/tty_port.h
@@ -147,9 +147,6 @@ struct device *tty_port_register_device_attr(struct tty_port *port,
 		struct tty_driver *driver, unsigned index,
 		struct device *device, void *drvdata,
 		const struct attribute_group **attr_grp);
-struct device *tty_port_register_device_serdev(struct tty_port *port,
-		struct tty_driver *driver, unsigned index,
-		struct device *host, struct device *parent);
 struct device *tty_port_register_device_attr_serdev(struct tty_port *port,
 		struct tty_driver *driver, unsigned index,
 		struct device *host, struct device *parent, void *drvdata,
-- 
cgit v1.2.3


From 1404d3509c768732be51d0acf8330689936a692a Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Fri, 25 Apr 2025 13:13:12 +0200
Subject: serial: switch uart_port::iotype to enum uart_iotype

The inline-defined constants look weird. Instead, define a proper enum
for them and type uart_port::iotype as that enum. This allows for proper
checking in switch-case labels (somewhere, a default or UPIO_UNKNOWN
label needs to be added/handled).

Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20250425111315.1036184-4-jirislaby@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_core.h | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 743b4afaad4c..914b5e97e056 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -427,6 +427,18 @@ struct uart_icount {
 typedef u64 __bitwise upf_t;
 typedef unsigned int __bitwise upstat_t;
 
+enum uart_iotype {
+	UPIO_UNKNOWN	= -1,
+	UPIO_PORT	= SERIAL_IO_PORT,	/* 8b I/O port access */
+	UPIO_HUB6	= SERIAL_IO_HUB6,	/* Hub6 ISA card */
+	UPIO_MEM	= SERIAL_IO_MEM,	/* driver-specific */
+	UPIO_MEM32	= SERIAL_IO_MEM32,	/* 32b little endian */
+	UPIO_AU		= SERIAL_IO_AU,		/* Au1x00 and RT288x type IO */
+	UPIO_TSI	= SERIAL_IO_TSI,	/* Tsi108/109 type IO */
+	UPIO_MEM32BE	= SERIAL_IO_MEM32BE,	/* 32b big endian */
+	UPIO_MEM16	= SERIAL_IO_MEM16,	/* 16b little endian */
+};
+
 struct uart_port {
 	spinlock_t		lock;			/* port lock */
 	unsigned long		iobase;			/* in/out[bwl] */
@@ -469,23 +481,13 @@ struct uart_port {
 	unsigned char		x_char;			/* xon/xoff char */
 	unsigned char		regshift;		/* reg offset shift */
 
-	unsigned char		iotype;			/* io access style */
-
-#define UPIO_UNKNOWN		((unsigned char)~0U)	/* UCHAR_MAX */
-#define UPIO_PORT		(SERIAL_IO_PORT)	/* 8b I/O port access */
-#define UPIO_HUB6		(SERIAL_IO_HUB6)	/* Hub6 ISA card */
-#define UPIO_MEM		(SERIAL_IO_MEM)		/* driver-specific */
-#define UPIO_MEM32		(SERIAL_IO_MEM32)	/* 32b little endian */
-#define UPIO_AU			(SERIAL_IO_AU)		/* Au1x00 and RT288x type IO */
-#define UPIO_TSI		(SERIAL_IO_TSI)		/* Tsi108/109 type IO */
-#define UPIO_MEM32BE		(SERIAL_IO_MEM32BE)	/* 32b big endian */
-#define UPIO_MEM16		(SERIAL_IO_MEM16)	/* 16b little endian */
-
 	unsigned char		quirks;			/* internal quirks */
 
 	/* internal quirks must be updated while holding port mutex */
 #define UPQ_NO_TXEN_TEST	BIT(0)
 
+	enum uart_iotype	iotype;			/* io access style */
+
 	unsigned int		read_status_mask;	/* driver specific */
 	unsigned int		ignore_status_mask;	/* driver specific */
 	struct uart_state	*state;			/* pointer to parent state */
@@ -1101,8 +1103,8 @@ static inline bool uart_console_registered(struct uart_port *port)
 
 struct uart_port *uart_get_console(struct uart_port *ports, int nr,
 				   struct console *c);
-int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr,
-			char **options);
+int uart_parse_earlycon(char *p, enum uart_iotype *iotype,
+			resource_size_t *addr, char **options);
 void uart_parse_options(const char *options, int *baud, int *parity, int *bits,
 			int *flow);
 int uart_set_options(struct uart_port *port, struct console *co, int baud,
-- 
cgit v1.2.3


From 5117f28a7d78d00a44d03463115a0f295dbbb1ff Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Tue, 15 Apr 2025 12:35:58 +0100
Subject: comedi: remove the mapping of the Comedi buffer in vmalloc address
 space

Now that all the code that accesses the Comedi buffer data does so
page-by-page, using the `virt_addr` member of `struct comedi_buf_page`
to point to the data of each page, do not linearly map the buffer into
vmalloc address space (pointed to by the `prealloc_buf` member of
`struct comedi_async`).  That was only done for convenience, but was not
done for those drivers that need a DMA coherent buffer, which is
allocated in a single chunk.  Remove the `prealloc_buf` member as it is
no longer used.

Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Link: https://lore.kernel.org/r/20250415114008.5977-4-abbotti@mev.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/comedi/comedidev.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/comedi/comedidev.h b/include/linux/comedi/comedidev.h
index c08416a7364b..4cb0400ad616 100644
--- a/include/linux/comedi/comedidev.h
+++ b/include/linux/comedi/comedidev.h
@@ -234,16 +234,12 @@ struct comedi_buf_page {
  *
  * A COMEDI data buffer is allocated as individual pages, either in
  * conventional memory or DMA coherent memory, depending on the attached,
- * low-level hardware device.  (The buffer pages also get mapped into the
- * kernel's contiguous virtual address space pointed to by the 'prealloc_buf'
- * member of &struct comedi_async.)
+ * low-level hardware device.
  *
  * The buffer is normally freed when the COMEDI device is detached from the
  * low-level driver (which may happen due to device removal), but if it happens
  * to be mmapped at the time, the pages cannot be freed until the buffer has
- * been munmapped.  That is what the reference counter is for.  (The virtual
- * address space pointed by 'prealloc_buf' is freed when the COMEDI device is
- * detached.)
+ * been munmapped.  That is what the reference counter is for.
  */
 struct comedi_buf_map {
 	struct device *dma_hw_dev;
@@ -255,7 +251,6 @@ struct comedi_buf_map {
 
 /**
  * struct comedi_async - Control data for asynchronous COMEDI commands
- * @prealloc_buf: Kernel virtual address of allocated acquisition buffer.
  * @prealloc_bufsz: Buffer size (in bytes).
  * @buf_map: Map of buffer pages.
  * @max_bufsize: Maximum allowed buffer size (in bytes).
@@ -344,7 +339,6 @@ struct comedi_buf_map {
  * less than or equal to UINT_MAX).
  */
 struct comedi_async {
-	void *prealloc_buf;
 	unsigned int prealloc_bufsz;
 	struct comedi_buf_map *buf_map;
 	unsigned int max_bufsize;
-- 
cgit v1.2.3


From dbc8c84d5c2e200c79393c17896bdc65e6daef30 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Sun, 20 Apr 2025 15:57:39 +0100
Subject: misc: rtsx: Remove deadcode

The last uses of rtsx_ms_power_off_card3v3() and
rtsx_sd_power_off_card3v3() were removed by 2019's
commit bede03a579b3 ("misc: rtsx: Enable OCP for rts522a rts524a rts525a
rts5260")

The last use of rtsx_pci_transfer_data() was removed by 2024's
commit d0f459259c13 ("memstick: rtsx_pci_ms: Remove Realtek PCI memstick
driver")

Remove them.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Link: https://lore.kernel.org/r/20250420145739.58337-1-linux@treblig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/rtsx_pci.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h
index 4612ef09a0c7..3b4c36705a9b 100644
--- a/include/linux/rtsx_pci.h
+++ b/include/linux/rtsx_pci.h
@@ -1312,8 +1312,6 @@ void rtsx_pci_add_cmd(struct rtsx_pcr *pcr,
 		u8 cmd_type, u16 reg_addr, u8 mask, u8 data);
 void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr);
 int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout);
-int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
-		int num_sg, bool read, int timeout);
 int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
 		int num_sg, bool read);
 void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
-- 
cgit v1.2.3


From 142ba31d8b4aff086c4f080bd97d437232922177 Mon Sep 17 00:00:00 2001
From: Zijun Hu <quic_zijuhu@quicinc.com>
Date: Sun, 20 Apr 2025 12:19:09 +0800
Subject: PM: wakeup: Do not expose 4 device wakeup source APIs

The following 4 APIs are only used by drivers/base/power/wakeup.c
internally.

- wakeup_source_create()
- wakeup_source_destroy()
- wakeup_source_add()
- wakeup_source_remove()

Do not expose them by making them as static functions.

Signed-off-by: Zijun Hu <quic_zijuhu@quicinc.com>
Link: https://lore.kernel.org/r/20250420-fix_power-v2-1-9b938d2283aa@quicinc.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/pm_wakeup.h | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index 51e0e8dd5f9e..c838b4a30f87 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -95,10 +95,6 @@ static inline void device_set_wakeup_path(struct device *dev)
 }
 
 /* drivers/base/power/wakeup.c */
-extern struct wakeup_source *wakeup_source_create(const char *name);
-extern void wakeup_source_destroy(struct wakeup_source *ws);
-extern void wakeup_source_add(struct wakeup_source *ws);
-extern void wakeup_source_remove(struct wakeup_source *ws);
 extern struct wakeup_source *wakeup_source_register(struct device *dev,
 						    const char *name);
 extern void wakeup_source_unregister(struct wakeup_source *ws);
@@ -129,17 +125,6 @@ static inline bool device_can_wakeup(struct device *dev)
 	return dev->power.can_wakeup;
 }
 
-static inline struct wakeup_source *wakeup_source_create(const char *name)
-{
-	return NULL;
-}
-
-static inline void wakeup_source_destroy(struct wakeup_source *ws) {}
-
-static inline void wakeup_source_add(struct wakeup_source *ws) {}
-
-static inline void wakeup_source_remove(struct wakeup_source *ws) {}
-
 static inline struct wakeup_source *wakeup_source_register(struct device *dev,
 							   const char *name)
 {
-- 
cgit v1.2.3


From 477058411c45f225ddfbb4769e35a9a5a95cb826 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 25 Apr 2025 10:11:30 +0200
Subject: pidfs: register pid in pidfs

Add simple helpers that allow a struct pid to be pinned via a pidfs
dentry/inode. If no pidfs dentry exists a new one will be allocated for
it. A reference is taken by pidfs on @pid. The reference must be
released via pidfs_put_pid().

This will allow AF_UNIX sockets to allocate a dentry for the peer
credentials pid at the time they are recorded where we know the task is
still alive. When the task gets reaped its exit status is guaranteed to
be recorded and a pidfd can be handed out for the reaped task.

Link: https://lore.kernel.org/20250425-work-pidfs-net-v2-1-450a19461e75@kernel.org
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: David Rheinsberg <david@readahead.eu>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/pidfs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h
index 05e6f8f4a026..2676890c4d0d 100644
--- a/include/linux/pidfs.h
+++ b/include/linux/pidfs.h
@@ -8,5 +8,8 @@ void pidfs_add_pid(struct pid *pid);
 void pidfs_remove_pid(struct pid *pid);
 void pidfs_exit(struct task_struct *tsk);
 extern const struct dentry_operations pidfs_dentry_operations;
+int pidfs_register_pid(struct pid *pid);
+void pidfs_get_pid(struct pid *pid);
+void pidfs_put_pid(struct pid *pid);
 
 #endif /* _LINUX_PID_FS_H */
-- 
cgit v1.2.3


From a71f402acd71a942e59c16270ad61dee06de6e24 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 25 Apr 2025 10:11:32 +0200
Subject: pidfs: get rid of __pidfd_prepare()

Fold it into pidfd_prepare() and rename PIDFD_CLONE to PIDFD_STALE to
indicate that the passed pid might not have task linkage and no explicit
check for that should be performed.

Link: https://lore.kernel.org/20250425-work-pidfs-net-v2-3-450a19461e75@kernel.org
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: David Rheinsberg <david@readahead.eu>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/pid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 311ecebd7d56..453ae6d8a68d 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -77,7 +77,7 @@ struct file;
 struct pid *pidfd_pid(const struct file *file);
 struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
 struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
-int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret);
+int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret_file);
 void do_notify_pidfd(struct task_struct *task);
 
 static inline struct pid *get_pid(struct pid *pid)
-- 
cgit v1.2.3


From 6cccf837ac8d72e13c651f60d93545f9fb4e84ed Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 26 Apr 2025 11:21:19 +0200
Subject: Revert "vt: create ucs_recompose.c using gen_ucs_recompose.py"

This reverts commit 54af55b990eda5a6a0140a3cded8094b42c0c3b7.

A new version of the series was submitted, so it's easier to revert the
old one and add the new one due to the changes invovled.

Cc: Nicolas Pitre <nico@fluxnic.net>
Cc: Jiri Slaby <jirislaby@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/consolemap.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h
index 4d3a34c288e5..b3a911866662 100644
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -30,7 +30,6 @@ int conv_uni_to_8bit(u32 uni);
 void console_map_init(void);
 bool ucs_is_double_width(uint32_t cp);
 bool ucs_is_zero_width(uint32_t cp);
-uint32_t ucs_recompose(uint32_t base, uint32_t combining);
 #else
 static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph,
 		bool use_unicode)
@@ -70,11 +69,6 @@ static inline bool ucs_is_zero_width(uint32_t cp)
 {
 	return false;
 }
-
-static inline uint32_t ucs_recompose(uint32_t base, uint32_t combining)
-{
-	return 0;
-}
 #endif /* CONFIG_CONSOLE_TRANSLATIONS */
 
 #endif /* __LINUX_CONSOLEMAP_H__ */
-- 
cgit v1.2.3


From 67a4bb27461b0e3b39b27db688b5981b6eb62175 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 26 Apr 2025 11:21:21 +0200
Subject: Revert "vt: update ucs_width.c using gen_ucs_width.py"

This reverts commit 3a1ab63aa05b4736a7d30ae0a769385662f13def.

A new version of the series was submitted, so it's easier to revert the
old one and add the new one due to the changes invovled.

Cc: Nicolas Pitre <nico@fluxnic.net>
Cc: Jiri Slaby <jirislaby@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/consolemap.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h
index b3a911866662..7d778752dcef 100644
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -29,7 +29,11 @@ u32 conv_8bit_to_uni(unsigned char c);
 int conv_uni_to_8bit(u32 uni);
 void console_map_init(void);
 bool ucs_is_double_width(uint32_t cp);
-bool ucs_is_zero_width(uint32_t cp);
+static inline bool ucs_is_zero_width(uint32_t cp)
+{
+	/* coming soon */
+	return false;
+}
 #else
 static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph,
 		bool use_unicode)
-- 
cgit v1.2.3


From d3e92076c1af713e65edac109499c25c37f38c16 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 26 Apr 2025 11:21:24 +0200
Subject: Revert "vt: properly support zero-width Unicode code points"

This reverts commit e88391f730e46d208b7fb37b02611d24137af1ef.

A new version of the series was submitted, so it's easier to revert the
old one and add the new one due to the changes invovled.

Cc: Nicolas Pitre <nico@fluxnic.net>
Cc: Jiri Slaby <jirislaby@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/consolemap.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h
index 7d778752dcef..caf079bcb8c9 100644
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -29,11 +29,6 @@ u32 conv_8bit_to_uni(unsigned char c);
 int conv_uni_to_8bit(u32 uni);
 void console_map_init(void);
 bool ucs_is_double_width(uint32_t cp);
-static inline bool ucs_is_zero_width(uint32_t cp)
-{
-	/* coming soon */
-	return false;
-}
 #else
 static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph,
 		bool use_unicode)
@@ -68,11 +63,6 @@ static inline bool ucs_is_double_width(uint32_t cp)
 {
 	return false;
 }
-
-static inline bool ucs_is_zero_width(uint32_t cp)
-{
-	return false;
-}
 #endif /* CONFIG_CONSOLE_TRANSLATIONS */
 
 #endif /* __LINUX_CONSOLEMAP_H__ */
-- 
cgit v1.2.3


From e42e607aefc4132d508a0e5724b5d0975d0a53e8 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 26 Apr 2025 11:21:25 +0200
Subject: Revert "vt: move unicode processing to a separate file"

This reverts commit 2acaf27cd7f4f32bfe8bf7335690618e2417e744.

A new version of the series was submitted, so it's easier to revert the
old one and add the new one due to the changes invovled.

Cc: Nicolas Pitre <nico@fluxnic.net>
Cc: Jiri Slaby <jirislaby@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/consolemap.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h
index caf079bcb8c9..c35db4896c37 100644
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -28,7 +28,6 @@ int conv_uni_to_pc(struct vc_data *conp, long ucs);
 u32 conv_8bit_to_uni(unsigned char c);
 int conv_uni_to_8bit(u32 uni);
 void console_map_init(void);
-bool ucs_is_double_width(uint32_t cp);
 #else
 static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph,
 		bool use_unicode)
@@ -58,11 +57,6 @@ static inline int conv_uni_to_8bit(u32 uni)
 }
 
 static inline void console_map_init(void) { }
-
-static inline bool ucs_is_double_width(uint32_t cp)
-{
-	return false;
-}
 #endif /* CONFIG_CONSOLE_TRANSLATIONS */
 
 #endif /* __LINUX_CONSOLEMAP_H__ */
-- 
cgit v1.2.3


From 07bc3f442f47b4d158468c2e0146475bdf009091 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <npitre@baylibre.com>
Date: Thu, 17 Apr 2025 14:45:04 -0400
Subject: vt: move unicode processing to a separate file

This will make it easier to maintain. Also make it depend on
CONFIG_CONSOLE_TRANSLATIONS.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20250417184849.475581-3-nico@fluxnic.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/consolemap.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h
index c35db4896c37..caf079bcb8c9 100644
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -28,6 +28,7 @@ int conv_uni_to_pc(struct vc_data *conp, long ucs);
 u32 conv_8bit_to_uni(unsigned char c);
 int conv_uni_to_8bit(u32 uni);
 void console_map_init(void);
+bool ucs_is_double_width(uint32_t cp);
 #else
 static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph,
 		bool use_unicode)
@@ -57,6 +58,11 @@ static inline int conv_uni_to_8bit(u32 uni)
 }
 
 static inline void console_map_init(void) { }
+
+static inline bool ucs_is_double_width(uint32_t cp)
+{
+	return false;
+}
 #endif /* CONFIG_CONSOLE_TRANSLATIONS */
 
 #endif /* __LINUX_CONSOLEMAP_H__ */
-- 
cgit v1.2.3


From 95b05de0a56699392e67590d000df76fedec609a Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <npitre@baylibre.com>
Date: Thu, 17 Apr 2025 14:45:05 -0400
Subject: vt: properly support zero-width Unicode code points

Zero-width Unicode code points are causing misalignment in vertically
aligned content, disrupting the visual layout. Let's handle zero-width
code points more intelligently.

Double-width code points are stored in the screen grid followed by a white
space code point to create the expected screen layout. When a double-width
code point is followed by a zero-width code point in the console incoming
bytestream (e.g., an emoji with a presentation selector) then we may
replace the white space padding by that zero-width code point instead of
dropping it. This maximize screen content information while preserving
proper layout.

If a zero-width code point is preceded by a single-width code point then
the above trick is not possible and such zero-width code point must
be dropped.

VS16 (Variation Selector 16, U+FE0F) is special as it typically doubles
the width of the preceding single-width code point. We handle that case
by giving VS16 a width of 1 instead of 0 when that happens.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20250417184849.475581-4-nico@fluxnic.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/consolemap.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h
index caf079bcb8c9..7d778752dcef 100644
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -29,6 +29,11 @@ u32 conv_8bit_to_uni(unsigned char c);
 int conv_uni_to_8bit(u32 uni);
 void console_map_init(void);
 bool ucs_is_double_width(uint32_t cp);
+static inline bool ucs_is_zero_width(uint32_t cp)
+{
+	/* coming soon */
+	return false;
+}
 #else
 static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph,
 		bool use_unicode)
@@ -63,6 +68,11 @@ static inline bool ucs_is_double_width(uint32_t cp)
 {
 	return false;
 }
+
+static inline bool ucs_is_zero_width(uint32_t cp)
+{
+	return false;
+}
 #endif /* CONFIG_CONSOLE_TRANSLATIONS */
 
 #endif /* __LINUX_CONSOLEMAP_H__ */
-- 
cgit v1.2.3


From 54cda9201c673fd1c5de189d961670999232e49d Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <npitre@baylibre.com>
Date: Thu, 17 Apr 2025 14:45:08 -0400
Subject: vt: use new tables in ucs.c

This removes the table from ucs.c and substitutes the generated tables
from ucs_width_table.h providing comprehensive ranges for double-width
and zero-width Unicode code points.

Also implements ucs_is_zero_width() to query the new zero-width table.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20250417184849.475581-7-nico@fluxnic.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/consolemap.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h
index 7d778752dcef..b3a911866662 100644
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -29,11 +29,7 @@ u32 conv_8bit_to_uni(unsigned char c);
 int conv_uni_to_8bit(u32 uni);
 void console_map_init(void);
 bool ucs_is_double_width(uint32_t cp);
-static inline bool ucs_is_zero_width(uint32_t cp)
-{
-	/* coming soon */
-	return false;
-}
+bool ucs_is_zero_width(uint32_t cp);
 #else
 static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph,
 		bool use_unicode)
-- 
cgit v1.2.3


From b5c574995d842d241d810f3a6a3ebb03c52d57fa Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <npitre@baylibre.com>
Date: Thu, 17 Apr 2025 14:45:11 -0400
Subject: vt: support Unicode recomposition

Try replacing any decomposed Unicode sequence by the corresponding
recomposed code point. Code point to glyph correspondance works best
after recomposition, and this apply mostly to single-width code points
therefore we can't preserve them in their decomposed form anyway.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20250417184849.475581-10-nico@fluxnic.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/consolemap.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h
index b3a911866662..8167494229db 100644
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -30,6 +30,7 @@ int conv_uni_to_8bit(u32 uni);
 void console_map_init(void);
 bool ucs_is_double_width(uint32_t cp);
 bool ucs_is_zero_width(uint32_t cp);
+u32 ucs_recompose(u32 base, u32 mark);
 #else
 static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph,
 		bool use_unicode)
@@ -69,6 +70,11 @@ static inline bool ucs_is_zero_width(uint32_t cp)
 {
 	return false;
 }
+
+static inline u32 ucs_recompose(u32 base, u32 mark)
+{
+	return 0;
+}
 #endif /* CONFIG_CONSOLE_TRANSLATIONS */
 
 #endif /* __LINUX_CONSOLEMAP_H__ */
-- 
cgit v1.2.3


From f5e5631df596ade5875ba1dc5d640611745d0c0d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 20 Feb 2025 18:20:26 +0200
Subject: devres: Move devm_*_action*() APIs to devres.h

We have a newly created header linux/device/devres.h that gathers
device managed APIs, so users won't need to include entire device.h
for only these ones. Move devm_*_action*() APIs to devres.h as well.

Reviewed-by: Raag Jadav <raag.jadav@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Zijun Hu <quic_zijuhu@quicinc.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20250220162238.2738038-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/device.h        | 38 --------------------------------------
 include/linux/device/devres.h | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 79e49fe494b7..4940db137fff 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -281,44 +281,6 @@ int __must_check device_create_bin_file(struct device *dev,
 void device_remove_bin_file(struct device *dev,
 			    const struct bin_attribute *attr);
 
-/* allows to add/remove a custom action to devres stack */
-int devm_remove_action_nowarn(struct device *dev, void (*action)(void *), void *data);
-
-/**
- * devm_remove_action() - removes previously added custom action
- * @dev: Device that owns the action
- * @action: Function implementing the action
- * @data: Pointer to data passed to @action implementation
- *
- * Removes instance of @action previously added by devm_add_action().
- * Both action and data should match one of the existing entries.
- */
-static inline
-void devm_remove_action(struct device *dev, void (*action)(void *), void *data)
-{
-	WARN_ON(devm_remove_action_nowarn(dev, action, data));
-}
-
-void devm_release_action(struct device *dev, void (*action)(void *), void *data);
-
-int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name);
-#define devm_add_action(dev, action, data) \
-	__devm_add_action(dev, action, data, #action)
-
-static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(void *),
-					     void *data, const char *name)
-{
-	int ret;
-
-	ret = __devm_add_action(dev, action, data, name);
-	if (ret)
-		action(data);
-
-	return ret;
-}
-#define devm_add_action_or_reset(dev, action, data) \
-	__devm_add_action_or_reset(dev, action, data, #action)
-
 /**
  * devm_alloc_percpu - Resource-managed alloc_percpu
  * @dev: Device to allocate per-cpu memory for
diff --git a/include/linux/device/devres.h b/include/linux/device/devres.h
index 9b49f9915850..9cd1787ef28e 100644
--- a/include/linux/device/devres.h
+++ b/include/linux/device/devres.h
@@ -8,6 +8,7 @@
 #include <linux/overflow.h>
 #include <linux/stdarg.h>
 #include <linux/types.h>
+#include <asm/bug.h>
 
 struct device;
 struct device_node;
@@ -126,4 +127,42 @@ void __iomem *devm_of_iomap(struct device *dev, struct device_node *node, int in
 
 #endif
 
+/* allows to add/remove a custom action to devres stack */
+int devm_remove_action_nowarn(struct device *dev, void (*action)(void *), void *data);
+
+/**
+ * devm_remove_action() - removes previously added custom action
+ * @dev: Device that owns the action
+ * @action: Function implementing the action
+ * @data: Pointer to data passed to @action implementation
+ *
+ * Removes instance of @action previously added by devm_add_action().
+ * Both action and data should match one of the existing entries.
+ */
+static inline
+void devm_remove_action(struct device *dev, void (*action)(void *), void *data)
+{
+	WARN_ON(devm_remove_action_nowarn(dev, action, data));
+}
+
+void devm_release_action(struct device *dev, void (*action)(void *), void *data);
+
+int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name);
+#define devm_add_action(dev, action, data) \
+	__devm_add_action(dev, action, data, #action)
+
+static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(void *),
+					     void *data, const char *name)
+{
+	int ret;
+
+	ret = __devm_add_action(dev, action, data, name);
+	if (ret)
+		action(data);
+
+	return ret;
+}
+#define devm_add_action_or_reset(dev, action, data) \
+	__devm_add_action_or_reset(dev, action, data, #action)
+
 #endif /* _DEVICE_DEVRES_H_ */
-- 
cgit v1.2.3


From e383bb8f958444620d96386811aacf6a49757996 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 20 Feb 2025 18:20:27 +0200
Subject: devres: Add devm_is_action_added() helper

In some code we would like to know if the action in device managed resources
was added by devm_add_action() family of calls. Introduce a helper for that.

Reviewed-by: Raag Jadav <raag.jadav@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Zijun Hu <quic_zijuhu@quicinc.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20250220162238.2738038-3-andriy.shevchenko@linux.intel.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/device/devres.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device/devres.h b/include/linux/device/devres.h
index 9cd1787ef28e..ae696d10faff 100644
--- a/include/linux/device/devres.h
+++ b/include/linux/device/devres.h
@@ -165,4 +165,6 @@ static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(
 #define devm_add_action_or_reset(dev, action, data) \
 	__devm_add_action_or_reset(dev, action, data, #action)
 
+bool devm_is_action_added(struct device *dev, void (*action)(void *), void *data);
+
 #endif /* _DEVICE_DEVRES_H_ */
-- 
cgit v1.2.3


From b15d97139ff14beb7c300f261e11d22d5a996941 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 3 Apr 2025 11:19:11 +0200
Subject: mtd: spinand: Use more specific naming for the reset op

SPI operations have been initially described through macros implicitly
implying the use of a single SPI SDR bus. Macros for supporting dual and
quad I/O transfers have been added on top, generally inspired by vendor
vendor naming, followed by DTR operations. Soon we might see octal
and even octal DTR operations as well (including the opcode byte).

Let's clarify what the macro really means by describing the expected bus
topology in the reset macro name.

Reviewed-by: Tudor Ambarus <tudor.ambarus@linaro.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/spinand.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 311f145eb4e8..d1b9b630bd83 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -20,7 +20,7 @@
  * Standard SPI NAND flash operations
  */
 
-#define SPINAND_RESET_OP						\
+#define SPINAND_RESET_1S_0_0_OP						\
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0xff, 1),				\
 		   SPI_MEM_OP_NO_ADDR,					\
 		   SPI_MEM_OP_NO_DUMMY,					\
-- 
cgit v1.2.3


From 7c8896dd4a2a27c84b04dcf0990e6f6b118cb6b2 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Fri, 18 Apr 2025 16:01:24 +0800
Subject: iommu: Remove IOMMU_DEV_FEAT_SVA

None of the drivers implement anything here anymore, remove the dead code.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Link: https://lore.kernel.org/r/20250418080130.1844424-3-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 062818b51822..f39af28acaeb 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -318,18 +318,11 @@ struct iommu_iort_rmr_data {
 
 /**
  * enum iommu_dev_features - Per device IOMMU features
- * @IOMMU_DEV_FEAT_SVA: Shared Virtual Addresses
- * @IOMMU_DEV_FEAT_IOPF: I/O Page Faults such as PRI or Stall. Generally
- *			 enabling %IOMMU_DEV_FEAT_SVA requires
- *			 %IOMMU_DEV_FEAT_IOPF, but some devices manage I/O Page
- *			 Faults themselves instead of relying on the IOMMU. When
- *			 supported, this feature must be enabled before and
- *			 disabled after %IOMMU_DEV_FEAT_SVA.
+ * @IOMMU_DEV_FEAT_IOPF: I/O Page Faults such as PRI or Stall.
  *
  * Device drivers enable a feature using iommu_dev_enable_feature().
  */
 enum iommu_dev_features {
-	IOMMU_DEV_FEAT_SVA,
 	IOMMU_DEV_FEAT_IOPF,
 };
 
-- 
cgit v1.2.3


From f984fb09e60e0e2db5ad5be9d4cfcefb2008fda1 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Fri, 18 Apr 2025 16:01:30 +0800
Subject: iommu: Remove iommu_dev_enable/disable_feature()

No external drivers use these interfaces anymore. Furthermore, no existing
iommu drivers implement anything in the callbacks. Remove them to avoid
dead code.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Link: https://lore.kernel.org/r/20250418080130.1844424-9-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 28 ----------------------------
 1 file changed, 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index f39af28acaeb..8d9119b000fe 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -316,16 +316,6 @@ struct iommu_iort_rmr_data {
 	u32 num_sids;
 };
 
-/**
- * enum iommu_dev_features - Per device IOMMU features
- * @IOMMU_DEV_FEAT_IOPF: I/O Page Faults such as PRI or Stall.
- *
- * Device drivers enable a feature using iommu_dev_enable_feature().
- */
-enum iommu_dev_features {
-	IOMMU_DEV_FEAT_IOPF,
-};
-
 #define IOMMU_NO_PASID	(0U) /* Reserved for DMA w/o PASID */
 #define IOMMU_FIRST_GLOBAL_PASID	(1U) /*starting range for allocation */
 #define IOMMU_PASID_INVALID	(-1U)
@@ -657,9 +647,6 @@ struct iommu_ops {
 	bool (*is_attach_deferred)(struct device *dev);
 
 	/* Per device IOMMU features */
-	int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f);
-	int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f);
-
 	void (*page_response)(struct device *dev, struct iopf_fault *evt,
 			      struct iommu_page_response *msg);
 
@@ -1128,9 +1115,6 @@ void dev_iommu_priv_set(struct device *dev, void *priv);
 extern struct mutex iommu_probe_device_lock;
 int iommu_probe_device(struct device *dev);
 
-int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f);
-int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f);
-
 int iommu_device_use_default_domain(struct device *dev);
 void iommu_device_unuse_default_domain(struct device *dev);
 
@@ -1415,18 +1399,6 @@ static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids,
 	return -ENODEV;
 }
 
-static inline int
-iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat)
-{
-	return -ENODEV;
-}
-
-static inline int
-iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
-{
-	return -ENODEV;
-}
-
 static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev)
 {
 	return NULL;
-- 
cgit v1.2.3


From 0d609a1450fab636c825c66344ab0ecfc1d3a98c Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 8 Apr 2025 13:35:48 -0300
Subject: iommu: Add domain_alloc_identity()

virtio-iommu has a mode where the IDENTITY domain is actually a paging
domain with an identity mapping covering some of the system address
space manually created.

To support this add a new domain_alloc_identity() op that accepts
the struct device so that virtio can allocate and fully finalize a
paging domain to return.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/2-v4-ff5fb6b03bd1+288-iommu_virtio_domains_jgg@nvidia.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 8d9119b000fe..7b636b92ac7c 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -567,6 +567,9 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size,
  * @domain_alloc: allocate and return an iommu domain if success. Otherwise
  *                NULL is returned. The domain is not fully initialized until
  *                the caller iommu_domain_alloc() returns.
+ * @domain_alloc_identity: allocate an IDENTITY domain. Drivers should prefer to
+ *                         use identity_domain instead. This should only be used
+ *                         if dynamic logic is necessary.
  * @domain_alloc_paging_flags: Allocate an iommu domain corresponding to the
  *                     input parameters as defined in
  *                     include/uapi/linux/iommufd.h. The @user_data can be
@@ -625,6 +628,7 @@ struct iommu_ops {
 
 	/* Domain allocation and freeing by the iommu driver */
 	struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type);
+	struct iommu_domain *(*domain_alloc_identity)(struct device *dev);
 	struct iommu_domain *(*domain_alloc_paging_flags)(
 		struct device *dev, u32 flags,
 		const struct iommu_user_data *user_data);
-- 
cgit v1.2.3


From 21c03574df19f0d77cb2e4d28bc02c79b21e656a Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 8 Apr 2025 13:35:51 -0300
Subject: iommu: Hide ops.domain_alloc behind CONFIG_FSL_PAMU

fsl_pamu is the last user of domain_alloc(), and it is using it to create
something weird that doesn't really fit into the iommu subsystem
architecture. It is a not a paging domain since it doesn't have any
map/unmap ops. It may be some special kind of identity domain.

For now just leave it as is. Wrap it's definition in CONFIG_FSL_PAMU to
discourage any new drivers from attempting to use it.

Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/5-v4-ff5fb6b03bd1+288-iommu_virtio_domains_jgg@nvidia.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7b636b92ac7c..5b64d0242e66 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -564,9 +564,7 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size,
  *           op is allocated in the iommu driver and freed by the caller after
  *           use. The information type is one of enum iommu_hw_info_type defined
  *           in include/uapi/linux/iommufd.h.
- * @domain_alloc: allocate and return an iommu domain if success. Otherwise
- *                NULL is returned. The domain is not fully initialized until
- *                the caller iommu_domain_alloc() returns.
+ * @domain_alloc: Do not use in new drivers
  * @domain_alloc_identity: allocate an IDENTITY domain. Drivers should prefer to
  *                         use identity_domain instead. This should only be used
  *                         if dynamic logic is necessary.
@@ -627,7 +625,9 @@ struct iommu_ops {
 	void *(*hw_info)(struct device *dev, u32 *length, u32 *type);
 
 	/* Domain allocation and freeing by the iommu driver */
+#if IS_ENABLED(CONFIG_FSL_PAMU)
 	struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type);
+#endif
 	struct iommu_domain *(*domain_alloc_identity)(struct device *dev);
 	struct iommu_domain *(*domain_alloc_paging_flags)(
 		struct device *dev, u32 flags,
-- 
cgit v1.2.3


From da33e87bd2bfc63531cf7448a3cd7a3d42182f08 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 24 Apr 2025 18:41:28 +0100
Subject: iommu: Handle yet another race around registration

Next up on our list of race windows to close is another one during
iommu_device_register() - it's now OK again for multiple instances to
run their bus_iommu_probe() in parallel, but an iommu_probe_device() can
still also race against a running bus_iommu_probe(). As Johan has
managed to prove, this has now become a lot more visible on DT platforms
wth driver_async_probe where a client driver is attempting to probe in
parallel with its IOMMU driver - although commit b46064a18810 ("iommu:
Handle race with default domain setup") resolves this from the client
driver's point of view, this isn't before of_iommu_configure() has had
the chance to attempt to "replay" a probe that the bus walk hasn't even
tried yet, and so still cause the out-of-order group allocation
behaviour that we're trying to clean up (and now warning about).

The most reliable thing to do here is to explicitly keep track of the
"iommu_device_register() is still running" state, so we can then
special-case the ops lookup for the replay path (based on dev->iommu
again) to let that think it's still waiting for the IOMMU driver to
appear at all. This still leaves the longstanding theoretical case of
iommu_bus_notifier() being triggered during bus_iommu_probe(), but it's
not so simple to defer a notifier, and nobody's ever reported that being
a visible issue, so let's quietly kick that can down the road for now...

Reported-by: Johan Hovold <johan@kernel.org>
Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path")
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/88d54c1b48fed8279aa47d30f3d75173685bb26a.1745516488.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5b64d0242e66..bc5363e6b43e 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -746,6 +746,7 @@ struct iommu_domain_ops {
  * @dev: struct device for sysfs handling
  * @singleton_group: Used internally for drivers that have only one group
  * @max_pasids: number of supported PASIDs
+ * @ready: set once iommu_device_register() has completed successfully
  */
 struct iommu_device {
 	struct list_head list;
@@ -754,6 +755,7 @@ struct iommu_device {
 	struct device *dev;
 	struct iommu_group *singleton_group;
 	u32 max_pasids;
+	bool ready;
 };
 
 /**
-- 
cgit v1.2.3


From 19da081a28c95fe9b03ce952a2bf4a6f6bf5112c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 23 Apr 2025 17:22:28 +0800
Subject: crypto: api - Add crypto_request_clone and fb

Add a helper to clone crypto requests and eliminate code duplication.
Use kmemdup in the helper.

Also add an fb field to crypto_tfm.

This also happens to fix the existing implementations which were
buggy.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202504230118.1CxUaUoX-lkp@intel.com/
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202504230004.c7mrY0C6-lkp@intel.com/
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index f691ce01745e..fe75320ff9a3 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -14,7 +14,7 @@
 
 #include <linux/completion.h>
 #include <linux/errno.h>
-#include <linux/refcount.h>
+#include <linux/refcount_types.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 
@@ -411,9 +411,11 @@ struct crypto_tfm {
 	u32 crt_flags;
 
 	int node;
-	
+
+	struct crypto_tfm *fb;
+
 	void (*exit)(struct crypto_tfm *tfm);
-	
+
 	struct crypto_alg *__crt_alg;
 
 	void *__crt_ctx[] CRYPTO_MINALIGN_ATTR;
@@ -509,5 +511,8 @@ static inline void crypto_request_set_tfm(struct crypto_async_request *req,
 	req->flags &= ~CRYPTO_TFM_REQ_ON_STACK;
 }
 
+struct crypto_async_request *crypto_request_clone(
+	struct crypto_async_request *req, size_t total, gfp_t gfp);
+
 #endif	/* _LINUX_CRYPTO_H */
 
-- 
cgit v1.2.3


From b75fa20c127eb736b0ac9b30be051f526a2316a9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 25 Apr 2025 11:05:29 +0800
Subject: crypto: api - Add crypto_stack_request_init and initialise flags
 fully

Add a helper to initialise crypto stack requests and use it for
ahash and acomp.  Make sure that the flags field is initialised
fully in the helper to silence false-positive warnings from the
compiler.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202504250751.mdy28Ibr-lkp@intel.com/
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index fe75320ff9a3..b8d875b11193 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -514,5 +514,13 @@ static inline void crypto_request_set_tfm(struct crypto_async_request *req,
 struct crypto_async_request *crypto_request_clone(
 	struct crypto_async_request *req, size_t total, gfp_t gfp);
 
+static inline void crypto_stack_request_init(struct crypto_async_request *req,
+					     struct crypto_tfm *tfm)
+{
+	req->flags = 0;
+	crypto_request_set_tfm(req, tfm);
+	req->flags |= CRYPTO_TFM_REQ_ON_STACK;
+}
+
 #endif	/* _LINUX_CRYPTO_H */
 
-- 
cgit v1.2.3


From 1313057c369bb4da11bb1f8dffb570ac7b44a4d4 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 31 Mar 2025 21:11:11 +0100
Subject: highmem: Add memcpy_folio()

The folio equivalent of memcpy_page().  It should correctly and
efficiently manage large folios:

 - If one, neither or both is highmem
 - If (either or both) offset+len crosses a page boundary
 - If the two offsets are congruent or not

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/highmem.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 5c6bea81a90e..fd72f66b872a 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -404,6 +404,33 @@ static inline void memcpy_page(struct page *dst_page, size_t dst_off,
 	kunmap_local(dst);
 }
 
+static inline void memcpy_folio(struct folio *dst_folio, size_t dst_off,
+		struct folio *src_folio, size_t src_off, size_t len)
+{
+	VM_BUG_ON(dst_off + len > folio_size(dst_folio));
+	VM_BUG_ON(src_off + len > folio_size(src_folio));
+
+	do {
+		char *dst = kmap_local_folio(dst_folio, dst_off);
+		const char *src = kmap_local_folio(src_folio, src_off);
+		size_t chunk = len;
+
+		if (folio_test_highmem(dst_folio) &&
+		    chunk > PAGE_SIZE - offset_in_page(dst_off))
+			chunk = PAGE_SIZE - offset_in_page(dst_off);
+		if (folio_test_highmem(src_folio) &&
+		    chunk > PAGE_SIZE - offset_in_page(src_off))
+			chunk = PAGE_SIZE - offset_in_page(src_off);
+		memcpy(dst, src, chunk);
+		kunmap_local(src);
+		kunmap_local(dst);
+
+		dst_off += chunk;
+		src_off += chunk;
+		len -= chunk;
+	} while (len > 0);
+}
+
 static inline void memset_page(struct page *page, size_t offset, int val,
 			       size_t len)
 {
-- 
cgit v1.2.3


From a510c186abfc870e5c74cf953b646c8a2c07bee1 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Wed, 16 Apr 2025 10:20:20 -0700
Subject: compiler_types: Identify compiler versions for
 __builtin_dynamic_object_size

Clarify when __builtin_dynamic_object_size() is available. All our
supported Clang versions support it. GCC 12 and later support it. Link
to documentation for both.

Acked-by: Miguel Ojeda <ojeda@kernel.org>
Link: https://lore.kernel.org/r/20250416172016.work.154-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/compiler_types.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 501cffddc2f4..20881cc761fa 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -449,6 +449,11 @@ struct ftrace_likely_data {
 /*
  * When the size of an allocated object is needed, use the best available
  * mechanism to find it. (For cases where sizeof() cannot be used.)
+ *
+ * Optional: only supported since gcc >= 12
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Object-Size-Checking.html
+ * clang: https://clang.llvm.org/docs/LanguageExtensions.html#evaluating-object-size
  */
 #if __has_builtin(__builtin_dynamic_object_size)
 #define __struct_size(p)	__builtin_dynamic_object_size(p, 0)
-- 
cgit v1.2.3


From 9a93048476e7cbdde00cdeebe66b6504995eac92 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Wed, 16 Apr 2025 10:29:15 -0700
Subject: overflow: Clarify expectations for getting DEFINE_FLEX variable sizes

Mention the use of __member_size() for DEFINE_FLEX variables as a hint
for getting at the compile-time size of the resulting flexible array
member.

Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/r/20250416172911.work.854-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/overflow.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 0c7e3dcfe867..6ee67c20b575 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -419,6 +419,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
  * Define a zeroed, on-stack, instance of @type structure with a trailing
  * flexible array member.
  * Use __struct_size(@name) to get compile-time size of it afterwards.
+ * Use __member_size(@name->member) to get compile-time size of @name members.
  */
 #define DEFINE_RAW_FLEX(type, name, member, count)	\
 	_DEFINE_FLEX(type, name, member, count, = {})
@@ -436,6 +437,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
  * Define a zeroed, on-stack, instance of @TYPE structure with a trailing
  * flexible array member.
  * Use __struct_size(@NAME) to get compile-time size of it afterwards.
+ * Use __member_size(@NAME->member) to get compile-time size of @NAME members.
  */
 #define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT)	\
 	_DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, })
-- 
cgit v1.2.3


From 655862865c97ff55e4f3f2aaa7708f42f0ea3bd8 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Tue, 15 Apr 2025 16:14:24 -0700
Subject: mod_devicetable: Enlarge the maximum platform_device_id name length

The 20 byte length of struct platform_device_id::name is not long enough
for many devices (especially regulators), where the string initialization
is getting truncated and missing the trailing NUL byte. This is seen
with GCC 15's -Wunterminated-string-initialization option:

drivers/regulator/hi6421v530-regulator.c:189:19: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (21 chars into 20 available) [-Wunterminated-string-initialization]
  189 |         { .name = "hi6421v530-regulator" },
      |                   ^~~~~~~~~~~~~~~~~~~~~~
drivers/regulator/hi6421v600-regulator.c:278:19: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (21 chars into 20 available) [-Wunterminated-string-initialization]
  278 |         { .name = "hi6421v600-regulator" },
      |                   ^~~~~~~~~~~~~~~~~~~~~~
drivers/regulator/lp87565-regulator.c:233:11: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (21 chars into 20 available) [-Wunterminated-string-initialization]
  233 |         { "lp87565-q1-regulator", },
      |           ^~~~~~~~~~~~~~~~~~~~~~
sound/soc/fsl/imx-pcm-rpmsg.c:818:19: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (21 chars into 20 available) [-Wunterminated-string-initialization]
  818 |         { .name = "rpmsg-micfil-channel" },
      |                   ^~~~~~~~~~~~~~~~~~~~~~
drivers/iio/light/hid-sensor-als.c:457:25: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (21 chars into 20 available) [-Wunterminated-string-initialization]
  457 |                 .name = "HID-SENSOR-LISS-0041",
      |                         ^~~~~~~~~~~~~~~~~~~~~~
drivers/iio/light/hid-sensor-prox.c:366:25: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (21 chars into 20 available) [-Wunterminated-string-initialization]
  366 |                 .name = "HID-SENSOR-LISS-0226",
      |                         ^~~~~~~~~~~~~~~~~~~~~~

Increase the length to 24, slightly more than is currently being used by
the affected drivers. The string is used in '%s' format strings and via
the module code, which appears to do its own length encoding. This size
was chosen because there was already a 4 byte hole in the structure:

struct platform_device_id {
        char                       name[20];             /*     0    20 */

        /* XXX 4 bytes hole, try to pack */

        kernel_ulong_t             driver_data;          /*    24     8 */

        /* size: 32, cachelines: 1, members: 2 */
        /* sum members: 28, holes: 1, sum holes: 4 */
        /* last cacheline: 32 bytes */
};

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20250415231420.work.066-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/mod_devicetable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index bd7e60c0b72f..ebcee9328168 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -601,7 +601,7 @@ struct dmi_system_id {
 #define DMI_MATCH(a, b)	{ .slot = a, .substr = b }
 #define DMI_EXACT_MATCH(a, b)	{ .slot = a, .substr = b, .exact_match = 1 }
 
-#define PLATFORM_NAME_SIZE	20
+#define PLATFORM_NAME_SIZE	24
 #define PLATFORM_MODULE_PREFIX	"platform:"
 
 struct platform_device_id {
-- 
cgit v1.2.3


From c1f7375a246e5f810191a6c3031d2fa2b80e9f5e Mon Sep 17 00:00:00 2001
From: Jelle van der Waa <jvanderwaa@redhat.com>
Date: Mon, 14 Apr 2025 15:18:40 +0200
Subject: power: supply: support charge_types in extensions

Similar to charge_behaviour, charge_types is an enum option where
reading the property shows the supported values, with the active value
surrounded by brackets. To be able to use it with a power_supply
extension a bitmask with the supported charge_Types values has to be
added to power_supply_ext.

Signed-off-by: Jelle van der Waa <jvanderwaa@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20250414131840.382756-2-jvanderwaa@redhat.com
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/power_supply.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 888824592953..c4cb854971f5 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -288,6 +288,7 @@ struct power_supply_desc {
 struct power_supply_ext {
 	const char *const name;
 	u8 charge_behaviours;
+	u32 charge_types;
 	const enum power_supply_property *properties;
 	size_t num_properties;
 
-- 
cgit v1.2.3


From e43b8bb56e537bfc8d9076793091e7679020fc9c Mon Sep 17 00:00:00 2001
From: Charlie Jenkins <charlie@rivosinc.com>
Date: Thu, 20 Mar 2025 10:29:24 -0700
Subject: entry: Inline syscall_exit_to_user_mode()

Similar to commit 221a164035fd ("entry: Move syscall_enter_from_user_mode()
to header file"), move syscall_exit_to_user_mode() to the header file as
well.

Testing was done with the byte-unixbench syscall benchmark (which calls
getpid) and QEMU. On riscv I measured a 7.09246% improvement, on x86 a
2.98843% improvement, on loongarch a 6.07954% improvement, and on s390 a
11.1328% improvement.

The Intel bot also reported "kernel test robot noticed a 1.9% improvement
of stress-ng.seek.ops_per_sec".

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Link: https://lore.kernel.org/all/20250320-riscv_optimize_entry-v6-4-63e187e26041@rivosinc.com
Link: https://lore.kernel.org/linux-riscv/202502051555.85ae6844-lkp@intel.com/
---
 include/linux/entry-common.h | 43 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index fc61d0205c97..f94f3fdf15fc 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -14,6 +14,7 @@
 #include <linux/kmsan.h>
 
 #include <asm/entry-common.h>
+#include <asm/syscall.h>
 
 /*
  * Define dummy _TIF work flags if not defined by the architecture or for
@@ -366,6 +367,15 @@ static __always_inline void exit_to_user_mode(void)
 	lockdep_hardirqs_on(CALLER_ADDR0);
 }
 
+/**
+ * syscall_exit_work - Handle work before returning to user mode
+ * @regs:	Pointer to current pt_regs
+ * @work:	Current thread syscall work
+ *
+ * Do one-time syscall specific work.
+ */
+void syscall_exit_work(struct pt_regs *regs, unsigned long work);
+
 /**
  * syscall_exit_to_user_mode_work - Handle work before returning to user mode
  * @regs:	Pointer to currents pt_regs
@@ -379,7 +389,30 @@ static __always_inline void exit_to_user_mode(void)
  * make the final state transitions. Interrupts must stay disabled between
  * return from this function and the invocation of exit_to_user_mode().
  */
-void syscall_exit_to_user_mode_work(struct pt_regs *regs);
+static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
+{
+	unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
+	unsigned long nr = syscall_get_nr(current, regs);
+
+	CT_WARN_ON(ct_state() != CT_STATE_KERNEL);
+
+	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+		if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
+			local_irq_enable();
+	}
+
+	rseq_syscall(regs);
+
+	/*
+	 * Do one-time syscall specific work. If these work items are
+	 * enabled, we want to run them exactly once per syscall exit with
+	 * interrupts enabled.
+	 */
+	if (unlikely(work & SYSCALL_WORK_EXIT))
+		syscall_exit_work(regs, work);
+	local_irq_disable_exit_to_user();
+	exit_to_user_mode_prepare(regs);
+}
 
 /**
  * syscall_exit_to_user_mode - Handle work before returning to user mode
@@ -410,7 +443,13 @@ void syscall_exit_to_user_mode_work(struct pt_regs *regs);
  * exit_to_user_mode(). This function is preferred unless there is a
  * compelling architectural reason to use the separate functions.
  */
-void syscall_exit_to_user_mode(struct pt_regs *regs);
+static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs)
+{
+	instrumentation_begin();
+	syscall_exit_to_user_mode_work(regs);
+	instrumentation_end();
+	exit_to_user_mode();
+}
 
 /**
  * irqentry_enter_from_user_mode - Establish state before invoking the irq handler
-- 
cgit v1.2.3


From d54e34c58aa224bdd0b9ea0f429c5a90d91db2c7 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 3 Apr 2025 11:19:12 +0200
Subject: mtd: spinand: Use more specific naming for the write enable/disable
 op

SPI operations have been initially described through macros implicitly
implying the use of a single SPI SDR bus. Macros for supporting dual and
quad I/O transfers have been added on top, generally inspired by vendor
naming, followed by DTR operations. Soon we might see octal
and even octal DTR operations as well (including the opcode byte).

Let's clarify what the macro really means by describing the expected bus
topology in the write enable/disable macro names.

Reviewed-by: Tudor Ambarus <tudor.ambarus@linaro.org>
[Miquel: Fixed conflicts with -next by updating esmt and micron drivers]
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/spinand.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index d1b9b630bd83..87d8f9f2017e 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -26,7 +26,7 @@
 		   SPI_MEM_OP_NO_DUMMY,					\
 		   SPI_MEM_OP_NO_DATA)
 
-#define SPINAND_WR_EN_DIS_OP(enable)					\
+#define SPINAND_WR_EN_DIS_1S_0_0_OP(enable)					\
 	SPI_MEM_OP(SPI_MEM_OP_CMD((enable) ? 0x06 : 0x04, 1),		\
 		   SPI_MEM_OP_NO_ADDR,					\
 		   SPI_MEM_OP_NO_DUMMY,					\
-- 
cgit v1.2.3


From 2a294fa215289aff4b7b0c0a70f24dcd621df497 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 3 Apr 2025 11:19:13 +0200
Subject: mtd: spinand: Use more specific naming for the read ID op

SPI operations have been initially described through macros implicitly
implying the use of a single SPI SDR bus. Macros for supporting dual and
quad I/O transfers have been added on top, generally inspired by vendor
naming, followed by DTR operations. Soon we might see octal
and even octal DTR operations as well (including the opcode byte).

Let's clarify what the macro really means by describing the expected bus
topology in the read ID macro name.

Reviewed-by: Tudor Ambarus <tudor.ambarus@linaro.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/spinand.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 87d8f9f2017e..dfb5c74cad6d 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -32,7 +32,7 @@
 		   SPI_MEM_OP_NO_DUMMY,					\
 		   SPI_MEM_OP_NO_DATA)
 
-#define SPINAND_READID_OP(naddr, ndummy, buf, len)			\
+#define SPINAND_READID_1S_1S_1S_OP(naddr, ndummy, buf, len)		\
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0x9f, 1),				\
 		   SPI_MEM_OP_ADDR(naddr, 0, 1),			\
 		   SPI_MEM_OP_DUMMY(ndummy, 1),				\
-- 
cgit v1.2.3


From 429330cd1cfe860133d1fbdd49e9e081524333cf Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 3 Apr 2025 11:19:14 +0200
Subject: mtd: spinand: Use more specific naming for the get/set feature ops

SPI operations have been initially described through macros implicitly
implying the use of a single SPI SDR bus. Macros for supporting dual and
quad I/O transfers have been added on top, generally inspired by vendor
naming, followed by DTR operations. Soon we might see octal
and even octal DTR operations as well (including the opcode byte).

Let's clarify what the macro really mean by describing the expected bus
topology in the get/set feature macro names.

Reviewed-by: Tudor Ambarus <tudor.ambarus@linaro.org>
[Miquel: Fixed conflicts with -next by updating macronix driver]
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/spinand.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index dfb5c74cad6d..8ea40d838dc2 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -38,13 +38,13 @@
 		   SPI_MEM_OP_DUMMY(ndummy, 1),				\
 		   SPI_MEM_OP_DATA_IN(len, buf, 1))
 
-#define SPINAND_SET_FEATURE_OP(reg, valptr)				\
+#define SPINAND_SET_FEATURE_1S_1S_1S_OP(reg, valptr)			\
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0x1f, 1),				\
 		   SPI_MEM_OP_ADDR(1, reg, 1),				\
 		   SPI_MEM_OP_NO_DUMMY,					\
 		   SPI_MEM_OP_DATA_OUT(1, valptr, 1))
 
-#define SPINAND_GET_FEATURE_OP(reg, valptr)				\
+#define SPINAND_GET_FEATURE_1S_1S_1S_OP(reg, valptr)			\
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0x0f, 1),				\
 		   SPI_MEM_OP_ADDR(1, reg, 1),				\
 		   SPI_MEM_OP_NO_DUMMY,					\
-- 
cgit v1.2.3


From 7e8533b273ee9e7243cc57afec835c20135ebbb2 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 3 Apr 2025 11:19:15 +0200
Subject: mtd: spinand: Use more specific naming for the erase op

SPI operations have been initially described through macros implicitly
implying the use of a single SPI SDR bus. Macros for supporting dual and
quad I/O transfers have been added on top, generally inspired by vendor
naming, followed by DTR operations. Soon we might see octal
and even octal DTR operations as well (including the opcode byte).

Let's clarify what the macro really means by describing the expected bus
topology in the erase macro name.

Reviewed-by: Tudor Ambarus <tudor.ambarus@linaro.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/spinand.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 8ea40d838dc2..964f1244d6a6 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -50,7 +50,7 @@
 		   SPI_MEM_OP_NO_DUMMY,					\
 		   SPI_MEM_OP_DATA_IN(1, valptr, 1))
 
-#define SPINAND_BLK_ERASE_OP(addr)					\
+#define SPINAND_BLK_ERASE_1S_1S_0_OP(addr)				\
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0xd8, 1),				\
 		   SPI_MEM_OP_ADDR(3, addr, 1),				\
 		   SPI_MEM_OP_NO_DUMMY,					\
-- 
cgit v1.2.3


From 7528c97c0c2ac4c6c09b5aae52382958a57122fe Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 3 Apr 2025 11:19:16 +0200
Subject: mtd: spinand: Use more specific naming for the page read op

SPI operations have been initially described through macros implicitly
implying the use of a single SPI SDR bus. Macros for supporting dual and
quad I/O transfers have been added on top, generally inspired by vendor
naming, followed by DTR operations. Soon we might see octal
and even octal DTR operations as well (including the opcode byte).

Let's clarify what the macro really means by describing the expected bus
topology in the page read macro name.

Reviewed-by: Tudor Ambarus <tudor.ambarus@linaro.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/spinand.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 964f1244d6a6..df07bc55aa4d 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -56,7 +56,7 @@
 		   SPI_MEM_OP_NO_DUMMY,					\
 		   SPI_MEM_OP_NO_DATA)
 
-#define SPINAND_PAGE_READ_OP(addr)					\
+#define SPINAND_PAGE_READ_1S_1S_0_OP(addr)				\
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0x13, 1),				\
 		   SPI_MEM_OP_ADDR(3, addr, 1),				\
 		   SPI_MEM_OP_NO_DUMMY,					\
-- 
cgit v1.2.3


From ea2087d4e66d0b927918cc9048576aca6a0446ad Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 3 Apr 2025 11:19:17 +0200
Subject: mtd: spinand: Use more specific naming for the (single) read from
 cache ops

SPI operations have been initially described through macros implicitly
implying the use of a single SPI SDR bus. Macros for supporting dual and
quad I/O transfers have been added on top, generally inspired by vendor
naming, followed by DTR operations. Soon we might see octal
and even octal DTR operations as well (including the opcode byte).

Let's clarify what the macro really mean by describing the expected bus
topology in the (single) read from cache macro names.

Acked-by: Tudor Ambarus <tudor.ambarus@linaro.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/spinand.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index df07bc55aa4d..c702e1ed8fe8 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -62,32 +62,32 @@
 		   SPI_MEM_OP_NO_DUMMY,					\
 		   SPI_MEM_OP_NO_DATA)
 
-#define SPINAND_PAGE_READ_FROM_CACHE_OP(addr, ndummy, buf, len, ...) \
+#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_1S_OP(addr, ndummy, buf, len, ...) \
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0x03, 1),				\
 		   SPI_MEM_OP_ADDR(2, addr, 1),				\
 		   SPI_MEM_OP_DUMMY(ndummy, 1),				\
 		   SPI_MEM_OP_DATA_IN(len, buf, 1),			\
 		   SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0))
 
-#define SPINAND_PAGE_READ_FROM_CACHE_FAST_OP(addr, ndummy, buf, len) \
-	SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1),			\
+#define SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(addr, ndummy, buf, len) \
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1),				\
 			 SPI_MEM_OP_ADDR(2, addr, 1),			\
 			 SPI_MEM_OP_DUMMY(ndummy, 1),			\
 			 SPI_MEM_OP_DATA_IN(len, buf, 1))
 
-#define SPINAND_PAGE_READ_FROM_CACHE_OP_3A(addr, ndummy, buf, len) \
+#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_1S_OP(addr, ndummy, buf, len) \
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0x03, 1),				\
 		   SPI_MEM_OP_ADDR(3, addr, 1),				\
 		   SPI_MEM_OP_DUMMY(ndummy, 1),				\
 		   SPI_MEM_OP_DATA_IN(len, buf, 1))
 
-#define SPINAND_PAGE_READ_FROM_CACHE_FAST_OP_3A(addr, ndummy, buf, len) \
+#define SPINAND_PAGE_READ_FROM_CACHE_FAST_3A_1S_1S_1S_OP(addr, ndummy, buf, len) \
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1),				\
 		   SPI_MEM_OP_ADDR(3, addr, 1),				\
 		   SPI_MEM_OP_DUMMY(ndummy, 1),				\
 		   SPI_MEM_OP_DATA_IN(len, buf, 1))
 
-#define SPINAND_PAGE_READ_FROM_CACHE_DTR_OP(addr, ndummy, buf, len, freq) \
+#define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_1D_OP(addr, ndummy, buf, len, freq) \
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0x0d, 1),				\
 		   SPI_MEM_DTR_OP_ADDR(2, addr, 1),			\
 		   SPI_MEM_DTR_OP_DUMMY(ndummy, 1),			\
-- 
cgit v1.2.3


From 684f7105e8534f6500de389c089ba204cb1c8058 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 3 Apr 2025 11:19:18 +0200
Subject: mtd: spinand: Use more specific naming for the (dual output) read
 from cache ops

SPI operations have been initially described through macros implicitly
implying the use of a single SPI SDR bus. Macros for supporting dual and
quad I/O transfers have been added on top, generally inspired by vendor
naming, followed by DTR operations. Soon we might see octal
and even octal DTR operations as well (including the opcode byte).

Let's clarify what the macro really mean by describing the expected bus
topology in the (dual output) read from cache macro names.

Acked-by: Tudor Ambarus <tudor.ambarus@linaro.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/spinand.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index c702e1ed8fe8..f84991d05d85 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -94,19 +94,19 @@
 		   SPI_MEM_DTR_OP_DATA_IN(len, buf, 1),			\
 		   SPI_MEM_OP_MAX_FREQ(freq))
 
-#define SPINAND_PAGE_READ_FROM_CACHE_X2_OP(addr, ndummy, buf, len)	\
+#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_2S_OP(addr, ndummy, buf, len) \
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0x3b, 1),				\
 		   SPI_MEM_OP_ADDR(2, addr, 1),				\
 		   SPI_MEM_OP_DUMMY(ndummy, 1),				\
 		   SPI_MEM_OP_DATA_IN(len, buf, 2))
 
-#define SPINAND_PAGE_READ_FROM_CACHE_X2_OP_3A(addr, ndummy, buf, len)	\
+#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_2S_OP(addr, ndummy, buf, len) \
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0x3b, 1),				\
 		   SPI_MEM_OP_ADDR(3, addr, 1),				\
 		   SPI_MEM_OP_DUMMY(ndummy, 1),				\
 		   SPI_MEM_OP_DATA_IN(len, buf, 2))
 
-#define SPINAND_PAGE_READ_FROM_CACHE_X2_DTR_OP(addr, ndummy, buf, len, freq) \
+#define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_2D_OP(addr, ndummy, buf, len, freq) \
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0x3d, 1),				\
 		   SPI_MEM_DTR_OP_ADDR(2, addr, 1),			\
 		   SPI_MEM_DTR_OP_DUMMY(ndummy, 1),			\
-- 
cgit v1.2.3


From d9de177996d74c0cc44220003953ba2d2bece0ac Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 3 Apr 2025 11:19:19 +0200
Subject: mtd: spinand: Use more specific naming for the (dual IO) read from
 cache ops

SPI operations have been initially described through macros implicitly
implying the use of a single SPI SDR bus. Macros for supporting dual and
quad I/O transfers have been added on top, generally inspired by vendor
naming, followed by DTR operations. Soon we might see octal
and even octal DTR operations as well (including the opcode byte).

Let's clarify what the macro really mean by describing the expected bus
topology in the (dual IO) read from cache macro names. While at
modifying them, better reordering the macros to group them all by bus
topology which now feels more intuitive.

Acked-by: Tudor Ambarus <tudor.ambarus@linaro.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/spinand.h | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index f84991d05d85..a8a963f57d43 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -113,6 +113,25 @@
 		   SPI_MEM_DTR_OP_DATA_IN(len, buf, 2),			\
 		   SPI_MEM_OP_MAX_FREQ(freq))
 
+#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len) \
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1),				\
+		   SPI_MEM_OP_ADDR(2, addr, 2),				\
+		   SPI_MEM_OP_DUMMY(ndummy, 2),				\
+		   SPI_MEM_OP_DATA_IN(len, buf, 2))
+
+#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_2S_2S_OP(addr, ndummy, buf, len) \
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1),				\
+		   SPI_MEM_OP_ADDR(3, addr, 2),				\
+		   SPI_MEM_OP_DUMMY(ndummy, 2),				\
+		   SPI_MEM_OP_DATA_IN(len, buf, 2))
+
+#define SPINAND_PAGE_READ_FROM_CACHE_1S_2D_2D_OP(addr, ndummy, buf, len, freq) \
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0xbd, 1),				\
+		   SPI_MEM_DTR_OP_ADDR(2, addr, 2),			\
+		   SPI_MEM_DTR_OP_DUMMY(ndummy, 2),			\
+		   SPI_MEM_DTR_OP_DATA_IN(len, buf, 2),			\
+		   SPI_MEM_OP_MAX_FREQ(freq))
+
 #define SPINAND_PAGE_READ_FROM_CACHE_X4_OP(addr, ndummy, buf, len)	\
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0x6b, 1),				\
 		   SPI_MEM_OP_ADDR(2, addr, 1),				\
@@ -132,25 +151,6 @@
 		   SPI_MEM_DTR_OP_DATA_IN(len, buf, 4),			\
 		   SPI_MEM_OP_MAX_FREQ(freq))
 
-#define SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(addr, ndummy, buf, len)	\
-	SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1),				\
-		   SPI_MEM_OP_ADDR(2, addr, 2),				\
-		   SPI_MEM_OP_DUMMY(ndummy, 2),				\
-		   SPI_MEM_OP_DATA_IN(len, buf, 2))
-
-#define SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP_3A(addr, ndummy, buf, len) \
-	SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1),				\
-		   SPI_MEM_OP_ADDR(3, addr, 2),				\
-		   SPI_MEM_OP_DUMMY(ndummy, 2),				\
-		   SPI_MEM_OP_DATA_IN(len, buf, 2))
-
-#define SPINAND_PAGE_READ_FROM_CACHE_DUALIO_DTR_OP(addr, ndummy, buf, len, freq) \
-	SPI_MEM_OP(SPI_MEM_OP_CMD(0xbd, 1),				\
-		   SPI_MEM_DTR_OP_ADDR(2, addr, 2),			\
-		   SPI_MEM_DTR_OP_DUMMY(ndummy, 2),			\
-		   SPI_MEM_DTR_OP_DATA_IN(len, buf, 2),			\
-		   SPI_MEM_OP_MAX_FREQ(freq))
-
 #define SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(addr, ndummy, buf, len)	\
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1),				\
 		   SPI_MEM_OP_ADDR(2, addr, 4),				\
-- 
cgit v1.2.3


From 1deae734cc1c7e976d588e7d8f46af2bb9ef5656 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 3 Apr 2025 11:19:20 +0200
Subject: mtd: spinand: Use more specific naming for the (quad output) read
 from cache ops

SPI operations have been initially described through macros implicitly
implying the use of a single SPI SDR bus. Macros for supporting dual and
quad I/O transfers have been added on top, generally inspired by vendor
naming, followed by DTR operations. Soon we might see octal
and even octal DTR operations as well (including the opcode byte).

Let's clarify what the macro really mean by describing the expected bus
topology in the (quad output) read from cache macro names.

Acked-by: Tudor Ambarus <tudor.ambarus@linaro.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/spinand.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index a8a963f57d43..cbeec0f53f88 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -132,19 +132,19 @@
 		   SPI_MEM_DTR_OP_DATA_IN(len, buf, 2),			\
 		   SPI_MEM_OP_MAX_FREQ(freq))
 
-#define SPINAND_PAGE_READ_FROM_CACHE_X4_OP(addr, ndummy, buf, len)	\
+#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_4S_OP(addr, ndummy, buf, len) \
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0x6b, 1),				\
 		   SPI_MEM_OP_ADDR(2, addr, 1),				\
 		   SPI_MEM_OP_DUMMY(ndummy, 1),				\
 		   SPI_MEM_OP_DATA_IN(len, buf, 4))
 
-#define SPINAND_PAGE_READ_FROM_CACHE_X4_OP_3A(addr, ndummy, buf, len)	\
+#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_4S_OP(addr, ndummy, buf, len)	\
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0x6b, 1),				\
 		   SPI_MEM_OP_ADDR(3, addr, 1),				\
 		   SPI_MEM_OP_DUMMY(ndummy, 1),				\
 		   SPI_MEM_OP_DATA_IN(len, buf, 4))
 
-#define SPINAND_PAGE_READ_FROM_CACHE_X4_DTR_OP(addr, ndummy, buf, len, freq) \
+#define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_4D_OP(addr, ndummy, buf, len, freq) \
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0x6d, 1),				\
 		   SPI_MEM_DTR_OP_ADDR(2, addr, 1),			\
 		   SPI_MEM_DTR_OP_DUMMY(ndummy, 1),			\
-- 
cgit v1.2.3


From 9c6911072c6e8b128ccdb7dd00efa13c47513074 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 3 Apr 2025 11:19:21 +0200
Subject: mtd: spinand: Use more specific naming for the (quad IO) read from
 cache ops

SPI operations have been initially described through macros implicitly
implying the use of a single SPI SDR bus. Macros for supporting dual and
quad I/O transfers have been added on top, generally inspired by vendor
naming, followed by DTR operations. Soon we might see octal
and even octal DTR operations as well (including the opcode byte).

Let's clarify what the macro really mean by describing the expected bus
topology in the (quad IO) read from cache macro names.

Acked-by: Tudor Ambarus <tudor.ambarus@linaro.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/spinand.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index cbeec0f53f88..70f3e69e56b8 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -151,19 +151,19 @@
 		   SPI_MEM_DTR_OP_DATA_IN(len, buf, 4),			\
 		   SPI_MEM_OP_MAX_FREQ(freq))
 
-#define SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(addr, ndummy, buf, len)	\
+#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len) \
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1),				\
 		   SPI_MEM_OP_ADDR(2, addr, 4),				\
 		   SPI_MEM_OP_DUMMY(ndummy, 4),				\
 		   SPI_MEM_OP_DATA_IN(len, buf, 4))
 
-#define SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP_3A(addr, ndummy, buf, len) \
+#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_4S_4S_OP(addr, ndummy, buf, len) \
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1),				\
 		   SPI_MEM_OP_ADDR(3, addr, 4),				\
 		   SPI_MEM_OP_DUMMY(ndummy, 4),				\
 		   SPI_MEM_OP_DATA_IN(len, buf, 4))
 
-#define SPINAND_PAGE_READ_FROM_CACHE_QUADIO_DTR_OP(addr, ndummy, buf, len, freq) \
+#define SPINAND_PAGE_READ_FROM_CACHE_1S_4D_4D_OP(addr, ndummy, buf, len, freq) \
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0xed, 1),				\
 		   SPI_MEM_DTR_OP_ADDR(2, addr, 4),			\
 		   SPI_MEM_DTR_OP_DUMMY(ndummy, 4),			\
-- 
cgit v1.2.3


From 36e461894cf31e33ebd869f3fd85c5d7c68a22bc Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 3 Apr 2025 11:19:22 +0200
Subject: mtd: spinand: Use more specific naming for the program execution op

SPI operations have been initially described through macros implicitly
implying the use of a single SPI SDR bus. Macros for supporting dual and
quad I/O transfers have been added on top, generally inspired by vendor
naming, followed by DTR operations. Soon we might see octal
and even octal DTR operations as well (including the opcode byte).

Let's clarify what the macro really means by describing the expected bus
topology in the program execution macro name.

Acked-by: Tudor Ambarus <tudor.ambarus@linaro.org>
[Miquel: Fixed conflicts with -next by updating esmt and micron drivers]
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/spinand.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 70f3e69e56b8..d84206b9e824 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -170,7 +170,7 @@
 		   SPI_MEM_DTR_OP_DATA_IN(len, buf, 4),			\
 		   SPI_MEM_OP_MAX_FREQ(freq))
 
-#define SPINAND_PROG_EXEC_OP(addr)					\
+#define SPINAND_PROG_EXEC_1S_1S_0_OP(addr)				\
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0x10, 1),				\
 		   SPI_MEM_OP_ADDR(3, addr, 1),				\
 		   SPI_MEM_OP_NO_DUMMY,					\
-- 
cgit v1.2.3


From 07cdbae7f84190983f9b07133ce5b6f2634c95cd Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 3 Apr 2025 11:19:23 +0200
Subject: mtd: spinand: Use more specific naming for the (single) program load
 op

SPI operations have been initially described through macros implicitly
implying the use of a single SPI SDR bus. Macros for supporting dual and
quad I/O transfers have been added on top, generally inspired by vendor
naming, followed by DTR operations. Soon we might see octal
and even octal DTR operations as well (including the opcode byte).

Let's clarify what the macro really means by describing the expected bus
topology in the (single) program load macro name.

While at modifying it, better add the missing_ OP suffix to align with
all the other macros of the same kind.

Acked-by: Tudor Ambarus <tudor.ambarus@linaro.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/spinand.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index d84206b9e824..d1cc2cdbd4a1 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -176,7 +176,7 @@
 		   SPI_MEM_OP_NO_DUMMY,					\
 		   SPI_MEM_OP_NO_DATA)
 
-#define SPINAND_PROG_LOAD(reset, addr, buf, len)			\
+#define SPINAND_PROG_LOAD_1S_1S_1S_OP(reset, addr, buf, len)		\
 	SPI_MEM_OP(SPI_MEM_OP_CMD(reset ? 0x02 : 0x84, 1),		\
 		   SPI_MEM_OP_ADDR(2, addr, 1),				\
 		   SPI_MEM_OP_NO_DUMMY,					\
-- 
cgit v1.2.3


From ac3a4b17e03b079c00eb61456364bcdbf65f3436 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 3 Apr 2025 11:19:24 +0200
Subject: mtd: spinand: Use more specific naming for the (quad) program load op

SPI operations have been initially described through macros implicitly
implying the use of a single SPI SDR bus. Macros for supporting dual and
quad I/O transfers have been added on top, generally inspired by vendor
naming, followed by DTR operations. Soon we might see octal
and even octal DTR operations as well (including the opcode byte).

Let's clarify what the macro really means by describing the expected bus
topology in the (quad) program load macro name.

While at modifying it, better add the missing_ OP suffix to align with
all the other macros of the same kind.

Acked-by: Tudor Ambarus <tudor.ambarus@linaro.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/spinand.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index d1cc2cdbd4a1..c70d17e0b308 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -182,7 +182,7 @@
 		   SPI_MEM_OP_NO_DUMMY,					\
 		   SPI_MEM_OP_DATA_OUT(len, buf, 1))
 
-#define SPINAND_PROG_LOAD_X4(reset, addr, buf, len)			\
+#define SPINAND_PROG_LOAD_1S_1S_4S_OP(reset, addr, buf, len)		\
 	SPI_MEM_OP(SPI_MEM_OP_CMD(reset ? 0x32 : 0x34, 1),		\
 		   SPI_MEM_OP_ADDR(2, addr, 1),				\
 		   SPI_MEM_OP_NO_DUMMY,					\
-- 
cgit v1.2.3


From 51b252cce172cbfb21dfd5e544dcbefc649f3daa Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 3 Apr 2025 11:19:25 +0200
Subject: mtd: spinand: Define octal operations

SPI NAND chips may support octal "read from cache" and "program load"
transfers. List the opcodes by defining the relevant macros describing
these operations.

However, due to the hardware available I had, 0x82 and 0xc2 are
untested and given as reference, only 0xc4 could be (successfully)
tested.

Controllers supporting operations mixing SDR and DTR operations might
even leverage octal DTR data I/O transfers.

Acked-by: Tudor Ambarus <tudor.ambarus@linaro.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/spinand.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index c70d17e0b308..811a0f356315 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -170,6 +170,27 @@
 		   SPI_MEM_DTR_OP_DATA_IN(len, buf, 4),			\
 		   SPI_MEM_OP_MAX_FREQ(freq))
 
+#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_8S_OP(addr, ndummy, buf, len, freq) \
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0x8b, 1),				\
+		   SPI_MEM_OP_ADDR(2, addr, 1),				\
+		   SPI_MEM_OP_DUMMY(ndummy, 1),				\
+		   SPI_MEM_OP_DATA_IN(len, buf, 8),			\
+		   SPI_MEM_OP_MAX_FREQ(freq))
+
+#define SPINAND_PAGE_READ_FROM_CACHE_1S_8S_8S_OP(addr, ndummy, buf, len, freq) \
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0xcb, 1),				\
+		   SPI_MEM_OP_ADDR(2, addr, 8),				\
+		   SPI_MEM_OP_DUMMY(ndummy, 8),				\
+		   SPI_MEM_OP_DATA_IN(len, buf, 8),			\
+		   SPI_MEM_OP_MAX_FREQ(freq))
+
+#define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_8D_OP(addr, ndummy, buf, len, freq) \
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0x9d, 1),				\
+		   SPI_MEM_DTR_OP_ADDR(2, addr, 1),			\
+		   SPI_MEM_DTR_OP_DUMMY(ndummy, 1),			\
+		   SPI_MEM_DTR_OP_DATA_IN(len, buf, 8),			\
+		   SPI_MEM_OP_MAX_FREQ(freq))
+
 #define SPINAND_PROG_EXEC_1S_1S_0_OP(addr)				\
 	SPI_MEM_OP(SPI_MEM_OP_CMD(0x10, 1),				\
 		   SPI_MEM_OP_ADDR(3, addr, 1),				\
@@ -188,6 +209,18 @@
 		   SPI_MEM_OP_NO_DUMMY,					\
 		   SPI_MEM_OP_DATA_OUT(len, buf, 4))
 
+#define SPINAND_PROG_LOAD_1S_1S_8S_OP(addr, buf, len)			\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0x82, 1),				\
+		   SPI_MEM_OP_ADDR(2, addr, 1),				\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_DATA_OUT(len, buf, 8))
+
+#define SPINAND_PROG_LOAD_1S_8S_8S_OP(reset, addr, buf, len)		\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(reset ? 0xc2 : 0xc4, 1),		\
+		   SPI_MEM_OP_ADDR(2, addr, 8),				\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_DATA_OUT(len, buf, 8))
+
 /**
  * Standard SPI NAND flash commands
  */
-- 
cgit v1.2.3


From ee000969f28bf579d3772bf7c0ae8aff86586e20 Mon Sep 17 00:00:00 2001
From: Md Sadre Alam <quic_mdalam@quicinc.com>
Date: Thu, 10 Apr 2025 15:30:17 +0530
Subject: mtd: rawnand: qcom: Pass 18 bit offset from NANDc base to BAM base

The BAM command descriptor provides only 18 bits to specify the BAM
register offset. Additionally, in the BAM command descriptor, the BAM
register offset is supposed to be specified as "(NANDc base - BAM base)
+ reg_off". Since, the BAM controller expecting the value in the form of
"NANDc base - BAM base", so that added a new field 'bam_offset' in the NAND
properties structure and use it while preparing the command descriptor.

Previously, the driver was specifying the NANDc base address in the BAM
command descriptor.

Cc: stable@vger.kernel.org
Fixes: 8d6b6d7e135e ("mtd: nand: qcom: support for command descriptor formation")
Tested-by: Lakshmi Sowjanya D <quic_laksd@quicinc.com>
Signed-off-by: Md Sadre Alam <quic_mdalam@quicinc.com>
Acked-by: Mark Brown <broonie@kernel.org>
Tested-by: Gabor Juhos <j4g8y7@gmail.com> # on IPQ9574
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/nand-qpic-common.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand-qpic-common.h b/include/linux/mtd/nand-qpic-common.h
index cd7172e6c1bb..e8462deda6db 100644
--- a/include/linux/mtd/nand-qpic-common.h
+++ b/include/linux/mtd/nand-qpic-common.h
@@ -199,9 +199,6 @@
  */
 #define dev_cmd_reg_addr(nandc, reg) ((nandc)->props->dev_cmd_reg_start + (reg))
 
-/* Returns the NAND register physical address */
-#define nandc_reg_phys(chip, offset) ((chip)->base_phys + (offset))
-
 /* Returns the dma address for reg read buffer */
 #define reg_buf_dma_addr(chip, vaddr) \
 	((chip)->reg_read_dma + \
@@ -454,6 +451,7 @@ struct qcom_nand_controller {
 struct qcom_nandc_props {
 	u32 ecc_modes;
 	u32 dev_cmd_reg_start;
+	u32 bam_offset;
 	bool supports_bam;
 	bool nandc_part_of_qpic;
 	bool qpic_version2;
-- 
cgit v1.2.3


From 73db799bf5efc5a04654bb3ff6c9bf63a0dfa473 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bence=20Cs=C3=B3k=C3=A1s?= <csokas.bence@prolan.hu>
Date: Thu, 27 Mar 2025 20:59:26 +0100
Subject: PM: runtime: Add new devm functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add `devm_pm_runtime_set_active_enabled()` and
`devm_pm_runtime_get_noresume()` for simplifying
common cases in drivers.

Signed-off-by: Bence Csókás <csokas.bence@prolan.hu>
Link: https://patch.msgid.link/20250327195928.680771-3-csokas.bence@prolan.hu
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_runtime.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 7fb5a459847e..756b842dcd30 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -96,7 +96,9 @@ extern void pm_runtime_new_link(struct device *dev);
 extern void pm_runtime_drop_link(struct device_link *link);
 extern void pm_runtime_release_supplier(struct device_link *link);
 
+int devm_pm_runtime_set_active_enabled(struct device *dev);
 extern int devm_pm_runtime_enable(struct device *dev);
+int devm_pm_runtime_get_noresume(struct device *dev);
 
 /**
  * pm_suspend_ignore_children - Set runtime PM behavior regarding children.
@@ -294,7 +296,9 @@ static inline bool pm_runtime_blocked(struct device *dev) { return true; }
 static inline void pm_runtime_allow(struct device *dev) {}
 static inline void pm_runtime_forbid(struct device *dev) {}
 
+static inline int devm_pm_runtime_set_active_enabled(struct device *dev) { return 0; }
 static inline int devm_pm_runtime_enable(struct device *dev) { return 0; }
+static inline int devm_pm_runtime_get_noresume(struct device *dev) { return 0; }
 
 static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {}
 static inline void pm_runtime_get_noresume(struct device *dev) {}
-- 
cgit v1.2.3


From 9f52aecc952ddf307571517d5c91136c8c4e87c9 Mon Sep 17 00:00:00 2001
From: Junhao He <hejunhao3@huawei.com>
Date: Wed, 18 Sep 2024 11:53:27 +0800
Subject: coresight: Fixes device's owner field for registered using
 coresight_init_driver()

The coresight_init_driver() of the coresight-core module is called from
the sub coresgiht device (such as tmc/stm/funnle/...) module. It calls
amba_driver_register() and Platform_driver_register(), which are macro
functions that use the coresight-core's module to initialize the caller's
owner field.  Therefore, when the sub coresight device calls
coresight_init_driver(), an incorrect THIS_MODULE value is captured.

The sub coesgiht modules can be removed while their callbacks are
running, resulting in a general protection failure.

Add module parameter to coresight_init_driver() so can be called
with the module of the callback.

Fixes: 075b7cd7ad7d ("coresight: Add helpers registering/removing both AMBA and platform drivers")
Signed-off-by: Junhao He <hejunhao3@huawei.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20240918035327.9710-1-hejunhao3@huawei.com
---
 include/linux/coresight.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index d79a242b271d..cfcf6e4707ed 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -723,7 +723,7 @@ coresight_find_output_type(struct coresight_platform_data *pdata,
 			   union coresight_dev_subtype subtype);
 
 int coresight_init_driver(const char *drv, struct amba_driver *amba_drv,
-			  struct platform_driver *pdev_drv);
+			  struct platform_driver *pdev_drv, struct module *owner);
 
 void coresight_remove_driver(struct amba_driver *amba_drv,
 			     struct platform_driver *pdev_drv);
-- 
cgit v1.2.3


From 0091d9241ea24c5275be4a3e5a032862fd9de9ec Mon Sep 17 00:00:00 2001
From: Steven Chen <chenste@linux.microsoft.com>
Date: Mon, 21 Apr 2025 15:25:09 -0700
Subject: kexec: define functions to map and unmap segments

Implement kimage_map_segment() to enable IMA to map the measurement log
list to the kimage structure during the kexec 'load' stage. This function
gathers the source pages within the specified address range, and maps them
to a contiguous virtual address range.

This is a preparation for later usage.

Implement kimage_unmap_segment() for unmapping segments using vunmap().

Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Co-developed-by: Tushar Sugandhi <tusharsu@linux.microsoft.com>
Signed-off-by: Tushar Sugandhi <tusharsu@linux.microsoft.com>
Signed-off-by: Steven Chen <chenste@linux.microsoft.com>
Acked-by: Baoquan He <bhe@redhat.com>
Tested-by: Stefan Berger <stefanb@linux.ibm.com> # ppc64/kvm
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 include/linux/kexec.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index c8971861521a..805743208d19 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -474,13 +474,19 @@ extern bool kexec_file_dbg_print;
 #define kexec_dprintk(fmt, arg...) \
         do { if (kexec_file_dbg_print) pr_info(fmt, ##arg); } while (0)
 
+extern void *kimage_map_segment(struct kimage *image, unsigned long addr, unsigned long size);
+extern void kimage_unmap_segment(void *buffer);
 #else /* !CONFIG_KEXEC_CORE */
 struct pt_regs;
 struct task_struct;
+struct kimage;
 static inline void __crash_kexec(struct pt_regs *regs) { }
 static inline void crash_kexec(struct pt_regs *regs) { }
 static inline int kexec_should_crash(struct task_struct *p) { return 0; }
 static inline int kexec_crash_loaded(void) { return 0; }
+static inline void *kimage_map_segment(struct kimage *image, unsigned long addr, unsigned long size)
+{ return NULL; }
+static inline void kimage_unmap_segment(void *buffer) { }
 #define kexec_in_progress false
 #endif /* CONFIG_KEXEC_CORE */
 
-- 
cgit v1.2.3


From 9ee8888a80fe2bd20ce929ffbc1dedd57607a778 Mon Sep 17 00:00:00 2001
From: Steven Chen <chenste@linux.microsoft.com>
Date: Mon, 21 Apr 2025 15:25:10 -0700
Subject: ima: kexec: skip IMA segment validation after kexec soft reboot

Currently, the function kexec_calculate_store_digests() calculates and
stores the digest of the segment during the kexec_file_load syscall,
where the  IMA segment is also allocated.

Later, the IMA segment will be updated with the measurement log at the
kexec execute stage when a kexec reboot is initiated. Therefore, the
digests should be updated for the IMA segment in the  normal case. The
problem is that the content of memory segments carried over to the new
kernel during the kexec systemcall can be changed at kexec 'execute'
stage, but the size and the location of the memory segments cannot be
changed at kexec 'execute' stage.

To address this, skip the calculation and storage of the digest for the
IMA segment in kexec_calculate_store_digests() so that it is not added
to the purgatory_sha_regions.

With this change, the IMA segment is not included in the digest
calculation, storage, and verification.

Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Co-developed-by: Tushar Sugandhi <tusharsu@linux.microsoft.com>
Signed-off-by: Tushar Sugandhi <tusharsu@linux.microsoft.com>
Signed-off-by: Steven Chen <chenste@linux.microsoft.com>
Reviewed-by: Stefan Berger <stefanb@linux.ibm.com>
Acked-by: Baoquan He <bhe@redhat.com>
Tested-by: Stefan Berger <stefanb@linux.ibm.com> # ppc64/kvm
[zohar@linux.ibm.com: Fixed Signed-off-by tag to match author's email ]
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 include/linux/kexec.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 805743208d19..53ef1b6c8712 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -369,6 +369,9 @@ struct kimage {
 
 	phys_addr_t ima_buffer_addr;
 	size_t ima_buffer_size;
+
+	unsigned long ima_segment_index;
+	bool is_ima_segment_index_set;
 #endif
 
 	/* Core ELF header buffer */
-- 
cgit v1.2.3


From f18e502db673c75f762d47101dafcf58f30e2733 Mon Sep 17 00:00:00 2001
From: Steven Chen <chenste@linux.microsoft.com>
Date: Mon, 21 Apr 2025 15:25:11 -0700
Subject: ima: kexec: define functions to copy IMA log at soft boot

The IMA log is currently copied to the new kernel during kexec 'load'
using ima_dump_measurement_list(). However, the log copied at kexec
'load' may result in loss of IMA measurements that only occurred after
kexec "load'. Setup the needed infrastructure to move the IMA log copy
from kexec 'load' to 'execute'.

Define a new IMA hook ima_update_kexec_buffer() as a stub function.
It will be used to call ima_dump_measurement_list() during kexec 'execute'.

Implement ima_kexec_post_load() function to be invoked after the new
Kernel image has been loaded for kexec. ima_kexec_post_load() maps the
IMA buffer to a segment in the newly loaded Kernel.  It also registers
the reboot notifier_block to trigger ima_update_kexec_buffer() at
kexec 'execute'.

Set the priority of register_reboot_notifier to INT_MIN to ensure that the
IMA log copy operation will happen at the end of the operation chain, so
that all the IMA measurement records extended into the TPM are copied

Co-developed-by: Tushar Sugandhi <tusharsu@linux.microsoft.com>
Signed-off-by: Tushar Sugandhi <tusharsu@linux.microsoft.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Signed-off-by: Steven Chen <chenste@linux.microsoft.com>
Reviewed-by: Stefan Berger <stefanb@linux.ibm.com>
Acked-by: Baoquan He <bhe@redhat.com>
Tested-by: Stefan Berger <stefanb@linux.ibm.com> # ppc64/kvm
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 include/linux/ima.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ima.h b/include/linux/ima.h
index 0bae61a15b60..8e29cb4e6a01 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -32,6 +32,9 @@ static inline void ima_appraise_parse_cmdline(void) {}
 
 #ifdef CONFIG_IMA_KEXEC
 extern void ima_add_kexec_buffer(struct kimage *image);
+extern void ima_kexec_post_load(struct kimage *image);
+#else
+static inline void ima_kexec_post_load(struct kimage *image) {}
 #endif
 
 #else
-- 
cgit v1.2.3


From 74a2bd0bfb0675cb8f276f2fe3ede0c7f9e78fa3 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Wed, 12 Mar 2025 22:19:49 -0400
Subject: nodemask: drop nodes_shift

nodes_shift_{left,right} are not used. Drop them.

Signed-off-by: Yury Norov [NVIDIA] <yury.norov@gmail.com>
---
 include/linux/nodemask.h | 19 -------------------
 1 file changed, 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index f0ac0633366b..0aa6b63aa747 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -39,9 +39,6 @@
  * int nodes_full(mask)			Is mask full (all bits sets)?
  * int nodes_weight(mask)		Hamming weight - number of set bits
  *
- * void nodes_shift_right(dst, src, n)	Shift right
- * void nodes_shift_left(dst, src, n)	Shift left
- *
  * unsigned int first_node(mask)	Number lowest set bit, or MAX_NUMNODES
  * unsigend int next_node(node, mask)	Next node past 'node', or MAX_NUMNODES
  * unsigned int next_node_in(node, mask) Next node past 'node', or wrap to first,
@@ -247,22 +244,6 @@ static __always_inline int __nodes_weight(const nodemask_t *srcp, unsigned int n
 	return bitmap_weight(srcp->bits, nbits);
 }
 
-#define nodes_shift_right(dst, src, n) \
-			__nodes_shift_right(&(dst), &(src), (n), MAX_NUMNODES)
-static __always_inline void __nodes_shift_right(nodemask_t *dstp,
-					const nodemask_t *srcp, int n, int nbits)
-{
-	bitmap_shift_right(dstp->bits, srcp->bits, n, nbits);
-}
-
-#define nodes_shift_left(dst, src, n) \
-			__nodes_shift_left(&(dst), &(src), (n), MAX_NUMNODES)
-static __always_inline void __nodes_shift_left(nodemask_t *dstp,
-					const nodemask_t *srcp, int n, int nbits)
-{
-	bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
-}
-
 /* FIXME: better would be to fix all architectures to never return
           > MAX_NUMNODES, then the silly min_ts could be dropped. */
 
-- 
cgit v1.2.3


From 4923c2c5b66fe8dea1df5e16d61f15c1dbea5ba1 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Wed, 12 Mar 2025 22:19:50 -0400
Subject: cpumask: add non-atomic __assign_cpu()

Similarly to atomic, add a non-atomic version.

Signed-off-by: Yury Norov [NVIDIA] <yury.norov@gmail.com>
---
 include/linux/cpumask.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index f9a868384083..ba1e232e0200 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -1074,6 +1074,9 @@ void init_cpu_possible(const struct cpumask *src);
 #define assign_cpu(cpu, mask, val)	\
 	assign_bit(cpumask_check(cpu), cpumask_bits(mask), (val))
 
+#define __assign_cpu(cpu, mask, val)	\
+	__assign_bit(cpumask_check(cpu), cpumask_bits(mask), (val))
+
 #define set_cpu_possible(cpu, possible)	assign_cpu((cpu), &__cpu_possible_mask, (possible))
 #define set_cpu_enabled(cpu, enabled)	assign_cpu((cpu), &__cpu_enabled_mask, (enabled))
 #define set_cpu_present(cpu, present)	assign_cpu((cpu), &__cpu_present_mask, (present))
-- 
cgit v1.2.3


From 791a9b25ce2e6ecbe404ee32eed8a96a17e52896 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Wed, 12 Mar 2025 22:19:52 -0400
Subject: cpumask: drop cpumask_assign_cpu()

Commit decde1fa209323c7 ("cpumask: Add assign cpu") was merged bypassing
cpumasks reviewers. It adds atomic and non-atomic cpumask_assign_cpu()
helpers.

In the same merge window, commit 5c563ee90a22d3 ("cpumask: introduce
assign_cpu() macro") added the same functionality. So now we have it
duplicated.

__cpumask_assign_cpu() has never been used since introducing, and because
this series reworks the only user of cpumask_assign_cpu(), both functions
become a dead code.

Signed-off-by: Yury Norov [NVIDIA] <yury.norov@gmail.com>
---
 include/linux/cpumask.h | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index ba1e232e0200..beff4d26e605 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -558,22 +558,6 @@ static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp)
 	__clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
 }
 
-/**
- * cpumask_assign_cpu - assign a cpu in a cpumask
- * @cpu: cpu number (< nr_cpu_ids)
- * @dstp: the cpumask pointer
- * @bool: the value to assign
- */
-static __always_inline void cpumask_assign_cpu(int cpu, struct cpumask *dstp, bool value)
-{
-	assign_bit(cpumask_check(cpu), cpumask_bits(dstp), value);
-}
-
-static __always_inline void __cpumask_assign_cpu(int cpu, struct cpumask *dstp, bool value)
-{
-	__assign_bit(cpumask_check(cpu), cpumask_bits(dstp), value);
-}
-
 /**
  * cpumask_test_cpu - test for a cpu in a cpumask
  * @cpu: cpu number (< nr_cpu_ids)
-- 
cgit v1.2.3


From 31299a5e0211241171b2222c5633aad4763bf700 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Wed, 26 Mar 2025 00:59:56 +0900
Subject: bits: add comments and newlines to #if, #else and #endif directives

This is a preparation for the upcoming GENMASK_U*() and BIT_U*()
changes. After introducing those new macros, there will be a lot of
scrolling between the #if, #else and #endif.

Add a comment to the #else and #endif preprocessor macros to help keep
track of which context we are in. Also, add new lines to better
visually separate the non-asm and asm sections.

Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/linux/bits.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bits.h b/include/linux/bits.h
index 14fd0ca9a6cd..e1e517769140 100644
--- a/include/linux/bits.h
+++ b/include/linux/bits.h
@@ -19,16 +19,21 @@
  * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
  */
 #if !defined(__ASSEMBLY__)
+
 #include <linux/build_bug.h>
 #include <linux/compiler.h>
+
 #define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h)))
-#else
+
+#else /* defined(__ASSEMBLY__) */
+
 /*
  * BUILD_BUG_ON_ZERO is not available in h files included from asm files,
  * disable the input check if that is the case.
  */
 #define GENMASK_INPUT_CHECK(h, l) 0
-#endif
+
+#endif /* !defined(__ASSEMBLY__) */
 
 #define GENMASK(h, l) \
 	(GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
-- 
cgit v1.2.3


From 19408200c094858d952a90bf4977733dc89a4df5 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Wed, 26 Mar 2025 00:59:57 +0900
Subject: bits: introduce fixed-type GENMASK_U*()

Add GENMASK_TYPE() which generalizes __GENMASK() to support different
types, and implement fixed-types versions of GENMASK() based on it.
The fixed-type version allows more strict checks to the min/max values
accepted, which is useful for defining registers like implemented by
i915 and xe drivers with their REG_GENMASK*() macros.

The strict checks rely on shift-count-overflow compiler check to fail
the build if a number outside of the range allowed is passed.
Example:

  #define FOO_MASK GENMASK_U32(33, 4)

will generate a warning like:

  include/linux/bits.h:51:27: error: right shift count >= width of type [-Werror=shift-count-overflow]
     51 |               type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h)))))
        |                           ^~

The result is casted to the corresponding fixed width type. For
example, GENMASK_U8() returns an u8. Note that because of the C
promotion rules, GENMASK_U8() and GENMASK_U16() will immediately be
promoted to int if used in an expression. Regardless, the main goal is
not to get the correct type, but rather to enforce more checks at
compile time.

While GENMASK_TYPE() is crafted to cover all variants, including the
already existing GENMASK(), GENMASK_ULL() and GENMASK_U128(), for the
moment, only use it for the newly introduced GENMASK_U*(). The
consolidation will be done in a separate change.

Co-developed-by: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/linux/bitops.h |  1 -
 include/linux/bits.h   | 30 ++++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index c1cb53cf2f0f..9be2d50da09a 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -8,7 +8,6 @@
 
 #include <uapi/linux/kernel.h>
 
-#define BITS_PER_TYPE(type)	(sizeof(type) * BITS_PER_BYTE)
 #define BITS_TO_LONGS(nr)	__KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(long))
 #define BITS_TO_U64(nr)		__KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u64))
 #define BITS_TO_U32(nr)		__KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u32))
diff --git a/include/linux/bits.h b/include/linux/bits.h
index e1e517769140..9718c5ae5fc3 100644
--- a/include/linux/bits.h
+++ b/include/linux/bits.h
@@ -12,6 +12,7 @@
 #define BIT_ULL_MASK(nr)	(ULL(1) << ((nr) % BITS_PER_LONG_LONG))
 #define BIT_ULL_WORD(nr)	((nr) / BITS_PER_LONG_LONG)
 #define BITS_PER_BYTE		8
+#define BITS_PER_TYPE(type)	(sizeof(type) * BITS_PER_BYTE)
 
 /*
  * Create a contiguous bitmask starting at bit position @l and ending at
@@ -20,11 +21,40 @@
  */
 #if !defined(__ASSEMBLY__)
 
+/*
+ * Missing asm support
+ *
+ * GENMASK_U*() depend on BITS_PER_TYPE() which relies on sizeof(),
+ * something not available in asm. Nevertheless, fixed width integers is a C
+ * concept. Assembly code can rely on the long and long long versions instead.
+ */
+
 #include <linux/build_bug.h>
 #include <linux/compiler.h>
+#include <linux/overflow.h>
 
 #define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h)))
 
+/*
+ * Generate a mask for the specified type @t. Additional checks are made to
+ * guarantee the value returned fits in that type, relying on
+ * -Wshift-count-overflow compiler check to detect incompatible arguments.
+ * For example, all these create build errors or warnings:
+ *
+ * - GENMASK(15, 20): wrong argument order
+ * - GENMASK(72, 15): doesn't fit unsigned long
+ * - GENMASK_U32(33, 15): doesn't fit in a u32
+ */
+#define GENMASK_TYPE(t, h, l)					\
+	((t)(GENMASK_INPUT_CHECK(h, l) +			\
+	     (type_max(t) << (l) &				\
+	      type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h)))))
+
+#define GENMASK_U8(h, l)	GENMASK_TYPE(u8, h, l)
+#define GENMASK_U16(h, l)	GENMASK_TYPE(u16, h, l)
+#define GENMASK_U32(h, l)	GENMASK_TYPE(u32, h, l)
+#define GENMASK_U64(h, l)	GENMASK_TYPE(u64, h, l)
+
 #else /* defined(__ASSEMBLY__) */
 
 /*
-- 
cgit v1.2.3


From 5b572e8a9f3dcd6e3ba80ec5b5ec85472c88cb4c Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 26 Mar 2025 00:59:58 +0900
Subject: bits: introduce fixed-type BIT_U*()

Implement fixed-type BIT_U*() to help drivers add stricter checks,
like it was done for GENMASK_U*().

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Co-developed-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/linux/bits.h | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bits.h b/include/linux/bits.h
index 9718c5ae5fc3..7ad056219115 100644
--- a/include/linux/bits.h
+++ b/include/linux/bits.h
@@ -24,7 +24,7 @@
 /*
  * Missing asm support
  *
- * GENMASK_U*() depend on BITS_PER_TYPE() which relies on sizeof(),
+ * GENMASK_U*() and BIT_U*() depend on BITS_PER_TYPE() which relies on sizeof(),
  * something not available in asm. Nevertheless, fixed width integers is a C
  * concept. Assembly code can rely on the long and long long versions instead.
  */
@@ -55,6 +55,24 @@
 #define GENMASK_U32(h, l)	GENMASK_TYPE(u32, h, l)
 #define GENMASK_U64(h, l)	GENMASK_TYPE(u64, h, l)
 
+/*
+ * Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The
+ * following examples generate compiler warnings due to -Wshift-count-overflow:
+ *
+ * - BIT_U8(8)
+ * - BIT_U32(-1)
+ * - BIT_U32(40)
+ */
+#define BIT_INPUT_CHECK(type, nr) \
+	BUILD_BUG_ON_ZERO(const_true((nr) >= BITS_PER_TYPE(type)))
+
+#define BIT_TYPE(type, nr) ((type)(BIT_INPUT_CHECK(type, nr) + BIT_ULL(nr)))
+
+#define BIT_U8(nr)	BIT_TYPE(u8, nr)
+#define BIT_U16(nr)	BIT_TYPE(u16, nr)
+#define BIT_U32(nr)	BIT_TYPE(u32, nr)
+#define BIT_U64(nr)	BIT_TYPE(u64, nr)
+
 #else /* defined(__ASSEMBLY__) */
 
 /*
-- 
cgit v1.2.3


From 243c90e917f5cfc99821e5104d1c8a81c11cda4c Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sat, 29 Mar 2025 01:48:50 +0900
Subject: build_bug.h: more user friendly error messages in BUILD_BUG_ON_ZERO()

__BUILD_BUG_ON_ZERO_MSG(), as introduced in [1], makes it possible to
do a static assertions in expressions. The direct benefit is to
provide a meaningful error message instead of the cryptic negative
bitfield size error message currently returned by BUILD_BUG_ON_ZERO():

  ./include/linux/build_bug.h:16:51: error: negative width in bit-field '<anonymous>'
     16 | #define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
        |                                                   ^

Get rid of BUILD_BUG_ON_ZERO()'s bitfield size hack. Instead rely on
__BUILD_BUG_ON_ZERO_MSG() which in turn relies on C11's
_Static_assert().

Use some macro magic, similarly to static_assert(), to either use an
optional error message provided by the user or, when omitted, to
produce a default error message by stringifying the tested
expression. With this, for example:

  BUILD_BUG_ON_ZERO(1 > 0)

would now throw:

  ./include/linux/compiler.h:197:62: error: static assertion failed: "1 > 0 is true"
    197 | define __BUILD_BUG_ON_ZERO_MSG(e, msg) ((int)sizeof(struct {_Static_assert(!(e), msg);}))
        |                                                             ^~~~~~~~~~~~~~

Finally, __BUILD_BUG_ON_ZERO_MSG() is already guarded by an:

  #ifdef __CHECKER__

So no need any more for that guard clause for BUILD_BUG_ON_ZERO().
Remove it.

[1] commit d7a516c6eeae ("compiler.h: Fix undefined BUILD_BUG_ON_ZERO()")
Link: https://git.kernel.org/torvalds/c/d7a516c6eeae

Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Link: https://git.kernel.org/next/linux-next/c/b88937277df
Reviewed-by: Kees Cook <kees@kernel.org>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/linux/build_bug.h | 10 +++++-----
 include/linux/compiler.h  |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h
index 3aa3640f8c18..2cfbb4c65c78 100644
--- a/include/linux/build_bug.h
+++ b/include/linux/build_bug.h
@@ -4,17 +4,17 @@
 
 #include <linux/compiler.h>
 
-#ifdef __CHECKER__
-#define BUILD_BUG_ON_ZERO(e) (0)
-#else /* __CHECKER__ */
 /*
  * Force a compilation error if condition is true, but also produce a
  * result (of value 0 and type int), so the expression can be used
  * e.g. in a structure initializer (or where-ever else comma expressions
  * aren't permitted).
+ *
+ * Take an error message as an optional second argument. If omitted,
+ * default to the stringification of the tested expression.
  */
-#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
-#endif /* __CHECKER__ */
+#define BUILD_BUG_ON_ZERO(e, ...) \
+	__BUILD_BUG_ON_ZERO_MSG(e, ##__VA_ARGS__, #e " is true")
 
 /* Force a compilation error if a constant expression is not a power of 2 */
 #define __BUILD_BUG_ON_NOT_POWER_OF_2(n)	\
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 27725f1ab5ab..6f04a1d8c720 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -192,9 +192,9 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 })
 
 #ifdef __CHECKER__
-#define __BUILD_BUG_ON_ZERO_MSG(e, msg) (0)
+#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) (0)
 #else /* __CHECKER__ */
-#define __BUILD_BUG_ON_ZERO_MSG(e, msg) ((int)sizeof(struct {_Static_assert(!(e), msg);}))
+#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)sizeof(struct {_Static_assert(!(e), msg);}))
 #endif /* __CHECKER__ */
 
 /* &a[0] degrades to a pointer: a different type from an array */
-- 
cgit v1.2.3


From 99c712d788c46452289beada638476b68b5a773b Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 17 Apr 2025 19:17:15 +0300
Subject: bitmap-str: Get rid of 'extern' for function prototypes

The bitmap-str.h uses mixed style for function prototypes. Drop
the 'extern' as it easier to read and makes style aligned with
a new code in the kernel.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/linux/bitmap-str.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap-str.h b/include/linux/bitmap-str.h
index 17caeca94cab..d758b4809a3a 100644
--- a/include/linux/bitmap-str.h
+++ b/include/linux/bitmap-str.h
@@ -4,10 +4,10 @@
 
 int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits);
 int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits);
-extern int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp,
-					int nmaskbits, loff_t off, size_t count);
-extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp,
-					int nmaskbits, loff_t off, size_t count);
+int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, int nmaskbits,
+				loff_t off, size_t count);
+int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, int nmaskbits,
+			     loff_t off, size_t count);
 int bitmap_parse(const char *buf, unsigned int buflen, unsigned long *dst, int nbits);
 int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits);
 int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen,
-- 
cgit v1.2.3


From 89a44a808814d4717a8bf945ddebd5c39bfffcf4 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 17 Apr 2025 19:17:16 +0300
Subject: bitmap-str: Add missing header(s)

bitmap-str.h is not self-contained, it uses bool type that is provided
in types.h and it uses __user annotation that is guaranteed to be included
with types.h. Add missing header(s) to follow IWYU (Include What You Use)
principle.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/linux/bitmap-str.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bitmap-str.h b/include/linux/bitmap-str.h
index d758b4809a3a..53d3e1b32d3d 100644
--- a/include/linux/bitmap-str.h
+++ b/include/linux/bitmap-str.h
@@ -2,6 +2,8 @@
 #ifndef __LINUX_BITMAP_STR_H
 #define __LINUX_BITMAP_STR_H
 
+#include <linux/types.h>
+
 int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits);
 int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits);
 int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, int nmaskbits,
-- 
cgit v1.2.3


From a256ae22570ee4c3427fdc703a58a89afee6a332 Mon Sep 17 00:00:00 2001
From: Luo Jie <quic_luoj@quicinc.com>
Date: Thu, 17 Apr 2025 18:47:08 +0800
Subject: bitfield: Add FIELD_MODIFY() helper

Add a helper for replacing the contents of bitfield in memory
with the specified value.

Even though a helper xxx_replace_bits() is available, it is not
well documented, and only reports errors at the run time, which
will not be helpful to catch possible overflow errors due to
incorrect parameter types used.

FIELD_MODIFY(REG_FIELD_C, &reg, c) is the wrapper to the code below.

	reg &= ~REG_FIELD_C;
	reg |= FIELD_PREP(REG_FIELD_C, c);

Yury: trim commit message, align backslashes.

Signed-off-by: Luo Jie <quic_luoj@quicinc.com>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/linux/bitfield.h | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
index 63928f173223..6d9a53db54b6 100644
--- a/include/linux/bitfield.h
+++ b/include/linux/bitfield.h
@@ -8,6 +8,7 @@
 #define _LINUX_BITFIELD_H
 
 #include <linux/build_bug.h>
+#include <linux/typecheck.h>
 #include <asm/byteorder.h>
 
 /*
@@ -38,8 +39,7 @@
  *	  FIELD_PREP(REG_FIELD_D, 0x40);
  *
  * Modify:
- *  reg &= ~REG_FIELD_C;
- *  reg |= FIELD_PREP(REG_FIELD_C, c);
+ *  FIELD_MODIFY(REG_FIELD_C, &reg, c);
  */
 
 #define __bf_shf(x) (__builtin_ffsll(x) - 1)
@@ -156,6 +156,23 @@
 		(typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask));	\
 	})
 
+/**
+ * FIELD_MODIFY() - modify a bitfield element
+ * @_mask: shifted mask defining the field's length and position
+ * @_reg_p: pointer to the memory that should be updated
+ * @_val: value to store in the bitfield
+ *
+ * FIELD_MODIFY() modifies the set of bits in @_reg_p specified by @_mask,
+ * by replacing them with the bitfield value passed in as @_val.
+ */
+#define FIELD_MODIFY(_mask, _reg_p, _val)						\
+	({										\
+		typecheck_pointer(_reg_p);						\
+		__BF_FIELD_CHECK(_mask, *(_reg_p), _val, "FIELD_MODIFY: ");		\
+		*(_reg_p) &= ~(_mask);							\
+		*(_reg_p) |= (((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask));	\
+	})
+
 extern void __compiletime_error("value doesn't fit into mask")
 __field_overflow(void);
 extern void __compiletime_error("bad bitfield mask")
-- 
cgit v1.2.3


From 007c07168ac0c64387be500f6604b09ace3f3bdc Mon Sep 17 00:00:00 2001
From: Su Hui <suhui@nfschina.com>
Date: Wed, 30 Apr 2025 11:27:32 +0800
Subject: time/jiffies: Change register_refined_jiffies() to void __init

register_refined_jiffies() is only used in setup code and always returns 0.
Mark it as __init to save some bytes and change it to void.

Signed-off-by: Su Hui <suhui@nfschina.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250430032734.2079290-2-suhui@nfschina.com
---
 include/linux/jiffies.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 0ea8c9887429..91b20788273d 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -59,7 +59,7 @@
 /* LATCH is used in the interval timer and ftape setup. */
 #define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ)	/* For divider */
 
-extern int register_refined_jiffies(long clock_tick_rate);
+extern void register_refined_jiffies(long clock_tick_rate);
 
 /* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */
 #define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ)
-- 
cgit v1.2.3


From 6c9ffa2ae48e8fd44ba5c6ffdc16e62a62bac38a Mon Sep 17 00:00:00 2001
From: Antheas Kapenekakis <lkml@antheas.dev>
Date: Fri, 25 Apr 2025 13:18:18 +0200
Subject: power: supply: add inhibit-charge-awake to charge_behaviour
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OneXPlayer devices have a charge inhibit feature that allows the user
to select between it being active always or only when the device is on.

Therefore, add attribute inhibit-charge-awake to charge_behaviour to
allow the user to select that charge should be paused only when the
device is awake.

Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Reviewed-by: Derek J. Clark <derekjohn.clark@gmail.com>
Signed-off-by: Antheas Kapenekakis <lkml@antheas.dev>
Link: https://lore.kernel.org/r/20250425111821.88746-14-lkml@antheas.dev
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/power_supply.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 888824592953..cbec930430a7 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -212,6 +212,7 @@ enum power_supply_usb_type {
 enum power_supply_charge_behaviour {
 	POWER_SUPPLY_CHARGE_BEHAVIOUR_AUTO = 0,
 	POWER_SUPPLY_CHARGE_BEHAVIOUR_INHIBIT_CHARGE,
+	POWER_SUPPLY_CHARGE_BEHAVIOUR_INHIBIT_CHARGE_AWAKE,
 	POWER_SUPPLY_CHARGE_BEHAVIOUR_FORCE_DISCHARGE,
 };
 
-- 
cgit v1.2.3


From 144530c15ec7fa95b29812d86f4be527338ea204 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Fri, 25 Apr 2025 13:46:16 -0700
Subject: pds_core: remove extra name description

Fix the kernel-doc complaint
include/linux/pds/pds_adminq.h:481: warning: Excess struct member 'name' description in 'pds_core_lif_getattr_comp'

Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pds/pds_adminq.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h
index ddd111f04ca0..339156113fa5 100644
--- a/include/linux/pds/pds_adminq.h
+++ b/include/linux/pds/pds_adminq.h
@@ -463,7 +463,6 @@ struct pds_core_lif_getattr_cmd {
  * @rsvd:       Word boundary padding
  * @comp_index: Index in the descriptor ring for which this is the completion
  * @state:	LIF state (enum pds_core_lif_state)
- * @name:	LIF name string, 0 terminated
  * @features:	Features (enum pds_core_hw_features)
  * @rsvd2:      Word boundary padding
  * @color:	Color bit
-- 
cgit v1.2.3


From 7c4f4c4fa9b6fbb7e483bebd02f7b9cbc20ca5cc Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Fri, 25 Apr 2025 13:46:17 -0700
Subject: pds_core: smaller adminq poll starting interval

Shorten the adminq poll starting interval in order to notice
any transaction errors more quickly.

Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pds/pds_adminq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h
index 339156113fa5..40ff0ec2b879 100644
--- a/include/linux/pds/pds_adminq.h
+++ b/include/linux/pds/pds_adminq.h
@@ -4,7 +4,7 @@
 #ifndef _PDS_CORE_ADMINQ_H_
 #define _PDS_CORE_ADMINQ_H_
 
-#define PDSC_ADMINQ_MAX_POLL_INTERVAL	256
+#define PDSC_ADMINQ_MAX_POLL_INTERVAL	256000	/* usecs */
 
 enum pds_core_adminq_flags {
 	PDS_AQ_FLAG_FASTPOLL	= BIT(1),	/* completion poll at 1ms */
-- 
cgit v1.2.3


From fc7fed6f77f94f2fd9a7557020503e146eb0ce38 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@linaro.org>
Date: Tue, 25 Mar 2025 11:58:46 +0000
Subject: coresight: Convert tag clear function to take a struct csdev_access

The self hosted claim tag will be reset on device probe in a later
commit. We'll want to do this before coresight_register() is called so
won't have a coresight_device and have to use csdev_access instead.

Also make them public and create locked and unlocked versions for
later use.

These look functions look like they set the whole tags register as one
value, but they only set and clear the self hosted bit using a SET/CLR
bits mechanism so also rename the functions to reflect this better.

Reviewed-by: Leo Yan <leo.yan@arm.com>
Reviewed-by: Yeoreum Yun <yeoreum.yun@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20250325-james-coresight-claim-tags-v4-1-dfbd3822b2e5@linaro.org
---
 include/linux/coresight.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index cfcf6e4707ed..b89692d9ceac 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -685,7 +685,8 @@ extern int coresight_timeout_action(struct csdev_access *csa, u32 offset,
 
 extern int coresight_claim_device(struct coresight_device *csdev);
 extern int coresight_claim_device_unlocked(struct coresight_device *csdev);
-
+void coresight_clear_self_claim_tag(struct csdev_access *csa);
+void coresight_clear_self_claim_tag_unlocked(struct csdev_access *csa);
 extern void coresight_disclaim_device(struct coresight_device *csdev);
 extern void coresight_disclaim_device_unlocked(struct coresight_device *csdev);
 extern char *coresight_alloc_device_name(struct coresight_dev_list *devs,
-- 
cgit v1.2.3


From e6e6b692865d333d79d25c761c53b19e73e9653f Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@linaro.org>
Date: Tue, 25 Mar 2025 11:58:52 +0000
Subject: coresight: Remove extern from function declarations

Function declarations are extern by default so remove the extra noise
and inconsistency.

Reviewed-by: Leo Yan <leo.yan@arm.com>
Reviewed-by: Yeoreum Yun <yeoreum.yun@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20250325-james-coresight-claim-tags-v4-7-dfbd3822b2e5@linaro.org
---
 include/linux/coresight.h | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index b89692d9ceac..8abdd8b5c791 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -671,28 +671,27 @@ static inline void coresight_set_mode(struct coresight_device *csdev,
 	local_set(&csdev->mode, new_mode);
 }
 
-extern struct coresight_device *
-coresight_register(struct coresight_desc *desc);
-extern void coresight_unregister(struct coresight_device *csdev);
-extern int coresight_enable_sysfs(struct coresight_device *csdev);
-extern void coresight_disable_sysfs(struct coresight_device *csdev);
-extern int coresight_timeout(struct csdev_access *csa, u32 offset,
-			     int position, int value);
+struct coresight_device *coresight_register(struct coresight_desc *desc);
+void coresight_unregister(struct coresight_device *csdev);
+int coresight_enable_sysfs(struct coresight_device *csdev);
+void coresight_disable_sysfs(struct coresight_device *csdev);
+int coresight_timeout(struct csdev_access *csa, u32 offset, int position, int value);
 typedef void (*coresight_timeout_cb_t) (struct csdev_access *, u32, int, int);
-extern int coresight_timeout_action(struct csdev_access *csa, u32 offset,
-					int position, int value,
-					coresight_timeout_cb_t cb);
+int coresight_timeout_action(struct csdev_access *csa, u32 offset, int position, int value,
+			     coresight_timeout_cb_t cb);
+int coresight_claim_device(struct coresight_device *csdev);
+int coresight_claim_device_unlocked(struct coresight_device *csdev);
 
-extern int coresight_claim_device(struct coresight_device *csdev);
-extern int coresight_claim_device_unlocked(struct coresight_device *csdev);
+int coresight_claim_device(struct coresight_device *csdev);
+int coresight_claim_device_unlocked(struct coresight_device *csdev);
 void coresight_clear_self_claim_tag(struct csdev_access *csa);
 void coresight_clear_self_claim_tag_unlocked(struct csdev_access *csa);
-extern void coresight_disclaim_device(struct coresight_device *csdev);
-extern void coresight_disclaim_device_unlocked(struct coresight_device *csdev);
-extern char *coresight_alloc_device_name(struct coresight_dev_list *devs,
+void coresight_disclaim_device(struct coresight_device *csdev);
+void coresight_disclaim_device_unlocked(struct coresight_device *csdev);
+char *coresight_alloc_device_name(struct coresight_dev_list *devs,
 					 struct device *dev);
 
-extern bool coresight_loses_context_with_cpu(struct device *dev);
+bool coresight_loses_context_with_cpu(struct device *dev);
 
 u32 coresight_relaxed_read32(struct coresight_device *csdev, u32 offset);
 u32 coresight_read32(struct coresight_device *csdev, u32 offset);
@@ -705,8 +704,8 @@ void coresight_relaxed_write64(struct coresight_device *csdev,
 			       u64 val, u32 offset);
 void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset);
 
-extern int coresight_get_cpu(struct device *dev);
-extern int coresight_get_static_trace_id(struct device *dev, u32 *id);
+int coresight_get_cpu(struct device *dev);
+int coresight_get_static_trace_id(struct device *dev, u32 *id);
 
 struct coresight_platform_data *coresight_get_platform_data(struct device *dev);
 struct coresight_connection *
-- 
cgit v1.2.3


From 468d8b462ac64659caec53eff34f02963d5f52c8 Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Tue, 15 Apr 2025 21:15:45 -0500
Subject: iidc/ice/irdma: Rename IDC header file

To prepare for the IDC upgrade to support different CORE
PCI drivers, rename header file from iidc.h to iidc_rdma.h
since this files functionality is specifically for RDMA support.

Use net/dscp.h include in irdma osdep.h and DSCP_MAX type.h,
instead of iidc header and define.

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/net/intel/iidc.h      | 109 ------------------------------------
 include/linux/net/intel/iidc_rdma.h | 109 ++++++++++++++++++++++++++++++++++++
 2 files changed, 109 insertions(+), 109 deletions(-)
 delete mode 100644 include/linux/net/intel/iidc.h
 create mode 100644 include/linux/net/intel/iidc_rdma.h

(limited to 'include/linux')

diff --git a/include/linux/net/intel/iidc.h b/include/linux/net/intel/iidc.h
deleted file mode 100644
index 13274c3def66..000000000000
--- a/include/linux/net/intel/iidc.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2021, Intel Corporation. */
-
-#ifndef _IIDC_H_
-#define _IIDC_H_
-
-#include <linux/auxiliary_bus.h>
-#include <linux/dcbnl.h>
-#include <linux/device.h>
-#include <linux/if_ether.h>
-#include <linux/kernel.h>
-#include <linux/netdevice.h>
-
-enum iidc_event_type {
-	IIDC_EVENT_BEFORE_MTU_CHANGE,
-	IIDC_EVENT_AFTER_MTU_CHANGE,
-	IIDC_EVENT_BEFORE_TC_CHANGE,
-	IIDC_EVENT_AFTER_TC_CHANGE,
-	IIDC_EVENT_CRIT_ERR,
-	IIDC_EVENT_NBITS		/* must be last */
-};
-
-enum iidc_reset_type {
-	IIDC_PFR,
-	IIDC_CORER,
-	IIDC_GLOBR,
-};
-
-enum iidc_rdma_protocol {
-	IIDC_RDMA_PROTOCOL_IWARP = BIT(0),
-	IIDC_RDMA_PROTOCOL_ROCEV2 = BIT(1),
-};
-
-#define IIDC_MAX_USER_PRIORITY		8
-#define IIDC_MAX_DSCP_MAPPING		64
-#define IIDC_DSCP_PFC_MODE		0x1
-
-/* Struct to hold per RDMA Qset info */
-struct iidc_rdma_qset_params {
-	/* Qset TEID returned to the RDMA driver in
-	 * ice_add_rdma_qset and used by RDMA driver
-	 * for calls to ice_del_rdma_qset
-	 */
-	u32 teid;	/* Qset TEID */
-	u16 qs_handle; /* RDMA driver provides this */
-	u16 vport_id; /* VSI index */
-	u8 tc; /* TC branch the Qset should belong to */
-};
-
-struct iidc_qos_info {
-	u64 tc_ctx;
-	u8 rel_bw;
-	u8 prio_type;
-	u8 egress_virt_up;
-	u8 ingress_virt_up;
-};
-
-/* Struct to pass QoS info */
-struct iidc_qos_params {
-	struct iidc_qos_info tc_info[IEEE_8021QAZ_MAX_TCS];
-	u8 up2tc[IIDC_MAX_USER_PRIORITY];
-	u8 vport_relative_bw;
-	u8 vport_priority_type;
-	u8 num_tc;
-	u8 pfc_mode;
-	u8 dscp_map[IIDC_MAX_DSCP_MAPPING];
-};
-
-struct iidc_event {
-	DECLARE_BITMAP(type, IIDC_EVENT_NBITS);
-	u32 reg;
-};
-
-struct ice_pf;
-
-int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
-int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
-int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type);
-int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable);
-void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos);
-int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
-void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
-
-/* Structure representing auxiliary driver tailored information about the core
- * PCI dev, each auxiliary driver using the IIDC interface will have an
- * instance of this struct dedicated to it.
- */
-
-struct iidc_auxiliary_dev {
-	struct auxiliary_device adev;
-	struct ice_pf *pf;
-};
-
-/* structure representing the auxiliary driver. This struct is to be
- * allocated and populated by the auxiliary driver's owner. The core PCI
- * driver will access these ops by performing a container_of on the
- * auxiliary_device->dev.driver.
- */
-struct iidc_auxiliary_drv {
-	struct auxiliary_driver adrv;
-	/* This event_handler is meant to be a blocking call.  For instance,
-	 * when a BEFORE_MTU_CHANGE event comes in, the event_handler will not
-	 * return until the auxiliary driver is ready for the MTU change to
-	 * happen.
-	 */
-	void (*event_handler)(struct ice_pf *pf, struct iidc_event *event);
-};
-
-#endif /* _IIDC_H_*/
diff --git a/include/linux/net/intel/iidc_rdma.h b/include/linux/net/intel/iidc_rdma.h
new file mode 100644
index 000000000000..0cd75404e459
--- /dev/null
+++ b/include/linux/net/intel/iidc_rdma.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021, Intel Corporation. */
+
+#ifndef _IIDC_RDMA_H_
+#define _IIDC_RDMA_H_
+
+#include <linux/auxiliary_bus.h>
+#include <linux/dcbnl.h>
+#include <linux/device.h>
+#include <linux/if_ether.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+
+enum iidc_event_type {
+	IIDC_EVENT_BEFORE_MTU_CHANGE,
+	IIDC_EVENT_AFTER_MTU_CHANGE,
+	IIDC_EVENT_BEFORE_TC_CHANGE,
+	IIDC_EVENT_AFTER_TC_CHANGE,
+	IIDC_EVENT_CRIT_ERR,
+	IIDC_EVENT_NBITS		/* must be last */
+};
+
+enum iidc_reset_type {
+	IIDC_PFR,
+	IIDC_CORER,
+	IIDC_GLOBR,
+};
+
+enum iidc_rdma_protocol {
+	IIDC_RDMA_PROTOCOL_IWARP = BIT(0),
+	IIDC_RDMA_PROTOCOL_ROCEV2 = BIT(1),
+};
+
+#define IIDC_MAX_USER_PRIORITY		8
+#define IIDC_MAX_DSCP_MAPPING		64
+#define IIDC_DSCP_PFC_MODE		0x1
+
+/* Struct to hold per RDMA Qset info */
+struct iidc_rdma_qset_params {
+	/* Qset TEID returned to the RDMA driver in
+	 * ice_add_rdma_qset and used by RDMA driver
+	 * for calls to ice_del_rdma_qset
+	 */
+	u32 teid;	/* Qset TEID */
+	u16 qs_handle; /* RDMA driver provides this */
+	u16 vport_id; /* VSI index */
+	u8 tc; /* TC branch the Qset should belong to */
+};
+
+struct iidc_qos_info {
+	u64 tc_ctx;
+	u8 rel_bw;
+	u8 prio_type;
+	u8 egress_virt_up;
+	u8 ingress_virt_up;
+};
+
+/* Struct to pass QoS info */
+struct iidc_qos_params {
+	struct iidc_qos_info tc_info[IEEE_8021QAZ_MAX_TCS];
+	u8 up2tc[IIDC_MAX_USER_PRIORITY];
+	u8 vport_relative_bw;
+	u8 vport_priority_type;
+	u8 num_tc;
+	u8 pfc_mode;
+	u8 dscp_map[IIDC_MAX_DSCP_MAPPING];
+};
+
+struct iidc_event {
+	DECLARE_BITMAP(type, IIDC_EVENT_NBITS);
+	u32 reg;
+};
+
+struct ice_pf;
+
+int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
+int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
+int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type);
+int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable);
+void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos);
+int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
+void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
+
+/* Structure representing auxiliary driver tailored information about the core
+ * PCI dev, each auxiliary driver using the IIDC interface will have an
+ * instance of this struct dedicated to it.
+ */
+
+struct iidc_auxiliary_dev {
+	struct auxiliary_device adev;
+	struct ice_pf *pf;
+};
+
+/* structure representing the auxiliary driver. This struct is to be
+ * allocated and populated by the auxiliary driver's owner. The core PCI
+ * driver will access these ops by performing a container_of on the
+ * auxiliary_device->dev.driver.
+ */
+struct iidc_auxiliary_drv {
+	struct auxiliary_driver adrv;
+	/* This event_handler is meant to be a blocking call.  For instance,
+	 * when a BEFORE_MTU_CHANGE event comes in, the event_handler will not
+	 * return until the auxiliary driver is ready for the MTU change to
+	 * happen.
+	 */
+	void (*event_handler)(struct ice_pf *pf, struct iidc_event *event);
+};
+
+#endif /* _IIDC_RDMA_H_*/
-- 
cgit v1.2.3


From 118c40b7b50340bf7ff7e0adee8e3bab6e552c82 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 28 Mar 2025 18:21:44 +0100
Subject: kbuild: require gcc-8 and binutils-2.30

Commit a3e8fe814ad1 ("x86/build: Raise the minimum GCC version to 8.1")
raised the minimum compiler version as enforced by Kbuild to gcc-8.1
and clang-15 for x86.

This is actually the same gcc version that has been discussed as the
minimum for all architectures several times in the past, with little
objection. A previous concern was the kernel for SLE15-SP7 needing to
be built with gcc-7. As this ended up still using linux-6.4 and there
is no plan for an SP8, this is no longer a problem.

Change it for all architectures and adjust the documentation accordingly.
A few version checks can be removed in the process.  The binutils
version 2.30 is the lowest version used in combination with gcc-8 on
common distros, so use that as the corresponding minimum.

Link: https://lore.kernel.org/lkml/20240925150059.3955569-32-ardb+git@google.com/
Link: https://lore.kernel.org/lkml/871q7yxrgv.wl-tiwai@suse.de/
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/unroll.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/unroll.h b/include/linux/unroll.h
index 863fb69f6a7e..186b71de740f 100644
--- a/include/linux/unroll.h
+++ b/include/linux/unroll.h
@@ -11,10 +11,8 @@
 
 #ifdef CONFIG_CC_IS_CLANG
 #define __pick_unrolled(x, y)		_Pragma(#x)
-#elif CONFIG_GCC_VERSION >= 80000
-#define __pick_unrolled(x, y)		_Pragma(#y)
 #else
-#define __pick_unrolled(x, y)		/* not supported */
+#define __pick_unrolled(x, y)		_Pragma(#y)
 #endif
 
 /**
-- 
cgit v1.2.3


From 97b5631aae6896369712d6b7131afbc95c753587 Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Tue, 15 Apr 2025 21:15:46 -0500
Subject: iidc/ice/irdma: Rename to iidc_* convention

In preparation of supporting more than a single core PCI driver
for RDMA, homogenize naming to iidc_rdma_* and IIDC_RDMA_*
form.

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/net/intel/iidc_rdma.h | 38 +++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net/intel/iidc_rdma.h b/include/linux/net/intel/iidc_rdma.h
index 0cd75404e459..2b24a9912fa0 100644
--- a/include/linux/net/intel/iidc_rdma.h
+++ b/include/linux/net/intel/iidc_rdma.h
@@ -11,16 +11,16 @@
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 
-enum iidc_event_type {
-	IIDC_EVENT_BEFORE_MTU_CHANGE,
-	IIDC_EVENT_AFTER_MTU_CHANGE,
-	IIDC_EVENT_BEFORE_TC_CHANGE,
-	IIDC_EVENT_AFTER_TC_CHANGE,
-	IIDC_EVENT_CRIT_ERR,
-	IIDC_EVENT_NBITS		/* must be last */
+enum iidc_rdma_event_type {
+	IIDC_RDMA_EVENT_BEFORE_MTU_CHANGE,
+	IIDC_RDMA_EVENT_AFTER_MTU_CHANGE,
+	IIDC_RDMA_EVENT_BEFORE_TC_CHANGE,
+	IIDC_RDMA_EVENT_AFTER_TC_CHANGE,
+	IIDC_RDMA_EVENT_CRIT_ERR,
+	IIDC_RDMA_EVENT_NBITS		/* must be last */
 };
 
-enum iidc_reset_type {
+enum iidc_rdma_reset_type {
 	IIDC_PFR,
 	IIDC_CORER,
 	IIDC_GLOBR,
@@ -47,7 +47,7 @@ struct iidc_rdma_qset_params {
 	u8 tc; /* TC branch the Qset should belong to */
 };
 
-struct iidc_qos_info {
+struct iidc_rdma_qos_info {
 	u64 tc_ctx;
 	u8 rel_bw;
 	u8 prio_type;
@@ -56,8 +56,8 @@ struct iidc_qos_info {
 };
 
 /* Struct to pass QoS info */
-struct iidc_qos_params {
-	struct iidc_qos_info tc_info[IEEE_8021QAZ_MAX_TCS];
+struct iidc_rdma_qos_params {
+	struct iidc_rdma_qos_info tc_info[IEEE_8021QAZ_MAX_TCS];
 	u8 up2tc[IIDC_MAX_USER_PRIORITY];
 	u8 vport_relative_bw;
 	u8 vport_priority_type;
@@ -66,8 +66,8 @@ struct iidc_qos_params {
 	u8 dscp_map[IIDC_MAX_DSCP_MAPPING];
 };
 
-struct iidc_event {
-	DECLARE_BITMAP(type, IIDC_EVENT_NBITS);
+struct iidc_rdma_event {
+	DECLARE_BITMAP(type, IIDC_RDMA_EVENT_NBITS);
 	u32 reg;
 };
 
@@ -75,9 +75,11 @@ struct ice_pf;
 
 int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
 int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
-int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type);
+int ice_rdma_request_reset(struct ice_pf *pf,
+			   enum iidc_rdma_reset_type reset_type);
 int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable);
-void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos);
+void ice_get_qos_params(struct ice_pf *pf,
+			struct iidc_rdma_qos_params *qos);
 int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
 void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
 
@@ -86,7 +88,7 @@ void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
  * instance of this struct dedicated to it.
  */
 
-struct iidc_auxiliary_dev {
+struct iidc_rdma_core_auxiliary_dev {
 	struct auxiliary_device adev;
 	struct ice_pf *pf;
 };
@@ -96,14 +98,14 @@ struct iidc_auxiliary_dev {
  * driver will access these ops by performing a container_of on the
  * auxiliary_device->dev.driver.
  */
-struct iidc_auxiliary_drv {
+struct iidc_rdma_core_auxiliary_drv {
 	struct auxiliary_driver adrv;
 	/* This event_handler is meant to be a blocking call.  For instance,
 	 * when a BEFORE_MTU_CHANGE event comes in, the event_handler will not
 	 * return until the auxiliary driver is ready for the MTU change to
 	 * happen.
 	 */
-	void (*event_handler)(struct ice_pf *pf, struct iidc_event *event);
+	void (*event_handler)(struct ice_pf *pf, struct iidc_rdma_event *event);
 };
 
 #endif /* _IIDC_RDMA_H_*/
-- 
cgit v1.2.3


From d9251a560ba67bbedd53b81aee32e1ad95f42000 Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Tue, 15 Apr 2025 21:15:47 -0500
Subject: iidc/ice/irdma: Break iidc.h into two headers

In preparation of supporting more than a single core PCI driver
for RDMA, break the iidc_rdma.h header file into two more focused
headers.

Only the elements universal to all Intel drivers will remain in
the generic iidc_rdma.h header. Move the ice specific information
to an ice specific header file named iidc_rdma_ice.h.

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/net/intel/iidc_rdma.h     | 14 +-------------
 include/linux/net/intel/iidc_rdma_ice.h | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+), 13 deletions(-)
 create mode 100644 include/linux/net/intel/iidc_rdma_ice.h

(limited to 'include/linux')

diff --git a/include/linux/net/intel/iidc_rdma.h b/include/linux/net/intel/iidc_rdma.h
index 2b24a9912fa0..1e8136395154 100644
--- a/include/linux/net/intel/iidc_rdma.h
+++ b/include/linux/net/intel/iidc_rdma.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2021, Intel Corporation. */
+/* Copyright (C) 2021-2025, Intel Corporation. */
 
 #ifndef _IIDC_RDMA_H_
 #define _IIDC_RDMA_H_
@@ -71,18 +71,6 @@ struct iidc_rdma_event {
 	u32 reg;
 };
 
-struct ice_pf;
-
-int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
-int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
-int ice_rdma_request_reset(struct ice_pf *pf,
-			   enum iidc_rdma_reset_type reset_type);
-int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable);
-void ice_get_qos_params(struct ice_pf *pf,
-			struct iidc_rdma_qos_params *qos);
-int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
-void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
-
 /* Structure representing auxiliary driver tailored information about the core
  * PCI dev, each auxiliary driver using the IIDC interface will have an
  * instance of this struct dedicated to it.
diff --git a/include/linux/net/intel/iidc_rdma_ice.h b/include/linux/net/intel/iidc_rdma_ice.h
new file mode 100644
index 000000000000..78d10003d776
--- /dev/null
+++ b/include/linux/net/intel/iidc_rdma_ice.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021-2025, Intel Corporation. */
+
+#ifndef _IIDC_RDMA_ICE_H_
+#define _IIDC_RDMA_ICE_H_
+
+struct ice_pf;
+
+int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
+int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
+int ice_rdma_request_reset(struct ice_pf *pf,
+			   enum iidc_rdma_reset_type reset_type);
+int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable);
+void ice_get_qos_params(struct ice_pf *pf,
+			struct iidc_rdma_qos_params *qos);
+int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
+void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
+
+#endif /* _IIDC_RDMA_ICE_H_*/
-- 
cgit v1.2.3


From 8239b771b94b639556c1987185fd82b2a896c923 Mon Sep 17 00:00:00 2001
From: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Date: Tue, 15 Apr 2025 21:15:48 -0500
Subject: ice: Replace ice specific DSCP mapping num with a kernel define

Replace ice driver specific DSCP mapping number defines
ICE_DSCP_NUM_VAL and IIDC_MAX_DSCP_MAPPING with
an equivalent kernel define DSCP_MAX.

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/net/intel/iidc_rdma.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net/intel/iidc_rdma.h b/include/linux/net/intel/iidc_rdma.h
index 1e8136395154..7f1910289534 100644
--- a/include/linux/net/intel/iidc_rdma.h
+++ b/include/linux/net/intel/iidc_rdma.h
@@ -10,6 +10,7 @@
 #include <linux/if_ether.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <net/dscp.h>
 
 enum iidc_rdma_event_type {
 	IIDC_RDMA_EVENT_BEFORE_MTU_CHANGE,
@@ -32,7 +33,6 @@ enum iidc_rdma_protocol {
 };
 
 #define IIDC_MAX_USER_PRIORITY		8
-#define IIDC_MAX_DSCP_MAPPING		64
 #define IIDC_DSCP_PFC_MODE		0x1
 
 /* Struct to hold per RDMA Qset info */
@@ -63,7 +63,7 @@ struct iidc_rdma_qos_params {
 	u8 vport_priority_type;
 	u8 num_tc;
 	u8 pfc_mode;
-	u8 dscp_map[IIDC_MAX_DSCP_MAPPING];
+	u8 dscp_map[DSCP_MAX];
 };
 
 struct iidc_rdma_event {
-- 
cgit v1.2.3


From 1b3f2bd04d90f61e1f291b5e365b9bc4ce0ea7c7 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 29 Apr 2025 19:46:21 -0700
Subject: x86/devmem: Remove duplicate range_is_allowed() definition

17 years ago, Venki suggested [1] "A future improvement would be to
avoid the range_is_allowed duplication".

The only thing preventing a common implementation is that
phys_mem_access_prot_allowed() expects the range check to exit
immediately when PAT is disabled [2]. I.e. there is no cache conflict to
manage in that case. This cleanup was noticed on the path to
considering changing range_is_allowed() policy to blanket deny /dev/mem
for private (confidential computing) memory.

Note, however that phys_mem_access_prot_allowed() has long since stopped
being relevant for managing cache-type validation due to [3], and [4].

Commit 0124cecfc85a ("x86, PAT: disable /dev/mem mmap RAM with PAT") [1]
Commit 9e41bff2708e ("x86: fix /dev/mem mmap breakage when PAT is disabled") [2]
Commit 1886297ce0c8 ("x86/mm/pat: Fix BUG_ON() in mmap_mem() on QEMU/i386") [3]
Commit 0c3c8a18361a ("x86, PAT: Remove duplicate memtype reserve in devmem mmap") [4]

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Nikolay Borisov <nik.borisov@suse.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/all/20250430024622.1134277-2-dan.j.williams%40intel.com
---
 include/linux/io.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/io.h b/include/linux/io.h
index 6a6bc4d46d0a..0642c7ee41db 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -183,4 +183,25 @@ static inline void arch_io_free_memtype_wc(resource_size_t base,
 int devm_arch_io_reserve_memtype_wc(struct device *dev, resource_size_t start,
 				    resource_size_t size);
 
+#ifdef CONFIG_STRICT_DEVMEM
+static inline int range_is_allowed(unsigned long pfn, unsigned long size)
+{
+	u64 from = ((u64)pfn) << PAGE_SHIFT;
+	u64 to = from + size;
+	u64 cursor = from;
+
+	while (cursor < to) {
+		if (!devmem_is_allowed(pfn))
+			return 0;
+		cursor += PAGE_SIZE;
+		pfn++;
+	}
+	return 1;
+}
+#else
+static inline int range_is_allowed(unsigned long pfn, unsigned long size)
+{
+	return 1;
+}
+#endif
 #endif /* _LINUX_IO_H */
-- 
cgit v1.2.3


From a3e1c0ad835702555d90565584ab6f723adf7f94 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Tue, 29 Apr 2025 08:04:46 +0200
Subject: net: phy: factor out provider part from mdio_bus.c

After 52358dd63e34 ("net: phy: remove function stubs") there's a
problem if CONFIG_MDIO_BUS is set, but CONFIG_PHYLIB is not.
mdiobus_scan() uses phylib functions like get_phy_device().
Bringing back the stub wouldn't make much sense, because it would
allow to compile mdiobus_scan(), but the function would be unusable.
The stub returned NULL, and we have the following in mdiobus_scan():

phydev = get_phy_device(bus, addr, c45);
        if (IS_ERR(phydev))
                return phydev;

So calling mdiobus_scan() w/o CONFIG_PHYLIB would cause a crash later in
mdiobus_scan(). In general the PHYLIB functionality isn't optional here.
Consequently, MDIO bus providers depend on PHYLIB.
Therefore factor it out and build it together with the libphy core
modules. In addition make all MDIO bus providers under /drivers/net/mdio
depend on PHYLIB. Same applies to enetc MDIO bus provider. Note that
PHYLIB selects MDIO_DEVRES, therefore we can omit this here.

Fixes: 52358dd63e34 ("net: phy: remove function stubs")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202504270639.mT0lh2o1-lkp@intel.com/
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/c74772a9-dab6-44bf-a657-389df89d85c2@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 3beaf225ee88..d62d292024bc 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -2062,6 +2062,7 @@ int __phy_hwtstamp_set(struct phy_device *phydev,
 		       struct netlink_ext_ack *extack);
 
 extern const struct bus_type mdio_bus_type;
+extern const struct class mdio_bus_class;
 
 struct mdio_board_info {
 	const char	*bus_id;
-- 
cgit v1.2.3


From 8c5d8c0b9e8184a86baede72ca392f90d867d22e Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 23 Apr 2025 14:21:28 +0530
Subject: OPP: Define and use scope-based cleanup helpers

Define and use scope-based cleanup helpers for `struct opp` and `struct
opp_table`.

No intentional functional impact.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 include/linux/pm_opp.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 0deddfa91aca..8313ed981535 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -11,6 +11,7 @@
 #ifndef __LINUX_OPP_H__
 #define __LINUX_OPP_H__
 
+#include <linux/cleanup.h>
 #include <linux/energy_model.h>
 #include <linux/err.h>
 #include <linux/notifier.h>
@@ -579,6 +580,12 @@ static inline int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_ta
 }
 #endif
 
+/* Scope based cleanup macro for OPP reference counting */
+DEFINE_FREE(put_opp, struct dev_pm_opp *, if (!IS_ERR_OR_NULL(_T)) dev_pm_opp_put(_T))
+
+/* Scope based cleanup macro for OPP table reference counting */
+DEFINE_FREE(put_opp_table, struct opp_table *, if (!IS_ERR_OR_NULL(_T)) dev_pm_opp_put_opp_table(_T))
+
 /* OPP Configuration helpers */
 
 static inline int dev_pm_opp_add(struct device *dev, unsigned long freq,
-- 
cgit v1.2.3


From ee3de3cf7035aaf289085111fd0cfaddc93f0409 Mon Sep 17 00:00:00 2001
From: Praveen Talari <quic_ptalari@quicinc.com>
Date: Fri, 2 May 2025 10:58:22 +0530
Subject: OPP: Add dev_pm_opp_set_level()

To configure a device to a specific performance level, consumer drivers
currently need to determine the OPP based on the exact level and then
set it, resulting in code duplication across drivers.

The new helper API, dev_pm_opp_set_level(), addresses this issue by
providing a streamlined method for consumer drivers to find and set the
OPP based on the desired performance level, thereby eliminating
redundancy.

Signed-off-by: Praveen Talari <quic_ptalari@quicinc.com>
[ Viresh: Lot of fixes in the code, and rebased over latest changes.
           Fixed commit log too. ]
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 include/linux/pm_opp.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 8313ed981535..cf477beae4bb 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -197,6 +197,7 @@ int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
 void dev_pm_opp_remove_table(struct device *dev);
 void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask);
 int dev_pm_opp_sync_regulators(struct device *dev);
+
 #else
 static inline struct opp_table *dev_pm_opp_get_opp_table(struct device *dev)
 {
@@ -717,4 +718,14 @@ static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
 	return dev_pm_opp_get_freq_indexed(opp, 0);
 }
 
+static inline int dev_pm_opp_set_level(struct device *dev, unsigned int level)
+{
+	struct dev_pm_opp *opp __free(put_opp) = dev_pm_opp_find_level_exact(dev, level);
+
+	if (IS_ERR(opp))
+		return PTR_ERR(opp);
+
+	return dev_pm_opp_set_opp(dev, opp);
+}
+
 #endif		/* __LINUX_OPP_H__ */
-- 
cgit v1.2.3


From 2e9b2ee2ba403cbe270a8256b8794ef5ad19b38d Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Wed, 30 Apr 2025 10:52:49 +0800
Subject: iommu: Cleanup comments for dev_enable/disable_feat

The dev_enable/disable_feat ops have been removed by commit
<f984fb09e60e> ("iommu: Remove iommu_dev_enable/disable_feature()").
Cleanup the comments to make the code clean.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20250430025249.2371751-1-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index bc5363e6b43e..56b4a1c2e031 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -590,8 +590,6 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size,
  * @of_xlate: add OF master IDs to iommu grouping
  * @is_attach_deferred: Check if domain attach should be deferred from iommu
  *                      driver init to device driver init (default no)
- * @dev_enable/disable_feat: per device entries to enable/disable
- *                               iommu specific features.
  * @page_response: handle page request response
  * @def_domain_type: device default domain type, return value:
  *		- IOMMU_DOMAIN_IDENTITY: must use an identity domain
-- 
cgit v1.2.3


From b5325b2a270fcaf7b2a9a0f23d422ca8a5a8bdea Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 14 Apr 2025 15:55:07 +0200
Subject: coredump: hand a pidfd to the usermode coredump helper

Give userspace a way to instruct the kernel to install a pidfd into the
usermode helper process. This makes coredump handling a lot more
reliable for userspace. In parallel with this commit we already have
systemd adding support for this in [1].

We create a pidfs file for the coredumping process when we process the
corename pattern. When the usermode helper process is forked we then
install the pidfs file as file descriptor three into the usermode
helpers file descriptor table so it's available to the exec'd program.

Since usermode helpers are either children of the system_unbound_wq
workqueue or kthreadd we know that the file descriptor table is empty
and can thus always use three as the file descriptor number.

Note, that we'll install a pidfd for the thread-group leader even if a
subthread is calling do_coredump(). We know that task linkage hasn't
been removed due to delay_group_leader() and even if this @current isn't
the actual thread-group leader we know that the thread-group leader
cannot be reaped until @current has exited.

Link: https://github.com/systemd/systemd/pull/37125 [1]
Link: https://lore.kernel.org/20250414-work-coredump-v2-3-685bf231f828@kernel.org
Tested-by: Luca Boccassi <luca.boccassi@gmail.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/coredump.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 77e6e195d1d6..76e41805b92d 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -28,6 +28,7 @@ struct coredump_params {
 	int vma_count;
 	size_t vma_data_size;
 	struct core_vma_metadata *vma_meta;
+	struct pid *pid;
 };
 
 extern unsigned int core_file_note_size_limit;
-- 
cgit v1.2.3


From 66d454e99d71857faf249486912e381ec83760b4 Mon Sep 17 00:00:00 2001
From: Jordan Rife <jordan@jrife.io>
Date: Fri, 2 May 2025 09:15:21 -0700
Subject: bpf: udp: Make sure iter->batch always contains a full bucket
 snapshot

Require that iter->batch always contains a full bucket snapshot. This
invariant is important to avoid skipping or repeating sockets during
iteration when combined with the next few patches. Before, there were
two cases where a call to bpf_iter_udp_batch may only capture part of a
bucket:

1. When bpf_iter_udp_realloc_batch() returns -ENOMEM [1].
2. When more sockets are added to the bucket while calling
   bpf_iter_udp_realloc_batch(), making the updated batch size
   insufficient [2].

In cases where the batch size only covers part of a bucket, it is
possible to forget which sockets were already visited, especially if we
have to process a bucket in more than two batches. This forces us to
choose between repeating or skipping sockets, so don't allow this:

1. Stop iteration and propagate -ENOMEM up to userspace if reallocation
   fails instead of continuing with a partial batch.
2. Try bpf_iter_udp_realloc_batch() with GFP_USER just as before, but if
   we still aren't able to capture the full bucket, call
   bpf_iter_udp_realloc_batch() again while holding the bucket lock to
   guarantee the bucket does not change. On the second attempt use
   GFP_NOWAIT since we hold onto the spin lock.

Introduce the udp_portaddr_for_each_entry_from macro and use it instead
of udp_portaddr_for_each_entry to make it possible to continue iteration
from an arbitrary socket. This is required for this patch in the
GFP_NOWAIT case to allow us to fill the rest of a batch starting from
the middle of a bucket and the later patch which skips sockets that were
already seen.

Testing all scenarios directly is a bit difficult, but I did some manual
testing to exercise the code paths where GFP_NOWAIT is used and where
ERR_PTR(err) is returned. I used the realloc test case included later
in this series to trigger a scenario where a realloc happens inside
bpf_iter_udp_batch and made a small code tweak to force the first
realloc attempt to allocate a too-small batch, thus requiring
another attempt with GFP_NOWAIT. Some printks showed both reallocs with
the tests passing:

Apr 25 23:16:24 crow kernel: go again GFP_USER
Apr 25 23:16:24 crow kernel: go again GFP_NOWAIT

With this setup, I also forced each of the bpf_iter_udp_realloc_batch
calls to return -ENOMEM to ensure that iteration ends and that the
read() in userspace fails.

[1]: https://lore.kernel.org/bpf/CABi4-ogUtMrH8-NVB6W8Xg_F_KDLq=yy-yu-tKr2udXE2Mu1Lg@mail.gmail.com/
[2]: https://lore.kernel.org/bpf/7ed28273-a716-4638-912d-f86f965e54bb@linux.dev/

Signed-off-by: Jordan Rife <jordan@jrife.io>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/linux/udp.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 895240177f4f..4e1a672af4c5 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -216,6 +216,9 @@ static inline void udp_allow_gso(struct sock *sk)
 #define udp_portaddr_for_each_entry(__sk, list) \
 	hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node)
 
+#define udp_portaddr_for_each_entry_from(__sk) \
+	hlist_for_each_entry_from(__sk, __sk_common.skc_portaddr_node)
+
 #define udp_portaddr_for_each_entry_rcu(__sk, list) \
 	hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node)
 
-- 
cgit v1.2.3


From 71ded61bee2af97a218f9dfa0f4019cc4cd3b029 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 3 Mar 2025 23:14:20 -0800
Subject: configfs-tsm: Namespace TSM report symbols

In preparation for new + common TSM (TEE Security Manager)
infrastructure, namespace the TSM report symbols in tsm.h with an
_REPORT suffix to differentiate them from other incoming tsm work.

Cc: Yilun Xu <yilun.xu@intel.com>
Cc: Samuel Ortiz <sameo@rivosinc.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Sami Mujawar <sami.mujawar@arm.com>
Cc: Steven Price <steven.price@arm.com>
Reviewed-by: Alexey Kardashevskiy <aik@amd.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Kai Huang <kai.huang@intel.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Link: https://patch.msgid.link/174107246021.1288555.7203769833791489618.stgit@dwillia2-xfh.jf.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/tsm.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tsm.h b/include/linux/tsm.h
index 11b0c525be30..431054810dca 100644
--- a/include/linux/tsm.h
+++ b/include/linux/tsm.h
@@ -6,17 +6,17 @@
 #include <linux/types.h>
 #include <linux/uuid.h>
 
-#define TSM_INBLOB_MAX 64
-#define TSM_OUTBLOB_MAX SZ_32K
+#define TSM_REPORT_INBLOB_MAX 64
+#define TSM_REPORT_OUTBLOB_MAX SZ_32K
 
 /*
  * Privilege level is a nested permission concept to allow confidential
  * guests to partition address space, 4-levels are supported.
  */
-#define TSM_PRIVLEVEL_MAX 3
+#define TSM_REPORT_PRIVLEVEL_MAX 3
 
 /**
- * struct tsm_desc - option descriptor for generating tsm report blobs
+ * struct tsm_report_desc - option descriptor for generating tsm report blobs
  * @privlevel: optional privilege level to associate with @outblob
  * @inblob_len: sizeof @inblob
  * @inblob: arbitrary input data
@@ -24,10 +24,10 @@
  * @service_guid: optional service-provider service guid to attest
  * @service_manifest_version: optional service-provider service manifest version requested
  */
-struct tsm_desc {
+struct tsm_report_desc {
 	unsigned int privlevel;
 	size_t inblob_len;
-	u8 inblob[TSM_INBLOB_MAX];
+	u8 inblob[TSM_REPORT_INBLOB_MAX];
 	char *service_provider;
 	guid_t service_guid;
 	unsigned int service_manifest_version;
@@ -44,7 +44,7 @@ struct tsm_desc {
  * @manifestblob: (optional) manifest data associated with the report
  */
 struct tsm_report {
-	struct tsm_desc desc;
+	struct tsm_report_desc desc;
 	size_t outblob_len;
 	u8 *outblob;
 	size_t auxblob_len;
@@ -88,7 +88,7 @@ enum tsm_bin_attr_index {
 };
 
 /**
- * struct tsm_ops - attributes and operations for tsm instances
+ * struct tsm_report_ops - attributes and operations for tsm_report instances
  * @name: tsm id reflected in /sys/kernel/config/tsm/report/$report/provider
  * @privlevel_floor: convey base privlevel for nested scenarios
  * @report_new: Populate @report with the report blob and auxblob
@@ -99,7 +99,7 @@ enum tsm_bin_attr_index {
  * Implementation specific ops, only one is expected to be registered at
  * a time i.e. only one of "sev-guest", "tdx-guest", etc.
  */
-struct tsm_ops {
+struct tsm_report_ops {
 	const char *name;
 	unsigned int privlevel_floor;
 	int (*report_new)(struct tsm_report *report, void *data);
@@ -107,6 +107,6 @@ struct tsm_ops {
 	bool (*report_bin_attr_visible)(int n);
 };
 
-int tsm_register(const struct tsm_ops *ops, void *priv);
-int tsm_unregister(const struct tsm_ops *ops);
+int tsm_report_register(const struct tsm_report_ops *ops, void *priv);
+int tsm_report_unregister(const struct tsm_report_ops *ops);
 #endif /* __TSM_H */
-- 
cgit v1.2.3


From ca732e990fc8222a2d6782ae750304719e212fe8 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 1 May 2025 12:45:11 +0100
Subject: net: stmmac: add get_interfaces() platform method

Add a get_interfaces() platform method to allow platforms to indicate
to phylink which interface modes they support - which then allows
phylink to validate on initialisation that the configured PHY interface
mode is actually supported.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1uASLn-0021Qd-Mi@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/stmmac.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 8aed09d65b4a..537bced69c46 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -233,6 +233,8 @@ struct plat_stmmacenet_data {
 	u8 tx_sched_algorithm;
 	struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES];
 	struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES];
+	void (*get_interfaces)(struct stmmac_priv *priv, void *bsp_priv,
+			       unsigned long *interfaces);
 	int (*set_clk_tx_rate)(void *priv, struct clk *clk_tx_i,
 			       phy_interface_t interface, int speed);
 	void (*fix_mac_speed)(void *priv, int speed, unsigned int mode);
-- 
cgit v1.2.3


From 9d165dc58055d98658941a33fef9e5da866af3e9 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 1 May 2025 12:45:27 +0100
Subject: net: stmmac: remove speed_mode_2500() method

Remove the speed_mode_2500() platform method which is no longer used
or necessary, being superseded by the more flexible get_interfaces()
method.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/E1uASM3-0021R3-2B@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/stmmac.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 537bced69c46..26ddf95d23f9 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -241,7 +241,6 @@ struct plat_stmmacenet_data {
 	int (*fix_soc_reset)(void *priv, void __iomem *ioaddr);
 	int (*serdes_powerup)(struct net_device *ndev, void *priv);
 	void (*serdes_powerdown)(struct net_device *ndev, void *priv);
-	void (*speed_mode_2500)(struct net_device *ndev, void *priv);
 	int (*mac_finish)(struct net_device *ndev,
 			  void *priv,
 			  unsigned int mode,
-- 
cgit v1.2.3


From 3efa66ce6ee1b55ab687b316e48e1e9ddc1f780a Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 16 Apr 2025 18:29:01 +0200
Subject: rcuref: Provide rcuref_is_dead()

rcuref_read() returns the number of references that are currently held.
If 0 is returned then it is not safe to assume that the object ca be
scheduled for deconstruction because it is marked DEAD. This happens if
the return value of rcuref_put() is ignored and assumptions are made.

If 0 is returned then the counter transitioned from 0 to RCUREF_NOREF.
If rcuref_put() did not return to the caller then the counter did not
yet transition from RCUREF_NOREF to RCUREF_DEAD. This means that there
is still a chance that the counter will transition from RCUREF_NOREF to
0 meaning it is still valid and must not be deconstructed. In this brief
window rcuref_read() will return 0.

Provide rcuref_is_dead() to determine if the counter is marked as
RCUREF_DEAD.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20250416162921.513656-2-bigeasy@linutronix.de
---
 include/linux/rcuref.h | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h
index 6322d8c1c6b4..2fb2af6d9824 100644
--- a/include/linux/rcuref.h
+++ b/include/linux/rcuref.h
@@ -30,7 +30,11 @@ static inline void rcuref_init(rcuref_t *ref, unsigned int cnt)
  * rcuref_read - Read the number of held reference counts of a rcuref
  * @ref:	Pointer to the reference count
  *
- * Return: The number of held references (0 ... N)
+ * Return: The number of held references (0 ... N). The value 0 does not
+ * indicate that it is safe to schedule the object, protected by this reference
+ * counter, for deconstruction.
+ * If you want to know if the reference counter has been marked DEAD (as
+ * signaled by rcuref_put()) please use rcuread_is_dead().
  */
 static inline unsigned int rcuref_read(rcuref_t *ref)
 {
@@ -40,6 +44,22 @@ static inline unsigned int rcuref_read(rcuref_t *ref)
 	return c >= RCUREF_RELEASED ? 0 : c + 1;
 }
 
+/**
+ * rcuref_is_dead -	Check if the rcuref has been already marked dead
+ * @ref:		Pointer to the reference count
+ *
+ * Return: True if the object has been marked DEAD. This signals that a previous
+ * invocation of rcuref_put() returned true on this reference counter meaning
+ * the protected object can safely be scheduled for deconstruction.
+ * Otherwise, returns false.
+ */
+static inline bool rcuref_is_dead(rcuref_t *ref)
+{
+	unsigned int c = atomic_read(&ref->refcnt);
+
+	return (c >= RCUREF_RELEASED) && (c < RCUREF_NOREF);
+}
+
 extern __must_check bool rcuref_get_slowpath(rcuref_t *ref);
 
 /**
-- 
cgit v1.2.3


From 55284f70134f01fdc9cc4c4905551cc1f37abd34 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 16 Apr 2025 18:29:02 +0200
Subject: mm: Add vmalloc_huge_node()

To enable node specific hash-tables using huge pages if possible.

[bigeasy: use __vmalloc_node_range_noprof(), add nommu bits, inline
vmalloc_huge]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20250416162921.513656-3-bigeasy@linutronix.de
---
 include/linux/vmalloc.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 31e9ffd936e3..de95794777ad 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -168,8 +168,13 @@ void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_m
 		int node, const void *caller) __alloc_size(1);
 #define __vmalloc_node(...)	alloc_hooks(__vmalloc_node_noprof(__VA_ARGS__))
 
-void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
-#define vmalloc_huge(...)	alloc_hooks(vmalloc_huge_noprof(__VA_ARGS__))
+void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node) __alloc_size(1);
+#define vmalloc_huge_node(...)	alloc_hooks(vmalloc_huge_node_noprof(__VA_ARGS__))
+
+static inline void *vmalloc_huge(unsigned long size, gfp_t gfp_mask)
+{
+	return vmalloc_huge_node(size, gfp_mask, NUMA_NO_NODE);
+}
 
 extern void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
 #define __vmalloc_array(...)	alloc_hooks(__vmalloc_array_noprof(__VA_ARGS__))
-- 
cgit v1.2.3


From 80367ad01d93ac781b0e1df246edaf006928002f Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 16 Apr 2025 18:29:12 +0200
Subject: futex: Add basic infrastructure for local task local hash

The futex hash is system wide and shared by all tasks. Each slot
is hashed based on futex address and the VMA of the thread. Due to
randomized VMAs (and memory allocations) the same logical lock (pointer)
can end up in a different hash bucket on each invocation of the
application. This in turn means that different applications may share a
hash bucket on the first invocation but not on the second and it is not
always clear which applications will be involved. This can result in
high latency's to acquire the futex_hash_bucket::lock especially if the
lock owner is limited to a CPU and can not be effectively PI boosted.

Introduce basic infrastructure for process local hash which is shared by
all threads of process. This hash will only be used for a
PROCESS_PRIVATE FUTEX operation.

The hashmap can be allocated via:

        prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, num);

A `num' of 0 means that the global hash is used instead of a private
hash.
Other values for `num' specify the number of slots for the hash and the
number must be power of two, starting with two.
The prctl() returns zero on success. This function can only be used
before a thread is created.

The current status for the private hash can be queried via:

        num = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS);

which return the current number of slots. The value 0 means that the
global hash is used. Values greater than 0 indicate the number of slots
that are used. A negative number indicates an error.

For optimisation, for the private hash jhash2() uses only two arguments
the address and the offset. This omits the VMA which is always the same.

[peterz: Use 0 for global hash. A bit shuffling and renaming. ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20250416162921.513656-13-bigeasy@linutronix.de
---
 include/linux/futex.h    | 26 ++++++++++++++++++++++++--
 include/linux/mm_types.h |  5 ++++-
 2 files changed, 28 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/futex.h b/include/linux/futex.h
index b70df27d7e85..8f1be08bef18 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -4,11 +4,11 @@
 
 #include <linux/sched.h>
 #include <linux/ktime.h>
+#include <linux/mm_types.h>
 
 #include <uapi/linux/futex.h>
 
 struct inode;
-struct mm_struct;
 struct task_struct;
 
 /*
@@ -77,7 +77,22 @@ void futex_exec_release(struct task_struct *tsk);
 
 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 	      u32 __user *uaddr2, u32 val2, u32 val3);
-#else
+int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4);
+
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
+void futex_hash_free(struct mm_struct *mm);
+
+static inline void futex_mm_init(struct mm_struct *mm)
+{
+	mm->futex_phash =  NULL;
+}
+
+#else /* !CONFIG_FUTEX_PRIVATE_HASH */
+static inline void futex_hash_free(struct mm_struct *mm) { }
+static inline void futex_mm_init(struct mm_struct *mm) { }
+#endif /* CONFIG_FUTEX_PRIVATE_HASH */
+
+#else /* !CONFIG_FUTEX */
 static inline void futex_init_task(struct task_struct *tsk) { }
 static inline void futex_exit_recursive(struct task_struct *tsk) { }
 static inline void futex_exit_release(struct task_struct *tsk) { }
@@ -88,6 +103,13 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val,
 {
 	return -EINVAL;
 }
+static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4)
+{
+	return -EINVAL;
+}
+static inline void futex_hash_free(struct mm_struct *mm) { }
+static inline void futex_mm_init(struct mm_struct *mm) { }
+
 #endif
 
 #endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 56d07edd01f9..a4b5661e4177 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -31,6 +31,7 @@
 #define INIT_PASID	0
 
 struct address_space;
+struct futex_private_hash;
 struct mem_cgroup;
 
 /*
@@ -1031,7 +1032,9 @@ struct mm_struct {
 		 */
 		seqcount_t mm_lock_seq;
 #endif
-
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
+		struct futex_private_hash	*futex_phash;
+#endif
 
 		unsigned long hiwater_rss; /* High-watermark of RSS usage */
 		unsigned long hiwater_vm;  /* High-water virtual memory usage */
-- 
cgit v1.2.3


From 7c4f75a21f636486d2969d9b6680403ea8483539 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 16 Apr 2025 18:29:13 +0200
Subject: futex: Allow automatic allocation of process wide futex hash

Allocate a private futex hash with 16 slots if a task forks its first
thread.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20250416162921.513656-14-bigeasy@linutronix.de
---
 include/linux/futex.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/futex.h b/include/linux/futex.h
index 8f1be08bef18..1d3f7555825e 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -80,6 +80,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4);
 
 #ifdef CONFIG_FUTEX_PRIVATE_HASH
+int futex_hash_allocate_default(void);
 void futex_hash_free(struct mm_struct *mm);
 
 static inline void futex_mm_init(struct mm_struct *mm)
@@ -88,6 +89,7 @@ static inline void futex_mm_init(struct mm_struct *mm)
 }
 
 #else /* !CONFIG_FUTEX_PRIVATE_HASH */
+static inline int futex_hash_allocate_default(void) { return 0; }
 static inline void futex_hash_free(struct mm_struct *mm) { }
 static inline void futex_mm_init(struct mm_struct *mm) { }
 #endif /* CONFIG_FUTEX_PRIVATE_HASH */
@@ -107,6 +109,10 @@ static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsig
 {
 	return -EINVAL;
 }
+static inline int futex_hash_allocate_default(void)
+{
+	return 0;
+}
 static inline void futex_hash_free(struct mm_struct *mm) { }
 static inline void futex_mm_init(struct mm_struct *mm) { }
 
-- 
cgit v1.2.3


From bd54df5ea7cadac520e346d5f0fe5d58e635b6ba Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 16 Apr 2025 18:29:14 +0200
Subject: futex: Allow to resize the private local hash

The mm_struct::futex_hash_lock guards the futex_hash_bucket assignment/
replacement. The futex_hash_allocate()/ PR_FUTEX_HASH_SET_SLOTS
operation can now be invoked at runtime and resize an already existing
internal private futex_hash_bucket to another size.

The reallocation is based on an idea by Thomas Gleixner: The initial
allocation of struct futex_private_hash sets the reference count
to one. Every user acquires a reference on the local hash before using
it and drops it after it enqueued itself on the hash bucket. There is no
reference held while the task is scheduled out while waiting for the
wake up.
The resize process allocates a new struct futex_private_hash and drops
the initial reference. Synchronized with mm_struct::futex_hash_lock it
is checked if the reference counter for the currently used
mm_struct::futex_phash is marked as DEAD. If so, then all users enqueued
on the current private hash are requeued on the new private hash and the
new private hash is set to mm_struct::futex_phash. Otherwise the newly
allocated private hash is saved as mm_struct::futex_phash_new and the
rehashing and reassigning is delayed to the futex_hash() caller once the
reference counter is marked DEAD.
The replacement is not performed at rcuref_put() time because certain
callers, such as futex_wait_queue(), drop their reference after changing
the task state. This change will be destroyed once the futex_hash_lock
is acquired.

The user can change the number slots with PR_FUTEX_HASH_SET_SLOTS
multiple times. An increase and decrease is allowed and request blocks
until the assignment is done.

The private hash allocated at thread creation is changed from 16 to
  16 <= 4 * number_of_threads <= global_hash_size
where number_of_threads can not exceed the number of online CPUs. Should
the user PR_FUTEX_HASH_SET_SLOTS then the auto scaling is disabled.

[peterz: reorganize the code to avoid state tracking and simplify new
object handling, block the user until changes are in effect, allow
increase and decrease of the hash].

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20250416162921.513656-15-bigeasy@linutronix.de
---
 include/linux/futex.h    | 3 ++-
 include/linux/mm_types.h | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/futex.h b/include/linux/futex.h
index 1d3f7555825e..40bc778b2bb4 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -85,7 +85,8 @@ void futex_hash_free(struct mm_struct *mm);
 
 static inline void futex_mm_init(struct mm_struct *mm)
 {
-	mm->futex_phash =  NULL;
+	rcu_assign_pointer(mm->futex_phash, NULL);
+	mutex_init(&mm->futex_hash_lock);
 }
 
 #else /* !CONFIG_FUTEX_PRIVATE_HASH */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index a4b5661e4177..32ba5126e221 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1033,7 +1033,9 @@ struct mm_struct {
 		seqcount_t mm_lock_seq;
 #endif
 #ifdef CONFIG_FUTEX_PRIVATE_HASH
-		struct futex_private_hash	*futex_phash;
+		struct mutex			futex_hash_lock;
+		struct futex_private_hash	__rcu *futex_phash;
+		struct futex_private_hash	*futex_phash_new;
 #endif
 
 		unsigned long hiwater_rss; /* High-watermark of RSS usage */
-- 
cgit v1.2.3


From cec199c5e39bde7191a08087cc3d002ccfab31ff Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 16 Apr 2025 18:29:16 +0200
Subject: futex: Implement FUTEX2_NUMA

Extend the futex2 interface to be numa aware.

When FUTEX2_NUMA is specified for a futex, the user value is extended
to two words (of the same size). The first is the user value we all
know, the second one will be the node to place this futex on.

  struct futex_numa_32 {
	u32 val;
	u32 node;
  };

When node is set to ~0, WAIT will set it to the current node_id such
that WAKE knows where to find it. If userspace corrupts the node value
between WAIT and WAKE, the futex will not be found and no wakeup will
happen.

When FUTEX2_NUMA is not set, the node is simply an extension of the
hash, such that traditional futexes are still interleaved over the
nodes.

This is done to avoid having to have a separate !numa hash-table.

[bigeasy: ensure to have at least hashsize of 4 in futex_init(), add
pr_info() for size and allocation information. Cast the naddr math to
void*]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20250416162921.513656-17-bigeasy@linutronix.de
---
 include/linux/futex.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/futex.h b/include/linux/futex.h
index 40bc778b2bb4..eccc99751bd9 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -34,6 +34,7 @@ union futex_key {
 		u64 i_seq;
 		unsigned long pgoff;
 		unsigned int offset;
+		/* unsigned int node; */
 	} shared;
 	struct {
 		union {
@@ -42,11 +43,13 @@ union futex_key {
 		};
 		unsigned long address;
 		unsigned int offset;
+		/* unsigned int node; */
 	} private;
 	struct {
 		u64 ptr;
 		unsigned long word;
 		unsigned int offset;
+		unsigned int node;	/* NOT hashed! */
 	} both;
 };
 
-- 
cgit v1.2.3


From c042c505210dc3453f378df432c10fff3d471bc5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 16 Apr 2025 18:29:17 +0200
Subject: futex: Implement FUTEX2_MPOL

Extend the futex2 interface to be aware of mempolicy.

When FUTEX2_MPOL is specified and there is a MPOL_PREFERRED or
home_node specified covering the futex address, use that hash-map.

Notably, in this case the futex will go to the global node hashtable,
even if it is a PRIVATE futex.

When FUTEX2_NUMA|FUTEX2_MPOL is specified and the user specified node
value is FUTEX_NO_NODE, the MPOL lookup (as described above) will be
tried first before reverting to setting node to the local node.

[bigeasy: add CONFIG_FUTEX_MPOL, add MPOL to FUTEX2_VALID_MASK, write
the node only to user if FUTEX_NO_NODE was supplied]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20250416162921.513656-18-bigeasy@linutronix.de
---
 include/linux/mmap_lock.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 4706c6769902..e0eddfd306ef 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -7,6 +7,7 @@
 #include <linux/rwsem.h>
 #include <linux/tracepoint-defs.h>
 #include <linux/types.h>
+#include <linux/cleanup.h>
 
 #define MMAP_LOCK_INITIALIZER(name) \
 	.mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
@@ -211,6 +212,9 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
 	up_read(&mm->mmap_lock);
 }
 
+DEFINE_GUARD(mmap_read_lock, struct mm_struct *,
+	     mmap_read_lock(_T), mmap_read_unlock(_T))
+
 static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
 {
 	__mmap_lock_trace_released(mm, false);
-- 
cgit v1.2.3


From 4862c8861d902d43645a493e441c4478be1c6c44 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Tue, 29 Apr 2025 18:50:17 +0200
Subject: dm: Allow .prepare_ioctl to handle ioctls directly

This adds a 'bool *forward' parameter to .prepare_ioctl, which allows
device mapper targets to accept ioctls to themselves instead of the
underlying device. If the target already fully handled the ioctl, it
sets *forward to false and device mapper won't forward it to the
underlying device any more.

In order for targets to actually know what the ioctl is about and how to
handle it, pass also cmd and arg.

As long as targets restrict themselves to interpreting ioctls of type
DM_IOCTL, this is a backwards compatible change because previously, any
such ioctl would have been passed down through all device mapper layers
until it reached a device that can't understand the ioctl and would
return an error.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
 include/linux/device-mapper.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index bcc6d7b69470..cb95951547ab 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -93,7 +93,14 @@ typedef void (*dm_status_fn) (struct dm_target *ti, status_type_t status_type,
 typedef int (*dm_message_fn) (struct dm_target *ti, unsigned int argc, char **argv,
 			      char *result, unsigned int maxlen);
 
-typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **bdev);
+/*
+ * Called with *forward == true. If it remains true, the ioctl should be
+ * forwarded to bdev. If it is reset to false, the target already fully handled
+ * the ioctl and the return value is the return value for the whole ioctl.
+ */
+typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **bdev,
+				    unsigned int cmd, unsigned long arg,
+				    bool *forward);
 
 #ifdef CONFIG_BLK_DEV_ZONED
 typedef int (*dm_report_zones_fn) (struct dm_target *ti,
-- 
cgit v1.2.3


From 8fd17374be8f220c26bec2b482cabf51ebbaed80 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 1 May 2025 20:37:32 +0800
Subject: crypto: api - Rename CRYPTO_ALG_REQ_CHAIN to CRYPTO_ALG_REQ_VIRT

As chaining has been removed, all that remains of REQ_CHAIN is
just virtual address support.  Rename it before the reintroduction
of batching creates confusion.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index b8d875b11193..b50f1954d1bb 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -133,8 +133,8 @@
  */
 #define CRYPTO_ALG_FIPS_INTERNAL	0x00020000
 
-/* Set if the algorithm supports request chains and virtual addresses. */
-#define CRYPTO_ALG_REQ_CHAIN		0x00040000
+/* Set if the algorithm supports virtual addresses. */
+#define CRYPTO_ALG_REQ_VIRT		0x00040000
 
 /* The high bits 0xff000000 are reserved for type-specific flags. */
 
-- 
cgit v1.2.3


From 6b3754009f871083b114daacbad5b87a494da4b8 Mon Sep 17 00:00:00 2001
From: Patrice Chotard <patrice.chotard@foss.st.com>
Date: Fri, 11 Apr 2025 14:41:10 +0200
Subject: reset: Add devm_reset_control_array_get_exclusive_released()

Add the released variant of devm_reset_control_array_get_exclusive().
Needed by spi-smt32-ospi driver as same reset line is ulso used by
stm32-omm driver.

Signed-off-by: Patrice Chotard <patrice.chotard@foss.st.com>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Link: https://lore.kernel.org/r/20250411-b4-upstream_ospi_reset_update-v2-1-4de7f5dd2a91@foss.st.com
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/linux/reset.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/reset.h b/include/linux/reset.h
index 2986ced69a02..840d75d172f6 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -1004,6 +1004,12 @@ devm_reset_control_array_get_exclusive(struct device *dev)
 	return devm_reset_control_array_get(dev, RESET_CONTROL_EXCLUSIVE);
 }
 
+static inline struct reset_control *
+devm_reset_control_array_get_exclusive_released(struct device *dev)
+{
+	return devm_reset_control_array_get(dev, RESET_CONTROL_EXCLUSIVE_RELEASED);
+}
+
 static inline struct reset_control *
 devm_reset_control_array_get_shared(struct device *dev)
 {
-- 
cgit v1.2.3


From 5d2ea5aebbb2f3ebde4403f9c55b2b057e5dd2d6 Mon Sep 17 00:00:00 2001
From: Patrisious Haddad <phaddad@nvidia.com>
Date: Mon, 28 Apr 2025 14:34:07 +0300
Subject: RDMA/mlx5: Fix error flow upon firmware failure for RQ destruction

Upon RQ destruction if the firmware command fails which is the
last resource to be destroyed some SW resources were already cleaned
regardless of the failure.

Now properly rollback the object to its original state upon such failure.

In order to avoid a use-after free in case someone tries to destroy the
object again, which results in the following kernel trace:
refcount_t: underflow; use-after-free.
WARNING: CPU: 0 PID: 37589 at lib/refcount.c:28 refcount_warn_saturate+0xf4/0x148
Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) rfkill mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) psample mlxfw(OE) mlx_compat(OE) macsec tls pci_hyperv_intf sunrpc vfat fat virtio_net net_failover failover fuse loop nfnetlink vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport vmw_vmci vsock xfs crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce virtio_console virtio_gpu virtio_blk virtio_dma_buf virtio_mmio dm_mirror dm_region_hash dm_log dm_mod xpmem(OE)
CPU: 0 UID: 0 PID: 37589 Comm: python3 Kdump: loaded Tainted: G           OE     -------  ---  6.12.0-54.el10.aarch64 #1
Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE
Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : refcount_warn_saturate+0xf4/0x148
lr : refcount_warn_saturate+0xf4/0x148
sp : ffff80008b81b7e0
x29: ffff80008b81b7e0 x28: ffff000133d51600 x27: 0000000000000001
x26: 0000000000000000 x25: 00000000ffffffea x24: ffff00010ae80f00
x23: ffff00010ae80f80 x22: ffff0000c66e5d08 x21: 0000000000000000
x20: ffff0000c66e0000 x19: ffff00010ae80340 x18: 0000000000000006
x17: 0000000000000000 x16: 0000000000000020 x15: ffff80008b81b37f
x14: 0000000000000000 x13: 2e656572662d7265 x12: ffff80008283ef78
x11: ffff80008257efd0 x10: ffff80008283efd0 x9 : ffff80008021ed90
x8 : 0000000000000001 x7 : 00000000000bffe8 x6 : c0000000ffff7fff
x5 : ffff0001fb8e3408 x4 : 0000000000000000 x3 : ffff800179993000
x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000133d51600
Call trace:
 refcount_warn_saturate+0xf4/0x148
 mlx5_core_put_rsc+0x88/0xa0 [mlx5_ib]
 mlx5_core_destroy_rq_tracked+0x64/0x98 [mlx5_ib]
 mlx5_ib_destroy_wq+0x34/0x80 [mlx5_ib]
 ib_destroy_wq_user+0x30/0xc0 [ib_core]
 uverbs_free_wq+0x28/0x58 [ib_uverbs]
 destroy_hw_idr_uobject+0x34/0x78 [ib_uverbs]
 uverbs_destroy_uobject+0x48/0x240 [ib_uverbs]
 __uverbs_cleanup_ufile+0xd4/0x1a8 [ib_uverbs]
 uverbs_destroy_ufile_hw+0x48/0x120 [ib_uverbs]
 ib_uverbs_close+0x2c/0x100 [ib_uverbs]
 __fput+0xd8/0x2f0
 __fput_sync+0x50/0x70
 __arm64_sys_close+0x40/0x90
 invoke_syscall.constprop.0+0x74/0xd0
 do_el0_svc+0x48/0xe8
 el0_svc+0x44/0x1d0
 el0t_64_sync_handler+0x120/0x130
 el0t_64_sync+0x1a4/0x1a8

Fixes: e2013b212f9f ("net/mlx5_core: Add RQ and SQ event handling")
Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Link: https://patch.msgid.link/3181433ccdd695c63560eeeb3f0c990961732101.1745839855.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/driver.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index d1dfbad9a447..e6ba8f4f4bd1 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -398,6 +398,7 @@ struct mlx5_core_rsc_common {
 	enum mlx5_res_type	res;
 	refcount_t		refcount;
 	struct completion	free;
+	bool			invalid;
 };
 
 struct mlx5_uars_page {
-- 
cgit v1.2.3


From 575ec9b0c2f11f40535ea737ed5a64792780d1ef Mon Sep 17 00:00:00 2001
From: Arvind Yadav <Arvind.Yadav@amd.com>
Date: Fri, 25 Apr 2025 19:36:50 +0530
Subject: dma-fence: Add helper to sort and deduplicate dma_fence arrays
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Export a new helper function `dma_fence_dedup_array()` that sorts
an array of dma_fence pointers by context, then deduplicates the array
by retaining only the most recent fence per context.

This utility is useful when merging or optimizing sets of fences where
redundant entries from the same context can be pruned. The operation is
performed in-place and releases references to dropped fences using
dma_fence_put().

v2: - Export this code from dma-fence-unwrap.c(by Christian).
v3: - To split this in a dma_buf patch and amd userq patch(by Sunil).
    - No need to add a new function just re-use existing(by Christian).
v4: - Export dma_fence_dedub_array and use it(by Christian).

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Reviewed-by: Sunil Khatri <sunil.khatri@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Arvind Yadav <Arvind.Yadav@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/linux/dma-fence-unwrap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h
index 66b1e56fbb81..62df222fe0f1 100644
--- a/include/linux/dma-fence-unwrap.h
+++ b/include/linux/dma-fence-unwrap.h
@@ -52,6 +52,8 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
 					   struct dma_fence **fences,
 					   struct dma_fence_unwrap *cursors);
 
+int dma_fence_dedup_array(struct dma_fence **array, int num_fences);
+
 /**
  * dma_fence_unwrap_merge - unwrap and merge fences
  *
-- 
cgit v1.2.3


From eeadd68e2a5f6bfe0bf1038ec49e3a8d99eb5fe8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 5 May 2025 10:11:25 +0200
Subject: block: remove bounce buffering support

The block layer bounce buffering support is unused now, remove it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20250505081138.3435992-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk_types.h | 1 -
 include/linux/blkdev.h    | 5 +----
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index dce7615c35e7..5a46067e85b1 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -286,7 +286,6 @@ struct bio {
 enum {
 	BIO_PAGE_PINNED,	/* Unpin pages in bio_release_pages() */
 	BIO_CLONED,		/* doesn't own data */
-	BIO_BOUNCED,		/* bio is a bounce bio */
 	BIO_QUIET,		/* Make BIO Quiet */
 	BIO_CHAIN,		/* chained bio, ->bi_remaining in effect */
 	BIO_REFFED,		/* bio has elevated ->bi_cnt */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f3d74f9dae8e..5ccb961ee2ae 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -331,9 +331,6 @@ typedef unsigned int __bitwise blk_features_t;
 /* skip this queue in blk_mq_(un)quiesce_tagset */
 #define BLK_FEAT_SKIP_TAGSET_QUIESCE	((__force blk_features_t)(1u << 13))
 
-/* bounce all highmem pages */
-#define BLK_FEAT_BOUNCE_HIGH		((__force blk_features_t)(1u << 14))
-
 /* undocumented magic for bcache */
 #define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \
 	((__force blk_features_t)(1u << 15))
@@ -347,7 +344,7 @@ typedef unsigned int __bitwise blk_features_t;
  */
 #define BLK_FEAT_INHERIT_MASK \
 	(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \
-	 BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH | \
+	 BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | \
 	 BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE)
 
 /* internal flags in queue_limits.flags */
-- 
cgit v1.2.3


From 194df9f66db8d6f74f03c78c2ad47b74a5a8b886 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 5 May 2025 10:11:26 +0200
Subject: mm: remove NR_BOUNCE zone stat

The stat is always 0 now, so remove it and hardwire the user visible
output to 0.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20250505081138.3435992-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/mmzone.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6ccec1bf2896..b1c459f7a485 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -148,7 +148,6 @@ enum zone_stat_item {
 	NR_ZONE_WRITE_PENDING,	/* Count of dirty, writeback and unstable pages */
 	NR_MLOCK,		/* mlock()ed pages found and moved off LRU */
 	/* Second 128 byte cacheline */
-	NR_BOUNCE,
 #if IS_ENABLED(CONFIG_ZSMALLOC)
 	NR_ZSPAGES,		/* allocated in zsmalloc */
 #endif
-- 
cgit v1.2.3


From f4fcfdda2fd8834c62dcb9bfddcf1f89d190b70e Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Wed, 23 Apr 2025 14:42:13 -0500
Subject: of: reserved_mem: Add functions to parse "memory-region"

Drivers with "memory-region" properties currently have to do their own
parsing of "memory-region" properties. The result is all the drivers
have similar patterns of a call to parse "memory-region" and then get
the region's address and size. As this is a standard property, it should
have common functions for drivers to use. Add new functions to count the
number of regions and retrieve the region's address as a resource.

Reviewed-by: Daniel Baluta <daniel.baluta@nxp.com>
Acked-by: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
Link: https://lore.kernel.org/r/20250423-dt-memory-region-v2-v2-1-2fbd6ebd3c88@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 include/linux/of_reserved_mem.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index e338282da652..f573423359f4 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -7,6 +7,7 @@
 
 struct of_phandle_args;
 struct reserved_mem_ops;
+struct resource;
 
 struct reserved_mem {
 	const char			*name;
@@ -39,6 +40,12 @@ int of_reserved_mem_device_init_by_name(struct device *dev,
 void of_reserved_mem_device_release(struct device *dev);
 
 struct reserved_mem *of_reserved_mem_lookup(struct device_node *np);
+int of_reserved_mem_region_to_resource(const struct device_node *np,
+				       unsigned int idx, struct resource *res);
+int of_reserved_mem_region_to_resource_byname(const struct device_node *np,
+					      const char *name, struct resource *res);
+int of_reserved_mem_region_count(const struct device_node *np);
+
 #else
 
 #define RESERVEDMEM_OF_DECLARE(name, compat, init)			\
@@ -63,6 +70,25 @@ static inline struct reserved_mem *of_reserved_mem_lookup(struct device_node *np
 {
 	return NULL;
 }
+
+static inline int of_reserved_mem_region_to_resource(const struct device_node *np,
+						     unsigned int idx,
+						     struct resource *res)
+{
+	return -ENOSYS;
+}
+
+static inline int of_reserved_mem_region_to_resource_byname(const struct device_node *np,
+							    const char *name,
+							    struct resource *res)
+{
+	return -ENOSYS;
+}
+
+static inline int of_reserved_mem_region_count(const struct device_node *np)
+{
+	return 0;
+}
 #endif
 
 /**
-- 
cgit v1.2.3


From 23227e71b69af95e421e263302d13f426c548155 Mon Sep 17 00:00:00 2001
From: Guan-Chun Wu <409411716@gms.tku.edu.tw>
Date: Sat, 3 May 2025 16:12:03 +0800
Subject: workqueue: fix typo in comment

Fix a duplicated word "that that" in the comment describing the
@max_active behavior for unbound workqueues.

Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b0dc957c3e56..a6dacc0eb688 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -480,7 +480,7 @@ void workqueue_softirq_dead(unsigned int cpu);
  * executing at most one work item for the workqueue.
  *
  * For unbound workqueues, @max_active limits the number of in-flight work items
- * for the whole system. e.g. @max_active of 16 indicates that that there can be
+ * for the whole system. e.g. @max_active of 16 indicates that there can be
  * at most 16 work items executing for the workqueue in the whole system.
  *
  * As sharing the same active counter for an unbound workqueue across multiple
-- 
cgit v1.2.3


From a25e7962db0d79882c9d32fd2a67a02e79129c0e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 5 May 2025 10:01:38 +0300
Subject: PCI/P2PDMA: Refactor the p2pdma mapping helpers

The current scheme with a single helper to determine the P2P status
and map a scatterlist segment force users to always use the map_sg
helper to DMA map, which we're trying to get away from because they
are very cache inefficient.

Refactor the code so that there is a single helper that checks the P2P
state for a page, including the result that it is not a P2P page to
simplify the callers, and a second one to perform the address translation
for a bus mapped P2P transfer that does not depend on the scatterlist
structure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 include/linux/dma-map-ops.h | 51 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index e172522cd936..c3086edeccc6 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -443,6 +443,11 @@ enum pci_p2pdma_map_type {
 	 */
 	PCI_P2PDMA_MAP_UNKNOWN = 0,
 
+	/*
+	 * Not a PCI P2PDMA transfer.
+	 */
+	PCI_P2PDMA_MAP_NONE,
+
 	/*
 	 * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will
 	 * traverse the host bridge and the host bridge is not in the
@@ -471,21 +476,47 @@ enum pci_p2pdma_map_type {
 
 struct pci_p2pdma_map_state {
 	struct dev_pagemap *pgmap;
-	int map;
+	enum pci_p2pdma_map_type map;
 	u64 bus_off;
 };
 
-#ifdef CONFIG_PCI_P2PDMA
-enum pci_p2pdma_map_type
-pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
-		       struct scatterlist *sg);
-#else /* CONFIG_PCI_P2PDMA */
+/* helper for pci_p2pdma_state(), do not use directly */
+void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state,
+		struct device *dev, struct page *page);
+
+/**
+ * pci_p2pdma_state - check the P2P transfer state of a page
+ * @state:	P2P state structure
+ * @dev:	device to transfer to/from
+ * @page:	page to map
+ *
+ * Check if @page is a PCI P2PDMA page, and if yes of what kind.  Returns the
+ * map type, and updates @state with all information needed for a P2P transfer.
+ */
 static inline enum pci_p2pdma_map_type
-pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
-		       struct scatterlist *sg)
+pci_p2pdma_state(struct pci_p2pdma_map_state *state, struct device *dev,
+		struct page *page)
+{
+	if (IS_ENABLED(CONFIG_PCI_P2PDMA) && is_pci_p2pdma_page(page)) {
+		if (state->pgmap != page_pgmap(page))
+			__pci_p2pdma_update_state(state, dev, page);
+		return state->map;
+	}
+	return PCI_P2PDMA_MAP_NONE;
+}
+
+/**
+ * pci_p2pdma_bus_addr_map - map a PCI_P2PDMA_MAP_BUS_ADDR P2P transfer
+ * @state:	P2P state structure
+ * @paddr:	physical address to map
+ *
+ * Map a physically contiguous PCI_P2PDMA_MAP_BUS_ADDR transfer.
+ */
+static inline dma_addr_t
+pci_p2pdma_bus_addr_map(struct pci_p2pdma_map_state *state, phys_addr_t paddr)
 {
-	return PCI_P2PDMA_MAP_NOT_SUPPORTED;
+	WARN_ON_ONCE(state->map != PCI_P2PDMA_MAP_BUS_ADDR);
+	return paddr + state->bus_off;
 }
-#endif /* CONFIG_PCI_P2PDMA */
 
 #endif /* _LINUX_DMA_MAP_OPS_H */
-- 
cgit v1.2.3


From ca2c2e4a78c619bcf1c1df29fee5981035f3fcbc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 5 May 2025 10:01:39 +0300
Subject: dma-mapping: move the PCI P2PDMA mapping helpers to pci-p2pdma.h

To support the upcoming non-scatterlist mapping helpers, we need to go
back to have them called outside of the DMA API.  Thus move them out of
dma-map-ops.h, which is only for DMA API implementations to pci-p2pdma.h,
which is for driver use.

Note that the core helper is still not exported as the mapping is
expected to be done only by very highlevel subsystem code at least for
now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 include/linux/dma-map-ops.h | 85 ---------------------------------------------
 include/linux/pci-p2pdma.h  | 85 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+), 85 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index c3086edeccc6..f48e5fb88bd5 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -434,89 +434,4 @@ static inline void debug_dma_dump_mappings(struct device *dev)
 #endif /* CONFIG_DMA_API_DEBUG */
 
 extern const struct dma_map_ops dma_dummy_ops;
-
-enum pci_p2pdma_map_type {
-	/*
-	 * PCI_P2PDMA_MAP_UNKNOWN: Used internally for indicating the mapping
-	 * type hasn't been calculated yet. Functions that return this enum
-	 * never return this value.
-	 */
-	PCI_P2PDMA_MAP_UNKNOWN = 0,
-
-	/*
-	 * Not a PCI P2PDMA transfer.
-	 */
-	PCI_P2PDMA_MAP_NONE,
-
-	/*
-	 * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will
-	 * traverse the host bridge and the host bridge is not in the
-	 * allowlist. DMA Mapping routines should return an error when
-	 * this is returned.
-	 */
-	PCI_P2PDMA_MAP_NOT_SUPPORTED,
-
-	/*
-	 * PCI_P2PDMA_BUS_ADDR: Indicates that two devices can talk to
-	 * each other directly through a PCI switch and the transaction will
-	 * not traverse the host bridge. Such a mapping should program
-	 * the DMA engine with PCI bus addresses.
-	 */
-	PCI_P2PDMA_MAP_BUS_ADDR,
-
-	/*
-	 * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: Indicates two devices can talk
-	 * to each other, but the transaction traverses a host bridge on the
-	 * allowlist. In this case, a normal mapping either with CPU physical
-	 * addresses (in the case of dma-direct) or IOVA addresses (in the
-	 * case of IOMMUs) should be used to program the DMA engine.
-	 */
-	PCI_P2PDMA_MAP_THRU_HOST_BRIDGE,
-};
-
-struct pci_p2pdma_map_state {
-	struct dev_pagemap *pgmap;
-	enum pci_p2pdma_map_type map;
-	u64 bus_off;
-};
-
-/* helper for pci_p2pdma_state(), do not use directly */
-void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state,
-		struct device *dev, struct page *page);
-
-/**
- * pci_p2pdma_state - check the P2P transfer state of a page
- * @state:	P2P state structure
- * @dev:	device to transfer to/from
- * @page:	page to map
- *
- * Check if @page is a PCI P2PDMA page, and if yes of what kind.  Returns the
- * map type, and updates @state with all information needed for a P2P transfer.
- */
-static inline enum pci_p2pdma_map_type
-pci_p2pdma_state(struct pci_p2pdma_map_state *state, struct device *dev,
-		struct page *page)
-{
-	if (IS_ENABLED(CONFIG_PCI_P2PDMA) && is_pci_p2pdma_page(page)) {
-		if (state->pgmap != page_pgmap(page))
-			__pci_p2pdma_update_state(state, dev, page);
-		return state->map;
-	}
-	return PCI_P2PDMA_MAP_NONE;
-}
-
-/**
- * pci_p2pdma_bus_addr_map - map a PCI_P2PDMA_MAP_BUS_ADDR P2P transfer
- * @state:	P2P state structure
- * @paddr:	physical address to map
- *
- * Map a physically contiguous PCI_P2PDMA_MAP_BUS_ADDR transfer.
- */
-static inline dma_addr_t
-pci_p2pdma_bus_addr_map(struct pci_p2pdma_map_state *state, phys_addr_t paddr)
-{
-	WARN_ON_ONCE(state->map != PCI_P2PDMA_MAP_BUS_ADDR);
-	return paddr + state->bus_off;
-}
-
 #endif /* _LINUX_DMA_MAP_OPS_H */
diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h
index 2c07aa6b7665..075c20b161d9 100644
--- a/include/linux/pci-p2pdma.h
+++ b/include/linux/pci-p2pdma.h
@@ -104,4 +104,89 @@ static inline struct pci_dev *pci_p2pmem_find(struct device *client)
 	return pci_p2pmem_find_many(&client, 1);
 }
 
+enum pci_p2pdma_map_type {
+	/*
+	 * PCI_P2PDMA_MAP_UNKNOWN: Used internally as an initial state before
+	 * the mapping type has been calculated. Exported routines for the API
+	 * will never return this value.
+	 */
+	PCI_P2PDMA_MAP_UNKNOWN = 0,
+
+	/*
+	 * Not a PCI P2PDMA transfer.
+	 */
+	PCI_P2PDMA_MAP_NONE,
+
+	/*
+	 * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will
+	 * traverse the host bridge and the host bridge is not in the
+	 * allowlist. DMA Mapping routines should return an error when
+	 * this is returned.
+	 */
+	PCI_P2PDMA_MAP_NOT_SUPPORTED,
+
+	/*
+	 * PCI_P2PDMA_MAP_BUS_ADDR: Indicates that two devices can talk to
+	 * each other directly through a PCI switch and the transaction will
+	 * not traverse the host bridge. Such a mapping should program
+	 * the DMA engine with PCI bus addresses.
+	 */
+	PCI_P2PDMA_MAP_BUS_ADDR,
+
+	/*
+	 * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: Indicates two devices can talk
+	 * to each other, but the transaction traverses a host bridge on the
+	 * allowlist. In this case, a normal mapping either with CPU physical
+	 * addresses (in the case of dma-direct) or IOVA addresses (in the
+	 * case of IOMMUs) should be used to program the DMA engine.
+	 */
+	PCI_P2PDMA_MAP_THRU_HOST_BRIDGE,
+};
+
+struct pci_p2pdma_map_state {
+	struct dev_pagemap *pgmap;
+	enum pci_p2pdma_map_type map;
+	u64 bus_off;
+};
+
+/* helper for pci_p2pdma_state(), do not use directly */
+void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state,
+		struct device *dev, struct page *page);
+
+/**
+ * pci_p2pdma_state - check the P2P transfer state of a page
+ * @state:	P2P state structure
+ * @dev:	device to transfer to/from
+ * @page:	page to map
+ *
+ * Check if @page is a PCI P2PDMA page, and if yes of what kind.  Returns the
+ * map type, and updates @state with all information needed for a P2P transfer.
+ */
+static inline enum pci_p2pdma_map_type
+pci_p2pdma_state(struct pci_p2pdma_map_state *state, struct device *dev,
+		struct page *page)
+{
+	if (IS_ENABLED(CONFIG_PCI_P2PDMA) && is_pci_p2pdma_page(page)) {
+		if (state->pgmap != page_pgmap(page))
+			__pci_p2pdma_update_state(state, dev, page);
+		return state->map;
+	}
+	return PCI_P2PDMA_MAP_NONE;
+}
+
+/**
+ * pci_p2pdma_bus_addr_map - Translate a physical address to a bus address
+ *			     for a PCI_P2PDMA_MAP_BUS_ADDR transfer.
+ * @state:	P2P state structure
+ * @paddr:	physical address to map
+ *
+ * Map a physically contiguous PCI_P2PDMA_MAP_BUS_ADDR transfer.
+ */
+static inline dma_addr_t
+pci_p2pdma_bus_addr_map(struct pci_p2pdma_map_state *state, phys_addr_t paddr)
+{
+	WARN_ON_ONCE(state->map != PCI_P2PDMA_MAP_BUS_ADDR);
+	return paddr + state->bus_off;
+}
+
 #endif /* _LINUX_PCI_P2P_H */
-- 
cgit v1.2.3


From 5c87cffe2d3853cfae61e9373ee98a0409839178 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 5 May 2025 10:01:40 +0300
Subject: iommu: generalize the batched sync after map interface

For the upcoming IOVA-based DMA API we want to batch the
ops->iotlb_sync_map() call after mapping multiple IOVAs from
dma-iommu without having a scatterlist. Improve the API.

Add a wrapper for the map_sync as iommu_sync_map() so that callers
don't need to poke into the methods directly.

Formalize __iommu_map() into iommu_map_nosync() which requires the
caller to call iommu_sync_map() after all maps are completed.

Refactor the existing sanity checks from all the different layers
into iommu_map_nosync().

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Will Deacon <will@kernel.org>
Tested-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 include/linux/iommu.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index ccce8a751e2a..ce472af8e9c3 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -872,6 +872,10 @@ extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
 extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
 extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
 		     phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova,
+		phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+int iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
+		size_t size);
 extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 			  size_t size);
 extern size_t iommu_unmap_fast(struct iommu_domain *domain,
-- 
cgit v1.2.3


From 393cf700e6242032fbad51b248111ab6740ce8ad Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Mon, 5 May 2025 10:01:42 +0300
Subject: dma-mapping: Provide an interface to allow allocate IOVA

The existing .map_pages() callback provides both allocating of IOVA
and linking DMA pages. That combination works great for most of the
callers who use it in control paths, but is less effective in fast
paths where there may be multiple calls to map_page().

These advanced callers already manage their data in some sort of
database and can perform IOVA allocation in advance, leaving range
linkage operation to be in fast path.

Provide an interface to allocate/deallocate IOVA and next patch
link/unlink DMA ranges to that specific IOVA.

In the new API a DMA mapping transaction is identified by a
struct dma_iova_state, which holds some recomputed information
for the transaction which does not change for each page being
mapped, so add a check if IOVA can be used for the specific
transaction.

The API is exported from dma-iommu as it is the only implementation
supported, the namespace is clearly different from iommu_* functions
which are not allowed to be used. This code layout allows us to save
function call per API call used in datapath as well as a lot of boilerplate
code.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 include/linux/dma-mapping.h | 48 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index b79925b1c433..de7f73810d54 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -72,6 +72,22 @@
 
 #define DMA_BIT_MASK(n)	(((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
 
+struct dma_iova_state {
+	dma_addr_t addr;
+	u64 __size;
+};
+
+/*
+ * Use the high bit to mark if we used swiotlb for one or more ranges.
+ */
+#define DMA_IOVA_USE_SWIOTLB		(1ULL << 63)
+
+static inline size_t dma_iova_size(struct dma_iova_state *state)
+{
+	/* Casting is needed for 32-bits systems */
+	return (size_t)(state->__size & ~DMA_IOVA_USE_SWIOTLB);
+}
+
 #ifdef CONFIG_DMA_API_DEBUG
 void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
 void debug_dma_map_single(struct device *dev, const void *addr,
@@ -277,6 +293,38 @@ static inline int dma_mmap_noncontiguous(struct device *dev,
 }
 #endif /* CONFIG_HAS_DMA */
 
+#ifdef CONFIG_IOMMU_DMA
+/**
+ * dma_use_iova - check if the IOVA API is used for this state
+ * @state: IOVA state
+ *
+ * Return %true if the DMA transfers uses the dma_iova_*() calls or %false if
+ * they can't be used.
+ */
+static inline bool dma_use_iova(struct dma_iova_state *state)
+{
+	return state->__size != 0;
+}
+
+bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state,
+		phys_addr_t phys, size_t size);
+void dma_iova_free(struct device *dev, struct dma_iova_state *state);
+#else /* CONFIG_IOMMU_DMA */
+static inline bool dma_use_iova(struct dma_iova_state *state)
+{
+	return false;
+}
+static inline bool dma_iova_try_alloc(struct device *dev,
+		struct dma_iova_state *state, phys_addr_t phys, size_t size)
+{
+	return false;
+}
+static inline void dma_iova_free(struct device *dev,
+		struct dma_iova_state *state)
+{
+}
+#endif /* CONFIG_IOMMU_DMA */
+
 #if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC)
 void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
 		enum dma_data_direction dir);
-- 
cgit v1.2.3


From 433a76207dcf5facc0183acb790f6e8398585258 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Mon, 5 May 2025 10:01:44 +0300
Subject: dma-mapping: Implement link/unlink ranges API

Introduce new DMA APIs to perform DMA linkage of buffers
in layers higher than DMA.

In proposed API, the callers will perform the following steps.
In map path:
	if (dma_can_use_iova(...))
	    dma_iova_alloc()
	    for (page in range)
	       dma_iova_link_next(...)
	    dma_iova_sync(...)
	else
	     /* Fallback to legacy map pages */
             for (all pages)
	       dma_map_page(...)

In unmap path:
	if (dma_can_use_iova(...))
	     dma_iova_destroy()
	else
	     for (all pages)
		dma_unmap_page(...)

Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 include/linux/dma-mapping.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index de7f73810d54..a71e110f1e9d 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -309,6 +309,17 @@ static inline bool dma_use_iova(struct dma_iova_state *state)
 bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state,
 		phys_addr_t phys, size_t size);
 void dma_iova_free(struct device *dev, struct dma_iova_state *state);
+void dma_iova_destroy(struct device *dev, struct dma_iova_state *state,
+		size_t mapped_len, enum dma_data_direction dir,
+		unsigned long attrs);
+int dma_iova_sync(struct device *dev, struct dma_iova_state *state,
+		size_t offset, size_t size);
+int dma_iova_link(struct device *dev, struct dma_iova_state *state,
+		phys_addr_t phys, size_t offset, size_t size,
+		enum dma_data_direction dir, unsigned long attrs);
+void dma_iova_unlink(struct device *dev, struct dma_iova_state *state,
+		size_t offset, size_t size, enum dma_data_direction dir,
+		unsigned long attrs);
 #else /* CONFIG_IOMMU_DMA */
 static inline bool dma_use_iova(struct dma_iova_state *state)
 {
@@ -323,6 +334,27 @@ static inline void dma_iova_free(struct device *dev,
 		struct dma_iova_state *state)
 {
 }
+static inline void dma_iova_destroy(struct device *dev,
+		struct dma_iova_state *state, size_t mapped_len,
+		enum dma_data_direction dir, unsigned long attrs)
+{
+}
+static inline int dma_iova_sync(struct device *dev,
+		struct dma_iova_state *state, size_t offset, size_t size)
+{
+	return -EOPNOTSUPP;
+}
+static inline int dma_iova_link(struct device *dev,
+		struct dma_iova_state *state, phys_addr_t phys, size_t offset,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+	return -EOPNOTSUPP;
+}
+static inline void dma_iova_unlink(struct device *dev,
+		struct dma_iova_state *state, size_t offset, size_t size,
+		enum dma_data_direction dir, unsigned long attrs)
+{
+}
 #endif /* CONFIG_IOMMU_DMA */
 
 #if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC)
-- 
cgit v1.2.3


From 5f3b133a23c545272b6bc1e818a5a35e1ca84b1e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 5 May 2025 10:01:45 +0300
Subject: dma-mapping: add a dma_need_unmap helper

Add helper that allows a driver to skip calling dma_unmap_*
if the DMA layer can guarantee that they are no-nops.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 include/linux/dma-mapping.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index a71e110f1e9d..d2f358c5a25d 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -406,6 +406,7 @@ static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
 {
 	return dma_dev_need_sync(dev) ? __dma_need_sync(dev, dma_addr) : false;
 }
+bool dma_need_unmap(struct device *dev);
 #else /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */
 static inline bool dma_dev_need_sync(const struct device *dev)
 {
@@ -431,6 +432,10 @@ static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
 {
 	return false;
 }
+static inline bool dma_need_unmap(struct device *dev)
+{
+	return false;
+}
 #endif /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */
 
 struct page *dma_alloc_pages(struct device *dev, size_t size,
-- 
cgit v1.2.3


From 56dee46ff47f0ef9944dddd1fd137c94b7c2d9de Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 5 May 2025 22:17:40 +0800
Subject: block: move ELEVATOR_FLAG_DISABLE_WBT a request queue flag

ELEVATOR_FLAG_DISABLE_WBT is only used by BFQ to disallow wbt when BFQ is
in use. The flag is set in BFQ's init(), and cleared in BFQ's exit().

Making it as request queue flag, so that we can avoid to deal with elevator
switch race. Also it isn't graceful to checking one scheduler flag in
wbt_enable_default().

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20250505141805.2751237-3-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5ccb961ee2ae..c36d7a1c2cc0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -641,6 +641,7 @@ enum {
 	QUEUE_FLAG_RQ_ALLOC_TIME,	/* record rq->alloc_time_ns */
 	QUEUE_FLAG_HCTX_ACTIVE,		/* at least one blk-mq hctx is active */
 	QUEUE_FLAG_SQ_SCHED,		/* single queue style io dispatch */
+	QUEUE_FLAG_DISABLE_WBT_DEF,	/* for sched to disable/enable wbt */
 	QUEUE_FLAG_MAX
 };
 
@@ -676,6 +677,8 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
 #define blk_queue_sq_sched(q)	test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags)
 #define blk_queue_skip_tagset_quiesce(q) \
 	((q)->limits.features & BLK_FEAT_SKIP_TAGSET_QUIESCE)
+#define blk_queue_disable_wbt(q)	\
+	test_bit(QUEUE_FLAG_DISABLE_WBT_DEF, &(q)->queue_flags)
 
 extern void blk_set_pm_only(struct request_queue *q);
 extern void blk_clear_pm_only(struct request_queue *q);
-- 
cgit v1.2.3


From 98e68f67020ce30e1a4d8e2d05d85a453738dfb8 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 5 May 2025 22:17:46 +0800
Subject: block: prevent adding/deleting disk during updating nr_hw_queues

Both adding/deleting disk code are reader of `nr_hw_queues`, so we can't
allow them in-progress when updating nr_hw_queues, kernel panic and
kasan has been reported in [1].

Prevent adding/deleting disk during updating nr_hw_queues by adding
rw_semaphore to tagset, write lock is grabbed in blk_mq_update_nr_hw_queues(),
and read lock is acquired when adding/deleting disk.

Also mark GFP_NOIO allocation scope for adding/deleting disk because
blk_mq_update_nr_hw_queues() is part of some driver's error handler.

This way avoids lot of trouble.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Suggested-by: Nilay Shroff <nilay@linux.ibm.com>
Reported-by: Nilay Shroff <nilay@linux.ibm.com>
Closes: https://lore.kernel.org/linux-block/a5896cdb-a59a-4a37-9f99-20522f5d2987@linux.ibm.com/
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20250505141805.2751237-9-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8eb9b3310167..ef84d53095a6 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -9,6 +9,7 @@
 #include <linux/prefetch.h>
 #include <linux/srcu.h>
 #include <linux/rw_hint.h>
+#include <linux/rwsem.h>
 
 struct blk_mq_tags;
 struct blk_flush_queue;
@@ -527,6 +528,8 @@ struct blk_mq_tag_set {
 	struct mutex		tag_list_lock;
 	struct list_head	tag_list;
 	struct srcu_struct	*srcu;
+
+	struct rw_semaphore	update_nr_hwq_lock;
 };
 
 /**
-- 
cgit v1.2.3


From 21eed794ab4bd1a6c82a55df4416d18fb4d21da9 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 5 May 2025 22:17:58 +0800
Subject: block: add new helper for disabling elevator switch when deleting
 disk

Add new helper disable_elv_switch() and new flag QUEUE_FLAG_NO_ELV_SWITCH
for disabling elevator switch before deleting disk:

- originally flag QUEUE_FLAG_REGISTERED is added for preventing elevator
switch during removing disk, but this flag has been used widely for
other purposes, so add one new flag for disabling elevator switch only

- for avoiding deadlock risk, we have to move elevator queue
register/unregister out of elevator lock and queue freeze, which will be
done in next patch. However, this way adds small race window between elevator
switch and deleting ->queue_kobj, in which elevator queue register/unregister
could be run concurrently. The added helper will be used for avoiding the race
in the following patch.

- drain in-progress elevator switch before deleting disk

Suggested-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Link: https://lore.kernel.org/r/20250505141805.2751237-21-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c36d7a1c2cc0..3aa1fd637d57 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -642,6 +642,7 @@ enum {
 	QUEUE_FLAG_HCTX_ACTIVE,		/* at least one blk-mq hctx is active */
 	QUEUE_FLAG_SQ_SCHED,		/* single queue style io dispatch */
 	QUEUE_FLAG_DISABLE_WBT_DEF,	/* for sched to disable/enable wbt */
+	QUEUE_FLAG_NO_ELV_SWITCH,	/* can't switch elevator any more */
 	QUEUE_FLAG_MAX
 };
 
@@ -679,6 +680,8 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
 	((q)->limits.features & BLK_FEAT_SKIP_TAGSET_QUIESCE)
 #define blk_queue_disable_wbt(q)	\
 	test_bit(QUEUE_FLAG_DISABLE_WBT_DEF, &(q)->queue_flags)
+#define blk_queue_no_elv_switch(q)	\
+	test_bit(QUEUE_FLAG_NO_ELV_SWITCH, &(q)->queue_flags)
 
 extern void blk_set_pm_only(struct request_queue *q);
 extern void blk_clear_pm_only(struct request_queue *q);
-- 
cgit v1.2.3


From 78c271344b6f64ce24c845e54903e09928cf2061 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 5 May 2025 22:18:03 +0800
Subject: block: move wbt_enable_default() out of queue freezing from sched
 ->exit()

scheduler's ->exit() is called with queue frozen and elevator lock is held, and
wbt_enable_default() can't be called with queue frozen, otherwise the
following lockdep warning is triggered:

	#6 (&q->rq_qos_mutex){+.+.}-{4:4}:
	#5 (&eq->sysfs_lock){+.+.}-{4:4}:
	#4 (&q->elevator_lock){+.+.}-{4:4}:
	#3 (&q->q_usage_counter(io)#3){++++}-{0:0}:
	#2 (fs_reclaim){+.+.}-{0:0}:
	#1 (&sb->s_type->i_mutex_key#3){+.+.}-{4:4}:
	#0 (&q->debugfs_mutex){+.+.}-{4:4}:

Fix the issue by moving wbt_enable_default() out of bfq's exit(), and
call it from elevator_change_done().

Meantime add disk->rqos_state_mutex for covering wbt state change, which
matches the purpose more than ->elevator_lock.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20250505141805.2751237-26-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3aa1fd637d57..94323f303b37 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -218,6 +218,8 @@ struct gendisk {
 	 * devices that do not have multiple independent access ranges.
 	 */
 	struct blk_independent_access_ranges *ia_ranges;
+
+	struct mutex rqos_state_mutex;	/* rqos state change mutex */
 };
 
 /**
-- 
cgit v1.2.3


From 732f25a2895a8c1c54fb56544f0b1e23770ef4d7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 May 2025 17:47:22 +0530
Subject: fs: add a write stream field to the kiocb

Prepare for io_uring passthrough of write streams. The write stream
field in the kiocb structure fits into an existing 2-byte hole, so its
size is not changed.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Nitesh Shetty <nj.shetty@samsung.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Link: https://lore.kernel.org/r/20250506121732.8211-2-joshi.k@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 016b0fe1536e..d5988867fe31 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -408,6 +408,7 @@ struct kiocb {
 	void			*private;
 	int			ki_flags;
 	u16			ki_ioprio; /* See linux/ioprio.h */
+	u8			ki_write_stream;
 	union {
 		/*
 		 * Only used for async buffered reads, where it denotes the
-- 
cgit v1.2.3


From 5006f85ea23ea0bda9a8e31fdda126f4fca48f20 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 May 2025 17:47:23 +0530
Subject: block: add a bi_write_stream field

Add the ability to pass a write stream for placement control in the bio.
The new field fits in an existing hole, so does not change the size of
the struct.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Nitesh Shetty <nj.shetty@samsung.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Link: https://lore.kernel.org/r/20250506121732.8211-3-joshi.k@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk_types.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 5a46067e85b1..f38425338c3f 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -220,6 +220,7 @@ struct bio {
 	unsigned short		bi_flags;	/* BIO_* below */
 	unsigned short		bi_ioprio;
 	enum rw_hint		bi_write_hint;
+	u8			bi_write_stream;
 	blk_status_t		bi_status;
 	atomic_t		__bi_remaining;
 
-- 
cgit v1.2.3


From d2f526ba27d29c442542f7c5df0a86ef0b576716 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Tue, 6 May 2025 17:47:24 +0530
Subject: block: introduce max_write_streams queue limit

Drivers with hardware that support write streams need a way to export how
many are available so applications can generically query this.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Nitesh Shetty <nj.shetty@samsung.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
[hch: renamed hints to streams, removed stacking]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Link: https://lore.kernel.org/r/20250506121732.8211-4-joshi.k@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 94323f303b37..13e1426ad38d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -404,6 +404,8 @@ struct queue_limits {
 	unsigned short		max_integrity_segments;
 	unsigned short		max_discard_segments;
 
+	unsigned short		max_write_streams;
+
 	unsigned int		max_open_zones;
 	unsigned int		max_active_zones;
 
@@ -1270,6 +1272,13 @@ static inline unsigned int bdev_max_segments(struct block_device *bdev)
 	return queue_max_segments(bdev_get_queue(bdev));
 }
 
+static inline unsigned short bdev_max_write_streams(struct block_device *bdev)
+{
+	if (bdev_is_partition(bdev))
+		return 0;
+	return bdev_limits(bdev)->max_write_streams;
+}
+
 static inline unsigned queue_logical_block_size(const struct request_queue *q)
 {
 	return q->limits.logical_block_size;
-- 
cgit v1.2.3


From c23acfac10786ac5062a0615e23e68b913ac8da0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 May 2025 17:47:25 +0530
Subject: block: introduce a write_stream_granularity queue limit

Export the granularity that write streams should be discarded with,
as it is essential for making good use of them.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Nitesh Shetty <nj.shetty@samsung.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Link: https://lore.kernel.org/r/20250506121732.8211-5-joshi.k@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 13e1426ad38d..52b7a27c46e8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -405,6 +405,7 @@ struct queue_limits {
 	unsigned short		max_discard_segments;
 
 	unsigned short		max_write_streams;
+	unsigned int		write_stream_granularity;
 
 	unsigned int		max_open_zones;
 	unsigned int		max_active_zones;
-- 
cgit v1.2.3


From ee203d3d86113559b77b1723e0d10909ebbd66ad Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 May 2025 17:47:30 +0530
Subject: nvme: add FDP definitions

Add the config feature result, config log page, and management receive
commands needed for FDP.

Partially based on a patch from Kanchan Joshi <joshi.k@samsung.com>.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Nitesh Shetty <nj.shetty@samsung.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Link: https://lore.kernel.org/r/20250506121732.8211-10-joshi.k@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/nvme.h | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 2479ed10f53e..51308f65b72f 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -303,6 +303,7 @@ enum nvme_ctrl_attr {
 	NVME_CTRL_ATTR_TBKAS		= (1 << 6),
 	NVME_CTRL_ATTR_ELBAS		= (1 << 15),
 	NVME_CTRL_ATTR_RHII		= (1 << 18),
+	NVME_CTRL_ATTR_FDPS		= (1 << 19),
 };
 
 struct nvme_id_ctrl {
@@ -689,6 +690,44 @@ struct nvme_rotational_media_log {
 	__u8	rsvd24[488];
 };
 
+struct nvme_fdp_config {
+	__u8			flags;
+#define FDPCFG_FDPE	(1U << 0)
+	__u8			fdpcidx;
+	__le16			reserved;
+};
+
+struct nvme_fdp_ruh_desc {
+	__u8			ruht;
+	__u8			reserved[3];
+};
+
+struct nvme_fdp_config_desc {
+	__le16			dsze;
+	__u8			fdpa;
+	__u8			vss;
+	__le32			nrg;
+	__le16			nruh;
+	__le16			maxpids;
+	__le32			nns;
+	__le64			runs;
+	__le32			erutl;
+	__u8			rsvd28[36];
+	struct nvme_fdp_ruh_desc ruhs[];
+};
+
+struct nvme_fdp_config_log {
+	__le16			numfdpc;
+	__u8			ver;
+	__u8			rsvd3;
+	__le32			sze;
+	__u8			rsvd8[8];
+	/*
+	 * This is followed by variable number of nvme_fdp_config_desc
+	 * structures, but sparse doesn't like nested variable sized arrays.
+	 */
+};
+
 struct nvme_smart_log {
 	__u8			critical_warning;
 	__u8			temperature[2];
@@ -915,6 +954,7 @@ enum nvme_opcode {
 	nvme_cmd_resv_register	= 0x0d,
 	nvme_cmd_resv_report	= 0x0e,
 	nvme_cmd_resv_acquire	= 0x11,
+	nvme_cmd_io_mgmt_recv	= 0x12,
 	nvme_cmd_resv_release	= 0x15,
 	nvme_cmd_zone_mgmt_send	= 0x79,
 	nvme_cmd_zone_mgmt_recv	= 0x7a,
@@ -936,6 +976,7 @@ enum nvme_opcode {
 		nvme_opcode_name(nvme_cmd_resv_register),	\
 		nvme_opcode_name(nvme_cmd_resv_report),		\
 		nvme_opcode_name(nvme_cmd_resv_acquire),	\
+		nvme_opcode_name(nvme_cmd_io_mgmt_recv),	\
 		nvme_opcode_name(nvme_cmd_resv_release),	\
 		nvme_opcode_name(nvme_cmd_zone_mgmt_send),	\
 		nvme_opcode_name(nvme_cmd_zone_mgmt_recv),	\
@@ -1087,6 +1128,7 @@ enum {
 	NVME_RW_PRINFO_PRCHK_GUARD	= 1 << 12,
 	NVME_RW_PRINFO_PRACT		= 1 << 13,
 	NVME_RW_DTYPE_STREAMS		= 1 << 4,
+	NVME_RW_DTYPE_DPLCMT		= 2 << 4,
 	NVME_WZ_DEAC			= 1 << 9,
 };
 
@@ -1174,6 +1216,38 @@ struct nvme_zone_mgmt_recv_cmd {
 	__le32			cdw14[2];
 };
 
+struct nvme_io_mgmt_recv_cmd {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__le64			rsvd2[2];
+	union nvme_data_ptr	dptr;
+	__u8			mo;
+	__u8			rsvd11;
+	__u16			mos;
+	__le32			numd;
+	__le32			cdw12[4];
+};
+
+enum {
+	NVME_IO_MGMT_RECV_MO_RUHS	= 1,
+};
+
+struct nvme_fdp_ruh_status_desc {
+	__le16			pid;
+	__le16			ruhid;
+	__le32			earutr;
+	__le64			ruamw;
+	__u8			reserved[16];
+};
+
+struct nvme_fdp_ruh_status {
+	__u8			rsvd0[14];
+	__le16			nruhsd;
+	struct nvme_fdp_ruh_status_desc ruhsd[];
+};
+
 enum {
 	NVME_ZRA_ZONE_REPORT		= 0,
 	NVME_ZRASF_ZONE_REPORT_ALL	= 0,
@@ -1309,6 +1383,7 @@ enum {
 	NVME_FEAT_PLM_WINDOW	= 0x14,
 	NVME_FEAT_HOST_BEHAVIOR	= 0x16,
 	NVME_FEAT_SANITIZE	= 0x17,
+	NVME_FEAT_FDP		= 0x1d,
 	NVME_FEAT_SW_PROGRESS	= 0x80,
 	NVME_FEAT_HOST_ID	= 0x81,
 	NVME_FEAT_RESV_MASK	= 0x82,
@@ -1329,6 +1404,7 @@ enum {
 	NVME_LOG_ANA		= 0x0c,
 	NVME_LOG_FEATURES	= 0x12,
 	NVME_LOG_RMI		= 0x16,
+	NVME_LOG_FDP_CONFIGS	= 0x20,
 	NVME_LOG_DISC		= 0x70,
 	NVME_LOG_RESERVATION	= 0x80,
 	NVME_FWACT_REPL		= (0 << 3),
@@ -1923,6 +1999,7 @@ struct nvme_command {
 		struct nvmf_auth_receive_command auth_receive;
 		struct nvme_dbbuf dbbuf;
 		struct nvme_directive_cmd directive;
+		struct nvme_io_mgmt_recv_cmd imr;
 	};
 };
 
-- 
cgit v1.2.3


From bb5eb8a5b222fa5092f60d5555867a05ebc3bdf2 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Tue, 22 Apr 2025 19:56:38 +0800
Subject: f2fs: fix to bail out in get_new_segment()

------------[ cut here ]------------
WARNING: CPU: 3 PID: 579 at fs/f2fs/segment.c:2832 new_curseg+0x5e8/0x6dc
pc : new_curseg+0x5e8/0x6dc
Call trace:
 new_curseg+0x5e8/0x6dc
 f2fs_allocate_data_block+0xa54/0xe28
 do_write_page+0x6c/0x194
 f2fs_do_write_node_page+0x38/0x78
 __write_node_page+0x248/0x6d4
 f2fs_sync_node_pages+0x524/0x72c
 f2fs_write_checkpoint+0x4bc/0x9b0
 __checkpoint_and_complete_reqs+0x80/0x244
 issue_checkpoint_thread+0x8c/0xec
 kthread+0x114/0x1bc
 ret_from_fork+0x10/0x20

get_new_segment() detects inconsistent status in between free_segmap
and free_secmap, let's record such error into super block, and bail
out get_new_segment() instead of continue using the segment.

Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index c24f8bc01045..5206d63b3386 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -78,6 +78,7 @@ enum stop_cp_reason {
 	STOP_CP_REASON_UPDATE_INODE,
 	STOP_CP_REASON_FLUSH_FAIL,
 	STOP_CP_REASON_NO_SEGMENT,
+	STOP_CP_REASON_CORRUPTED_FREE_BITMAP,
 	STOP_CP_REASON_MAX,
 };
 
-- 
cgit v1.2.3


From 2dbf6e0df447d1542f8fd158b17a06d2e8ede15e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 4 May 2025 21:04:08 -0400
Subject: kill vfs_submount()

The last remaining user of vfs_submount() (tracefs) is easy to convert
to fs_context_for_submount(); do that and bury that thing, along with
SB_SUBMOUNT

Reviewed-by: Jan Kara <jack@suse.cz>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Tested-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h    | 1 -
 include/linux/mount.h | 3 ---
 2 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 016b0fe1536e..515e702d98ae 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1240,7 +1240,6 @@ extern int send_sigurg(struct file *file);
 /* These sb flags are internal to the kernel */
 #define SB_DEAD         BIT(21)
 #define SB_DYING        BIT(24)
-#define SB_SUBMOUNT     BIT(26)
 #define SB_FORCE        BIT(27)
 #define SB_NOSEC        BIT(28)
 #define SB_BORN         BIT(29)
diff --git a/include/linux/mount.h b/include/linux/mount.h
index dcc17ce8a959..d4eb90a367af 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -98,9 +98,6 @@ extern struct vfsmount *vfs_create_mount(struct fs_context *fc);
 extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
 				      int flags, const char *name,
 				      void *data);
-extern struct vfsmount *vfs_submount(const struct dentry *mountpoint,
-				     struct file_system_type *type,
-				     const char *name, void *data);
 
 extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
 extern void mark_mounts_for_expiry(struct list_head *mounts);
-- 
cgit v1.2.3


From 20b4f0b4cdfec106bb73afd47de7d121b723f723 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Wed, 30 Apr 2025 00:45:57 +0100
Subject: rpmsg: core: Remove deadcode

rpmsg_send_offchannel() and rpmsg_trysend_offchannel() have been
unused since they were added in 2011's
commit bcabbccabffe ("rpmsg: add virtio-based remote processor messaging
bus")

Remove them and associated docs.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Acked-by: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
Link: https://lore.kernel.org/r/20250429234600.301083-2-linux@treblig.org
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
---
 include/linux/rpmsg.h | 22 ----------------------
 1 file changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h
index 90d8e4475f80..fb7ab9165645 100644
--- a/include/linux/rpmsg.h
+++ b/include/linux/rpmsg.h
@@ -184,13 +184,9 @@ struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *,
 
 int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len);
 int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst);
-int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst,
-			  void *data, int len);
 
 int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len);
 int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst);
-int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst,
-			     void *data, int len);
 
 __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp,
 			poll_table *wait);
@@ -271,15 +267,6 @@ static inline int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len,
 
 }
 
-static inline int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src,
-					u32 dst, void *data, int len)
-{
-	/* This shouldn't be possible */
-	WARN_ON(1);
-
-	return -ENXIO;
-}
-
 static inline int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len)
 {
 	/* This shouldn't be possible */
@@ -297,15 +284,6 @@ static inline int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data,
 	return -ENXIO;
 }
 
-static inline int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src,
-					   u32 dst, void *data, int len)
-{
-	/* This shouldn't be possible */
-	WARN_ON(1);
-
-	return -ENXIO;
-}
-
 static inline __poll_t rpmsg_poll(struct rpmsg_endpoint *ept,
 				      struct file *filp, poll_table *wait)
 {
-- 
cgit v1.2.3


From a1ceb831417b8e23bd041d3e7021e235fa845128 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 29 Apr 2025 08:55:41 +0200
Subject: genirq/manage: Rework can_request_irq()

Use the new guards to get and lock the interrupt descriptor and tidy up the
code.

Make the return value boolean to reflect it's meaning.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/20250429065422.187250840@linutronix.de
---
 include/linux/irq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index dd5df1e2d032..df93b238b8bf 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -700,7 +700,7 @@ extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret);
 extern int noirqdebug_setup(char *str);
 
 /* Checks whether the interrupt can be requested by request_irq(): */
-extern int can_request_irq(unsigned int irq, unsigned long irqflags);
+extern bool can_request_irq(unsigned int irq, unsigned long irqflags);
 
 /* Dummy irq-chip implementations: */
 extern struct irq_chip no_irq_chip;
-- 
cgit v1.2.3


From 66db876162155c1cec87359cd78c62aaafde9257 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Wed, 7 May 2025 11:21:22 +0200
Subject: bus: firewall: Fix missing static inline annotations for stubs

Stubs in the header file for !CONFIG_STM32_FIREWALL case should be both
static and inline, because they do not come with earlier declaration and
should be inlined in every unit including the header.

Cc: Patrice Chotard <patrice.chotard@foss.st.com>
Cc: stable@vger.kernel.org
Fixes: 5c9668cfc6d7 ("firewall: introduce stm32_firewall framework")
Link: https://lore.kernel.org/r/20250507092121.95121-2-krzysztof.kozlowski@linaro.org
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
---
 include/linux/bus/stm32_firewall_device.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bus/stm32_firewall_device.h b/include/linux/bus/stm32_firewall_device.h
index 5178b72bc920..eaa7a3f54450 100644
--- a/include/linux/bus/stm32_firewall_device.h
+++ b/include/linux/bus/stm32_firewall_device.h
@@ -114,27 +114,30 @@ void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall, u32 su
 
 #else /* CONFIG_STM32_FIREWALL */
 
-int stm32_firewall_get_firewall(struct device_node *np, struct stm32_firewall *firewall,
-				unsigned int nb_firewall)
+static inline int stm32_firewall_get_firewall(struct device_node *np,
+					      struct stm32_firewall *firewall,
+					      unsigned int nb_firewall)
 {
 	return -ENODEV;
 }
 
-int stm32_firewall_grant_access(struct stm32_firewall *firewall)
+static inline int stm32_firewall_grant_access(struct stm32_firewall *firewall)
 {
 	return -ENODEV;
 }
 
-void stm32_firewall_release_access(struct stm32_firewall *firewall)
+static inline void stm32_firewall_release_access(struct stm32_firewall *firewall)
 {
 }
 
-int stm32_firewall_grant_access_by_id(struct stm32_firewall *firewall, u32 subsystem_id)
+static inline int stm32_firewall_grant_access_by_id(struct stm32_firewall *firewall,
+						    u32 subsystem_id)
 {
 	return -ENODEV;
 }
 
-void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall, u32 subsystem_id)
+static inline void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall,
+						       u32 subsystem_id)
 {
 }
 
-- 
cgit v1.2.3


From aefc11550ebd08eadee6d643792c9092de2e472f Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Sun, 20 Apr 2025 17:46:56 +0100
Subject: genirq: Remove unused remove_percpu_irq()

remove_percpu_irq() has been unused since it was added in 2011 by
commit 31d9d9b6d830 ("genirq: Add support for per-cpu dev_id interrupts")

Remove it.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250420164656.112641-1-linux@treblig.org
---
 include/linux/irq.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index df93b238b8bf..810e44ee297f 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -597,7 +597,6 @@ enum {
 
 struct irqaction;
 extern int setup_percpu_irq(unsigned int irq, struct irqaction *new);
-extern void remove_percpu_irq(unsigned int irq, struct irqaction *act);
 
 #ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE
 extern void irq_cpu_online(void);
-- 
cgit v1.2.3


From d683a8561889c1813fe2ad6082769c91e3cb71b3 Mon Sep 17 00:00:00 2001
From: Mostafa Saleh <smostafa@google.com>
Date: Wed, 30 Apr 2025 16:27:09 +0000
Subject: ubsan: Remove regs from report_ubsan_failure()

report_ubsan_failure() doesn't use argument regs, and soon it will
be called from the hypervisor context were regs are not available.
So, remove the unused argument.

Signed-off-by: Mostafa Saleh <smostafa@google.com>
Acked-by: Kees Cook <kees@kernel.org>
Link: https://lore.kernel.org/r/20250430162713.1997569-3-smostafa@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/ubsan.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ubsan.h b/include/linux/ubsan.h
index d8219cbe09ff..c843816f5f68 100644
--- a/include/linux/ubsan.h
+++ b/include/linux/ubsan.h
@@ -3,9 +3,9 @@
 #define _LINUX_UBSAN_H
 
 #ifdef CONFIG_UBSAN_TRAP
-const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type);
+const char *report_ubsan_failure(u32 check_type);
 #else
-static inline const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type)
+static inline const char *report_ubsan_failure(u32 check_type)
 {
 	return NULL;
 }
-- 
cgit v1.2.3


From 61b38f7591fb434fce326c1d686a9793c7f418bc Mon Sep 17 00:00:00 2001
From: Mostafa Saleh <smostafa@google.com>
Date: Wed, 30 Apr 2025 16:27:10 +0000
Subject: KVM: arm64: Introduce CONFIG_UBSAN_KVM_EL2

Add a new Kconfig CONFIG_UBSAN_KVM_EL2 for KVM which enables
UBSAN for EL2 code (in protected/nvhe/hvhe) modes.
This will re-use the same checks enabled for the kernel for
the hypervisor. The only difference is that for EL2 it always
emits a "brk" instead of implementing hooks as the hypervisor
can't print reports.

The KVM code will re-use the same code for the kernel
"report_ubsan_failure()" so #ifdefs are changed to also have this
code for CONFIG_UBSAN_KVM_EL2

Signed-off-by: Mostafa Saleh <smostafa@google.com>
Reviewed-by: Kees Cook <kees@kernel.org>
Link: https://lore.kernel.org/r/20250430162713.1997569-4-smostafa@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/ubsan.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ubsan.h b/include/linux/ubsan.h
index c843816f5f68..3ab8d38aedb8 100644
--- a/include/linux/ubsan.h
+++ b/include/linux/ubsan.h
@@ -2,7 +2,7 @@
 #ifndef _LINUX_UBSAN_H
 #define _LINUX_UBSAN_H
 
-#ifdef CONFIG_UBSAN_TRAP
+#if defined(CONFIG_UBSAN_TRAP) || defined(CONFIG_UBSAN_KVM_EL2)
 const char *report_ubsan_failure(u32 check_type);
 #else
 static inline const char *report_ubsan_failure(u32 check_type)
-- 
cgit v1.2.3


From f31acff017b1d80e649f674450d3b64d3265848c Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 7 May 2025 17:25:37 +0800
Subject: block: fix warning on 'make htmldocs'

Fix the following warning when running 'make htmldocs':

+WARNING: include/linux/blk-mq.h:532 struct member 'update_nr_hwq_lock' not described in 'blk_mq_tag_set'

Fixes: 98e68f67020c ("block: prevent adding/deleting disk during updating nr_hw_queues")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Closes: https://lore.kernel.org/all/20250507163220.00141d77@canb.auug.org.au/
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20250507092537.3009112-1-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ef84d53095a6..906bf330ffb5 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -507,6 +507,9 @@ enum hctx_type {
  *		   request_queue.tag_set_list.
  * @srcu:	   Use as lock when type of the request queue is blocking
  *		   (BLK_MQ_F_BLOCKING).
+ * @update_nr_hwq_lock:
+ * 		   Synchronize updating nr_hw_queues with add/del disk &
+ * 		   switching elevator.
  */
 struct blk_mq_tag_set {
 	const struct blk_mq_ops	*ops;
-- 
cgit v1.2.3


From 850e210d5ad21b94b55b97d4d82b4cdeb0bb05df Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 7 May 2025 14:04:25 +0200
Subject: block: add a bio_add_virt_nofail helper

Add a helper to add a directly mapped kernel virtual address to a
bio so that callers don't have to convert to pages or folios.

For now only the _nofail variant is provided as that is what all the
obvious callers want.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20250507120451.4000627-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index cafc7c215de8..acca7464080c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -417,6 +417,8 @@ void __bio_add_page(struct bio *bio, struct page *page,
 		unsigned int len, unsigned int off);
 void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
 			  size_t off);
+void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len);
+
 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
 void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter);
 void __bio_release_pages(struct bio *bio, bool mark_dirty);
-- 
cgit v1.2.3


From 10b1e59cdadabff16fc78eb2ca4c341b1502293c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 7 May 2025 14:04:26 +0200
Subject: block: add a bdev_rw_virt helper

Add a helper to perform synchronous I/O on a kernel direct map range.
Currently this is implemented in various places in usually not very
efficient ways, so provide a generic helper instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20250507120451.4000627-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index acca7464080c..ad54e6af20dc 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -402,7 +402,6 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs)
 
 struct request_queue;
 
-extern int submit_bio_wait(struct bio *bio);
 void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
 	      unsigned short max_vecs, blk_opf_t opf);
 extern void bio_uninit(struct bio *);
@@ -419,6 +418,10 @@ void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
 			  size_t off);
 void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len);
 
+int submit_bio_wait(struct bio *bio);
+int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data,
+		size_t len, enum req_op op);
+
 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
 void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter);
 void __bio_release_pages(struct bio *bio, bool mark_dirty);
-- 
cgit v1.2.3


From 75f88659e47dc570bdebddf77d7a3cd5f0845612 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 7 May 2025 14:04:27 +0200
Subject: block: add a bio_add_max_vecs helper

Add a helper to check how many bio_vecs are needed to add a kernel
virtual address range to a bio, accounting for the always contiguous
direct mapping and vmalloc mappings that usually need a bio_vec
per page sized chunk.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20250507120451.4000627-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index ad54e6af20dc..128b1c6ca648 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -418,6 +418,21 @@ void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
 			  size_t off);
 void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len);
 
+/**
+ * bio_add_max_vecs - number of bio_vecs needed to add data to a bio
+ * @kaddr: kernel virtual address to add
+ * @len: length in bytes to add
+ *
+ * Calculate how many bio_vecs need to be allocated to add the kernel virtual
+ * address range in [@kaddr:@len] in the worse case.
+ */
+static inline unsigned int bio_add_max_vecs(void *kaddr, unsigned int len)
+{
+	if (is_vmalloc_addr(kaddr))
+		return DIV_ROUND_UP(offset_in_page(kaddr) + len, PAGE_SIZE);
+	return 1;
+}
+
 int submit_bio_wait(struct bio *bio);
 int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data,
 		size_t len, enum req_op op);
-- 
cgit v1.2.3


From 8dd16f5e34693aa0aa6a4ed48427045008097e64 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 7 May 2025 14:04:28 +0200
Subject: block: add a bio_add_vmalloc helpers

Add a helper to add a vmalloc region to a bio, abstracting away the
vmalloc addresses from the underlying pages and another one wrapping
it for the simple case where all data fits into a single bio.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20250507120451.4000627-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 128b1c6ca648..5d880903bcc5 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -433,6 +433,9 @@ static inline unsigned int bio_add_max_vecs(void *kaddr, unsigned int len)
 	return 1;
 }
 
+unsigned int bio_add_vmalloc_chunk(struct bio *bio, void *vaddr, unsigned len);
+bool bio_add_vmalloc(struct bio *bio, void *vaddr, unsigned int len);
+
 int submit_bio_wait(struct bio *bio);
 int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data,
 		size_t len, enum req_op op);
-- 
cgit v1.2.3


From af78428ed3f3eebad7be9d0463251046e9582cf6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 7 May 2025 14:04:29 +0200
Subject: block: remove the q argument from blk_rq_map_kern

Remove the q argument from blk_rq_map_kern and the internal helpers
called by it as the queue can trivially be derived from the request.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20250507120451.4000627-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 906bf330ffb5..de8c85a03bb7 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -1037,8 +1037,8 @@ int blk_rq_map_user_io(struct request *, struct rq_map_data *,
 int blk_rq_map_user_iov(struct request_queue *, struct request *,
 		struct rq_map_data *, const struct iov_iter *, gfp_t);
 int blk_rq_unmap_user(struct bio *);
-int blk_rq_map_kern(struct request_queue *, struct request *, void *,
-		unsigned int, gfp_t);
+int blk_rq_map_kern(struct request *rq, void *kbuf, unsigned int len,
+		gfp_t gfp);
 int blk_rq_append_bio(struct request *rq, struct bio *bio);
 void blk_execute_rq_nowait(struct request *rq, bool at_head);
 blk_status_t blk_execute_rq(struct request *rq, bool at_head);
-- 
cgit v1.2.3


From b8c7bfb7a0f01521297d688ca5fe1482c81e8910 Mon Sep 17 00:00:00 2001
From: Frank Li <Frank.Li@nxp.com>
Date: Mon, 14 Apr 2025 14:30:56 -0400
Subject: irqdomain: Add IRQ_DOMAIN_FLAG_MSI_IMMUTABLE and
 irq_domain_is_msi_immutable()

Add the flag IRQ_DOMAIN_FLAG_MSI_IMMUTABLE and the API function
irq_domain_is_msi_immutable() to check if the MSI controller retains an
immutable address/data pair during irq_set_affinity().

Ensure compatibility with MSI users like PCIe Endpoint Doorbell, which
require the address/data pair to remain unchanged after setup. Use this
function to verify if the MSI controller is immutable.

Signed-off-by: Frank Li <Frank.Li@nxp.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250414-ep-msi-v18-2-f69b49917464@nxp.com
---
 include/linux/irqdomain.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index bb7111105296..bccfff5bebe2 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -231,6 +231,9 @@ enum {
 	/* Irq domain must destroy generic chips when removed */
 	IRQ_DOMAIN_FLAG_DESTROY_GC	= (1 << 10),
 
+	/* Address and data pair is mutable when irq_set_affinity() */
+	IRQ_DOMAIN_FLAG_MSI_IMMUTABLE	= (1 << 11),
+
 	/*
 	 * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
 	 * for implementation specific purposes and ignored by the
@@ -691,6 +694,10 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain)
 	return domain->flags & IRQ_DOMAIN_FLAG_MSI_DEVICE;
 }
 
+static inline bool irq_domain_is_msi_immutable(struct irq_domain *domain)
+{
+	return domain->flags & IRQ_DOMAIN_FLAG_MSI_IMMUTABLE;
+}
 #else	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
 static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
 			unsigned int nr_irqs, int node, void *arg)
-- 
cgit v1.2.3


From f42c8556a0690802246dc588ae9c18184f71d8f5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 6 May 2025 22:34:31 +0200
Subject: cpufreq/sched: schedutil: Add helper for governor checks

Add a helper for checking if schedutil is the current governor for
a given cpufreq policy and use it in sched_is_eas_possible() to avoid
accessing cpufreq policy internals directly from there.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Christian Loehle <christian.loehle@arm.com>
Tested-by: Christian Loehle <christian.loehle@arm.com>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://patch.msgid.link/3365956.44csPzL39Z@rjwysocki.net
---
 include/linux/cpufreq.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 7bb760031dd7..1d2c6c6d8952 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -650,6 +650,15 @@ module_exit(__governor##_exit)
 struct cpufreq_governor *cpufreq_default_governor(void);
 struct cpufreq_governor *cpufreq_fallback_governor(void);
 
+#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
+bool sugov_is_governor(struct cpufreq_policy *policy);
+#else
+static inline bool sugov_is_governor(struct cpufreq_policy *policy)
+{
+	return false;
+}
+#endif
+
 static inline void cpufreq_policy_apply_limits(struct cpufreq_policy *policy)
 {
 	if (policy->max < policy->cur)
-- 
cgit v1.2.3


From 4854649b1fb43968615e0374d9d59580093ac67f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 6 May 2025 22:37:15 +0200
Subject: cpufreq/sched: Move cpufreq-specific EAS checks to cpufreq

Doing cpufreq-specific EAS checks that require accessing policy
internals directly from sched_is_eas_possible() is a bit unfortunate,
so introduce cpufreq_ready_for_eas() in cpufreq, move those checks
into that new function and make sched_is_eas_possible() call it.

While at it, address a possible race between the EAS governor check
and governor change by doing the former under the policy rwsem.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Christian Loehle <christian.loehle@arm.com>
Tested-by: Christian Loehle <christian.loehle@arm.com>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://patch.msgid.link/2317800.iZASKD2KPV@rjwysocki.net
---
 include/linux/cpufreq.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 1d2c6c6d8952..95f3807c8c55 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -1237,6 +1237,8 @@ void cpufreq_generic_init(struct cpufreq_policy *policy,
 		struct cpufreq_frequency_table *table,
 		unsigned int transition_latency);
 
+bool cpufreq_ready_for_eas(const struct cpumask *cpu_mask);
+
 static inline void cpufreq_register_em_with_opp(struct cpufreq_policy *policy)
 {
 	dev_pm_opp_of_register_em(get_cpu_device(policy->cpu),
-- 
cgit v1.2.3


From 6bceea7a1e076ef9d71b20d8dda2f7dc52bd34d2 Mon Sep 17 00:00:00 2001
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Date: Fri, 18 Apr 2025 19:55:03 -0700
Subject: arch_topology: Relocate cpu_scale to topology.[h|c]

arch_topology.c provides functionality to parse and scale CPU capacity.
It also provides a corresponding sysfs interface. Some architectures
parse and scale CPU capacity differently as per their own needs. On
Intel processors, for instance, it is responsibility of the Intel
P-state driver.

Relocate the implementation of that interface to a common location in
topology.c. Architectures can use the interface and populate it using
their own mechanisms.

An alternative approach would be to compile arch_topology.c even if
not needed only to get this interface. This approach would create
duplicated and conflicting functionality and data structures.

Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Tested-by: Christian Loehle <christian.loehle@arm.com>
Link: https://patch.msgid.link/20250419025504.9760-2-ricardo.neri-calderon@linux.intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/arch_topology.h | 8 --------
 include/linux/topology.h      | 9 +++++++++
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index 2222e8b03ff4..d72d6e5aa200 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -14,14 +14,6 @@ int topology_update_cpu_topology(void);
 struct device_node;
 bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu);
 
-DECLARE_PER_CPU(unsigned long, cpu_scale);
-
-static inline unsigned long topology_get_cpu_scale(int cpu)
-{
-	return per_cpu(cpu_scale, cpu);
-}
-
-void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity);
 
 DECLARE_PER_CPU(unsigned long, capacity_freq_ref);
 
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 24e715f0f6d2..cd6b4bdc9cfd 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -332,4 +332,13 @@ sched_numa_hop_mask(unsigned int node, unsigned int hops)
 	     !IS_ERR_OR_NULL(mask);					       \
 	     __hops++)
 
+DECLARE_PER_CPU(unsigned long, cpu_scale);
+
+static inline unsigned long topology_get_cpu_scale(int cpu)
+{
+	return per_cpu(cpu_scale, cpu);
+}
+
+void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity);
+
 #endif /* _LINUX_TOPOLOGY_H */
-- 
cgit v1.2.3


From 5d894321c49e61379189b0ff605f316e39cbd1e9 Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Wed, 7 May 2025 14:18:21 -0700
Subject: fs: add atomic write unit max opt to statx

XFS will be able to support large atomic writes (atomic write > 1x block)
in future. This will be achieved by using different operating methods,
depending on the size of the write.

Specifically a new method of operation based in FS atomic extent remapping
will be supported in addition to the current HW offload-based method.

The FS method will generally be appreciably slower performing than the
HW-offload method. However the FS method will be typically able to
contribute to achieving a larger atomic write unit max limit.

XFS will support a hybrid mode, where HW offload method will be used when
possible, i.e. HW offload is used when the length of the write is
supported, and for other times FS-based atomic writes will be used.

As such, there is an atomic write length at which the user may experience
appreciably slower performance.

Advertise this limit in a new statx field, stx_atomic_write_unit_max_opt.

When zero, it means that there is no such performance boundary.

Masks STATX{_ATTR}_WRITE_ATOMIC can be used to get this new field. This is
ok for older kernels which don't support this new field, as they would
report 0 in this field (from zeroing in cp_statx()) already. Furthermore
those older kernels don't support large atomic writes - apart from block
fops, but there would be consistent performance there for atomic writes
in range [unit min, unit max].

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Acked-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 include/linux/fs.h   | 3 ++-
 include/linux/stat.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 016b0fe1536e..7b19d8f99aff 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3475,7 +3475,8 @@ void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
 void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
 void generic_fill_statx_atomic_writes(struct kstat *stat,
 				      unsigned int unit_min,
-				      unsigned int unit_max);
+				      unsigned int unit_max,
+				      unsigned int unit_max_opt);
 extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
 extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
 void __inode_add_bytes(struct inode *inode, loff_t bytes);
diff --git a/include/linux/stat.h b/include/linux/stat.h
index be7496a6a0dd..e3d00e7bb26d 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -57,6 +57,7 @@ struct kstat {
 	u32		dio_read_offset_align;
 	u32		atomic_write_unit_min;
 	u32		atomic_write_unit_max;
+	u32		atomic_write_unit_max_opt;
 	u32		atomic_write_segments_max;
 };
 
-- 
cgit v1.2.3


From 5017b1b02640234b08ad38a046043f143670dea2 Mon Sep 17 00:00:00 2001
From: Corey Minyard <corey@minyard.net>
Date: Tue, 18 Mar 2025 20:41:42 -0500
Subject: ipmi: Add a note about the pretimeout callback

You can't do IPMI calls from the callback, it's called with locks
held.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 include/linux/ipmi.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h
index 2f74dd90c271..27cd5980bb27 100644
--- a/include/linux/ipmi.h
+++ b/include/linux/ipmi.h
@@ -93,7 +93,8 @@ struct ipmi_user_hndl {
 
 	/*
 	 * Called when the interface detects a watchdog pre-timeout.  If
-	 * this is NULL, it will be ignored for the user.
+	 * this is NULL, it will be ignored for the user.  Note that you
+	 * can't do any IPMI calls from here, it's called with locks held.
 	 */
 	void (*ipmi_watchdog_pretimeout)(void *handler_data);
 
-- 
cgit v1.2.3


From 6f7f6605c9aeb472b5d4101b263b1fc5dc3cf623 Mon Sep 17 00:00:00 2001
From: Corey Minyard <corey@minyard.net>
Date: Tue, 22 Apr 2025 12:06:02 -0500
Subject: ipmi:msghandler: Export and fix panic messaging capability

Don't have the other users that do things at panic time (the watchdog)
do all this themselves, provide a function to do it.

Also, with the new design where most stuff happens at thread context,
a few things needed to be fixed to avoid doing locking in a panic
context.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 include/linux/ipmi.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h
index 27cd5980bb27..7da6602eab71 100644
--- a/include/linux/ipmi.h
+++ b/include/linux/ipmi.h
@@ -344,4 +344,14 @@ extern int ipmi_get_smi_info(int if_num, struct ipmi_smi_info *data);
 /* Helper function for computing the IPMB checksum of some data. */
 unsigned char ipmb_checksum(unsigned char *data, int size);
 
+/*
+ * For things that must send messages at panic time, like the IPMI watchdog
+ * driver that extends the reset time on a panic, use this to send messages
+ * from panic context.  Note that this puts the driver into a mode that
+ * only works at panic time, so only use it then.
+ */
+void ipmi_panic_request_and_wait(struct ipmi_user *user,
+				 struct ipmi_addr *addr,
+				 struct kernel_ipmi_msg *msg);
+
 #endif /* __LINUX_IPMI_H */
-- 
cgit v1.2.3


From 59529bbe642de4eb2191a541d9b4bae7eb73862e Mon Sep 17 00:00:00 2001
From: Huang Yiwei <quic_hyiwei@quicinc.com>
Date: Wed, 7 May 2025 12:57:57 +0800
Subject: firmware: SDEI: Allow sdei initialization without ACPI_APEI_GHES

SDEI usually initialize with the ACPI table, but on platforms where
ACPI is not used, the SDEI feature can still be used to handle
specific firmware calls or other customized purposes. Therefore, it
is not necessary for ARM_SDE_INTERFACE to depend on ACPI_APEI_GHES.

In commit dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES
in acpi_init()"), to make APEI ready earlier, sdei_init was moved
into acpi_ghes_init instead of being a standalone initcall, adding
ACPI_APEI_GHES dependency to ARM_SDE_INTERFACE. This restricts the
flexibility and usability of SDEI.

This patch corrects the dependency in Kconfig and splits sdei_init()
into two separate functions: sdei_init() and acpi_sdei_init().
sdei_init() will be called by arch_initcall and will only initialize
the platform driver, while acpi_sdei_init() will initialize the
device from acpi_ghes_init() when ACPI is ready. This allows the
initialization of SDEI without ACPI_APEI_GHES enabled.

Fixes: dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in apci_init()")
Cc: Shuai Xue <xueshuai@linux.alibaba.com>
Signed-off-by: Huang Yiwei <quic_hyiwei@quicinc.com>
Reviewed-by: Shuai Xue <xueshuai@linux.alibaba.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/r/20250507045757.2658795-1-quic_hyiwei@quicinc.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/arm_sdei.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
index 255701e1251b..f652a5028b59 100644
--- a/include/linux/arm_sdei.h
+++ b/include/linux/arm_sdei.h
@@ -46,12 +46,12 @@ int sdei_unregister_ghes(struct ghes *ghes);
 /* For use by arch code when CPU hotplug notifiers are not appropriate. */
 int sdei_mask_local_cpu(void);
 int sdei_unmask_local_cpu(void);
-void __init sdei_init(void);
+void __init acpi_sdei_init(void);
 void sdei_handler_abort(void);
 #else
 static inline int sdei_mask_local_cpu(void) { return 0; }
 static inline int sdei_unmask_local_cpu(void) { return 0; }
-static inline void sdei_init(void) { }
+static inline void acpi_sdei_init(void) { }
 static inline void sdei_handler_abort(void) { }
 #endif /* CONFIG_ARM_SDE_INTERFACE */
 
-- 
cgit v1.2.3


From 812bca7f7e73778a7022f862dc8c7c7d38b9a760 Mon Sep 17 00:00:00 2001
From: Xi Pardee <xi.pardee@linux.intel.com>
Date: Fri, 25 Apr 2025 12:52:29 -0700
Subject: platform/x86:intel/vsec: Change return type of intel_vsec_register
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change return type of intel_vsec_register() to int. The current
implementation does not indicate if the register fail or not.
Change to return error code if it fails or if INTEL_VSEC config
is not set. This is a preparation step to introduce a new SSRAM
Telemetry driver that will be using this API.

Signed-off-by: Xi Pardee <xi.pardee@linux.intel.com>
Link: https://lore.kernel.org/r/20250425195237.493129-2-xi.pardee@linux.intel.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/intel_vsec.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h
index b94beab64610..bc95821f1bfb 100644
--- a/include/linux/intel_vsec.h
+++ b/include/linux/intel_vsec.h
@@ -139,12 +139,13 @@ static inline struct intel_vsec_device *auxdev_to_ivdev(struct auxiliary_device
 }
 
 #if IS_ENABLED(CONFIG_INTEL_VSEC)
-void intel_vsec_register(struct pci_dev *pdev,
+int intel_vsec_register(struct pci_dev *pdev,
 			 struct intel_vsec_platform_info *info);
 #else
-static inline void intel_vsec_register(struct pci_dev *pdev,
+static inline int intel_vsec_register(struct pci_dev *pdev,
 				       struct intel_vsec_platform_info *info)
 {
+	return -ENODEV;
 }
 #endif
 #endif
-- 
cgit v1.2.3


From feea7bd6b02d43a794e3f065650d89cf8d8e8e59 Mon Sep 17 00:00:00 2001
From: "Luke D. Jones" <luke@ljones.dev>
Date: Sun, 23 Mar 2025 15:34:21 +1300
Subject: platform/x86: asus-wmi: Refactor Ally suspend/resume
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adjust how the CSEE direct call hack is used.

The results of months of testing combined with help from ASUS to
determine the actual cause of suspend issues has resulted in this
refactoring which immensely improves the reliability for devices which
do not have the following minimum MCU FW version:
- ROG Ally X: 313
- ROG Ally 1: 319

For MCU FW versions that match the minimum or above the CSEE hack is
disabled and mcu_powersave set to on by default as there are no
negatives beyond a slightly slower device reinitialization due to the
MCU being powered off.

As this is set only at module load time, it is still possible for
mcu_powersave sysfs attributes to change it at runtime if so desired.

Signed-off-by: Luke D. Jones <luke@ljones.dev>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Link: https://lore.kernel.org/r/20250323023421.78012-3-luke@ljones.dev
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/platform_data/x86/asus-wmi.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
index 783e2a336861..8a515179113d 100644
--- a/include/linux/platform_data/x86/asus-wmi.h
+++ b/include/linux/platform_data/x86/asus-wmi.h
@@ -157,9 +157,28 @@
 #define ASUS_WMI_DSTS_MAX_BRIGTH_MASK	0x0000FF00
 #define ASUS_WMI_DSTS_LIGHTBAR_MASK	0x0000000F
 
+enum asus_ally_mcu_hack {
+	ASUS_WMI_ALLY_MCU_HACK_INIT,
+	ASUS_WMI_ALLY_MCU_HACK_ENABLED,
+	ASUS_WMI_ALLY_MCU_HACK_DISABLED,
+};
+
 #if IS_REACHABLE(CONFIG_ASUS_WMI)
+void set_ally_mcu_hack(enum asus_ally_mcu_hack status);
+void set_ally_mcu_powersave(bool enabled);
+int asus_wmi_set_devstate(u32 dev_id, u32 ctrl_param, u32 *retval);
 int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1, u32 *retval);
 #else
+static inline void set_ally_mcu_hack(enum asus_ally_mcu_hack status)
+{
+}
+static inline void set_ally_mcu_powersave(bool enabled)
+{
+}
+static inline int asus_wmi_set_devstate(u32 dev_id, u32 ctrl_param, u32 *retval)
+{
+	return -ENODEV;
+}
 static inline int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1,
 					   u32 *retval)
 {
-- 
cgit v1.2.3


From 9950f94e485908fc42e7bb2533aff13b7e97a36c Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Mon, 5 May 2025 16:25:58 +0100
Subject: platform/x86/sony-laptop: Remove unused sony laptop camera code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit ba47652ba655 ("media: meye: remove this deprecated driver")
removed the meye driver but left behind the code in sony-laptop.c
which that driver used to call.

Remove the sony_pic_camera_command() function, and the set of
defines (SONY_PIC_COMMAND_*) in a header used for the interface
and the static helpers it called.

Cleanup remaining #defines.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Link: https://lore.kernel.org/r/20250505152558.40526-1-linux@treblig.org
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/sony-laptop.h | 39 ---------------------------------------
 1 file changed, 39 deletions(-)
 delete mode 100644 include/linux/sony-laptop.h

(limited to 'include/linux')

diff --git a/include/linux/sony-laptop.h b/include/linux/sony-laptop.h
deleted file mode 100644
index 1e3c92feea6e..000000000000
--- a/include/linux/sony-laptop.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _SONYLAPTOP_H_
-#define _SONYLAPTOP_H_
-
-#include <linux/types.h>
-
-#ifdef __KERNEL__
-
-/* used only for communication between v4l and sony-laptop */
-
-#define SONY_PIC_COMMAND_GETCAMERA		 1	/* obsolete */
-#define SONY_PIC_COMMAND_SETCAMERA		 2
-#define SONY_PIC_COMMAND_GETCAMERABRIGHTNESS	 3	/* obsolete */
-#define SONY_PIC_COMMAND_SETCAMERABRIGHTNESS	 4
-#define SONY_PIC_COMMAND_GETCAMERACONTRAST	 5	/* obsolete */
-#define SONY_PIC_COMMAND_SETCAMERACONTRAST	 6
-#define SONY_PIC_COMMAND_GETCAMERAHUE		 7	/* obsolete */
-#define SONY_PIC_COMMAND_SETCAMERAHUE		 8
-#define SONY_PIC_COMMAND_GETCAMERACOLOR		 9	/* obsolete */
-#define SONY_PIC_COMMAND_SETCAMERACOLOR		10
-#define SONY_PIC_COMMAND_GETCAMERASHARPNESS	11	/* obsolete */
-#define SONY_PIC_COMMAND_SETCAMERASHARPNESS	12
-#define SONY_PIC_COMMAND_GETCAMERAPICTURE	13	/* obsolete */
-#define SONY_PIC_COMMAND_SETCAMERAPICTURE	14
-#define SONY_PIC_COMMAND_GETCAMERAAGC		15	/* obsolete */
-#define SONY_PIC_COMMAND_SETCAMERAAGC		16
-#define SONY_PIC_COMMAND_GETCAMERADIRECTION	17	/* obsolete */
-#define SONY_PIC_COMMAND_GETCAMERAROMVERSION	18	/* obsolete */
-#define SONY_PIC_COMMAND_GETCAMERAREVISION	19	/* obsolete */
-
-#if IS_ENABLED(CONFIG_SONY_LAPTOP)
-int sony_pic_camera_command(int command, u8 value);
-#else
-static inline int sony_pic_camera_command(int command, u8 value) { return 0; }
-#endif
-
-#endif	/* __KERNEL__ */
-
-#endif /* _SONYLAPTOP_H_ */
-- 
cgit v1.2.3


From 88cefd99ee265b18f851b911a75282146ecabc3d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 10 Apr 2025 15:38:30 -0400
Subject: ftrace: Show subops in enabled_functions

The function graph infrastructure uses subops of the function tracer.
These are not shown in enabled_functions. Add a "subops:" section to the
enabled_functions line to show what functions are attached via subops. If
the subops is from the function_graph infrastructure, then show the entry
and return callbacks that are attached.

Here's an example of the output:

schedule_on_each_cpu (1)                tramp: 0xffffffffc03ef000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60     subops: {ent:trace_graph_entry+0x0/0x20 ret:trace_graph_return+0x0/0x150}

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://lore.kernel.org/20250410153830.5d97f108@gandalf.local.home
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fbabc3d848b3..fc939ca2ff66 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -328,6 +328,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops);
  * DIRECT - Used by the direct ftrace_ops helper for direct functions
  *            (internal ftrace only, should not be used by others)
  * SUBOP  - Is controlled by another op in field managed.
+ * GRAPH  - Is a component of the fgraph_ops structure
  */
 enum {
 	FTRACE_OPS_FL_ENABLED			= BIT(0),
@@ -349,6 +350,7 @@ enum {
 	FTRACE_OPS_FL_PERMANENT                 = BIT(16),
 	FTRACE_OPS_FL_DIRECT			= BIT(17),
 	FTRACE_OPS_FL_SUBOP			= BIT(18),
+	FTRACE_OPS_FL_GRAPH			= BIT(19),
 };
 
 #ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
-- 
cgit v1.2.3


From 53eddae9af0c0b46f9c77a02d23c21c1aa824739 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 7 May 2025 20:47:32 +0200
Subject: platform/x86: int3472: Move common.h to public includes, symbols to
 INTEL_INT3472
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the common.h header file to include/linux/platform_data/x86/int3472.h
and add a "INTEL_INT3472" kernel-symbol-namespace to the exported symbols.

This is a preparation patch for exporting some more symbols for re-use in
the atomisp driver.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Link: https://lore.kernel.org/r/20250507184737.154747-2-hdegoede@redhat.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/platform_data/x86/int3472.h | 164 ++++++++++++++++++++++++++++++
 1 file changed, 164 insertions(+)
 create mode 100644 include/linux/platform_data/x86/int3472.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h
new file mode 100644
index 000000000000..4cf02df6f753
--- /dev/null
+++ b/include/linux/platform_data/x86/int3472.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Intel INT3472 ACPI camera sensor power-management support
+ *
+ * Author: Dan Scally <djrscally@gmail.com>
+ */
+
+#ifndef __PLATFORM_DATA_X86_INT3472_H
+#define __PLATFORM_DATA_X86_INT3472_H
+
+#include <linux/clk-provider.h>
+#include <linux/gpio/machine.h>
+#include <linux/leds.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/types.h>
+
+/* FIXME drop this once the I2C_DEV_NAME_FORMAT macro has been added to include/linux/i2c.h */
+#ifndef I2C_DEV_NAME_FORMAT
+#define I2C_DEV_NAME_FORMAT					"i2c-%s"
+#endif
+
+/* PMIC GPIO Types */
+#define INT3472_GPIO_TYPE_RESET					0x00
+#define INT3472_GPIO_TYPE_POWERDOWN				0x01
+#define INT3472_GPIO_TYPE_POWER_ENABLE				0x0b
+#define INT3472_GPIO_TYPE_CLK_ENABLE				0x0c
+#define INT3472_GPIO_TYPE_PRIVACY_LED				0x0d
+#define INT3472_GPIO_TYPE_HANDSHAKE				0x12
+
+#define INT3472_PDEV_MAX_NAME_LEN				23
+#define INT3472_MAX_SENSOR_GPIOS				3
+#define INT3472_MAX_REGULATORS					3
+
+/* E.g. "avdd\0" */
+#define GPIO_SUPPLY_NAME_LENGTH				5
+/* 12 chars for acpi_dev_name() + "-", e.g. "ABCD1234:00-" */
+#define GPIO_REGULATOR_NAME_LENGTH				(12 + GPIO_SUPPLY_NAME_LENGTH)
+/* lower- and upper-case mapping */
+#define GPIO_REGULATOR_SUPPLY_MAP_COUNT				2
+/*
+ * Ensure the GPIO is driven low/high for at least 2 ms before changing.
+ *
+ * 2 ms has been chosen because it is the minimum time ovXXXX sensors need to
+ * have their reset line driven logical high to properly register a reset.
+ */
+#define GPIO_REGULATOR_ENABLE_TIME				(2 * USEC_PER_MSEC)
+#define GPIO_REGULATOR_OFF_ON_DELAY				(2 * USEC_PER_MSEC)
+
+#define INT3472_LED_MAX_NAME_LEN				32
+
+#define CIO2_SENSOR_SSDB_MCLKSPEED_OFFSET			86
+
+#define INT3472_REGULATOR(_name, _ops, _enable_time, _off_on_delay) \
+	(const struct regulator_desc) {				\
+		.name = _name,					\
+		.type = REGULATOR_VOLTAGE,			\
+		.ops = _ops,					\
+		.owner = THIS_MODULE,				\
+		.enable_time = _enable_time,			\
+		.off_on_delay = _off_on_delay,			\
+	}
+
+#define to_int3472_clk(hw)					\
+	container_of(hw, struct int3472_clock, clk_hw)
+
+#define to_int3472_device(clk)					\
+	container_of(clk, struct int3472_discrete_device, clock)
+
+struct acpi_device;
+struct dmi_system_id;
+struct i2c_client;
+struct platform_device;
+
+struct int3472_cldb {
+	u8 version;
+	/*
+	 * control logic type
+	 * 0: UNKNOWN
+	 * 1: DISCRETE(CRD-D)
+	 * 2: PMIC TPS68470
+	 * 3: PMIC uP6641
+	 */
+	u8 control_logic_type;
+	u8 control_logic_id;
+	u8 sensor_card_sku;
+	u8 reserved[10];
+	u8 clock_source;
+	u8 reserved2[17];
+};
+
+struct int3472_discrete_quirks {
+	/* For models where AVDD GPIO is shared between sensors */
+	const char *avdd_second_sensor;
+};
+
+struct int3472_gpio_regulator {
+	/* SUPPLY_MAP_COUNT * 2 to make room for second sensor mappings */
+	struct regulator_consumer_supply supply_map[GPIO_REGULATOR_SUPPLY_MAP_COUNT * 2];
+	char supply_name_upper[GPIO_SUPPLY_NAME_LENGTH];
+	char regulator_name[GPIO_REGULATOR_NAME_LENGTH];
+	struct regulator_dev *rdev;
+	struct regulator_desc rdesc;
+};
+
+struct int3472_discrete_device {
+	struct acpi_device *adev;
+	struct device *dev;
+	struct acpi_device *sensor;
+	const char *sensor_name;
+
+	const struct int3472_sensor_config *sensor_config;
+
+	struct int3472_gpio_regulator regulators[INT3472_MAX_REGULATORS];
+
+	struct int3472_clock {
+		struct clk *clk;
+		struct clk_hw clk_hw;
+		struct clk_lookup *cl;
+		struct gpio_desc *ena_gpio;
+		u32 frequency;
+		u8 imgclk_index;
+	} clock;
+
+	struct int3472_pled {
+		struct led_classdev classdev;
+		struct led_lookup_data lookup;
+		char name[INT3472_LED_MAX_NAME_LEN];
+		struct gpio_desc *gpio;
+	} pled;
+
+	struct int3472_discrete_quirks quirks;
+
+	unsigned int ngpios; /* how many GPIOs have we seen */
+	unsigned int n_sensor_gpios; /* how many have we mapped to sensor */
+	unsigned int n_regulator_gpios; /* how many have we mapped to a regulator */
+	struct gpiod_lookup_table gpios;
+};
+
+extern const struct dmi_system_id skl_int3472_discrete_quirks[];
+
+union acpi_object *skl_int3472_get_acpi_buffer(struct acpi_device *adev,
+					       char *id);
+int skl_int3472_fill_cldb(struct acpi_device *adev, struct int3472_cldb *cldb);
+int skl_int3472_get_sensor_adev_and_name(struct device *dev,
+					 struct acpi_device **sensor_adev_ret,
+					 const char **name_ret);
+
+int skl_int3472_register_gpio_clock(struct int3472_discrete_device *int3472,
+				    struct gpio_desc *gpio);
+int skl_int3472_register_dsm_clock(struct int3472_discrete_device *int3472);
+void skl_int3472_unregister_clock(struct int3472_discrete_device *int3472);
+
+int skl_int3472_register_regulator(struct int3472_discrete_device *int3472,
+				   struct gpio_desc *gpio,
+				   unsigned int enable_time,
+				   const char *supply_name,
+				   const char *second_sensor);
+void skl_int3472_unregister_regulator(struct int3472_discrete_device *int3472);
+
+int skl_int3472_register_pled(struct int3472_discrete_device *int3472, struct gpio_desc *gpio);
+void skl_int3472_unregister_pled(struct int3472_discrete_device *int3472);
+
+#endif
-- 
cgit v1.2.3


From 1e5d088a52c207bcef6a43a6f6ffe162c514ed64 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 7 May 2025 20:47:33 +0200
Subject: platform/x86: int3472: Stop using devm_gpiod_get()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The intent is to re-use the INT3472 code for parsing Intel camera sensor
GPIOs and mapping them to the sensor for the atomisp driver, which
currently has duplicate code.

On atomisp devices there is no special INT3472 ACPI device, instead
the Intel _DSM to get the GPIO type is part of the ACPI device for
the sensor itself.

To deal with this the mapping is done from ipu_bridge_init() instead of
from a platform-device probe() function, there is no device to tie
the lifetime of the gpiod_get() calls done by the INT3472 code to.

Switch from devm_gpiod_get() to plain gpiod_get() + explicit gpiod_put()
calls, to prepare for the code being re-used in the atomisp driver.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Link: https://lore.kernel.org/r/20250507184737.154747-3-hdegoede@redhat.com
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/platform_data/x86/int3472.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h
index 4cf02df6f753..0a835cc85c67 100644
--- a/include/linux/platform_data/x86/int3472.h
+++ b/include/linux/platform_data/x86/int3472.h
@@ -99,6 +99,7 @@ struct int3472_gpio_regulator {
 	struct regulator_consumer_supply supply_map[GPIO_REGULATOR_SUPPLY_MAP_COUNT * 2];
 	char supply_name_upper[GPIO_SUPPLY_NAME_LENGTH];
 	char regulator_name[GPIO_REGULATOR_NAME_LENGTH];
+	struct gpio_desc *ena_gpio;
 	struct regulator_dev *rdev;
 	struct regulator_desc rdesc;
 };
-- 
cgit v1.2.3


From 1cfa1bb9b4033e51d3c3f5ed5bf55475e57cc686 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 7 May 2025 20:47:34 +0200
Subject: platform/x86: int3472: Export int3472_discrete_parse_crs()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

At the moment the atomisp has duplicate code for parsing Intel camera
sensor GPIOS and calling the special 79234640-9e10-4fea-a5c1-b5aa8b19756f
_DSM to get the GPIO type and map it to the sensor.

Export int3472_discrete_parse_crs() so that the atomisp driver can reuse
the INT3472 code for this.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Link: https://lore.kernel.org/r/20250507184737.154747-4-hdegoede@redhat.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/platform_data/x86/int3472.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h
index 0a835cc85c67..89410f0cb73a 100644
--- a/include/linux/platform_data/x86/int3472.h
+++ b/include/linux/platform_data/x86/int3472.h
@@ -147,6 +147,9 @@ int skl_int3472_get_sensor_adev_and_name(struct device *dev,
 					 struct acpi_device **sensor_adev_ret,
 					 const char **name_ret);
 
+int int3472_discrete_parse_crs(struct int3472_discrete_device *int3472);
+void int3472_discrete_cleanup(struct int3472_discrete_device *int3472);
+
 int skl_int3472_register_gpio_clock(struct int3472_discrete_device *int3472,
 				    struct gpio_desc *gpio);
 int skl_int3472_register_dsm_clock(struct int3472_discrete_device *int3472);
-- 
cgit v1.2.3


From 45adb05473aa4afd6ff19fd0c46c17a6294ae788 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 7 May 2025 20:47:35 +0200
Subject: platform/x86: int3472: Remove unused sensor_config struct member
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

sensor_config is not used anywhere and its struct int3472_sensor_config
type also is not declared anywhere, drop it.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Link: https://lore.kernel.org/r/20250507184737.154747-5-hdegoede@redhat.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/platform_data/x86/int3472.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h
index 89410f0cb73a..78276a11c48d 100644
--- a/include/linux/platform_data/x86/int3472.h
+++ b/include/linux/platform_data/x86/int3472.h
@@ -110,8 +110,6 @@ struct int3472_discrete_device {
 	struct acpi_device *sensor;
 	const char *sensor_name;
 
-	const struct int3472_sensor_config *sensor_config;
-
 	struct int3472_gpio_regulator regulators[INT3472_MAX_REGULATORS];
 
 	struct int3472_clock {
-- 
cgit v1.2.3


From 190faecf64c54bb5f9d8f0ea2a6628078d0f2c83 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 22 Apr 2025 15:05:18 -0600
Subject: overflow: Add STACK_FLEX_ARRAY_SIZE() helper

Add new STACK_FLEX_ARRAY_SIZE() helper to get the size of a
flexible-array member defined using DEFINE_FLEX()/DEFINE_RAW_FLEX()
at compile time.

This is essentially the same as ARRAY_SIZE() but for on-stack
flexible-array members.

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/r/83d53744e11c80eb3f03765238cbe648855f4168.1745355442.git.gustavoars@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/overflow.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 6ee67c20b575..f33d74dac06f 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -420,6 +420,8 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
  * flexible array member.
  * Use __struct_size(@name) to get compile-time size of it afterwards.
  * Use __member_size(@name->member) to get compile-time size of @name members.
+ * Use STACK_FLEX_ARRAY_SIZE(@name, @member) to get compile-time number of
+ * elements in array @member.
  */
 #define DEFINE_RAW_FLEX(type, name, member, count)	\
 	_DEFINE_FLEX(type, name, member, count, = {})
@@ -438,8 +440,21 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
  * flexible array member.
  * Use __struct_size(@NAME) to get compile-time size of it afterwards.
  * Use __member_size(@NAME->member) to get compile-time size of @NAME members.
+ * Use STACK_FLEX_ARRAY_SIZE(@name, @member) to get compile-time number of
+ * elements in array @member.
  */
 #define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT)	\
 	_DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, })
 
+/**
+ * STACK_FLEX_ARRAY_SIZE() - helper macro for DEFINE_FLEX() family.
+ * Returns the number of elements in @array.
+ *
+ * @name: Name for a variable defined in DEFINE_RAW_FLEX()/DEFINE_FLEX().
+ * @array: Name of the array member.
+ */
+#define STACK_FLEX_ARRAY_SIZE(name, array)						\
+	(__member_size((name)->array) / sizeof(*(name)->array) +			\
+						__must_be_array((name)->array))
+
 #endif /* __LINUX_OVERFLOW_H */
-- 
cgit v1.2.3


From 47e36ed7840661a9f7fb53554a1b04a5f8daffea Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 1 May 2025 18:44:43 -0600
Subject: overflow: Fix direct struct member initialization in _DEFINE_FLEX()

Currently, to statically initialize the struct members of the `type`
object created by _DEFINE_FLEX(), the internal `obj` member must be
explicitly referenced at the call site. See:

struct flex {
        int a;
        int b;
        struct foo flex_array[];
};

_DEFINE_FLEX(struct flex, instance, flex_array,
                 FIXED_SIZE, = {
                        .obj = {
                                .a = 0,
                                .b = 1,
                        },
                });

This leaks _DEFINE_FLEX() internal implementation details and make
the helper harder to use and read.

Fix this and allow for a more natural and intuitive C99 init-style:

_DEFINE_FLEX(struct flex, instance, flex_array,
                 FIXED_SIZE, = {
                        .a = 0,
                        .b = 1,
                });

Note that before these changes, the `initializer` argument was optional,
but now it's required.

Also, update "counter" member initialization in DEFINE_FLEX().

Fixes: 26dd68d293fd ("overflow: add DEFINE_FLEX() for on-stack allocs")
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/r/aBQVeyKfLOkO9Yss@kspp
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/overflow.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index f33d74dac06f..7b7be27ca113 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -396,7 +396,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
  * @name: Name for a variable to define.
  * @member: Name of the array member.
  * @count: Number of elements in the array; must be compile-time const.
- * @initializer: initializer expression (could be empty for no init).
+ * @initializer: Initializer expression (e.g., pass `= { }` at minimum).
  */
 #define _DEFINE_FLEX(type, name, member, count, initializer...)			\
 	_Static_assert(__builtin_constant_p(count),				\
@@ -404,7 +404,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
 	union {									\
 		u8 bytes[struct_size_t(type, member, count)];			\
 		type obj;							\
-	} name##_u initializer;							\
+	} name##_u = { .obj initializer };					\
 	type *name = (type *)&name##_u
 
 /**
@@ -444,7 +444,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
  * elements in array @member.
  */
 #define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT)	\
-	_DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, })
+	_DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .COUNTER = COUNT, })
 
 /**
  * STACK_FLEX_ARRAY_SIZE() - helper macro for DEFINE_FLEX() family.
-- 
cgit v1.2.3


From 0cecd37daef3d57e6656c0023978d5ec2d7409c1 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Sat, 3 May 2025 11:46:18 -0700
Subject: gcc-plugins: Force full rebuild when plugins change

There was no dependency between the plugins changing and the rest of the
kernel being built. This could cause strange behaviors as instrumentation
could vary between targets depending on when they were built.

Generate a new header file, gcc-plugins.h, any time the GCC plugins
change. Include the header file in compiler-version.h when its associated
feature name, GCC_PLUGINS, is defined. This will be picked up by fixdep
and force rebuilds where needed.

Add a generic "touch" kbuild command, which will be used again in
a following patch. Add a "normalize_path" string helper to make the
"TOUCH" output less ugly.

Link: https://lore.kernel.org/r/20250503184623.2572355-1-kees@kernel.org
Tested-by: Nicolas Schier <n.schier@avm.de>
Reviewed-by: Nicolas Schier <n.schier@avm.de>
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/compiler-version.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-version.h b/include/linux/compiler-version.h
index 573fa85b6c0c..5dba398a9412 100644
--- a/include/linux/compiler-version.h
+++ b/include/linux/compiler-version.h
@@ -12,3 +12,14 @@
  * and add dependency on include/config/CC_VERSION_TEXT, which is touched
  * by Kconfig when the version string from the compiler changes.
  */
+
+/* Additional tree-wide dependencies start here. */
+
+/*
+ * If any of the GCC plugins change, we need to rebuild everything that
+ * was built with them, as they may have changed their behavior and those
+ * behaviors may need to be synchronized across all translation units.
+ */
+#ifdef GCC_PLUGINS
+#include <generated/gcc-plugins.h>
+#endif
-- 
cgit v1.2.3


From 056000c471ea41db56de58f71b9fe727d6e68b9b Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Sat, 3 May 2025 11:46:19 -0700
Subject: randstruct: Force full rebuild when seed changes

While the randstruct GCC plugin was being rebuilt if the randstruct seed
changed, Clang builds did not notice the change. This could result in
differing struct layouts in a target depending on when it was built.

Include the existing generated header file in compiler-version.h when
its associated feature name, RANDSTRUCT, is defined. This will be picked
up by fixdep and force rebuilds where needed.

Link: https://lore.kernel.org/r/20250503184623.2572355-2-kees@kernel.org
Reviewed-by: Nicolas Schier <n.schier@avm.de>
Tested-by: Nicolas Schier <n.schier@avm.de>
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/compiler-version.h | 9 +++++++++
 include/linux/vermagic.h         | 1 -
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-version.h b/include/linux/compiler-version.h
index 5dba398a9412..4b9d39b37650 100644
--- a/include/linux/compiler-version.h
+++ b/include/linux/compiler-version.h
@@ -23,3 +23,12 @@
 #ifdef GCC_PLUGINS
 #include <generated/gcc-plugins.h>
 #endif
+
+/*
+ * If the randstruct seed itself changes (whether for GCC plugins or
+ * Clang), the entire tree needs to be rebuilt since the randomization of
+ * structures may change between compilation units if not.
+ */
+#ifdef RANDSTRUCT
+#include <generated/randstruct_hash.h>
+#endif
diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index 939ceabcaf06..335c360d4f9b 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -33,7 +33,6 @@
 #define MODULE_VERMAGIC_MODVERSIONS ""
 #endif
 #ifdef RANDSTRUCT
-#include <generated/randstruct_hash.h>
 #define MODULE_RANDSTRUCT "RANDSTRUCT_" RANDSTRUCT_HASHED_SEED
 #else
 #define MODULE_RANDSTRUCT
-- 
cgit v1.2.3


From 11bb1678e249e51cd748e8f91e5241b3ce71da3a Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Sat, 3 May 2025 11:46:20 -0700
Subject: integer-wrap: Force full rebuild when .scl file changes

Since the integer wrapping sanitizer's behavior depends on its associated
.scl file, we must force a full rebuild if the file changes. If not,
instrumentation may differ between targets based on when they were built.

Generate a new header file, integer-wrap.h, any time the Clang .scl
file changes. Include the header file in compiler-version.h when its
associated feature name, INTEGER_WRAP, is defined. This will be picked
up by fixdep and force rebuilds where needed.

Acked-by: Justin Stitt <justinstitt@google.com>
Link: https://lore.kernel.org/r/20250503184623.2572355-3-kees@kernel.org
Reviewed-by: Nicolas Schier <n.schier@avm.de>
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/compiler-version.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-version.h b/include/linux/compiler-version.h
index 4b9d39b37650..ac1665a98a15 100644
--- a/include/linux/compiler-version.h
+++ b/include/linux/compiler-version.h
@@ -32,3 +32,13 @@
 #ifdef RANDSTRUCT
 #include <generated/randstruct_hash.h>
 #endif
+
+/*
+ * If any external changes affect Clang's integer wrapping sanitizer
+ * behavior, a full rebuild is needed as the coverage for wrapping types
+ * may have changed, which may impact the expected behaviors that should
+ * not differ between compilation units.
+ */
+#ifdef INTEGER_WRAP
+#include <generated/integer-wrap.h>
+#endif
-- 
cgit v1.2.3


From e86e43907f945e7f2e48d1c5c9105801ca2b11b7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 7 May 2025 19:53:30 +0200
Subject: timers: Rename init_timer_key() as timer_init_key()

Move this API to the canonical timer_*() namespace.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250507175338.672442-3-mingo@kernel.org
---
 include/linux/timer.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 10596d7c3a34..0c1c3aa723e5 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -67,7 +67,7 @@
 /*
  * LOCKDEP and DEBUG timer interfaces.
  */
-void init_timer_key(struct timer_list *timer,
+void timer_init_key(struct timer_list *timer,
 		    void (*func)(struct timer_list *), unsigned int flags,
 		    const char *name, struct lock_class_key *key);
 
@@ -83,7 +83,7 @@ static inline void init_timer_on_stack_key(struct timer_list *timer,
 					   const char *name,
 					   struct lock_class_key *key)
 {
-	init_timer_key(timer, func, flags, name, key);
+	timer_init_key(timer, func, flags, name, key);
 }
 #endif
 
@@ -91,7 +91,7 @@ static inline void init_timer_on_stack_key(struct timer_list *timer,
 #define __init_timer(_timer, _fn, _flags)				\
 	do {								\
 		static struct lock_class_key __key;			\
-		init_timer_key((_timer), (_fn), (_flags), #_timer, &__key);\
+		timer_init_key((_timer), (_fn), (_flags), #_timer, &__key);\
 	} while (0)
 
 #define __init_timer_on_stack(_timer, _fn, _flags)			\
@@ -102,7 +102,7 @@ static inline void init_timer_on_stack_key(struct timer_list *timer,
 	} while (0)
 #else
 #define __init_timer(_timer, _fn, _flags)				\
-	init_timer_key((_timer), (_fn), (_flags), NULL, NULL)
+	timer_init_key((_timer), (_fn), (_flags), NULL, NULL)
 #define __init_timer_on_stack(_timer, _fn, _flags)			\
 	init_timer_on_stack_key((_timer), (_fn), (_flags), NULL, NULL)
 #endif
-- 
cgit v1.2.3


From 7879d10de3318af0b7893d1efc9d7654cd4ac1a6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 7 May 2025 19:53:31 +0200
Subject: timers: Rename init_timer_on_stack_key() as timer_init_key_on_stack()

Move this API to the canonical timer_*() namespace.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250507175338.672442-4-mingo@kernel.org
---
 include/linux/timer.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 0c1c3aa723e5..31127e8de010 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -72,12 +72,12 @@ void timer_init_key(struct timer_list *timer,
 		    const char *name, struct lock_class_key *key);
 
 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
-extern void init_timer_on_stack_key(struct timer_list *timer,
+extern void timer_init_key_on_stack(struct timer_list *timer,
 				    void (*func)(struct timer_list *),
 				    unsigned int flags, const char *name,
 				    struct lock_class_key *key);
 #else
-static inline void init_timer_on_stack_key(struct timer_list *timer,
+static inline void timer_init_key_on_stack(struct timer_list *timer,
 					   void (*func)(struct timer_list *),
 					   unsigned int flags,
 					   const char *name,
@@ -97,14 +97,14 @@ static inline void init_timer_on_stack_key(struct timer_list *timer,
 #define __init_timer_on_stack(_timer, _fn, _flags)			\
 	do {								\
 		static struct lock_class_key __key;			\
-		init_timer_on_stack_key((_timer), (_fn), (_flags),	\
+		timer_init_key_on_stack((_timer), (_fn), (_flags),	\
 					#_timer, &__key);		 \
 	} while (0)
 #else
 #define __init_timer(_timer, _fn, _flags)				\
 	timer_init_key((_timer), (_fn), (_flags), NULL, NULL)
 #define __init_timer_on_stack(_timer, _fn, _flags)			\
-	init_timer_on_stack_key((_timer), (_fn), (_flags), NULL, NULL)
+	timer_init_key_on_stack((_timer), (_fn), (_flags), NULL, NULL)
 #endif
 
 /**
-- 
cgit v1.2.3


From 9505215b6b3233d38c5ee65cfd47b6a5a36adab9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 7 May 2025 19:53:32 +0200
Subject: timers: Rename __init_timer() as __timer_init()

Move this API to the canonical __timer_*() namespace.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250507175338.672442-5-mingo@kernel.org
---
 include/linux/timer.h     | 6 +++---
 include/linux/workqueue.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 31127e8de010..11e1fac18f0e 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -88,7 +88,7 @@ static inline void timer_init_key_on_stack(struct timer_list *timer,
 #endif
 
 #ifdef CONFIG_LOCKDEP
-#define __init_timer(_timer, _fn, _flags)				\
+#define __timer_init(_timer, _fn, _flags)				\
 	do {								\
 		static struct lock_class_key __key;			\
 		timer_init_key((_timer), (_fn), (_flags), #_timer, &__key);\
@@ -101,7 +101,7 @@ static inline void timer_init_key_on_stack(struct timer_list *timer,
 					#_timer, &__key);		 \
 	} while (0)
 #else
-#define __init_timer(_timer, _fn, _flags)				\
+#define __timer_init(_timer, _fn, _flags)				\
 	timer_init_key((_timer), (_fn), (_flags), NULL, NULL)
 #define __init_timer_on_stack(_timer, _fn, _flags)			\
 	timer_init_key_on_stack((_timer), (_fn), (_flags), NULL, NULL)
@@ -118,7 +118,7 @@ static inline void timer_init_key_on_stack(struct timer_list *timer,
  * be used and must be balanced with a call to destroy_timer_on_stack().
  */
 #define timer_setup(timer, callback, flags)			\
-	__init_timer((timer), (callback), (flags))
+	__timer_init((timer), (callback), (flags))
 
 #define timer_setup_on_stack(timer, callback, flags)		\
 	__init_timer_on_stack((timer), (callback), (flags))
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b0dc957c3e56..985f69f8fb99 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -316,7 +316,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 #define __INIT_DELAYED_WORK(_work, _func, _tflags)			\
 	do {								\
 		INIT_WORK(&(_work)->work, (_func));			\
-		__init_timer(&(_work)->timer,				\
+		__timer_init(&(_work)->timer,				\
 			     delayed_work_timer_fn,			\
 			     (_tflags) | TIMER_IRQSAFE);		\
 	} while (0)
-- 
cgit v1.2.3


From 9a716ac6eaaa73599921fa081c99b67bef589171 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 7 May 2025 19:53:33 +0200
Subject: timers: Rename __init_timer_on_stack() as __timer_init_on_stack()

Move this API to the canonical __timer_*() namespace.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250507175338.672442-6-mingo@kernel.org
---
 include/linux/timer.h     | 6 +++---
 include/linux/workqueue.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 11e1fac18f0e..4e1237ff4b24 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -94,7 +94,7 @@ static inline void timer_init_key_on_stack(struct timer_list *timer,
 		timer_init_key((_timer), (_fn), (_flags), #_timer, &__key);\
 	} while (0)
 
-#define __init_timer_on_stack(_timer, _fn, _flags)			\
+#define __timer_init_on_stack(_timer, _fn, _flags)			\
 	do {								\
 		static struct lock_class_key __key;			\
 		timer_init_key_on_stack((_timer), (_fn), (_flags),	\
@@ -103,7 +103,7 @@ static inline void timer_init_key_on_stack(struct timer_list *timer,
 #else
 #define __timer_init(_timer, _fn, _flags)				\
 	timer_init_key((_timer), (_fn), (_flags), NULL, NULL)
-#define __init_timer_on_stack(_timer, _fn, _flags)			\
+#define __timer_init_on_stack(_timer, _fn, _flags)			\
 	timer_init_key_on_stack((_timer), (_fn), (_flags), NULL, NULL)
 #endif
 
@@ -121,7 +121,7 @@ static inline void timer_init_key_on_stack(struct timer_list *timer,
 	__timer_init((timer), (callback), (flags))
 
 #define timer_setup_on_stack(timer, callback, flags)		\
-	__init_timer_on_stack((timer), (callback), (flags))
+	__timer_init_on_stack((timer), (callback), (flags))
 
 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
 extern void destroy_timer_on_stack(struct timer_list *timer);
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 985f69f8fb99..c84da78878d9 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -324,7 +324,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 #define __INIT_DELAYED_WORK_ONSTACK(_work, _func, _tflags)		\
 	do {								\
 		INIT_WORK_ONSTACK(&(_work)->work, (_func));		\
-		__init_timer_on_stack(&(_work)->timer,			\
+		__timer_init_on_stack(&(_work)->timer,			\
 				      delayed_work_timer_fn,		\
 				      (_tflags) | TIMER_IRQSAFE);	\
 	} while (0)
-- 
cgit v1.2.3


From 220beffd36c26ac68e1c646f91b7ddf4f39039e6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 7 May 2025 19:53:34 +0200
Subject: timers: Rename NEXT_TIMER_MAX_DELTA as TIMER_NEXT_MAX_DELTA

Move this macro to the canonical TIMER_* namespace.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250507175338.672442-7-mingo@kernel.org
---
 include/linux/timer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 4e1237ff4b24..68cf8e28bbe0 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -156,7 +156,7 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires);
  * The jiffies value which is added to now, when there is no timer
  * in the timer wheel:
  */
-#define NEXT_TIMER_MAX_DELTA	((1UL << 30) - 1)
+#define TIMER_NEXT_MAX_DELTA	((1UL << 30) - 1)
 
 extern void add_timer(struct timer_list *timer);
 extern void add_timer_local(struct timer_list *timer);
-- 
cgit v1.2.3


From 751e6a394c2ecd08825d5ba527478e171b87144e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 7 May 2025 19:53:35 +0200
Subject: timers: Rename init_timers() as timers_init()

Move this API to the canonical timers_*() namespace.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250507175338.672442-8-mingo@kernel.org
---
 include/linux/timer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 68cf8e28bbe0..153d07dad3cd 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -168,7 +168,7 @@ extern int timer_delete(struct timer_list *timer);
 extern int timer_shutdown_sync(struct timer_list *timer);
 extern int timer_shutdown(struct timer_list *timer);
 
-extern void init_timers(void);
+extern void timers_init(void);
 struct hrtimer;
 extern enum hrtimer_restart it_real_fn(struct hrtimer *);
 
-- 
cgit v1.2.3


From 367ed4e35734d6e7bce1dbca426a5bf150d76905 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 7 May 2025 19:53:36 +0200
Subject: treewide, timers: Rename try_to_del_timer_sync() as
 timer_delete_sync_try()

Move this API to the canonical timer_*() namespace.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250507175338.672442-9-mingo@kernel.org
---
 include/linux/timer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 153d07dad3cd..5b6ff90fcf81 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -162,7 +162,7 @@ extern void add_timer(struct timer_list *timer);
 extern void add_timer_local(struct timer_list *timer);
 extern void add_timer_global(struct timer_list *timer);
 
-extern int try_to_del_timer_sync(struct timer_list *timer);
+extern int timer_delete_sync_try(struct timer_list *timer);
 extern int timer_delete_sync(struct timer_list *timer);
 extern int timer_delete(struct timer_list *timer);
 extern int timer_shutdown_sync(struct timer_list *timer);
-- 
cgit v1.2.3


From aad823aa3a7d675a8d0de478a04307f63e3725db Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 7 May 2025 19:53:37 +0200
Subject: treewide, timers: Rename destroy_timer_on_stack() as
 timer_destroy_on_stack()

Move this API to the canonical timer_*() namespace.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250507175338.672442-10-mingo@kernel.org
---
 include/linux/timer.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 5b6ff90fcf81..7b53043a2d25 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -115,7 +115,7 @@ static inline void timer_init_key_on_stack(struct timer_list *timer,
  *
  * Regular timer initialization should use either DEFINE_TIMER() above,
  * or timer_setup(). For timers on the stack, timer_setup_on_stack() must
- * be used and must be balanced with a call to destroy_timer_on_stack().
+ * be used and must be balanced with a call to timer_destroy_on_stack().
  */
 #define timer_setup(timer, callback, flags)			\
 	__timer_init((timer), (callback), (flags))
@@ -124,9 +124,9 @@ static inline void timer_init_key_on_stack(struct timer_list *timer,
 	__timer_init_on_stack((timer), (callback), (flags))
 
 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
-extern void destroy_timer_on_stack(struct timer_list *timer);
+extern void timer_destroy_on_stack(struct timer_list *timer);
 #else
-static inline void destroy_timer_on_stack(struct timer_list *timer) { }
+static inline void timer_destroy_on_stack(struct timer_list *timer) { }
 #endif
 
 #define from_timer(var, callback_timer, timer_fieldname) \
-- 
cgit v1.2.3


From 01475aedfdfa33a5ee3219079426f5743367c624 Mon Sep 17 00:00:00 2001
From: Nam Cao <namcao@linutronix.de>
Date: Mon, 28 Apr 2025 21:34:45 +0200
Subject: futex: Fix outdated comment in struct restart_block

Since commit 2070887fdeac ("futex: fix restart in wait_requeue_pi"),
futex_wait_requeue_pi() no longer uses restart_block. Update the comment
accordingly.

Signed-off-by: Nam Cao <namcao@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250428193445.4571-1-namcao@linutronix.de
---
 include/linux/restart_block.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h
index 13f17676c5f4..7e50bbc94e47 100644
--- a/include/linux/restart_block.h
+++ b/include/linux/restart_block.h
@@ -26,7 +26,7 @@ struct restart_block {
 	unsigned long arch_data;
 	long (*fn)(struct restart_block *);
 	union {
-		/* For futex_wait and futex_wait_requeue_pi */
+		/* For futex_wait() */
 		struct {
 			u32 __user *uaddr;
 			u32 val;
-- 
cgit v1.2.3


From f400565faa50737ac1d550d2c75128c0dad75765 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Thu, 24 Apr 2025 18:11:27 +0200
Subject: perf: Remove too early and redundant CPU hotplug handling

The CPU hotplug handlers are called twice: at prepare and online stage.

Their role is to:

1) Enable/disable a CPU context. This is irrelevant and even buggy at
   the prepare stage because the CPU is still offline. On early
   secondary CPU up, creating an event attached to that CPU might
   silently fail because the CPU context is observed as online but the
   context installation's IPI failure is ignored.

2) Update the scope cpumasks and re-migrate the events accordingly in
   the CPU down case. This is irrelevant at the prepare stage.

3) Remove the events attached to the context of the offlining CPU. It
   even uses an (unnecessary) IPI for it. This is also irrelevant at the
   prepare stage.

Also none of the *_PREPARE and *_STARTING architecture perf related CPU
hotplug callbacks rely on CPUHP_PERF_PREPARE.

CPUHP_AP_PERF_ONLINE is enough and the right place to perform the work.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20250424161128.29176-4-frederic@kernel.org
---
 include/linux/cpuhotplug.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 1987400000b4..df366ee15456 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -60,7 +60,6 @@ enum cpuhp_state {
 	/* PREPARE section invoked on a control CPU */
 	CPUHP_OFFLINE = 0,
 	CPUHP_CREATE_THREADS,
-	CPUHP_PERF_PREPARE,
 	CPUHP_PERF_X86_PREPARE,
 	CPUHP_PERF_X86_AMD_UNCORE_PREP,
 	CPUHP_PERF_POWER,
-- 
cgit v1.2.3


From 22c64f37e1d4e757b0073a72f1439c2c3509c5cb Mon Sep 17 00:00:00 2001
From: Mohan Kumar G <quic_mkumarg@quicinc.com>
Date: Mon, 5 May 2025 20:58:36 +0530
Subject: wifi: mac80211: Update MCS15 support in link_conf

As per IEEE 802.11be-2024 - 9.4.2.321, EHT operation element
contains MCS15 Disable subfield as the sixth bit, which is set when
MCS15 support is not enabled.

Get MCS15 support from EHT operation params and add it in link_conf
so that driver can use this value to know if EHT-MCS 15 reception
is enabled.

Co-developed-by: Dhanavandhana Kannan <quic_dhanavan1@quicinc.com>
Signed-off-by: Dhanavandhana Kannan <quic_dhanavan1@quicinc.com>
Signed-off-by: Mohan Kumar G <quic_mkumarg@quicinc.com>
Link: https://patch.msgid.link/20250505152836.3266829-1-quic_mkumarg@quicinc.com
[remove pointless !! for bool assignment]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 17f917cb4540..420c7f9aa6ee 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2325,6 +2325,7 @@ struct ieee80211_eht_cap_elem {
 #define IEEE80211_EHT_OPER_EHT_DEF_PE_DURATION	                0x04
 #define IEEE80211_EHT_OPER_GROUP_ADDRESSED_BU_IND_LIMIT         0x08
 #define IEEE80211_EHT_OPER_GROUP_ADDRESSED_BU_IND_EXP_MASK      0x30
+#define IEEE80211_EHT_OPER_MCS15_DISABLE                        0x40
 
 /**
  * struct ieee80211_eht_operation - eht operation element
-- 
cgit v1.2.3


From 56a7b9f8b05981e929becb45a826558779bca6f6 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 13 Mar 2025 15:31:50 -0700
Subject: ratelimit: Create functions to handle ratelimit_state internals

A number of ratelimit use cases do open-coded access to the
ratelimit_state structure's ->missed field.  This works, but is a bit
messy and makes it more annoying to make changes to this field.

Therefore, provide a ratelimit_state_inc_miss() function that increments
the ->missed field, a ratelimit_state_get_miss() function that reads
out the ->missed field, and a ratelimit_state_reset_miss() function
that reads out that field, but that also resets its value to zero.
These functions will replace client-code open-coded uses of ->missed.

In addition, a new ratelimit_state_reset_interval() function encapsulates
what was previously open-coded lock acquisition and direct field updates.

[ paulmck: Apply kernel test robot feedback. ]

Link: https://lore.kernel.org/all/fbe93a52-365e-47fe-93a4-44a44547d601@paulmck-laptop/
Link: https://lore.kernel.org/all/20250423115409.3425-1-spasswolf@web.de/
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Kuniyuki Iwashima <kuniyu@amazon.com>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
---
 include/linux/ratelimit.h | 40 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 35 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
index b17e0cd0a30c..8400c5356c18 100644
--- a/include/linux/ratelimit.h
+++ b/include/linux/ratelimit.h
@@ -22,16 +22,46 @@ static inline void ratelimit_default_init(struct ratelimit_state *rs)
 					DEFAULT_RATELIMIT_BURST);
 }
 
+static inline void ratelimit_state_inc_miss(struct ratelimit_state *rs)
+{
+	rs->missed++;
+}
+
+static inline int ratelimit_state_get_miss(struct ratelimit_state *rs)
+{
+	return rs->missed;
+}
+
+static inline int ratelimit_state_reset_miss(struct ratelimit_state *rs)
+{
+	int ret = rs->missed;
+
+	rs->missed = 0;
+	return ret;
+}
+
+static inline void ratelimit_state_reset_interval(struct ratelimit_state *rs, int interval_init)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&rs->lock, flags);
+	rs->interval = interval_init;
+	rs->begin = 0;
+	rs->printed = 0;
+	ratelimit_state_reset_miss(rs);
+	raw_spin_unlock_irqrestore(&rs->lock, flags);
+}
+
 static inline void ratelimit_state_exit(struct ratelimit_state *rs)
 {
+	int m;
+
 	if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE))
 		return;
 
-	if (rs->missed) {
-		pr_warn("%s: %d output lines suppressed due to ratelimiting\n",
-			current->comm, rs->missed);
-		rs->missed = 0;
-	}
+	m = ratelimit_state_reset_miss(rs);
+	if (m)
+		pr_warn("%s: %d output lines suppressed due to ratelimiting\n", current->comm, m);
 }
 
 static inline void
-- 
cgit v1.2.3


From 78bf44de47b3cec72ee8ddc14161d5b3212f25e0 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 14 Mar 2025 09:07:13 -0700
Subject: ratelimit: Convert the ->missed field to atomic_t

The ratelimit_state structure's ->missed field is sometimes incremented
locklessly, and it would be good to avoid lost counts.  This is also
needed to count the number of misses due to trylock failure.  Therefore,
convert the ratelimit_state structure's ->missed field to atomic_t.

Link: https://lore.kernel.org/all/fbe93a52-365e-47fe-93a4-44a44547d601@paulmck-laptop/
Link: https://lore.kernel.org/all/20250423115409.3425-1-spasswolf@web.de/
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Kuniyuki Iwashima <kuniyu@amazon.com>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
---
 include/linux/ratelimit.h       | 9 +++------
 include/linux/ratelimit_types.h | 2 +-
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
index 8400c5356c18..c78b92b3e5cd 100644
--- a/include/linux/ratelimit.h
+++ b/include/linux/ratelimit.h
@@ -24,20 +24,17 @@ static inline void ratelimit_default_init(struct ratelimit_state *rs)
 
 static inline void ratelimit_state_inc_miss(struct ratelimit_state *rs)
 {
-	rs->missed++;
+	atomic_inc(&rs->missed);
 }
 
 static inline int ratelimit_state_get_miss(struct ratelimit_state *rs)
 {
-	return rs->missed;
+	return atomic_read(&rs->missed);
 }
 
 static inline int ratelimit_state_reset_miss(struct ratelimit_state *rs)
 {
-	int ret = rs->missed;
-
-	rs->missed = 0;
-	return ret;
+	return atomic_xchg_relaxed(&rs->missed, 0);
 }
 
 static inline void ratelimit_state_reset_interval(struct ratelimit_state *rs, int interval_init)
diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h
index 765232ce0b5e..d21fe82b67f6 100644
--- a/include/linux/ratelimit_types.h
+++ b/include/linux/ratelimit_types.h
@@ -18,7 +18,7 @@ struct ratelimit_state {
 	int		interval;
 	int		burst;
 	int		printed;
-	int		missed;
+	atomic_t	missed;
 	unsigned int	flags;
 	unsigned long	begin;
 };
-- 
cgit v1.2.3


From e64a348dc148af41cec266b2143305a2d0228ee7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 25 Mar 2025 15:32:54 -0700
Subject: ratelimit: Avoid jiffies=0 special case

The ___ratelimit() function special-cases the jiffies-counter value of zero
as "uninitialized".  This works well on 64-bit systems, where the jiffies
counter is not going to return to zero for more than half a billion years
on systems with HZ=1000, but similar 32-bit systems take less than 50 days
to wrap the jiffies counter.  And although the consequences of wrapping the
jiffies counter seem to be limited to minor confusion on the duration of
the rate-limiting interval that happens to end at time zero, it is almost
no work to avoid this confusion.

Therefore, introduce a RATELIMIT_INITIALIZED bit to the ratelimit_state
structure's ->flags field so that a ->begin value of zero is no longer
special.

Link: https://lore.kernel.org/all/fbe93a52-365e-47fe-93a4-44a44547d601@paulmck-laptop/
Link: https://lore.kernel.org/all/20250423115409.3425-1-spasswolf@web.de/
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Kuniyuki Iwashima <kuniyu@amazon.com>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
---
 include/linux/ratelimit.h       | 2 +-
 include/linux/ratelimit_types.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
index c78b92b3e5cd..adfec24061d1 100644
--- a/include/linux/ratelimit.h
+++ b/include/linux/ratelimit.h
@@ -43,7 +43,7 @@ static inline void ratelimit_state_reset_interval(struct ratelimit_state *rs, in
 
 	raw_spin_lock_irqsave(&rs->lock, flags);
 	rs->interval = interval_init;
-	rs->begin = 0;
+	rs->flags &= ~RATELIMIT_INITIALIZED;
 	rs->printed = 0;
 	ratelimit_state_reset_miss(rs);
 	raw_spin_unlock_irqrestore(&rs->lock, flags);
diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h
index d21fe82b67f6..ef6711b6b229 100644
--- a/include/linux/ratelimit_types.h
+++ b/include/linux/ratelimit_types.h
@@ -11,6 +11,7 @@
 
 /* issue num suppressed message on exit */
 #define RATELIMIT_MSG_ON_RELEASE	BIT(0)
+#define RATELIMIT_INITIALIZED		BIT(1)
 
 struct ratelimit_state {
 	raw_spinlock_t	lock;		/* protect the state */
-- 
cgit v1.2.3


From cf8cfa8a9978bfe77f2648a04824a46d58258e68 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 9 Apr 2025 16:54:33 -0700
Subject: ratelimit: Reduce ___ratelimit() false-positive rate limiting

Retain the locked design, but check rate-limiting even when the lock
could not be acquired.

Link: https://lore.kernel.org/all/Z_VRo63o2UsVoxLG@pathway.suse.cz/
Link: https://lore.kernel.org/all/fbe93a52-365e-47fe-93a4-44a44547d601@paulmck-laptop/
Link: https://lore.kernel.org/all/20250423115409.3425-1-spasswolf@web.de/
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Kuniyuki Iwashima <kuniyu@amazon.com>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
---
 include/linux/ratelimit.h       | 2 +-
 include/linux/ratelimit_types.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
index adfec24061d1..7aaad158ee37 100644
--- a/include/linux/ratelimit.h
+++ b/include/linux/ratelimit.h
@@ -44,7 +44,7 @@ static inline void ratelimit_state_reset_interval(struct ratelimit_state *rs, in
 	raw_spin_lock_irqsave(&rs->lock, flags);
 	rs->interval = interval_init;
 	rs->flags &= ~RATELIMIT_INITIALIZED;
-	rs->printed = 0;
+	atomic_set(&rs->rs_n_left, rs->burst);
 	ratelimit_state_reset_miss(rs);
 	raw_spin_unlock_irqrestore(&rs->lock, flags);
 }
diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h
index ef6711b6b229..b19c4354540a 100644
--- a/include/linux/ratelimit_types.h
+++ b/include/linux/ratelimit_types.h
@@ -18,7 +18,7 @@ struct ratelimit_state {
 
 	int		interval;
 	int		burst;
-	int		printed;
+	atomic_t	rs_n_left;
 	atomic_t	missed;
 	unsigned int	flags;
 	unsigned long	begin;
-- 
cgit v1.2.3


From b9e22b35d4598aefed642928ed2856a9900e5b37 Mon Sep 17 00:00:00 2001
From: Cedric Xing <cedric.xing@intel.com>
Date: Thu, 8 May 2025 19:07:39 -0700
Subject: tsm-mr: Add TVM Measurement Register support

Introduce new TSM Measurement helper library (tsm-mr) for TVM guest drivers
to expose MRs (Measurement Registers) as sysfs attributes, with Crypto
Agility support.

Add the following new APIs (see include/linux/tsm-mr.h for details):

- tsm_mr_create_attribute_group(): Take on input a `struct
  tsm_measurements` instance, which includes one `struct
  tsm_measurement_register` per MR with properties like `TSM_MR_F_READABLE`
  and `TSM_MR_F_WRITABLE`, to determine the supported operations and create
  the sysfs attributes accordingly. On success, return a `struct
  attribute_group` instance that will typically be included by the guest
  driver into `miscdevice.groups` before calling misc_register().

- tsm_mr_free_attribute_group(): Free the memory allocated to the attrubute
  group returned by tsm_mr_create_attribute_group().

tsm_mr_create_attribute_group() creates one attribute for each MR, with
names following this pattern:

        MRNAME[:HASH]

- MRNAME - Placeholder for the MR name, as specified by
  `tsm_measurement_register.mr_name`.
- :HASH - Optional suffix indicating the hash algorithm associated with
  this MR, as specified by `tsm_measurement_register.mr_hash`.

Support Crypto Agility by allowing multiple definitions of the same MR
(i.e., with the same `mr_name`) with distinct HASH algorithms.

NOTE: Crypto Agility, introduced in TPM 2.0, allows new hash algorithms to
be introduced without breaking compatibility with applications using older
algorithms. CC architectures may face the same challenge in the future,
needing new hashes for security while retaining compatibility with older
hashes, hence the need for Crypto Agility.

Signed-off-by: Cedric Xing <cedric.xing@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Dionna Amalie Glaze <dionnaglaze@google.com>
[djbw: fixup bin_attr const conflict]
Link: https://patch.msgid.link/20250509020739.882913-1-dan.j.williams@intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/tsm-mr.h | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)
 create mode 100644 include/linux/tsm-mr.h

(limited to 'include/linux')

diff --git a/include/linux/tsm-mr.h b/include/linux/tsm-mr.h
new file mode 100644
index 000000000000..50a521f4ac97
--- /dev/null
+++ b/include/linux/tsm-mr.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __TSM_MR_H
+#define __TSM_MR_H
+
+#include <crypto/hash_info.h>
+
+/**
+ * struct tsm_measurement_register - describes an architectural measurement
+ * register (MR)
+ * @mr_name: name of the MR
+ * @mr_value: buffer containing the current value of the MR
+ * @mr_size: size of the MR - typically the digest size of @mr_hash
+ * @mr_flags: bitwise OR of one or more flags, detailed below
+ * @mr_hash: optional hash identifier defined in include/uapi/linux/hash_info.h.
+ *
+ * A CC guest driver encloses an array of this structure in struct
+ * tsm_measurements to detail the measurement facility supported by the
+ * underlying CC hardware.
+ *
+ * @mr_name and @mr_value must stay valid until this structure is no longer in
+ * use.
+ *
+ * @mr_flags is the bitwise-OR of zero or more of the flags below.
+ *
+ * * %TSM_MR_F_READABLE - the sysfs attribute corresponding to this MR is readable.
+ * * %TSM_MR_F_WRITABLE - the sysfs attribute corresponding to this MR is writable.
+ *   The semantics is typically to extend the MR but could vary depending on the
+ *   architecture and the MR.
+ * * %TSM_MR_F_LIVE - this MR's value may differ from the last value written, so
+ *   must be read back from the underlying CC hardware/firmware.
+ * * %TSM_MR_F_RTMR - bitwise-OR of %TSM_MR_F_LIVE and %TSM_MR_F_WRITABLE.
+ * * %TSM_MR_F_NOHASH - this MR does NOT have an associated hash algorithm.
+ *   @mr_hash will be ignored when this flag is set.
+ */
+struct tsm_measurement_register {
+	const char *mr_name;
+	void *mr_value;
+	u32 mr_size;
+	u32 mr_flags;
+	enum hash_algo mr_hash;
+};
+
+#define TSM_MR_F_NOHASH 1
+#define TSM_MR_F_WRITABLE 2
+#define TSM_MR_F_READABLE 4
+#define TSM_MR_F_LIVE 8
+#define TSM_MR_F_RTMR (TSM_MR_F_LIVE | TSM_MR_F_WRITABLE)
+
+#define TSM_MR_(mr, hash)                              \
+	.mr_name = #mr, .mr_size = hash##_DIGEST_SIZE, \
+	.mr_hash = HASH_ALGO_##hash, .mr_flags = TSM_MR_F_READABLE
+
+/**
+ * struct tsm_measurements - defines the CC architecture specific measurement
+ * facility and methods for updating measurement registers (MRs)
+ * @mrs: Array of MR definitions.
+ * @nr_mrs: Number of elements in @mrs.
+ * @refresh: Callback function to load/sync all MRs from TVM hardware/firmware
+ *           into the kernel cache.
+ * @write: Callback function to write to the MR specified by the parameter @mr.
+ *         Typically, writing to an MR extends the input buffer to that MR.
+ *
+ * The @refresh callback is invoked when an MR with %TSM_MR_F_LIVE set is being
+ * read and the cache is stale. It must reload all MRs with %TSM_MR_F_LIVE set.
+ * The function parameter @tm is a pointer pointing back to this structure.
+ *
+ * The @write callback is invoked whenever an MR is being written. It takes two
+ * additional parameters besides @tm:
+ *
+ * * @mr - points to the MR (an element of @tm->mrs) being written.
+ * * @data - contains the bytes to write and whose size is @mr->mr_size.
+ *
+ * Both @refresh and @write should return 0 on success and an appropriate error
+ * code on failure.
+ */
+struct tsm_measurements {
+	const struct tsm_measurement_register *mrs;
+	size_t nr_mrs;
+	int (*refresh)(const struct tsm_measurements *tm);
+	int (*write)(const struct tsm_measurements *tm,
+		     const struct tsm_measurement_register *mr, const u8 *data);
+};
+
+const struct attribute_group *
+tsm_mr_create_attribute_group(const struct tsm_measurements *tm);
+void tsm_mr_free_attribute_group(const struct attribute_group *attr_grp);
+
+#endif
-- 
cgit v1.2.3


From 4701073c3debd16d7f534f3eb808bd9b50601c0c Mon Sep 17 00:00:00 2001
From: Wei Fang <wei.fang@nxp.com>
Date: Tue, 6 May 2025 16:07:22 +0800
Subject: net: enetc: add initial netc-lib driver to support NTMP

Some NETC functionality is controlled using control messages sent to the
hardware using BD ring interface with 32B descriptor similar to transmit
BD ring used on ENETC. This BD ring interface is referred to as command
BD ring. It is used to configure functionality where the underlying
resources may be shared between different entities or being too large to
configure using direct registers. Therefore, a messaging protocol called
NETC Table Management Protocol (NTMP) is provided for exchanging
configuration and management information between the software and the
hardware using the command BD ring interface.

For the management protocol of LS1028A has been retroactively named NTMP
1.0, and its implementation is in enetc_cbdr.c and enetc_qos.c. However,
NTMP of i.MX95 has been upgraded to version 2.0, which is incompatible
with LS1028A, because the message formats have been changed. Therefore,
add the netc-lib driver to support NTMP 2.0 to operate various tables.
Note that, only MAC address filter table and RSS table are supported at
the moment. More tables will be supported in subsequent patches.

It is worth mentioning that the purpose of the netc-lib driver is to
provide some NTMP-based generic interfaces for ENETC and NETC Switch
drivers. Currently, it only supports the configurations of some tables.
Interfaces such as tc flower and debugfs will be added in the future.

Signed-off-by: Wei Fang <wei.fang@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/20250506080735.3444381-2-wei.fang@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/fsl/ntmp.h | 121 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)
 create mode 100644 include/linux/fsl/ntmp.h

(limited to 'include/linux')

diff --git a/include/linux/fsl/ntmp.h b/include/linux/fsl/ntmp.h
new file mode 100644
index 000000000000..916dc4fe7de3
--- /dev/null
+++ b/include/linux/fsl/ntmp.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2025 NXP */
+#ifndef __NETC_NTMP_H
+#define __NETC_NTMP_H
+
+#include <linux/bitops.h>
+#include <linux/if_ether.h>
+
+struct maft_keye_data {
+	u8 mac_addr[ETH_ALEN];
+	__le16 resv;
+};
+
+struct maft_cfge_data {
+	__le16 si_bitmap;
+	__le16 resv;
+};
+
+struct netc_cbdr_regs {
+	void __iomem *pir;
+	void __iomem *cir;
+	void __iomem *mr;
+
+	void __iomem *bar0;
+	void __iomem *bar1;
+	void __iomem *lenr;
+};
+
+struct netc_tbl_vers {
+	u8 maft_ver;
+	u8 rsst_ver;
+};
+
+struct netc_cbdr {
+	struct device *dev;
+	struct netc_cbdr_regs regs;
+
+	int bd_num;
+	int next_to_use;
+	int next_to_clean;
+
+	int dma_size;
+	void *addr_base;
+	void *addr_base_align;
+	dma_addr_t dma_base;
+	dma_addr_t dma_base_align;
+
+	/* Serialize the order of command BD ring */
+	spinlock_t ring_lock;
+};
+
+struct ntmp_user {
+	int cbdr_num;	/* number of control BD ring */
+	struct device *dev;
+	struct netc_cbdr *ring;
+	struct netc_tbl_vers tbl;
+};
+
+struct maft_entry_data {
+	struct maft_keye_data keye;
+	struct maft_cfge_data cfge;
+};
+
+#if IS_ENABLED(CONFIG_NXP_NETC_LIB)
+int ntmp_init_cbdr(struct netc_cbdr *cbdr, struct device *dev,
+		   const struct netc_cbdr_regs *regs);
+void ntmp_free_cbdr(struct netc_cbdr *cbdr);
+
+/* NTMP APIs */
+int ntmp_maft_add_entry(struct ntmp_user *user, u32 entry_id,
+			struct maft_entry_data *maft);
+int ntmp_maft_query_entry(struct ntmp_user *user, u32 entry_id,
+			  struct maft_entry_data *maft);
+int ntmp_maft_delete_entry(struct ntmp_user *user, u32 entry_id);
+int ntmp_rsst_update_entry(struct ntmp_user *user, const u32 *table,
+			   int count);
+int ntmp_rsst_query_entry(struct ntmp_user *user,
+			  u32 *table, int count);
+#else
+static inline int ntmp_init_cbdr(struct netc_cbdr *cbdr, struct device *dev,
+				 const struct netc_cbdr_regs *regs)
+{
+	return 0;
+}
+
+static inline void ntmp_free_cbdr(struct netc_cbdr *cbdr)
+{
+}
+
+static inline int ntmp_maft_add_entry(struct ntmp_user *user, u32 entry_id,
+				      struct maft_entry_data *maft)
+{
+	return 0;
+}
+
+static inline int ntmp_maft_query_entry(struct ntmp_user *user, u32 entry_id,
+					struct maft_entry_data *maft)
+{
+	return 0;
+}
+
+static inline int ntmp_maft_delete_entry(struct ntmp_user *user, u32 entry_id)
+{
+	return 0;
+}
+
+static inline int ntmp_rsst_update_entry(struct ntmp_user *user,
+					 const u32 *table, int count)
+{
+	return 0;
+}
+
+static inline int ntmp_rsst_query_entry(struct ntmp_user *user,
+					u32 *table, int count)
+{
+	return 0;
+}
+
+#endif
+
+#endif
-- 
cgit v1.2.3


From 1af3331764b9356fadc4652af77bbbc97f3d7f78 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Sat, 29 Mar 2025 09:42:19 +0100
Subject: super: add filesystem freezing helpers for suspend and hibernate

Allow the power subsystem to support filesystem freeze for
suspend and hibernate.

For some kernel subsystems it is paramount that they are guaranteed that
they are the owner of the freeze to avoid any risk of deadlocks. This is
the case for the power subsystem. Enable it to recognize whether it did
actually freeze the filesystem.

If userspace has 10 filesystems and suspend/hibernate manges to freeze 5
and then fails on the 6th for whatever odd reason (current or future)
then power needs to undo the freeze of the first 5 filesystems. It can't
just walk the list again because while it's unlikely that a new
filesystem got added in the meantime it still cannot tell which
filesystems the power subsystem actually managed to get a freeze
reference count on that needs to be dropped during thaw.

There's various ways out of this ugliness. For example, record the
filesystems the power subsystem managed to freeze on a temporary list in
the callbacks and then walk that list backwards during thaw to undo the
freezing or make sure that the power subsystem just actually exclusively
freezes things it can freeze and marking such filesystems as being owned
by power for the duration of the suspend or resume cycle. I opted for
the latter as that seemed the clean thing to do even if it means more
code changes.

If hibernation races with filesystem freezing (e.g. DM reconfiguration),
then hibernation need not freeze a filesystem because it's already
frozen but userspace may thaw the filesystem before hibernation actually
happens.

If the race happens the other way around, DM reconfiguration may
unexpectedly fail with EBUSY.

So allow FREEZE_EXCL to nest with other holders. An exclusive freezer
cannot be undone by any of the other concurrent freezers.

Link: https://lore.kernel.org/r/20250329-work-freeze-v2-6-a47af37ecc3d@kernel.org
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 96f8c8a794ea..7a3f821d2723 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1307,6 +1307,7 @@ struct sb_writers {
 	unsigned short			frozen;		/* Is sb frozen? */
 	int				freeze_kcount;	/* How many kernel freeze requests? */
 	int				freeze_ucount;	/* How many userspace freeze requests? */
+	const void			*freeze_owner;	/* Owner of the freeze */
 	struct percpu_rw_semaphore	rw_sem[SB_FREEZE_LEVELS];
 };
 
@@ -2269,6 +2270,7 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
  * @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem
  * @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem
  * @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed
+ * @FREEZE_EXCL: a freeze that can only be undone by the owner
  *
  * Indicate who the owner of the freeze or thaw request is and whether
  * the freeze needs to be exclusive or can nest.
@@ -2282,6 +2284,7 @@ enum freeze_holder {
 	FREEZE_HOLDER_KERNEL	= (1U << 0),
 	FREEZE_HOLDER_USERSPACE	= (1U << 1),
 	FREEZE_MAY_NEST		= (1U << 2),
+	FREEZE_EXCL		= (1U << 3),
 };
 
 struct super_operations {
@@ -2295,9 +2298,9 @@ struct super_operations {
 	void (*evict_inode) (struct inode *);
 	void (*put_super) (struct super_block *);
 	int (*sync_fs)(struct super_block *sb, int wait);
-	int (*freeze_super) (struct super_block *, enum freeze_holder who);
+	int (*freeze_super) (struct super_block *, enum freeze_holder who, const void *owner);
 	int (*freeze_fs) (struct super_block *);
-	int (*thaw_super) (struct super_block *, enum freeze_holder who);
+	int (*thaw_super) (struct super_block *, enum freeze_holder who, const void *owner);
 	int (*unfreeze_fs) (struct super_block *);
 	int (*statfs) (struct dentry *, struct kstatfs *);
 	int (*remount_fs) (struct super_block *, int *, char *);
@@ -2705,8 +2708,10 @@ extern int unregister_filesystem(struct file_system_type *);
 extern int vfs_statfs(const struct path *, struct kstatfs *);
 extern int user_statfs(const char __user *, struct kstatfs *);
 extern int fd_statfs(int, struct kstatfs *);
-int freeze_super(struct super_block *super, enum freeze_holder who);
-int thaw_super(struct super_block *super, enum freeze_holder who);
+int freeze_super(struct super_block *super, enum freeze_holder who,
+		 const void *freeze_owner);
+int thaw_super(struct super_block *super, enum freeze_holder who,
+	       const void *freeze_owner);
 extern __printf(2, 3)
 int super_setup_bdi_name(struct super_block *sb, char *fmt, ...);
 extern int super_setup_bdi(struct super_block *sb);
@@ -3518,6 +3523,8 @@ extern void drop_super_exclusive(struct super_block *sb);
 extern void iterate_supers(void (*f)(struct super_block *, void *), void *arg);
 extern void iterate_supers_type(struct file_system_type *,
 			        void (*)(struct super_block *, void *), void *);
+void filesystems_freeze(void);
+void filesystems_thaw(void);
 
 extern int dcache_dir_open(struct inode *, struct file *);
 extern int dcache_dir_close(struct inode *, struct file *);
-- 
cgit v1.2.3


From 79fb8d8d93e41be4ebda8c9cc507e20297277231 Mon Sep 17 00:00:00 2001
From: Joel Savitz <jsavitz@redhat.com>
Date: Thu, 8 May 2025 14:49:30 -0400
Subject: include/cgroup: separate {get,put}_cgroup_ns no-op case

When CONFIG_CGROUPS is not selected, {get,put}_cgroup_ns become no-ops
and therefore it is not necessary to compile in the code for changing
the reference count.

When CONFIG_CGROUP is selected, there is no valid case where
either of {get,put}_cgroup_ns() will be called with a NULL argument.

Signed-off-by: Joel Savitz <jsavitz@redhat.com>
Link: https://lore.kernel.org/20250508184930.183040-3-jsavitz@redhat.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/cgroup.h | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 28e999f2c642..8da19c3627b0 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -785,6 +785,17 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
 int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
 		   struct cgroup_namespace *ns);
 
+static inline void get_cgroup_ns(struct cgroup_namespace *ns)
+{
+	refcount_inc(&ns->ns.count);
+}
+
+static inline void put_cgroup_ns(struct cgroup_namespace *ns)
+{
+	if (refcount_dec_and_test(&ns->ns.count))
+		free_cgroup_ns(ns);
+}
+
 #else /* !CONFIG_CGROUPS */
 
 static inline void free_cgroup_ns(struct cgroup_namespace *ns) { }
@@ -795,19 +806,10 @@ copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns,
 	return old_ns;
 }
 
-#endif /* !CONFIG_CGROUPS */
+static inline void get_cgroup_ns(struct cgroup_namespace *ns) { }
+static inline void put_cgroup_ns(struct cgroup_namespace *ns) { }
 
-static inline void get_cgroup_ns(struct cgroup_namespace *ns)
-{
-	if (ns)
-		refcount_inc(&ns->ns.count);
-}
-
-static inline void put_cgroup_ns(struct cgroup_namespace *ns)
-{
-	if (ns && refcount_dec_and_test(&ns->ns.count))
-		free_cgroup_ns(ns);
-}
+#endif /* !CONFIG_CGROUPS */
 
 #ifdef CONFIG_CGROUPS
 
-- 
cgit v1.2.3


From 91e40668e70a08cf09c0e22023807b01d78d8b3a Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts@arm.com>
Date: Tue, 22 Apr 2025 09:18:11 +0100
Subject: mm/page_table_check: Batch-check pmds/puds just like ptes

Convert page_table_check_p[mu]d_set(...) to
page_table_check_p[mu]ds_set(..., nr) to allow checking a contiguous set
of pmds/puds in single batch. We retain page_table_check_p[mu]d_set(...)
as macros that call new batch functions with nr=1 for compatibility.

arm64 is about to reorganise its pte/pmd/pud helpers to reuse more code
and to allow the implementation for huge_pte to more efficiently set
ptes/pmds/puds in batches. We need these batch-helpers to make the
refactoring possible.

Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Tested-by: Luiz Capitulino <luizcap@redhat.com>
Link: https://lore.kernel.org/r/20250422081822.1836315-4-ryan.roberts@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/page_table_check.h | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h
index 6722941c7cb8..289620d4aad3 100644
--- a/include/linux/page_table_check.h
+++ b/include/linux/page_table_check.h
@@ -19,8 +19,10 @@ void __page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd);
 void __page_table_check_pud_clear(struct mm_struct *mm, pud_t pud);
 void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte,
 		unsigned int nr);
-void __page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd);
-void __page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp, pud_t pud);
+void __page_table_check_pmds_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd,
+		unsigned int nr);
+void __page_table_check_puds_set(struct mm_struct *mm, pud_t *pudp, pud_t pud,
+		unsigned int nr);
 void __page_table_check_pte_clear_range(struct mm_struct *mm,
 					unsigned long addr,
 					pmd_t pmd);
@@ -74,22 +76,22 @@ static inline void page_table_check_ptes_set(struct mm_struct *mm,
 	__page_table_check_ptes_set(mm, ptep, pte, nr);
 }
 
-static inline void page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp,
-					    pmd_t pmd)
+static inline void page_table_check_pmds_set(struct mm_struct *mm,
+		pmd_t *pmdp, pmd_t pmd, unsigned int nr)
 {
 	if (static_branch_likely(&page_table_check_disabled))
 		return;
 
-	__page_table_check_pmd_set(mm, pmdp, pmd);
+	__page_table_check_pmds_set(mm, pmdp, pmd, nr);
 }
 
-static inline void page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp,
-					    pud_t pud)
+static inline void page_table_check_puds_set(struct mm_struct *mm,
+		pud_t *pudp, pud_t pud, unsigned int nr)
 {
 	if (static_branch_likely(&page_table_check_disabled))
 		return;
 
-	__page_table_check_pud_set(mm, pudp, pud);
+	__page_table_check_puds_set(mm, pudp, pud, nr);
 }
 
 static inline void page_table_check_pte_clear_range(struct mm_struct *mm,
@@ -129,13 +131,13 @@ static inline void page_table_check_ptes_set(struct mm_struct *mm,
 {
 }
 
-static inline void page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp,
-					    pmd_t pmd)
+static inline void page_table_check_pmds_set(struct mm_struct *mm,
+		pmd_t *pmdp, pmd_t pmd, unsigned int nr)
 {
 }
 
-static inline void page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp,
-					    pud_t pud)
+static inline void page_table_check_puds_set(struct mm_struct *mm,
+		pud_t *pudp, pud_t pud, unsigned int nr)
 {
 }
 
@@ -146,4 +148,8 @@ static inline void page_table_check_pte_clear_range(struct mm_struct *mm,
 }
 
 #endif /* CONFIG_PAGE_TABLE_CHECK */
+
+#define page_table_check_pmd_set(mm, pmdp, pmd)	page_table_check_pmds_set(mm, pmdp, pmd, 1)
+#define page_table_check_pud_set(mm, pudp, pud)	page_table_check_puds_set(mm, pudp, pud, 1)
+
 #endif /* __LINUX_PAGE_TABLE_CHECK_H */
-- 
cgit v1.2.3


From 2fba13371fe80b4d0d533a502e460ce0e936d024 Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts@arm.com>
Date: Tue, 22 Apr 2025 09:18:16 +0100
Subject: mm/vmalloc: Gracefully unmap huge ptes

Commit f7ee1f13d606 ("mm/vmalloc: enable mapping of huge pages at pte
level in vmap") added its support by reusing the set_huge_pte_at() API,
which is otherwise only used for user mappings. But when unmapping those
huge ptes, it continued to call ptep_get_and_clear(), which is a
layering violation. To date, the only arch to implement this support is
powerpc and it all happens to work ok for it.

But arm64's implementation of ptep_get_and_clear() can not be safely
used to clear a previous set_huge_pte_at(). So let's introduce a new
arch opt-in function, arch_vmap_pte_range_unmap_size(), which can
provide the size of a (present) pte. Then we can call
huge_ptep_get_and_clear() to tear it down properly.

Note that if vunmap_range() is called with a range that starts in the
middle of a huge pte-mapped page, we must unmap the entire huge page so
the behaviour is consistent with pmd and pud block mappings. In this
case emit a warning just like we do for pmd/pud mappings.

Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Tested-by: Luiz Capitulino <luizcap@redhat.com>
Link: https://lore.kernel.org/r/20250422081822.1836315-9-ryan.roberts@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/vmalloc.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 31e9ffd936e3..16dd4cba64f2 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -113,6 +113,14 @@ static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, uns
 }
 #endif
 
+#ifndef arch_vmap_pte_range_unmap_size
+static inline unsigned long arch_vmap_pte_range_unmap_size(unsigned long addr,
+							   pte_t *ptep)
+{
+	return PAGE_SIZE;
+}
+#endif
+
 #ifndef arch_vmap_pte_supported_shift
 static inline int arch_vmap_pte_supported_shift(unsigned long size)
 {
-- 
cgit v1.2.3


From 63de899cb6220357dea9d0f4e5aa459ff5193bb0 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 9 May 2025 12:12:53 +0100
Subject: io_uring: count allocated requests

Keep track of the number requests a ring currently has allocated (and
not freed), it'll be needed in the next patch.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/c8f8308294dc2a1cb8925d984d937d4fc14ab5d4.1746788718.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 7e23e993280e..73b289b48280 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -435,6 +435,7 @@ struct io_ring_ctx {
 
 	/* protected by ->completion_lock */
 	unsigned			evfd_last_cq_tail;
+	unsigned			nr_req_allocated;
 
 	/*
 	 * Protection for resize vs mmap races - both the mmap and resize
-- 
cgit v1.2.3


From bb71e40db1b7a7822f434d211b4a94bab9996cc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Draszik?= <andre.draszik@linaro.org>
Date: Fri, 9 May 2025 14:22:39 +0100
Subject: mfd: max77759: Add Maxim MAX77759 core driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Maxim MAX77759 is a companion PMIC for USB Type-C applications and
includes Battery Charger, Fuel Gauge, temperature sensors, USB Type-C
Port Controller (TCPC), NVMEM, and a GPIO expander.

Fuel Gauge and TCPC have separate and independent I2C addresses,
register maps, and interrupt lines and are therefore excluded from the
MFD core device driver here.

The GPIO and NVMEM interfaces are accessed via specific commands to the
built-in microprocessor. This driver implements an API that client
drivers can use for accessing those.

Signed-off-by: André Draszik <andre.draszik@linaro.org>
Acked-by: Peter Griffin <peter.griffin@linaro.org>
Link: https://lore.kernel.org/r/20250509-max77759-mfd-v10-1-962ac15ee3ef@linaro.org
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/max77759.h | 165 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 165 insertions(+)
 create mode 100644 include/linux/mfd/max77759.h

(limited to 'include/linux')

diff --git a/include/linux/mfd/max77759.h b/include/linux/mfd/max77759.h
new file mode 100644
index 000000000000..c6face34e385
--- /dev/null
+++ b/include/linux/mfd/max77759.h
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2020 Google Inc.
+ * Copyright 2025 Linaro Ltd.
+ *
+ * Maxim MAX77759 core driver
+ */
+
+#ifndef __LINUX_MFD_MAX77759_H
+#define __LINUX_MFD_MAX77759_H
+
+#include <linux/completion.h>
+#include <linux/mutex.h>
+#include <linux/regmap.h>
+
+#define MAX77759_PMIC_REG_PMIC_ID               0x00
+#define MAX77759_PMIC_REG_PMIC_REVISION         0x01
+#define MAX77759_PMIC_REG_OTP_REVISION          0x02
+#define MAX77759_PMIC_REG_INTSRC                0x22
+#define MAX77759_PMIC_REG_INTSRCMASK            0x23
+#define   MAX77759_PMIC_REG_INTSRC_MAXQ         BIT(3)
+#define   MAX77759_PMIC_REG_INTSRC_TOPSYS       BIT(1)
+#define   MAX77759_PMIC_REG_INTSRC_CHGR         BIT(0)
+#define MAX77759_PMIC_REG_TOPSYS_INT            0x24
+#define MAX77759_PMIC_REG_TOPSYS_INT_MASK       0x26
+#define   MAX77759_PMIC_REG_TOPSYS_INT_TSHDN    BIT(6)
+#define   MAX77759_PMIC_REG_TOPSYS_INT_SYSOVLO  BIT(5)
+#define   MAX77759_PMIC_REG_TOPSYS_INT_SYSUVLO  BIT(4)
+#define   MAX77759_PMIC_REG_TOPSYS_INT_FSHIP    BIT(0)
+#define MAX77759_PMIC_REG_I2C_CNFG              0x40
+#define MAX77759_PMIC_REG_SWRESET               0x50
+#define MAX77759_PMIC_REG_CONTROL_FG            0x51
+
+#define MAX77759_MAXQ_REG_UIC_INT1              0x64
+#define   MAX77759_MAXQ_REG_UIC_INT1_APCMDRESI  BIT(7)
+#define   MAX77759_MAXQ_REG_UIC_INT1_SYSMSGI    BIT(6)
+#define   MAX77759_MAXQ_REG_UIC_INT1_GPIO6I     BIT(1)
+#define   MAX77759_MAXQ_REG_UIC_INT1_GPIO5I     BIT(0)
+#define   MAX77759_MAXQ_REG_UIC_INT1_GPIOxI(offs, en)  (((en) & 1) << (offs))
+#define   MAX77759_MAXQ_REG_UIC_INT1_GPIOxI_MASK(offs) \
+				MAX77759_MAXQ_REG_UIC_INT1_GPIOxI(offs, ~0)
+#define MAX77759_MAXQ_REG_UIC_INT2              0x65
+#define MAX77759_MAXQ_REG_UIC_INT3              0x66
+#define MAX77759_MAXQ_REG_UIC_INT4              0x67
+#define MAX77759_MAXQ_REG_UIC_UIC_STATUS1       0x68
+#define MAX77759_MAXQ_REG_UIC_UIC_STATUS2       0x69
+#define MAX77759_MAXQ_REG_UIC_UIC_STATUS3       0x6a
+#define MAX77759_MAXQ_REG_UIC_UIC_STATUS4       0x6b
+#define MAX77759_MAXQ_REG_UIC_UIC_STATUS5       0x6c
+#define MAX77759_MAXQ_REG_UIC_UIC_STATUS6       0x6d
+#define MAX77759_MAXQ_REG_UIC_UIC_STATUS7       0x6f
+#define MAX77759_MAXQ_REG_UIC_UIC_STATUS8       0x6f
+#define MAX77759_MAXQ_REG_UIC_INT1_M            0x70
+#define MAX77759_MAXQ_REG_UIC_INT2_M            0x71
+#define MAX77759_MAXQ_REG_UIC_INT3_M            0x72
+#define MAX77759_MAXQ_REG_UIC_INT4_M            0x73
+#define MAX77759_MAXQ_REG_AP_DATAOUT0           0x81
+#define MAX77759_MAXQ_REG_AP_DATAOUT32          0xa1
+#define MAX77759_MAXQ_REG_AP_DATAIN0            0xb1
+#define MAX77759_MAXQ_REG_UIC_SWRST             0xe0
+
+#define MAX77759_CHGR_REG_CHG_INT               0xb0
+#define MAX77759_CHGR_REG_CHG_INT2              0xb1
+#define MAX77759_CHGR_REG_CHG_INT_MASK          0xb2
+#define MAX77759_CHGR_REG_CHG_INT2_MASK         0xb3
+#define MAX77759_CHGR_REG_CHG_INT_OK            0xb4
+#define MAX77759_CHGR_REG_CHG_DETAILS_00        0xb5
+#define MAX77759_CHGR_REG_CHG_DETAILS_01        0xb6
+#define MAX77759_CHGR_REG_CHG_DETAILS_02        0xb7
+#define MAX77759_CHGR_REG_CHG_DETAILS_03        0xb8
+#define MAX77759_CHGR_REG_CHG_CNFG_00           0xb9
+#define MAX77759_CHGR_REG_CHG_CNFG_01           0xba
+#define MAX77759_CHGR_REG_CHG_CNFG_02           0xbb
+#define MAX77759_CHGR_REG_CHG_CNFG_03           0xbc
+#define MAX77759_CHGR_REG_CHG_CNFG_04           0xbd
+#define MAX77759_CHGR_REG_CHG_CNFG_05           0xbe
+#define MAX77759_CHGR_REG_CHG_CNFG_06           0xbf
+#define MAX77759_CHGR_REG_CHG_CNFG_07           0xc0
+#define MAX77759_CHGR_REG_CHG_CNFG_08           0xc1
+#define MAX77759_CHGR_REG_CHG_CNFG_09           0xc2
+#define MAX77759_CHGR_REG_CHG_CNFG_10           0xc3
+#define MAX77759_CHGR_REG_CHG_CNFG_11           0xc4
+#define MAX77759_CHGR_REG_CHG_CNFG_12           0xc5
+#define MAX77759_CHGR_REG_CHG_CNFG_13           0xc6
+#define MAX77759_CHGR_REG_CHG_CNFG_14           0xc7
+#define MAX77759_CHGR_REG_CHG_CNFG_15           0xc8
+#define MAX77759_CHGR_REG_CHG_CNFG_16           0xc9
+#define MAX77759_CHGR_REG_CHG_CNFG_17           0xca
+#define MAX77759_CHGR_REG_CHG_CNFG_18           0xcb
+#define MAX77759_CHGR_REG_CHG_CNFG_19           0xcc
+
+/* MaxQ opcodes for max77759_maxq_command() */
+#define MAX77759_MAXQ_OPCODE_MAXLENGTH (MAX77759_MAXQ_REG_AP_DATAOUT32 - \
+					MAX77759_MAXQ_REG_AP_DATAOUT0 + \
+					1)
+
+#define MAX77759_MAXQ_OPCODE_GPIO_TRIGGER_READ   0x21
+#define MAX77759_MAXQ_OPCODE_GPIO_TRIGGER_WRITE  0x22
+#define MAX77759_MAXQ_OPCODE_GPIO_CONTROL_READ   0x23
+#define MAX77759_MAXQ_OPCODE_GPIO_CONTROL_WRITE  0x24
+#define MAX77759_MAXQ_OPCODE_USER_SPACE_READ     0x81
+#define MAX77759_MAXQ_OPCODE_USER_SPACE_WRITE    0x82
+
+/**
+ * struct max77759 - core max77759 internal data structure
+ *
+ * @regmap_top: Regmap for accessing TOP registers
+ * @maxq_lock: Lock for serializing access to MaxQ
+ * @regmap_maxq: Regmap for accessing MaxQ registers
+ * @cmd_done: Used to signal completion of a MaxQ command
+ * @regmap_charger: Regmap for accessing charger registers
+ *
+ * The MAX77759 comprises several sub-blocks, namely TOP, MaxQ, Charger,
+ * Fuel Gauge, and TCPCI.
+ */
+struct max77759 {
+	struct regmap *regmap_top;
+
+	/* This protects MaxQ commands - only one can be active */
+	struct mutex maxq_lock;
+	struct regmap *regmap_maxq;
+	struct completion cmd_done;
+
+	struct regmap *regmap_charger;
+};
+
+/**
+ * struct max77759_maxq_command - structure containing the MaxQ command to
+ * send
+ *
+ * @length: The number of bytes to send.
+ * @cmd: The data to send.
+ */
+struct max77759_maxq_command {
+	u8 length;
+	u8 cmd[] __counted_by(length);
+};
+
+/**
+ * struct max77759_maxq_response - structure containing the MaxQ response
+ *
+ * @length: The number of bytes to receive.
+ * @rsp: The data received. Must have at least @length bytes space.
+ */
+struct max77759_maxq_response {
+	u8 length;
+	u8 rsp[] __counted_by(length);
+};
+
+/**
+ * max77759_maxq_command() - issue a MaxQ command and wait for the response
+ * and associated data
+ *
+ * @max77759: The core max77759 device handle.
+ * @cmd: The command to be sent.
+ * @rsp: Any response data associated with the command will be copied here;
+ *     can be %NULL if the command has no response (other than ACK).
+ *
+ * Return: 0 on success, a negative error number otherwise.
+ */
+int max77759_maxq_command(struct max77759 *max77759,
+			  const struct max77759_maxq_command *cmd,
+			  struct max77759_maxq_response *rsp);
+
+#endif /* __LINUX_MFD_MAX77759_H */
-- 
cgit v1.2.3


From ee971630f20fd421fffcdc4543731ebcb54ed6d0 Mon Sep 17 00:00:00 2001
From: Feng Yang <yangfeng@kylinos.cn>
Date: Tue, 6 May 2025 14:14:33 +0800
Subject: bpf: Allow some trace helpers for all prog types

if it works under NMI and doesn't use any context-dependent things,
should be fine for any program type. The detailed discussion is in [1].

[1] https://lore.kernel.org/all/CAEf4Bza6gK3dsrTosk6k3oZgtHesNDSrDd8sdeQ-GiS6oJixQg@mail.gmail.com/

Suggested-by: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Signed-off-by: Feng Yang <yangfeng@kylinos.cn>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/bpf/20250506061434.94277-2-yangfeng59949@163.com
---
 include/linux/bpf-cgroup.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 9de7adb68294..4847dcade917 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -427,8 +427,6 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
 
 const struct bpf_func_proto *
 cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
-const struct bpf_func_proto *
-cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
 #else
 
 static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
@@ -465,12 +463,6 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	return NULL;
 }
 
-static inline const struct bpf_func_proto *
-cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
-{
-	return NULL;
-}
-
 static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
 					    struct bpf_map *map) { return 0; }
 static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
-- 
cgit v1.2.3


From c24a65b6a27c78d8540409800886b6622ea86ebf Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Tue, 15 Apr 2025 21:15:49 -0500
Subject: iidc/ice/irdma: Update IDC to support multiple consumers

In preparation of supporting more than a single core PCI driver
for RDMA, move ice specific structs like qset_params, qos_info
and qos_params from iidc_rdma.h to iidc_rdma_ice.h.

Previously, the ice driver was just exporting its entire PF struct
to the auxiliary driver, but since each core driver will have its own
different PF struct, implement a universal struct that all core drivers
can provide to the auxiliary driver through the probe call.

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Co-developed-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Co-developed-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Co-developed-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/net/intel/iidc_rdma.h     | 67 +++++++++-----------------------
 include/linux/net/intel/iidc_rdma_ice.h | 69 ++++++++++++++++++++++++++++-----
 2 files changed, 78 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net/intel/iidc_rdma.h b/include/linux/net/intel/iidc_rdma.h
index 7f1910289534..8baad1082042 100644
--- a/include/linux/net/intel/iidc_rdma.h
+++ b/include/linux/net/intel/iidc_rdma.h
@@ -5,7 +5,6 @@
 #define _IIDC_RDMA_H_
 
 #include <linux/auxiliary_bus.h>
-#include <linux/dcbnl.h>
 #include <linux/device.h>
 #include <linux/if_ether.h>
 #include <linux/kernel.h>
@@ -17,14 +16,19 @@ enum iidc_rdma_event_type {
 	IIDC_RDMA_EVENT_AFTER_MTU_CHANGE,
 	IIDC_RDMA_EVENT_BEFORE_TC_CHANGE,
 	IIDC_RDMA_EVENT_AFTER_TC_CHANGE,
+	IIDC_RDMA_EVENT_WARN_RESET,
 	IIDC_RDMA_EVENT_CRIT_ERR,
 	IIDC_RDMA_EVENT_NBITS		/* must be last */
 };
 
+struct iidc_rdma_event {
+	DECLARE_BITMAP(type, IIDC_RDMA_EVENT_NBITS);
+	u32 reg;
+};
+
 enum iidc_rdma_reset_type {
-	IIDC_PFR,
-	IIDC_CORER,
-	IIDC_GLOBR,
+	IIDC_FUNC_RESET,
+	IIDC_DEV_RESET,
 };
 
 enum iidc_rdma_protocol {
@@ -32,53 +36,22 @@ enum iidc_rdma_protocol {
 	IIDC_RDMA_PROTOCOL_ROCEV2 = BIT(1),
 };
 
-#define IIDC_MAX_USER_PRIORITY		8
-#define IIDC_DSCP_PFC_MODE		0x1
-
-/* Struct to hold per RDMA Qset info */
-struct iidc_rdma_qset_params {
-	/* Qset TEID returned to the RDMA driver in
-	 * ice_add_rdma_qset and used by RDMA driver
-	 * for calls to ice_del_rdma_qset
-	 */
-	u32 teid;	/* Qset TEID */
-	u16 qs_handle; /* RDMA driver provides this */
-	u16 vport_id; /* VSI index */
-	u8 tc; /* TC branch the Qset should belong to */
-};
-
-struct iidc_rdma_qos_info {
-	u64 tc_ctx;
-	u8 rel_bw;
-	u8 prio_type;
-	u8 egress_virt_up;
-	u8 ingress_virt_up;
-};
-
-/* Struct to pass QoS info */
-struct iidc_rdma_qos_params {
-	struct iidc_rdma_qos_info tc_info[IEEE_8021QAZ_MAX_TCS];
-	u8 up2tc[IIDC_MAX_USER_PRIORITY];
-	u8 vport_relative_bw;
-	u8 vport_priority_type;
-	u8 num_tc;
-	u8 pfc_mode;
-	u8 dscp_map[DSCP_MAX];
-};
-
-struct iidc_rdma_event {
-	DECLARE_BITMAP(type, IIDC_RDMA_EVENT_NBITS);
-	u32 reg;
+/* Structure to be populated by core LAN PCI driver */
+struct iidc_rdma_core_dev_info {
+	struct pci_dev *pdev; /* PCI device of corresponding to main function */
+	struct auxiliary_device *adev;
+	/* Current active RDMA protocol */
+	enum iidc_rdma_protocol rdma_protocol;
+	void *iidc_priv; /* elements unique to each driver */
 };
 
 /* Structure representing auxiliary driver tailored information about the core
  * PCI dev, each auxiliary driver using the IIDC interface will have an
  * instance of this struct dedicated to it.
  */
-
 struct iidc_rdma_core_auxiliary_dev {
 	struct auxiliary_device adev;
-	struct ice_pf *pf;
+	struct iidc_rdma_core_dev_info *cdev_info;
 };
 
 /* structure representing the auxiliary driver. This struct is to be
@@ -88,12 +61,8 @@ struct iidc_rdma_core_auxiliary_dev {
  */
 struct iidc_rdma_core_auxiliary_drv {
 	struct auxiliary_driver adrv;
-	/* This event_handler is meant to be a blocking call.  For instance,
-	 * when a BEFORE_MTU_CHANGE event comes in, the event_handler will not
-	 * return until the auxiliary driver is ready for the MTU change to
-	 * happen.
-	 */
-	void (*event_handler)(struct ice_pf *pf, struct iidc_rdma_event *event);
+	void (*event_handler)(struct iidc_rdma_core_dev_info *cdev,
+			      struct iidc_rdma_event *event);
 };
 
 #endif /* _IIDC_RDMA_H_*/
diff --git a/include/linux/net/intel/iidc_rdma_ice.h b/include/linux/net/intel/iidc_rdma_ice.h
index 78d10003d776..b40eed0e13fe 100644
--- a/include/linux/net/intel/iidc_rdma_ice.h
+++ b/include/linux/net/intel/iidc_rdma_ice.h
@@ -4,16 +4,67 @@
 #ifndef _IIDC_RDMA_ICE_H_
 #define _IIDC_RDMA_ICE_H_
 
-struct ice_pf;
+#include <linux/dcbnl.h>
 
-int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
-int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
-int ice_rdma_request_reset(struct ice_pf *pf,
+#define IIDC_MAX_USER_PRIORITY         8
+#define IIDC_DSCP_PFC_MODE             0x1
+
+/**
+ * struct iidc_rdma_qset_params - Struct to hold per RDMA Qset info
+ * @teid: TEID of the Qset node
+ * @qs_handle: SW index of the Qset, RDMA provides this
+ * @vport_id: VSI index
+ * @tc: Traffic Class branch the QSet should belong to
+ */
+struct iidc_rdma_qset_params {
+	/* Qset TEID returned to the RDMA driver in
+	 * ice_add_rdma_qset and used by RDMA driver
+	 * for calls to ice_del_rdma_qset
+	 */
+	u32 teid;
+	u16 qs_handle;
+	u16 vport_id;
+	u8 tc;
+};
+
+struct iidc_rdma_qos_info {
+	u64 tc_ctx;
+	u8 rel_bw;
+	u8 prio_type;
+	u8 egress_virt_up;
+	u8 ingress_virt_up;
+};
+
+/* Struct to pass QoS info */
+struct iidc_rdma_qos_params {
+	struct iidc_rdma_qos_info tc_info[IEEE_8021QAZ_MAX_TCS];
+	u8 up2tc[IIDC_MAX_USER_PRIORITY];
+	u8 vport_relative_bw;
+	u8 vport_priority_type;
+	u8 num_tc;
+	u8 pfc_mode;
+	u8 dscp_map[DSCP_MAX];
+};
+
+struct iidc_rdma_priv_dev_info {
+	u8 pf_id;
+	u16 vport_id;
+	struct net_device *netdev;
+	struct iidc_rdma_qos_params qos_info;
+	u8 __iomem *hw_addr;
+};
+
+int ice_add_rdma_qset(struct iidc_rdma_core_dev_info *cdev,
+		      struct iidc_rdma_qset_params *qset);
+int ice_del_rdma_qset(struct iidc_rdma_core_dev_info *cdev,
+		      struct iidc_rdma_qset_params *qset);
+int ice_rdma_request_reset(struct iidc_rdma_core_dev_info *cdev,
 			   enum iidc_rdma_reset_type reset_type);
-int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable);
-void ice_get_qos_params(struct ice_pf *pf,
-			struct iidc_rdma_qos_params *qos);
-int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
-void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry);
+int ice_rdma_update_vsi_filter(struct iidc_rdma_core_dev_info *cdev, u16 vsi_id,
+			       bool enable);
+int ice_alloc_rdma_qvector(struct iidc_rdma_core_dev_info *cdev,
+			   struct msix_entry *entry);
+void ice_free_rdma_qvector(struct iidc_rdma_core_dev_info *cdev,
+			   struct msix_entry *entry);
 
 #endif /* _IIDC_RDMA_ICE_H_*/
-- 
cgit v1.2.3


From 092a38565ed87cbda6fe7dd16c742df6f907483e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Mon, 5 May 2025 17:21:13 -0400
Subject: ring-buffer: Add ring_buffer_record_is_on_cpu()

Add the function ring_buffer_record_is_on_cpu() that returns true if the
ring buffer for a give CPU is writable and false otherwise.

Also add tracer_tracing_is_on_cpu() to return if the ring buffer for a
given CPU is writeable for a given trace_array.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: https://lore.kernel.org/20250505212236.059853898@goodmis.org
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 56e27263acf8..cd7f0ae26615 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -192,6 +192,7 @@ void ring_buffer_record_off(struct trace_buffer *buffer);
 void ring_buffer_record_on(struct trace_buffer *buffer);
 bool ring_buffer_record_is_on(struct trace_buffer *buffer);
 bool ring_buffer_record_is_set_on(struct trace_buffer *buffer);
+bool ring_buffer_record_is_on_cpu(struct trace_buffer *buffer, int cpu);
 void ring_buffer_record_disable_cpu(struct trace_buffer *buffer, int cpu);
 void ring_buffer_record_enable_cpu(struct trace_buffer *buffer, int cpu);
 
-- 
cgit v1.2.3


From f4818881c47fd91fcb6d62373c57c7844e3de1c0 Mon Sep 17 00:00:00 2001
From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Date: Fri, 21 Jun 2024 20:23:23 -0700
Subject: x86/its: Enable Indirect Target Selection mitigation

Indirect Target Selection (ITS) is a bug in some pre-ADL Intel CPUs with
eIBRS. It affects prediction of indirect branch and RETs in the
lower half of cacheline. Due to ITS such branches may get wrongly predicted
to a target of (direct or indirect) branch that is located in the upper
half of the cacheline.

Scope of impact
===============

Guest/host isolation
--------------------
When eIBRS is used for guest/host isolation, the indirect branches in the
VMM may still be predicted with targets corresponding to branches in the
guest.

Intra-mode
----------
cBPF or other native gadgets can be used for intra-mode training and
disclosure using ITS.

User/kernel isolation
---------------------
When eIBRS is enabled user/kernel isolation is not impacted.

Indirect Branch Prediction Barrier (IBPB)
-----------------------------------------
After an IBPB, indirect branches may be predicted with targets
corresponding to direct branches which were executed prior to IBPB. This is
mitigated by a microcode update.

Add cmdline parameter indirect_target_selection=off|on|force to control the
mitigation to relocate the affected branches to an ITS-safe thunk i.e.
located in the upper half of cacheline. Also add the sysfs reporting.

When retpoline mitigation is deployed, ITS safe-thunks are not needed,
because retpoline sequence is already ITS-safe. Similarly, when call depth
tracking (CDT) mitigation is deployed (retbleed=stuff), ITS safe return
thunk is not used, as CDT prevents RSB-underflow.

To not overcomplicate things, ITS mitigation is not supported with
spectre-v2 lfence;jmp mitigation. Moreover, it is less practical to deploy
lfence;jmp mitigation on ITS affected parts anyways.

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
---
 include/linux/cpu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index e3049543008b..3aa955102b34 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -78,6 +78,8 @@ extern ssize_t cpu_show_gds(struct device *dev,
 extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev,
 					       struct device_attribute *attr, char *buf);
 extern ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_indirect_target_selection(struct device *dev,
+						  struct device_attribute *attr, char *buf);
 
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,
-- 
cgit v1.2.3


From d6d1e3e6580ca35071ad474381f053cbf1fb6414 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2025 16:30:11 +0200
Subject: mm/execmem: Unify early execmem_cache behaviour

Early kernel memory is RWX, only at the end of early boot (before SMP)
do we mark things ROX. Have execmem_cache mirror this behaviour for
early users.

This avoids having to remember what code is execmem and what is not --
we can poke everything with impunity ;-) Also performance for not
having to do endless text_poke_mm switches.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
---
 include/linux/execmem.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/execmem.h b/include/linux/execmem.h
index 65655a5d1be2..089c71cc8ddf 100644
--- a/include/linux/execmem.h
+++ b/include/linux/execmem.h
@@ -53,7 +53,7 @@ enum execmem_range_flags {
 	EXECMEM_ROX_CACHE	= (1 << 1),
 };
 
-#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX
+#if defined(CONFIG_ARCH_HAS_EXECMEM_ROX) && defined(CONFIG_EXECMEM)
 /**
  * execmem_fill_trapping_insns - set memory to contain instructions that
  *				 will trap
@@ -93,9 +93,15 @@ int execmem_make_temp_rw(void *ptr, size_t size);
  * Return: 0 on success or negative error code on failure.
  */
 int execmem_restore_rox(void *ptr, size_t size);
+
+/*
+ * Called from mark_readonly(), where the system transitions to ROX.
+ */
+void execmem_cache_make_ro(void);
 #else
 static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; }
 static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; }
+static inline void execmem_cache_make_ro(void) { }
 #endif
 
 /**
-- 
cgit v1.2.3


From 872df34d7c51a79523820ea6a14860398c639b87 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 14 Oct 2024 10:05:48 -0700
Subject: x86/its: Use dynamic thunks for indirect branches

ITS mitigation moves the unsafe indirect branches to a safe thunk. This
could degrade the prediction accuracy as the source address of indirect
branches becomes same for different execution paths.

To improve the predictions, and hence the performance, assign a separate
thunk for each indirect callsite. This is also a defense-in-depth measure
to avoid indirect branches aliasing with each other.

As an example, 5000 dynamic thunks would utilize around 16 bits of the
address space, thereby gaining entropy. For a BTB that uses
32 bits for indexing, dynamic thunks could provide better prediction
accuracy over fixed thunks.

Have ITS thunks be variable sized and use EXECMEM_MODULE_TEXT such that
they are both more flexible (got to extend them later) and live in 2M TLBs,
just like kernel code, avoiding undue TLB pressure.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
---
 include/linux/execmem.h | 3 +++
 include/linux/module.h  | 5 +++++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/execmem.h b/include/linux/execmem.h
index 089c71cc8ddf..ca42d5e46ccc 100644
--- a/include/linux/execmem.h
+++ b/include/linux/execmem.h
@@ -4,6 +4,7 @@
 
 #include <linux/types.h>
 #include <linux/moduleloader.h>
+#include <linux/cleanup.h>
 
 #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
 		!defined(CONFIG_KASAN_VMALLOC)
@@ -176,6 +177,8 @@ void *execmem_alloc(enum execmem_type type, size_t size);
  */
 void execmem_free(void *ptr);
 
+DEFINE_FREE(execmem, void *, if (_T) execmem_free(_T));
+
 #ifdef CONFIG_MMU
 /**
  * execmem_vmap - create virtual mapping for EXECMEM_MODULE_DATA memory
diff --git a/include/linux/module.h b/include/linux/module.h
index b3329110d668..8050f77c3b64 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -586,6 +586,11 @@ struct module {
 	atomic_t refcnt;
 #endif
 
+#ifdef CONFIG_MITIGATION_ITS
+	int its_num_pages;
+	void **its_page_array;
+#endif
+
 #ifdef CONFIG_CONSTRUCTORS
 	/* Constructor functions. */
 	ctor_fn_t *ctors;
-- 
cgit v1.2.3


From f2987c5816bda01a8ffdd4eb5dfaaa2b41b67039 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Tue, 6 May 2025 20:48:59 +0800
Subject: block: export API to get the number of bdev inflight IO

- rename part_in_{flight, flight_rw} to bdev_count_{inflight, inflight_rw}
- export bdev_count_inflight, to fix a problem in mdraid that foreground
  IO can be starved by background sync IO in later patches

Link: https://lore.kernel.org/linux-raid/20250506124903.2540268-6-yukuai1@huaweicloud.com
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 include/linux/part_stat.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
index c5e9cac0575e..eeeff2a04529 100644
--- a/include/linux/part_stat.h
+++ b/include/linux/part_stat.h
@@ -79,4 +79,6 @@ static inline void part_stat_set_all(struct block_device *part, int value)
 #define part_stat_local_read_cpu(part, field, cpu)			\
 	local_read(&(part_stat_get_cpu(part, field, cpu)))
 
+unsigned int bdev_count_inflight(struct block_device *part);
+
 #endif /* _LINUX_PART_STAT_H */
-- 
cgit v1.2.3


From 752d0464b78a5b28682256ed7a057106119e1d1a Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Tue, 6 May 2025 20:49:03 +0800
Subject: md: clean up accounting for issued sync IO

It's no longer used and can be removed, also remove the field
'gendisk->sync_io'.

Link: https://lore.kernel.org/linux-raid/20250506124903.2540268-10-yukuai1@huaweicloud.com
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Xiao Ni <xni@redhat.com>
---
 include/linux/blkdev.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 52b7a27c46e8..ad75dee3b213 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -182,7 +182,6 @@ struct gendisk {
 	struct list_head slave_bdevs;
 #endif
 	struct timer_rand_state *random;
-	atomic_t sync_io;		/* RAID */
 	struct disk_events *ev;
 
 #ifdef CONFIG_BLK_DEV_ZONED
-- 
cgit v1.2.3


From 094ac8cff7858bee5fa4554f6ea66c964f8e160e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 10 May 2025 10:45:28 +0200
Subject: futex: Relax the rcu_assign_pointer() assignment of mm->futex_phash
 in futex_mm_init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The following commit added an rcu_assign_pointer() assignment to
futex_mm_init() in <linux/futex.h>:

  bd54df5ea7ca ("futex: Allow to resize the private local hash")

Which breaks the build on older compilers (gcc-9, x86-64 defconfig):

   CC      io_uring/futex.o
   In file included from ./arch/x86/include/generated/asm/rwonce.h:1,
                    from ./include/linux/compiler.h:390,
                    from ./include/linux/array_size.h:5,
                    from ./include/linux/kernel.h:16,
                    from io_uring/futex.c:2:
   ./include/linux/futex.h: In function 'futex_mm_init':
   ./include/linux/rcupdate.h:555:36: error: dereferencing pointer to incomplete type 'struct futex_private_hash'

The problem is that this variant of rcu_assign_pointer() wants to
know the full type of 'struct futex_private_hash', which type
is local to futex.c:

   kernel/futex/core.c:struct futex_private_hash {

There are a couple of mechanical solutions for this bug:

  - we can uninline futex_mm_init() and move it into futex/core.c

  - or we can share the structure definition with kernel/fork.c.

But both of these solutions have disadvantages: the first one adds
runtime overhead, while the second one dis-encapsulates private
futex types.

A third solution, implemented by this patch, is to just initialize
mm->futex_phash with NULL like the patch below, it's not like this
new MM's ->futex_phash can be observed externally until the task
is inserted into the task list, which guarantees full store ordering.

The relaxation of this initialization might also give a tiny speedup
on certain platforms.

Fixes: bd54df5ea7ca ("futex: Allow to resize the private local hash")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: André Almeida <andrealmeid@igalia.com>
Cc: Darren Hart <dvhart@infradead.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Waiman Long <longman@redhat.com>
Link: https://lore.kernel.org/r/aB8SI00EHBri23lB@gmail.com
---
 include/linux/futex.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/futex.h b/include/linux/futex.h
index eccc99751bd9..168ffd5996b4 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -88,7 +88,14 @@ void futex_hash_free(struct mm_struct *mm);
 
 static inline void futex_mm_init(struct mm_struct *mm)
 {
-	rcu_assign_pointer(mm->futex_phash, NULL);
+	/*
+	 * No need for rcu_assign_pointer() here, as we can rely on
+	 * tasklist_lock write-ordering in copy_process(), before
+	 * the task's MM becomes visible and the ->futex_phash
+	 * becomes externally observable:
+	 */
+	mm->futex_phash = NULL;
+
 	mutex_init(&mm->futex_hash_lock);
 }
 
-- 
cgit v1.2.3


From 1f93d877f09d987f08baedd50597aaaa72a37be4 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Date: Thu, 8 May 2025 21:12:07 +0300
Subject: ALSA/hda: intel-sdw-acpi: Correct sdw_intel_acpi_scan() function
 parameter

The acpi_handle should be just a handle and not a pointer in
sdw_intel_acpi_scan() parameter list.
It is called with 'acpi_handle handle' as parameter and it is passing it to
acpi_walk_namespace, which also expects acpi_handle and not  acpi_handle*

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Liam Girdwood <liam.r.girdwood@intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Link: https://patch.msgid.link/20250508181207.22113-1-peter.ujfalusi@linux.intel.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/soundwire/sdw_intel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 493d9de4e472..dc6ebaee3d18 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -365,7 +365,7 @@ struct sdw_intel_res {
  * on e.g. which machine driver to select (I2S mode, HDaudio or
  * SoundWire).
  */
-int sdw_intel_acpi_scan(acpi_handle *parent_handle,
+int sdw_intel_acpi_scan(acpi_handle parent_handle,
 			struct sdw_intel_acpi_info *info);
 
 void sdw_intel_process_wakeen_event(struct sdw_intel_ctx *ctx);
-- 
cgit v1.2.3


From 18c64378ad85ef00e70f196793ee8901a8aa2fa1 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Fri, 11 Apr 2025 10:22:14 -0400
Subject: sunrpc: add info about xprt queue times to svc_xprt_dequeue
 tracepoint

I've been looking at a problem where we see increased RPC timeouts in
clients when the nfs_layout_flexfiles dataserver_timeo value is tuned
very low (6s). This is necessary to ensure quick failover to a different
mirror if a server goes down, but it causes a lot more major RPC timeouts.

Ultimately, the problem is server-side however. It's sometimes doesn't
respond to connection attempts. My theory is that the interrupt handler
runs when a connection comes in, the xprt ends up being enqueued, but it
takes a significant amount of time for the nfsd thread to pick it up.

Currently, the svc_xprt_dequeue tracepoint displays "wakeup-us". This is
the time between the wake_up() call, and the thread dequeueing the xprt.
If no thread was woken, or the thread ended up picking up a different
xprt than intended, then this value won't tell us how long the xprt was
waiting.

Add a new xpt_qtime field to struct svc_xprt and set it in
svc_xprt_enqueue(). When the dequeue tracepoint fires, also store the
time that the xprt sat on the queue in total. Display it as "qtime-us".

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_xprt.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 72be60952579..369a89aea186 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -53,6 +53,7 @@ struct svc_xprt {
 	struct svc_xprt_class	*xpt_class;
 	const struct svc_xprt_ops *xpt_ops;
 	struct kref		xpt_ref;
+	ktime_t			xpt_qtime;
 	struct list_head	xpt_list;
 	struct lwq_node		xpt_ready;
 	unsigned long		xpt_flags;
-- 
cgit v1.2.3


From 0ae0227fa31dda5bfc6b5a0145952d46fe57408b Mon Sep 17 00:00:00 2001
From: David Wang <00107082@163.com>
Date: Tue, 6 May 2025 03:30:34 +0800
Subject: mm/codetag: move tag retrieval back upfront in __free_pages()

Commit 51ff4d7486f0 ("mm: avoid extra mem_alloc_profiling_enabled()
 checks") introduces a possible use-after-free scenario, when page
is non-compound, page[0] could be released by other thread right
after put_page_testzero failed in current thread, pgalloc_tag_sub_pages
afterwards would manipulate an invalid page for accounting remaining
pages:

[timeline]   [thread1]                     [thread2]
  |          alloc_page non-compound
  V
  |                                        get_page, rf counter inc
  V
  |          in ___free_pages
  |          put_page_testzero fails
  V
  |                                        put_page, page released
  V
  |          in ___free_pages,
  |          pgalloc_tag_sub_pages
  |          manipulate an invalid page
  V

Restore __free_pages() to its state before, retrieve alloc tag
beforehand.

Link: https://lkml.kernel.org/r/20250505193034.91682-1-00107082@163.com
Fixes: 51ff4d7486f0 ("mm: avoid extra mem_alloc_profiling_enabled() checks")
Signed-off-by: David Wang <00107082@163.com>
Acked-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Brendan Jackman <jackmanb@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgalloc_tag.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index c74077977830..8a7f4f802c57 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -188,6 +188,13 @@ static inline struct alloc_tag *__pgalloc_tag_get(struct page *page)
 	return tag;
 }
 
+static inline struct alloc_tag *pgalloc_tag_get(struct page *page)
+{
+	if (mem_alloc_profiling_enabled())
+		return __pgalloc_tag_get(page);
+	return NULL;
+}
+
 void pgalloc_tag_split(struct folio *folio, int old_order, int new_order);
 void pgalloc_tag_swap(struct folio *new, struct folio *old);
 
@@ -199,6 +206,7 @@ static inline void clear_page_tag_ref(struct page *page) {}
 static inline void alloc_tag_sec_init(void) {}
 static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {}
 static inline void pgalloc_tag_swap(struct folio *new, struct folio *old) {}
+static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; }
 
 #endif /* CONFIG_MEM_ALLOC_PROFILING */
 
-- 
cgit v1.2.3


From cb5b13cd6c9237fe5ac978b22453eb3fa098a8d6 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 2 Apr 2025 19:16:56 +0100
Subject: mm: introduce a common definition of mk_pte()

Most architectures simply call pfn_pte().  Centralise that as the normal
definition and remove the definition of mk_pte() from the architectures
which have either that exact definition or something similar.

Link: https://lkml.kernel.org/r/20250402181709.2386022-3-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org> # m68k
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com> # s390
Cc: Zi Yan <ziy@nvidia.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Richard Weinberger <richard@nod.at>
Cc: <x86@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index bf55206935c4..aa944eaad0ec 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2004,6 +2004,15 @@ static inline struct folio *pfn_folio(unsigned long pfn)
 	return page_folio(pfn_to_page(pfn));
 }
 
+#ifndef mk_pte
+#ifdef CONFIG_MMU
+static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
+{
+	return pfn_pte(page_to_pfn(page), pgprot);
+}
+#endif
+#endif
+
 static inline bool folio_has_pincount(const struct folio *folio)
 {
 	if (IS_ENABLED(CONFIG_64BIT))
-- 
cgit v1.2.3


From 4ec492a628d897806bb6dc13b1c257c4e06eb1cf Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 2 Apr 2025 19:17:00 +0100
Subject: mm: make mk_pte() definition unconditional

All architectures now use the common mk_pte() definition, so we can remove
the condition.

Link: https://lkml.kernel.org/r/20250402181709.2386022-7-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Richard Weinberger <richard@nod.at>
Cc: <x86@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index aa944eaad0ec..3a55903d68e2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2004,14 +2004,12 @@ static inline struct folio *pfn_folio(unsigned long pfn)
 	return page_folio(pfn_to_page(pfn));
 }
 
-#ifndef mk_pte
 #ifdef CONFIG_MMU
 static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
 {
 	return pfn_pte(page_to_pfn(page), pgprot);
 }
 #endif
-#endif
 
 static inline bool folio_has_pincount(const struct folio *folio)
 {
-- 
cgit v1.2.3


From deb8d4d28e4d05c4ecfc6e242c0a53d49e119224 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 2 Apr 2025 19:17:01 +0100
Subject: mm: add folio_mk_pte()

Remove a cast from folio to page in four callers of mk_pte().

Link: https://lkml.kernel.org/r/20250402181709.2386022-8-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Richard Weinberger <richard@nod.at>
Cc: <x86@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3a55903d68e2..cbad8c663c4d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2009,6 +2009,21 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
 {
 	return pfn_pte(page_to_pfn(page), pgprot);
 }
+
+/**
+ * folio_mk_pte - Create a PTE for this folio
+ * @folio: The folio to create a PTE for
+ * @pgprot: The page protection bits to use
+ *
+ * Create a page table entry for the first page of this folio.
+ * This is suitable for passing to set_ptes().
+ *
+ * Return: A page table entry suitable for mapping this folio.
+ */
+static inline pte_t folio_mk_pte(struct folio *folio, pgprot_t pgprot)
+{
+	return pfn_pte(folio_pfn(folio), pgprot);
+}
 #endif
 
 static inline bool folio_has_pincount(const struct folio *folio)
-- 
cgit v1.2.3


From e3981db444a0a18d350d9f92e3f2e8d489b54211 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 2 Apr 2025 19:17:04 +0100
Subject: mm: add folio_mk_pmd()

Removes five conversions from folio to page.  Also removes both callers of
mk_pmd() that aren't part of mk_huge_pmd(), getting us a step closer to
removing the confusion between mk_pmd(), mk_huge_pmd() and pmd_mkhuge().

Link: https://lkml.kernel.org/r/20250402181709.2386022-11-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Richard Weinberger <richard@nod.at>
Cc: <x86@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index cbad8c663c4d..733dd7100ca1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2024,7 +2024,24 @@ static inline pte_t folio_mk_pte(struct folio *folio, pgprot_t pgprot)
 {
 	return pfn_pte(folio_pfn(folio), pgprot);
 }
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/**
+ * folio_mk_pmd - Create a PMD for this folio
+ * @folio: The folio to create a PMD for
+ * @pgprot: The page protection bits to use
+ *
+ * Create a page table entry for the first page of this folio.
+ * This is suitable for passing to set_pmd_at().
+ *
+ * Return: A page table entry suitable for mapping this folio.
+ */
+static inline pmd_t folio_mk_pmd(struct folio *folio, pgprot_t pgprot)
+{
+	return pmd_mkhuge(pfn_pmd(folio_pfn(folio), pgprot));
+}
 #endif
+#endif /* CONFIG_MMU */
 
 static inline bool folio_has_pincount(const struct folio *folio)
 {
-- 
cgit v1.2.3


From 5071ea3d7b3d1e9660524374083a929a6885d78a Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 2 Apr 2025 19:17:05 +0100
Subject: arch: remove mk_pmd()

There are now no callers of mk_huge_pmd() and mk_pmd().  Remove them.

Link: https://lkml.kernel.org/r/20250402181709.2386022-12-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Richard Weinberger <richard@nod.at>
Cc: <x86@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index e893d546a49f..f190998b2ebd 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -495,8 +495,6 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
 struct folio *mm_get_huge_zero_folio(struct mm_struct *mm);
 void mm_put_huge_zero_folio(struct mm_struct *mm);
 
-#define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot))
-
 static inline bool thp_migration_supported(void)
 {
 	return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
-- 
cgit v1.2.3


From c09b997342bcd1b3c2b63c6ff6fecf037a68beff Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 2 Apr 2025 22:06:03 +0100
Subject: filemap: remove readahead_page()

Patch series "Misc folio patches for 6.16".

Remove a few APIs that we've converted everybody from using.  I also found
a few places that extract a page pointer from i_pages, which will be an
invalid thing to do when we separate pages from folios.


This patch (of 8):

All filesystems have now been converted to call readahead_folio() so we
can delete this wrapper.

Link: https://lkml.kernel.org/r/20250402210612.2444135-1-willy@infradead.org
Link: https://lkml.kernel.org/r/20250402210612.2444135-2-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagemap.h | 22 +++-------------------
 1 file changed, 3 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 26baa78f1ca7..cd4bd0f8e5f6 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1308,9 +1308,9 @@ static inline bool filemap_range_needs_writeback(struct address_space *mapping,
  * struct readahead_control - Describes a readahead request.
  *
  * A readahead request is for consecutive pages.  Filesystems which
- * implement the ->readahead method should call readahead_page() or
- * readahead_page_batch() in a loop and attempt to start I/O against
- * each page in the request.
+ * implement the ->readahead method should call readahead_folio() or
+ * __readahead_batch() in a loop and attempt to start reads into each
+ * folio in the request.
  *
  * Most of the fields in this struct are private and should be accessed
  * by the functions below.
@@ -1415,22 +1415,6 @@ static inline struct folio *__readahead_folio(struct readahead_control *ractl)
 	return folio;
 }
 
-/**
- * readahead_page - Get the next page to read.
- * @ractl: The current readahead request.
- *
- * Context: The page is locked and has an elevated refcount.  The caller
- * should decreases the refcount once the page has been submitted for I/O
- * and unlock the page once all I/O to that page has completed.
- * Return: A pointer to the next page, or %NULL if we are done.
- */
-static inline struct page *readahead_page(struct readahead_control *ractl)
-{
-	struct folio *folio = __readahead_folio(ractl);
-
-	return &folio->page;
-}
-
 /**
  * readahead_folio - Get the next folio to read.
  * @ractl: The current readahead request.
-- 
cgit v1.2.3


From a55139579082e4bc9ea9b04003cb9f78c781e08b Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 2 Apr 2025 22:06:04 +0100
Subject: mm: remove offset_in_thp()

All callers have been converted to call offset_in_folio().

Link: https://lkml.kernel.org/r/20250402210612.2444135-3-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 733dd7100ca1..ae67d3a33792 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2445,7 +2445,6 @@ static inline void clear_page_pfmemalloc(struct page *page)
 extern void pagefault_out_of_memory(void);
 
 #define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
-#define offset_in_thp(page, p)	((unsigned long)(p) & (thp_size(page) - 1))
 #define offset_in_folio(folio, p) ((unsigned long)(p) & (folio_size(folio) - 1))
 
 /*
-- 
cgit v1.2.3


From 9c532d79082f9f2e58a45f2e92020dda65b7dedd Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 2 Apr 2025 22:06:07 +0100
Subject: filemap: remove find_subpage()

All users of this function now call folio_file_page() instead.  Delete it.

Link: https://lkml.kernel.org/r/20250402210612.2444135-6-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagemap.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index cd4bd0f8e5f6..0ddd4bd8cdf8 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -945,19 +945,6 @@ static inline bool folio_contains(struct folio *folio, pgoff_t index)
 	return index - folio_index(folio) < folio_nr_pages(folio);
 }
 
-/*
- * Given the page we found in the page cache, return the page corresponding
- * to this index in the file
- */
-static inline struct page *find_subpage(struct page *head, pgoff_t index)
-{
-	/* HugeTLBfs wants the head page regardless */
-	if (PageHuge(head))
-		return head;
-
-	return head + (index & (thp_nr_pages(head) - 1));
-}
-
 unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
 		pgoff_t end, struct folio_batch *fbatch);
 unsigned filemap_get_folios_contig(struct address_space *mapping,
-- 
cgit v1.2.3


From 8dfc8cbf7b07da172b3a4c8bea064e84fbfa5d56 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 2 Apr 2025 22:06:08 +0100
Subject: filemap: convert __readahead_batch() to use a folio

Extract folios from i_mapping, not pages.  Removes a hidden call to
compound_head(), a use of thp_nr_pages() and an unnecessary assertion that
we didn't find a tail page in the page cache.

Link: https://lkml.kernel.org/r/20250402210612.2444135-7-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagemap.h | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0ddd4bd8cdf8..c5c9b3770d75 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1424,7 +1424,7 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac,
 {
 	unsigned int i = 0;
 	XA_STATE(xas, &rac->mapping->i_pages, 0);
-	struct page *page;
+	struct folio *folio;
 
 	BUG_ON(rac->_batch_count > rac->_nr_pages);
 	rac->_nr_pages -= rac->_batch_count;
@@ -1433,13 +1433,12 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac,
 
 	xas_set(&xas, rac->_index);
 	rcu_read_lock();
-	xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) {
-		if (xas_retry(&xas, page))
+	xas_for_each(&xas, folio, rac->_index + rac->_nr_pages - 1) {
+		if (xas_retry(&xas, folio))
 			continue;
-		VM_BUG_ON_PAGE(!PageLocked(page), page);
-		VM_BUG_ON_PAGE(PageTail(page), page);
-		array[i++] = page;
-		rac->_batch_count += thp_nr_pages(page);
+		VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+		array[i++] = folio_page(folio, 0);
+		rac->_batch_count += folio_nr_pages(folio);
 		if (i == array_sz)
 			break;
 	}
-- 
cgit v1.2.3


From 41e422a898da69e903a846dfb2b0c0ff62abc3cd Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 2 Apr 2025 22:06:09 +0100
Subject: filemap: remove readahead_page_batch()

This function has no more callers; delete it.

Link: https://lkml.kernel.org/r/20250402210612.2444135-8-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagemap.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c5c9b3770d75..af25fb640463 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1447,20 +1447,6 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac,
 	return i;
 }
 
-/**
- * readahead_page_batch - Get a batch of pages to read.
- * @rac: The current readahead request.
- * @array: An array of pointers to struct page.
- *
- * Context: The pages are locked and have an elevated refcount.  The caller
- * should decreases the refcount once the page has been submitted for I/O
- * and unlock the page once all I/O to that page has completed.
- * Return: The number of pages placed in the array.  0 indicates the request
- * is complete.
- */
-#define readahead_page_batch(rac, array)				\
-	__readahead_batch(rac, array, ARRAY_SIZE(array))
-
 /**
  * readahead_pos - The byte offset into the file of this readahead request.
  * @rac: The readahead request.
-- 
cgit v1.2.3


From 2355153ea8185e7deaf9ce728716dadb707ef59f Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 2 Apr 2025 22:06:10 +0100
Subject: mm: delete thp_nr_pages()

All callers now use folio_nr_pages().  Delete this wrapper.

Link: https://lkml.kernel.org/r/20250402210612.2444135-9-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ae67d3a33792..dcdb798184ef 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2223,15 +2223,6 @@ static inline long compound_nr(struct page *page)
 	return folio_large_nr_pages(folio);
 }
 
-/**
- * thp_nr_pages - The number of regular pages in this huge page.
- * @page: The head page of a huge page.
- */
-static inline long thp_nr_pages(struct page *page)
-{
-	return folio_nr_pages((struct folio *)page);
-}
-
 /**
  * folio_next - Move to the next physical folio.
  * @folio: The folio we're currently operating on.
-- 
cgit v1.2.3


From 56e5a103a721d0ef139bba7ff3d3ada6c8217d5b Mon Sep 17 00:00:00 2001
From: Nhat Pham <nphamcs@gmail.com>
Date: Wed, 2 Apr 2025 13:44:16 -0700
Subject: zsmalloc: prefer the the original page's node for compressed data

Currently, zsmalloc, zswap's and zram's backend memory allocator, does not
enforce any policy for the allocation of memory for the compressed data,
instead just adopting the memory policy of the task entering reclaim, or
the default policy (prefer local node) if no such policy is specified.
This can lead to several pathological behaviors in multi-node NUMA
systems:

1. Systems with CXL-based memory tiering can encounter the following
   inversion with zswap/zram: the coldest pages demoted to the CXL tier
   can return to the high tier when they are reclaimed to compressed swap,
   creating memory pressure on the high tier.

2. Consider a direct reclaimer scanning nodes in order of allocation
   preference.  If it ventures into remote nodes, the memory it compresses
   there should stay there.  Trying to shift those contents over to the
   reclaiming thread's preferred node further *increases* its local
   pressure, and provoking more spills.  The remote node is also the most
   likely to refault this data again.  This undesirable behavior was
   pointed out by Johannes Weiner in [1].

3. For zswap writeback, the zswap entries are organized in
   node-specific LRUs, based on the node placement of the original pages,
   allowing for targeted zswap writeback for specific nodes.

   However, the compressed data of a zswap entry can be placed on a
   different node from the LRU it is placed on.  This means that reclaim
   targeted at one node might not free up memory used for zswap entries in
   that node, but instead reclaiming memory in a different node.

All of these issues will be resolved if the compressed data go to the same
node as the original page.  This patch encourages this behavior by having
zswap and zram pass the node of the original page to zsmalloc, and have
zsmalloc prefer the specified node if we need to allocate new (zs)pages
for the compressed data.

Note that we are not strictly binding the allocation to the preferred
node.  We still allow the allocation to fall back to other nodes when the
preferred node is full, or if we have zspages with slots available on a
different node.  This is OK, and still a strict improvement over the
status quo:

1. On a system with demotion enabled, we will generally prefer
   demotions over compressed swapping, and only swap when pages have
   already gone to the lowest tier.  This patch should achieve the desired
   effect for the most part.

2. If the preferred node is out of memory, letting the compressed data
   going to other nodes can be better than the alternative (OOMs, keeping
   cold memory unreclaimed, disk swapping, etc.).

3. If the allocation go to a separate node because we have a zspage
   with slots available, at least we're not creating extra immediate
   memory pressure (since the space is already allocated).

3. While there can be mixings, we generally reclaim pages in same-node
   batches, which encourage zspage grouping that is more likely to go to
   the right node.

4. A strict binding would require partitioning zsmalloc by node, which
   is more complicated, and more prone to regression, since it reduces the
   storage density of zsmalloc.  We need to evaluate the tradeoff and
   benchmark carefully before adopting such an involved solution.

[1]: https://lore.kernel.org/linux-mm/20250331165306.GC2110528@cmpxchg.org/

[senozhatsky@chromium.org: coding-style fixes]
  Link: https://lkml.kernel.org/r/mnvexa7kseswglcqbhlot4zg3b3la2ypv2rimdl5mh5glbmhvz@wi6bgqn47hge
Link: https://lkml.kernel.org/r/20250402204416.3435994-1-nphamcs@gmail.com
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
Suggested-by: Gregory Price <gourry@gourry.net>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev>
Acked-by: Sergey Senozhatsky <senozhatsky@chromium.org>	[zram, zsmalloc]
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Yosry Ahmed <yosry.ahmed@linux.dev>	[zswap/zsmalloc]
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/zpool.h    | 4 ++--
 include/linux/zsmalloc.h | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index 52f30e526607..369ef068fad8 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -22,7 +22,7 @@ const char *zpool_get_type(struct zpool *pool);
 void zpool_destroy_pool(struct zpool *pool);
 
 int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
-			unsigned long *handle);
+		 unsigned long *handle, const int nid);
 
 void zpool_free(struct zpool *pool, unsigned long handle);
 
@@ -64,7 +64,7 @@ struct zpool_driver {
 	void (*destroy)(void *pool);
 
 	int (*malloc)(void *pool, size_t size, gfp_t gfp,
-				unsigned long *handle);
+		      unsigned long *handle, const int nid);
 	void (*free)(void *pool, unsigned long handle);
 
 	void *(*obj_read_begin)(void *pool, unsigned long handle,
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index c26baf9fb331..13e9cc5490f7 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -26,7 +26,8 @@ struct zs_pool;
 struct zs_pool *zs_create_pool(const char *name);
 void zs_destroy_pool(struct zs_pool *pool);
 
-unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags);
+unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags,
+			const int nid);
 void zs_free(struct zs_pool *pool, unsigned long obj);
 
 size_t zs_huge_class_size(struct zs_pool *pool);
-- 
cgit v1.2.3


From 737e9d021993c99377bb61e762c0d6945a37615b Mon Sep 17 00:00:00 2001
From: Gregory Price <gourry@gourry.net>
Date: Mon, 27 Jan 2025 10:34:03 -0500
Subject: memory: implement memory_block_advise/probe_max_size

Patch series "memory,x86,acpi: hotplug memory alignment advisement", v8.

When physical address regions are not aligned to memory block size, the
misaligned portion is lost (stranded capacity).

Block size (min/max/selected) is architecture defined.  Most architectures
tend to use the minimum block size or some simplistic heurist.  On x86,
memory block size increases up to 2GB, and is otherwise fitted to the
alignment of non-hotplug (i.e.  not special purpose memory).

CXL exposes its memory for management through the ACPI CEDT (CXL Early
Detection Table) in a field called the CXL Fixed Memory Window.  Per the
CXL specification, this memory must be aligned to at least 256MB.

When a CFMW aligns on a size less than the block size, this causes a loss
of up to 2GB per CFMW on x86.  It is not uncommon for CFMW to be allocated
per-device - though this behavior is BIOS defined.

This patch set provides 3 things:
 1) implement advise/query functions in driverse/base/memory.c to
    report/query architecture agnostic hotplug block alignment advice.
 2) update x86 memblock size logic to consider the hotplug advice
 3) add code in acpi/numa/srat.c to report CFMW alignment advice

The advisement interfaces are design to be called during arch_init code
prior to allocator and smp_init.  start_kernel will call these through
setup_arch() (via acpi and mm/init_64.c on x86), which occurs prior to
mm_core_init and smp_init - so no need for atomics.

There's an attempt to signal callers to advise() that query has already
occurred, but this is predicated on the notion that query actually occurs
(which presently only happens on the x86 arch).  This is to assist
debugging future users.  Otherwise, the advise() call has been marked
__init to help static discovery of bad call times.

Once query is called the first time, it will always return the same value.

Interfaces return -EBUSY and 0 respectively on systems without hotplug.


This patch (of 3):

Hotplug memory sources may have opinions on what the memblock size should
be - usually for alignment purposes.  For example, CXL memory extents can
be 256MB with a matching alignment.  If this size/alignment is smaller
than the block size, it can result in stranded capacity.

Implement memory_block_advise_max_size for use prior to allocator init,
for software to advise the system on the max block size.

Implement memory_block_probe_max_size for use by arch init code to
calculate the best block size.  Use of advice is architecture defined.

The probe value can never change after first probe.  Calls to advise after
probe will return -EBUSY to aid debugging.

On systems without hotplug, always return -ENODEV and 0 respectively.

Link: https://lkml.kernel.org/r/20250127153405.3379117-1-gourry@gourry.net
Link: https://lkml.kernel.org/r/20250127153405.3379117-2-gourry@gourry.net
Signed-off-by: Gregory Price <gourry@gourry.net>
Suggested-by: Ira Weiny <ira.weiny@intel.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Fan Ni <fan.ni@samsung.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Acked-by: Oscar Salvador <osalvador@suse.de>
Cc: Alison Schofield <alison.schofield@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Bruno Faccini <bfaccini@nvidia.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Haibo Xu <haibo1.xu@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Robert Richter <rrichter@amd.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memory.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memory.h b/include/linux/memory.h
index 12daa6ec7d09..5ec4e6d209b9 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -149,6 +149,14 @@ static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
 {
 	return 0;
 }
+static inline int memory_block_advise_max_size(unsigned long size)
+{
+	return -ENODEV;
+}
+static inline unsigned long memory_block_advised_max_size(void)
+{
+	return 0;
+}
 #else /* CONFIG_MEMORY_HOTPLUG */
 extern int register_memory_notifier(struct notifier_block *nb);
 extern void unregister_memory_notifier(struct notifier_block *nb);
@@ -181,6 +189,8 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
 void memory_block_add_nid(struct memory_block *mem, int nid,
 			  enum meminit_context context);
 #endif /* CONFIG_NUMA */
+int memory_block_advise_max_size(unsigned long size);
+unsigned long memory_block_advised_max_size(void);
 #endif	/* CONFIG_MEMORY_HOTPLUG */
 
 /*
-- 
cgit v1.2.3


From b4c829fa4d56f3b566bbbb41c9a8ff0c83ae84c5 Mon Sep 17 00:00:00 2001
From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
Date: Mon, 31 Mar 2025 19:10:25 -0700
Subject: mm/compaction: use folio in hugetlb pathway

Use a folio in the hugetlb pathway during the compaction migrate-able
pageblock scan.

This removes a call to compound_head().

Link: https://lkml.kernel.org/r/20250401021025.637333-2-vishal.moola@gmail.com
Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Acked-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 8f3ac832ee7f..a57bed83c657 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -695,7 +695,7 @@ struct huge_bootmem_page {
 
 bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m);
 
-int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
+int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list);
 int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
 void wait_for_freed_hugetlb_folios(void);
 struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
@@ -1083,7 +1083,7 @@ static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h,
 	return NULL;
 }
 
-static inline int isolate_or_dissolve_huge_page(struct page *page,
+static inline int isolate_or_dissolve_huge_folio(struct folio *folio,
 						struct list_head *list)
 {
 	return -ENOMEM;
-- 
cgit v1.2.3


From 2e976567233228ff928c2405f7e03ebb7fb7aa50 Mon Sep 17 00:00:00 2001
From: Ignacio Encinas <ignacio@iencinas.com>
Date: Mon, 31 Mar 2025 21:57:05 +0200
Subject: mm: annotate data race in update_hiwater_rss

mm_struct.hiwater_rss can be accessed concurrently without proper
synchronization as reported by KCSAN.

This data race is benign as it only affects accounting information.
Annotate it with data_race() to make KCSAN happy.

Link: https://lkml.kernel.org/r/20250331-mm-maxrss-data-race-v2-1-cf958e6205bf@iencinas.com
Signed-off-by: Ignacio Encinas <ignacio@iencinas.com>
Reported-by: syzbot+419c4b42acc36c420ad3@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/67e3390c.050a0220.1ec46.0001.GAE@google.com/
Suggested-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: Pedro Falcato <pfalcato@suse.de>
Cc: Liam Howlett <liam.howlett@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index dcdb798184ef..1690f21e7808 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -12,6 +12,7 @@
 #include <linux/rbtree.h>
 #include <linux/atomic.h>
 #include <linux/debug_locks.h>
+#include <linux/compiler.h>
 #include <linux/mm_types.h>
 #include <linux/mmap_lock.h>
 #include <linux/range.h>
@@ -2796,7 +2797,7 @@ static inline void update_hiwater_rss(struct mm_struct *mm)
 {
 	unsigned long _rss = get_mm_rss(mm);
 
-	if ((mm)->hiwater_rss < _rss)
+	if (data_race(mm->hiwater_rss) < _rss)
 		(mm)->hiwater_rss = _rss;
 }
 
-- 
cgit v1.2.3


From 979f3ef0f798d9b4fda4806d37fb1a264fc38566 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Fri, 21 Mar 2025 22:02:21 +1000
Subject: mm: fix parameter passed to page_mapcount_is_type()

Patch series "Fix parameter passed to page_mapcount_is_type()", v2.

Found by code inspection.  There are two places where the parameter passed
to page_mapcount_is_type() is (page->_mapcount), which is incorrect since
it should be one more than the value, as explained in the comments to
page_mapcount_is_type(): (a) page_has_type() in page-flags.h (b)
__dump_folio() in mm/debug.c

PATCH[1] fixes the parameter for (a)
PATCH[2] fixes the parameter for (b)

Note that the issue doesn't cause any visible impacts due to the safety
gap introduced by PGTY_mapcount_underflow limit.  So the tag 'Cc:
stable@vger.kernel.org' isn't needed.


This patch (of 2):

As the comments of page_mapcount_is_type() indicate, the parameter passed
to the function should be one more than page->_mapcount.  However,
page->_mapcount (equivalent to page->page_type) is passed to the function
by commit 4ffca5a96678 ("mm: support only one page_type per page")
page_type_has_type() is replaced by page_mapcount_is_type(), but the
parameter isn't adjusted.

Fix it by replacing page_mapcount_is_type() with page_type_has_type() in
page_has_type().  Note that the issue doesn't cause any visible impacts
due to the safety gap introduced by PGTY_mapcount_underflow limit.

Link: https://lkml.kernel.org/r/20250321120222.1456770-1-gshan@redhat.com
Link: https://lkml.kernel.org/r/20250321120222.1456770-2-gshan@redhat.com
Fixes: 4ffca5a96678 ("mm: support only one page_type per page")
Signed-off-by: Gavin Shan <gshan@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: gehao <gehao@kylinos.cn>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index e6a21b62dcce..d3909cb1e576 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -982,7 +982,7 @@ static inline bool page_mapcount_is_type(unsigned int mapcount)
 
 static inline bool page_has_type(const struct page *page)
 {
-	return page_mapcount_is_type(data_race(page->page_type));
+	return page_type_has_type(data_race(page->page_type));
 }
 
 #define FOLIO_TYPE_OPS(lname, fname)					\
-- 
cgit v1.2.3


From 8a5577428e8e586a107ee5f39eb4c86d32c971cd Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 21 Mar 2025 12:37:12 +0100
Subject: kernel/events/uprobes: pass VMA to set_swbp(), set_orig_insn() and
 uprobe_write_opcode()

We already have the VMA, no need to look it up using
get_user_page_vma_remote().  We can now switch to get_user_pages_remote().

Link: https://lkml.kernel.org/r/20250321113713.204682-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <olsajiri@gmail.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Namhyung kim <namhyung@kernel.org>
Cc: Russel King <linux@armlinux.org.uk>
Cc: tongtiangen <tongtiangen@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/uprobes.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 2e46b69ff0a6..516217c39094 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -188,13 +188,13 @@ struct uprobes_state {
 };
 
 extern void __init uprobes_init(void);
-extern int set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
-extern int set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
+extern int set_swbp(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr);
+extern int set_orig_insn(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr);
 extern bool is_swbp_insn(uprobe_opcode_t *insn);
 extern bool is_trap_insn(uprobe_opcode_t *insn);
 extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
 extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
-extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
+extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t);
 extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool);
 extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc);
-- 
cgit v1.2.3


From e064e7384f991c7df81999cad4ce30fed7ef7d88 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Mon, 7 Apr 2025 11:01:11 +0530
Subject: mm/ptdump: split note_page() into level specific callbacks

Patch series "mm/ptdump: Drop assumption that pxd_val() is u64", v2.

Last argument passed down in note_page() is u64 assuming pxd_val()
returned value (all page table levels) is 64 bit - which might not be the
case going ahead when D128 page tables is enabled on arm64 platform.
Besides pxd_val() is very platform specific and its type should not be
assumed in generic MM.  A similar problem exists for effective_prot(),
although it is restricted to x86 platform.

This series splits note_page() and effective_prot() into individual page
table level specific callbacks which accepts corresponding pxd_t page
table entry as an argument instead and later on all subscribing platforms
could derive pxd_val() from the table entries as required and proceed as
before.

Define ptdesc_t type which describes the basic page table descriptor
layout on arm64 platform.  Subsequently all level specific pxxval_t
descriptors are derived from ptdesc_t thus establishing a common original
format, which can also be appropriate for page table entries, masks and
protection values etc which are used at all page table levels.


This patch (of 3):

Last argument passed down in note_page() is u64 assuming pxd_val()
returned value (all page table levels) is 64 bit - which might not be the
case going ahead when D128 page tables is enabled on arm64 platform.
Besides pxd_val() is very platform specific and its type should not be
assumed in generic MM.

Split note_page() into individual page table level specific callbacks
which accepts corresponding pxd_t argument instead and then subscribing
platforms just derive pxd_val() from the entries as required and proceed
as earlier.

Also add a note_page_flush() callback for flushing the last page table
page that was being handled earlier via level = -1.

Link: https://lkml.kernel.org/r/20250407053113.746295-1-anshuman.khandual@arm.com
Link: https://lkml.kernel.org/r/20250407053113.746295-2-anshuman.khandual@arm.com
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/ptdump.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptdump.h b/include/linux/ptdump.h
index 8dbd51ea8626..1c1eb1fae199 100644
--- a/include/linux/ptdump.h
+++ b/include/linux/ptdump.h
@@ -11,9 +11,12 @@ struct ptdump_range {
 };
 
 struct ptdump_state {
-	/* level is 0:PGD to 4:PTE, or -1 if unknown */
-	void (*note_page)(struct ptdump_state *st, unsigned long addr,
-			  int level, u64 val);
+	void (*note_page_pte)(struct ptdump_state *st, unsigned long addr, pte_t pte);
+	void (*note_page_pmd)(struct ptdump_state *st, unsigned long addr, pmd_t pmd);
+	void (*note_page_pud)(struct ptdump_state *st, unsigned long addr, pud_t pud);
+	void (*note_page_p4d)(struct ptdump_state *st, unsigned long addr, p4d_t p4d);
+	void (*note_page_pgd)(struct ptdump_state *st, unsigned long addr, pgd_t pgd);
+	void (*note_page_flush)(struct ptdump_state *st);
 	void (*effective_prot)(struct ptdump_state *st, int level, u64 val);
 	const struct ptdump_range *range;
 };
-- 
cgit v1.2.3


From 08978fc3b0d5709583f6dc2072a8607d0b527860 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Mon, 7 Apr 2025 11:01:12 +0530
Subject: mm/ptdump: split effective_prot() into level specific callbacks

Last argument in effective_prot() is u64 assuming pxd_val() returned value
(all page table levels) is 64 bit.  pxd_val() is very platform specific
and its type should not be assumed in generic MM.

Split effective_prot() into individual page table level specific callbacks
which accepts corresponding pxd_t argument instead and then the
subscribing platform (only x86) just derive pxd_val() from the entries as
required and proceed as earlier.

Link: https://lkml.kernel.org/r/20250407053113.746295-3-anshuman.khandual@arm.com
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/ptdump.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ptdump.h b/include/linux/ptdump.h
index 1c1eb1fae199..240bd3bff18d 100644
--- a/include/linux/ptdump.h
+++ b/include/linux/ptdump.h
@@ -17,7 +17,11 @@ struct ptdump_state {
 	void (*note_page_p4d)(struct ptdump_state *st, unsigned long addr, p4d_t p4d);
 	void (*note_page_pgd)(struct ptdump_state *st, unsigned long addr, pgd_t pgd);
 	void (*note_page_flush)(struct ptdump_state *st);
-	void (*effective_prot)(struct ptdump_state *st, int level, u64 val);
+	void (*effective_prot_pte)(struct ptdump_state *st, pte_t pte);
+	void (*effective_prot_pmd)(struct ptdump_state *st, pmd_t pmd);
+	void (*effective_prot_pud)(struct ptdump_state *st, pud_t pud);
+	void (*effective_prot_p4d)(struct ptdump_state *st, p4d_t p4d);
+	void (*effective_prot_pgd)(struct ptdump_state *st, pgd_t pgd);
 	const struct ptdump_range *range;
 };
 
-- 
cgit v1.2.3


From 4c97a17a252bf8396f7bd65efced00bf401a8c25 Mon Sep 17 00:00:00 2001
From: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Date: Thu, 20 Mar 2025 11:22:19 +0100
Subject: xarray: make xa_alloc_cyclic() return 0 on all success cases

Change xa_alloc_cyclic() to return 0 even on wrap-around.  Do the same for
xa_alloc_cyclic_irq() and xa_alloc_cyclic_bh().

This will prevent any future bug of treating return of 1 as an error:
	int ret = xa_alloc_cyclic(...)
	if (ret) // currently mishandles ret==1
		goto failure;

If there will be someone interested in when wrap-around occurs, there is
still __xa_alloc_cyclic() that behaves as before.  For now there is no
such user.

Link: https://lkml.kernel.org/r/20250320102219.8101-1-przemyslaw.kitszel@intel.com
Signed-off-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Suggested-by: Matthew Wilcox <willy@infradead.org>
Link: https://lore.kernel.org/netdev/Z9gUd-5t8b5NX2wE@casper.infradead.org
Cc: Andriy Shevchenko <andriy.shevchenko@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Cc: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/xarray.h | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 78eede109b1a..be850174e802 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -965,10 +965,12 @@ static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id,
  * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
  * in xa_init_flags().
  *
+ * Note that callers interested in whether wrapping has occurred should
+ * use __xa_alloc_cyclic() instead.
+ *
  * Context: Any context.  Takes and releases the xa_lock.  May sleep if
  * the @gfp flags permit.
- * Return: 0 if the allocation succeeded without wrapping.  1 if the
- * allocation succeeded after wrapping, -ENOMEM if memory could not be
+ * Return: 0 if the allocation succeeded, -ENOMEM if memory could not be
  * allocated or -EBUSY if there are no free entries in @limit.
  */
 static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
@@ -981,7 +983,7 @@ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
 	err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp);
 	xa_unlock(xa);
 
-	return err;
+	return err < 0 ? err : 0;
 }
 
 /**
@@ -1002,10 +1004,12 @@ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
  * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
  * in xa_init_flags().
  *
+ * Note that callers interested in whether wrapping has occurred should
+ * use __xa_alloc_cyclic() instead.
+ *
  * Context: Any context.  Takes and releases the xa_lock while
  * disabling softirqs.  May sleep if the @gfp flags permit.
- * Return: 0 if the allocation succeeded without wrapping.  1 if the
- * allocation succeeded after wrapping, -ENOMEM if memory could not be
+ * Return: 0 if the allocation succeeded, -ENOMEM if memory could not be
  * allocated or -EBUSY if there are no free entries in @limit.
  */
 static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry,
@@ -1018,7 +1022,7 @@ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry,
 	err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp);
 	xa_unlock_bh(xa);
 
-	return err;
+	return err < 0 ? err : 0;
 }
 
 /**
@@ -1039,10 +1043,12 @@ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry,
  * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
  * in xa_init_flags().
  *
+ * Note that callers interested in whether wrapping has occurred should
+ * use __xa_alloc_cyclic() instead.
+ *
  * Context: Process context.  Takes and releases the xa_lock while
  * disabling interrupts.  May sleep if the @gfp flags permit.
- * Return: 0 if the allocation succeeded without wrapping.  1 if the
- * allocation succeeded after wrapping, -ENOMEM if memory could not be
+ * Return: 0 if the allocation succeeded, -ENOMEM if memory could not be
  * allocated or -EBUSY if there are no free entries in @limit.
  */
 static inline int xa_alloc_cyclic_irq(struct xarray *xa, u32 *id, void *entry,
@@ -1055,7 +1061,7 @@ static inline int xa_alloc_cyclic_irq(struct xarray *xa, u32 *id, void *entry,
 	err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp);
 	xa_unlock_irq(xa);
 
-	return err;
+	return err < 0 ? err : 0;
 }
 
 /**
-- 
cgit v1.2.3


From a40b3fa844b4c29276a228ce878bd427a3149d37 Mon Sep 17 00:00:00 2001
From: Liu Ye <liuye@kylinos.cn>
Date: Tue, 18 Mar 2025 14:32:26 +0800
Subject: fs/proc/page: refactor to reduce code duplication

kpageflags_read() and kpagecgroup_read() are quite similar to
kpagecount_read().  Refactor common code into a helper function to reduce
code duplication.

Link: https://lkml.kernel.org/r/20250318063226.223284-1-liuyerd@163.com
Signed-off-by: Liu Ye <liuye@kylinos.cn>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Ran Xiaokai <ran.xiaokai@zte.com.cn>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Svetly Todorov <svetly.todorov@memverge.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 53364526d877..5264d148bdd9 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1793,6 +1793,10 @@ static inline void count_objcg_events(struct obj_cgroup *objcg,
 {
 }
 
+static inline ino_t page_cgroup_ino(struct page *page)
+{
+	return 0;
+}
 #endif /* CONFIG_MEMCG */
 
 #if defined(CONFIG_MEMCG) && defined(CONFIG_ZSWAP)
-- 
cgit v1.2.3


From d82d3bf4115217bb3f43f9320bad5d68a35c278f Mon Sep 17 00:00:00 2001
From: Kevin Brodsky <kevin.brodsky@arm.com>
Date: Tue, 8 Apr 2025 10:52:11 +0100
Subject: mm: pass mm down to pagetable_{pte,pmd}_ctor

Patch series "Always call constructor for kernel page tables", v2.

There has been much confusion around exactly when page table
constructors/destructors (pagetable_*_[cd]tor) are supposed to be called.
They were initially introduced for user PTEs only (to support split page
table locks), then at the PMD level for the same purpose.  Accounting was
added later on, starting at the PTE level and then moving to higher levels
(PMD, PUD).  Finally, with my earlier series "Account page tables at all
levels" [1], the ctor/dtor is run for all levels, all the way to PGD.

I thought this was the end of the story, and it hopefully is for user
pgtables, but I was wrong for what concerns kernel pgtables.  The current
situation there makes very little sense:

* At the PTE level, the ctor/dtor is not called (at least in the generic
  implementation).  Specific helpers are used for kernel pgtables at this
  level (pte_{alloc,free}_kernel()) and those have never called the
  ctor/dtor, most likely because they were initially irrelevant in the
  kernel case.

* At all other levels, the ctor/dtor is normally called.  This is
  potentially wasteful at the PMD level (more on that later).

This series aims to ensure that the ctor/dtor is always called for kernel
pgtables, as it already is for user pgtables.  Besides consistency, the
main motivation is to guarantee that ctor/dtor hooks are systematically
called; this makes it possible to insert hooks to protect page tables [2],
for instance.  There is however an extra challenge: split locks are not
used for kernel pgtables, and it would therefore be wasteful to initialise
them (ptlock_init()).

It is worth clarifying exactly when split locks are used.  They clearly
are for user pgtables, but as illustrated in commit 61444cde9170 ("ARM:
8591/1: mm: use fully constructed struct pages for EFI pgd allocations"),
they also are for special page tables like efi_mm.  The one case where
split locks are definitely unused is pgtables owned by init_mm; this is
consistent with the behaviour of apply_to_pte_range().

The approach chosen in this series is therefore to pass the mm associated
to the pgtables being constructed to pagetable_{pte,pmd}_ctor() (patch 1),
and skip ptlock_init() if mm == &init_mm (patch 3 and 7).  This makes it
possible to call the PTE ctor/dtor from pte_{alloc,free}_kernel() without
unintended consequences (patch 3).  As a result the accounting functions
are now called at all levels for kernel pgtables, and split locks are
never initialised.

In configurations where ptlocks are dynamically allocated (32-bit,
PREEMPT_RT, etc.) and ARCH_ENABLE_SPLIT_PMD_PTLOCK is selected, this
series results in the removal of a kmem_cache allocation for every kernel
PMD.  Additionally, for certain architectures that do not use
<asm-generic/pgalloc.h> such as s390, the same optimisation occurs at the
PTE level.

===

Things get more complicated when it comes to special pgtable allocators
(patch 8-12).  All architectures need such allocators to create initial
kernel pgtables; we are not concerned with those as the ctor cannot be
called so early in the boot sequence.  However, those allocators may also
be used later in the boot sequence or during normal operations.  There are
two main use-cases:

1. Mapping EFI memory: efi_mm (arm, arm64, riscv)
2. arch_add_memory(): init_mm

The ctor is already explicitly run (at the PTE/PMD level) in the first
case, as required for pgtables that are not associated with init_mm.
However the same allocators may also be used for the second use-case (or
others), and this is where it gets messy.  Patch 1 calls the ctor with
NULL as mm in those situations, as the actual mm isn't available.
Practically this means that ptlocks will be unconditionally initialised.
This is fine on arm - create_mapping_late() is only used for the EFI
mapping.  On arm64, __create_pgd_mapping() is also used by
arch_add_memory(); patch 8/9/11 ensure that ctors are called at all levels
with the appropriate mm.  The situation is similar on riscv, but
propagating the mm down to the ctor would require significant refactoring.
Since they are already called unconditionally, this series leaves riscv
no worse off - patch 10 adds comments to clarify the situation.

From a cursory look at other architectures implementing arch_add_memory(),
s390 and x86 may also need a similar treatment to add constructor calls.
This is to be taken care of in a future version or as a follow-up.

===

The complications in those special pgtable allocators beg the question:
does it really make sense to treat efi_mm and init_mm differently in e.g.
apply_to_pte_range()?  Maybe what we really need is a way to tell if an mm
corresponds to user memory or not, and never use split locks for non-user
mm's.  Feedback and suggestions welcome!


This patch (of 12):

In preparation for calling constructors for all kernel page tables while
eliding unnecessary ptlock initialisation, let's pass down the associated
mm to the PTE/PMD level ctors.  (These are the two levels where ptlocks
are used.)

In most cases the mm is already around at the point of calling the ctor so
we simply pass it down.  This is however not the case for special page
table allocators:

* arch/arm/mm/mmu.c
* arch/arm64/mm/mmu.c
* arch/riscv/mm/init.c

In those cases, the page tables being allocated are either for standard
kernel memory (init_mm) or special page directories, which may not be
associated to any mm.  For now let's pass NULL as mm; this will be refined
where possible in future patches.

No functional change in this patch.

Link: https://lore.kernel.org/linux-mm/20250103184415.2744423-1-kevin.brodsky@arm.com/ [1]
Link: https://lore.kernel.org/linux-hardening/20250203101839.1223008-1-kevin.brodsky@arm.com/ [2]
Link: https://lkml.kernel.org/r/20250408095222.860601-1-kevin.brodsky@arm.com
Link: https://lkml.kernel.org/r/20250408095222.860601-2-kevin.brodsky@arm.com
Signed-off-by: Kevin Brodsky <kevin.brodsky@arm.com>
Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>	[s390]
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Kevin Brodsky <kevin.brodsky@arm.com>
Cc: Linus Waleij <linus.walleij@linaro.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <yang@os.amperecomputing.com>
Cc: <x86@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1690f21e7808..fa22b17e337e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3147,7 +3147,8 @@ static inline void pagetable_dtor_free(struct ptdesc *ptdesc)
 	pagetable_free(ptdesc);
 }
 
-static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc)
+static inline bool pagetable_pte_ctor(struct mm_struct *mm,
+				      struct ptdesc *ptdesc)
 {
 	if (!ptlock_init(ptdesc))
 		return false;
@@ -3253,7 +3254,8 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
 	return ptl;
 }
 
-static inline bool pagetable_pmd_ctor(struct ptdesc *ptdesc)
+static inline bool pagetable_pmd_ctor(struct mm_struct *mm,
+				      struct ptdesc *ptdesc)
 {
 	if (!pmd_ptlock_init(ptdesc))
 		return false;
-- 
cgit v1.2.3


From 49f5996664201a3581ee5ea5949ee7d5fafb9d86 Mon Sep 17 00:00:00 2001
From: Kevin Brodsky <kevin.brodsky@arm.com>
Date: Tue, 8 Apr 2025 10:52:13 +0100
Subject: mm: call ctor/dtor for kernel PTEs

Since [1], constructors/destructors are expected to be called for all page
table pages, at all levels and for both user and kernel pgtables.  There
is however one glaring exception: kernel PTEs are managed via separate
helpers (pte_alloc_kernel/pte_free_kernel), which do not call the [cd]tor,
at least not in the generic implementation.

The most obvious reason for this anomaly is that init_mm is special-cased
not to use split page table locks.  As a result calling ptlock_init() for
PTEs associated with init_mm would be wasteful, potentially resulting in
dynamic memory allocation.  However, pgtable [cd]tors perform other
actions - currently related to accounting/statistics, and potentially more
functionally significant in the future.

Now that pagetable_pte_ctor() is passed the associated mm, we can make it
skip the call to ptlock_init() for init_mm; this allows us to call the
ctor from pte_alloc_one_kernel() too.  This is matched by a call to the
pgtable destructor in pte_free_kernel(); no special-casing is needed on
that path, as ptlock_free() is already called unconditionally.
(ptlock_free() is a no-op unless a ptlock was allocated for the given
PTP.)

This patch ensures that all architectures that rely on
<asm-generic/pgalloc.h> call the [cd]tor for kernel PTEs.
pte_free_kernel() cannot be overridden so changing the generic
implementation is sufficient.  pte_alloc_one_kernel() can be overridden
using __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL, and a few architectures implement
it by calling the page allocator directly.  We amend those so that they
call the generic __pte_alloc_one_kernel() instead, if possible, ensuring
that the ctor is called.

A few architectures do not use <asm-generic/pgalloc.h>; those will be
taken care of separately.

[1] https://lore.kernel.org/linux-mm/20250103184415.2744423-1-kevin.brodsky@arm.com/

Link: https://lkml.kernel.org/r/20250408095222.860601-4-kevin.brodsky@arm.com
Signed-off-by: Kevin Brodsky <kevin.brodsky@arm.com>
Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com> # s390
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Linus Waleij <linus.walleij@linaro.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: <x86@kernel.org>
Cc: Yang Shi <yang@os.amperecomputing.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index fa22b17e337e..ce6832787f6d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3150,7 +3150,7 @@ static inline void pagetable_dtor_free(struct ptdesc *ptdesc)
 static inline bool pagetable_pte_ctor(struct mm_struct *mm,
 				      struct ptdesc *ptdesc)
 {
-	if (!ptlock_init(ptdesc))
+	if (mm != &init_mm && !ptlock_init(ptdesc))
 		return false;
 	__pagetable_ctor(ptdesc);
 	return true;
-- 
cgit v1.2.3


From 8240d8d3c5fb65fbdace7ae4db909b51f2253da7 Mon Sep 17 00:00:00 2001
From: Kevin Brodsky <kevin.brodsky@arm.com>
Date: Tue, 8 Apr 2025 10:52:17 +0100
Subject: mm: skip ptlock_init() for kernel PMDs

Split page table locks are not used for pgtables associated to init_mm, at
any level.  pte_alloc_kernel() does not call ptlock_init() as a result.
There is however no separate alloc/free functions for kernel PMDs, and
pmd_ptlock_init() is called unconditionally.  When ALLOC_SPLIT_PTLOCKS is
true (e.g.  32-bit architectures or if CONFIG_PREEMPT_RT is selected),
this results in unnecessary dynamic memory allocation every time a kernel
PMD is allocated.

Now that pagetable_pmd_ctor() is passed the associated mm, we can easily
remove this overhead by skipping pmd_ptlock_init() if the pgtable is
associated to init_mm.  No special-casing is needed on the dtor path, as
ptlock_free() is already called unconditionally for all levels.
(ptlock_free() is a no-op unless a ptlock was allocated for the given
PTP.)

Link: https://lkml.kernel.org/r/20250408095222.860601-8-kevin.brodsky@arm.com
Signed-off-by: Kevin Brodsky <kevin.brodsky@arm.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Linus Waleij <linus.walleij@linaro.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: <x86@kernel.org>
Cc: Yang Shi <yang@os.amperecomputing.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ce6832787f6d..5eb0d77c4438 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3257,7 +3257,7 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
 static inline bool pagetable_pmd_ctor(struct mm_struct *mm,
 				      struct ptdesc *ptdesc)
 {
-	if (!pmd_ptlock_init(ptdesc))
+	if (mm != &init_mm && !pmd_ptlock_init(ptdesc))
 		return false;
 	ptdesc_pmd_pts_init(ptdesc);
 	__pagetable_ctor(ptdesc);
-- 
cgit v1.2.3


From ad88fc17d2dafe45e40de2af80207f4b2e3b1f71 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Thu, 10 Apr 2025 19:14:43 +0000
Subject: maple_tree: use vacant nodes to reduce worst case allocations

In order to determine the store type for a maple tree operation, a walk of
the tree is done through mas_wr_walk().  This function descends the tree
until a spanning write is detected or we reach a leaf node.  While
descending, keep track of the height at which we encounter a node with
available space.  This is done by checking if mas->end is less than the
number of slots a given node type can fit.

Now that the height of the vacant node is tracked, we can use the
difference between the height of the tree and the height of the vacant
node to know how many levels we will have to propagate creating new nodes.
Update mas_prealloc_calc() to consider the vacant height and reduce the
number of worst-case allocations.

Rebalancing and spanning stores are not supported and fall back to using
the full height of the tree for allocations.

Update preallocation testing assertions to take into account vacant
height.

Link: https://lkml.kernel.org/r/20250410191446.2474640-4-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index cbbcd18d4186..657adb33e61e 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -463,6 +463,7 @@ struct ma_wr_state {
 	void __rcu **slots;		/* mas->node->slots pointer */
 	void *entry;			/* The entry to write */
 	void *content;			/* The existing entry that is being overwritten */
+	unsigned char vacant_height;	/* Height of lowest node with free space */
 };
 
 #define mas_lock(mas)           spin_lock(&((mas)->tree->ma_lock))
@@ -498,6 +499,7 @@ struct ma_wr_state {
 		.mas = ma_state,					\
 		.content = NULL,					\
 		.entry = wr_entry,					\
+		.vacant_height = 0					\
 	}
 
 #define MA_TOPIARY(name, tree)						\
-- 
cgit v1.2.3


From 271152a973cb01c135d29e91d1a05f51fbd88a9c Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Thu, 10 Apr 2025 19:14:45 +0000
Subject: maple_tree: add sufficient height

In order to support rebalancing and spanning stores using less than the
worst case number of nodes, we need to track more than just the vacant
height.  Using only vacant height to reduce the worst case maple node
allocation count can lead to a shortcoming of nodes in the following
scenarios.

For rebalancing writes, when a leaf node becomes insufficient, it may be
combined with a sibling into a single node.  This means that the parent
node which has entries for this children will lose one entry.  If this
parent node was just meeting the minimum entries, losing one entry will
now cause this parent node to be insufficient.  This leads to a cascading
operation of rebalancing at different levels and can lead to more node
allocations than simply using vacant height can return.

For spanning writes, a similar situation occurs.  At the location at which
a spanning write is detected, the number of ancestor nodes may similarly
need to rebalanced into a smaller number of nodes and the same cascading
situation could occur.

To use less than the full height of the tree for the number of
allocations, we also need to track the height at which a non-leaf node
cannot become insufficient.  This means even if a rebalance occurs to a
child of this node, it currently has enough entries that it can lose one
without any further action.  This field is stored in the maple write state
as sufficient height.  In mas_prealloc_calc() when figuring out how many
nodes to allocate, we check if the vacant node is lower in the tree than a
sufficient node (has a larger value).  If it is, we cannot use the vacant
height and must use the difference in the height and sufficient height as
the basis for the number of nodes needed.

An off by one bug was also discovered in mast_overflow() where it is using
>= rather than >.  This caused extra iterations of the
mas_spanning_rebalance() loop and lead to unneeded allocations.  A test is
also added to check the number of allocations is correct.

Link: https://lkml.kernel.org/r/20250410191446.2474640-6-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 657adb33e61e..9ef129038224 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -464,6 +464,7 @@ struct ma_wr_state {
 	void *entry;			/* The entry to write */
 	void *content;			/* The existing entry that is being overwritten */
 	unsigned char vacant_height;	/* Height of lowest node with free space */
+	unsigned char sufficient_height;/* Height of lowest node with min sufficiency + 1 nodes */
 };
 
 #define mas_lock(mas)           spin_lock(&((mas)->tree->ma_lock))
@@ -499,7 +500,8 @@ struct ma_wr_state {
 		.mas = ma_state,					\
 		.content = NULL,					\
 		.entry = wr_entry,					\
-		.vacant_height = 0					\
+		.vacant_height = 0,					\
+		.sufficient_height = 0					\
 	}
 
 #define MA_TOPIARY(name, tree)						\
-- 
cgit v1.2.3


From 06340b927051bf71b59a9cd4cff3417247318251 Mon Sep 17 00:00:00 2001
From: Fan Ni <fan.ni@samsung.com>
Date: Wed, 16 Apr 2025 13:12:15 -0700
Subject: mm: convert free_page_and_swap_cache() to free_folio_and_swap_cache()

free_page_and_swap_cache() takes a struct page pointer as input parameter,
but it will immediately convert it to folio and all operations following
within use folio instead of page.  It makes more sense to pass in folio
directly.

Convert free_page_and_swap_cache() to free_folio_and_swap_cache() to
consume folio directly.

Link: https://lkml.kernel.org/r/20250416201720.41678-1-nifan.cxl@gmail.com
Signed-off-by: Fan Ni <fan.ni@samsung.com>
Acked-by: Davidlohr Bueso <dave@stgolabs.net>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Adam Manzanares <a.manzanares@samsung.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Luis Chamberalin <mcgrof@kernel.org>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index db46b25a65ae..4e4e27d3ce3d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -450,7 +450,7 @@ static inline unsigned long total_swapcache_pages(void)
 }
 
 void free_swap_cache(struct folio *folio);
-void free_page_and_swap_cache(struct page *);
+void free_folio_and_swap_cache(struct folio *folio);
 void free_pages_and_swap_cache(struct encoded_page **, int);
 /* linux/mm/swapfile.c */
 extern atomic_long_t nr_swap_pages;
@@ -520,10 +520,8 @@ static inline void put_swap_device(struct swap_info_struct *si)
 
 #define si_swapinfo(val) \
 	do { (val)->freeswap = (val)->totalswap = 0; } while (0)
-/* only sparc can not include linux/pagemap.h in this file
- * so leave put_page and release_pages undeclared... */
-#define free_page_and_swap_cache(page) \
-	put_page(page)
+#define free_folio_and_swap_cache(folio) \
+	folio_put(folio)
 #define free_pages_and_swap_cache(pages, nr) \
 	release_pages((pages), (nr));
 
-- 
cgit v1.2.3


From 75404e07663b1622948944cf31531fa87cb1785d Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Wed, 16 Apr 2025 11:38:36 +0100
Subject: mm: move mmap/vma locking logic into specific files

Currently the VMA and mmap locking logic is entangled in two of the most
overwrought files in mm - include/linux/mm.h and mm/memory.c.  Separate
this logic out so we can more easily make changes and create an
appropriate MAINTAINERS entry that spans only the logic relating to
locking.

This should have no functional change.  Care is taken to avoid dependency
loops, we must regrettably keep release_fault_lock() and
assert_fault_locked() in mm.h as a result due to the dependence on the
vm_fault type.

Additionally we must declare rcuwait_wake_up() manually to avoid a
dependency cycle on linux/rcuwait.h.

Additionally move the nommu implementatino of lock_mm_and_find_vma() to
mmap_lock.c so everything lock-related is in one place.

Link: https://lkml.kernel.org/r/bec6c8e29fa8de9267a811a10b1bdae355d67ed4.1744799282.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: "Paul E . McKenney" <paulmck@kernel.org>
Cc: SeongJae Park <sj@kernel.org>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h        | 231 +---------------------------------------------
 include/linux/mmap_lock.h | 227 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 231 insertions(+), 227 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5eb0d77c4438..9b701cfbef22 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -671,204 +671,11 @@ static inline void vma_numab_state_init(struct vm_area_struct *vma) {}
 static inline void vma_numab_state_free(struct vm_area_struct *vma) {}
 #endif /* CONFIG_NUMA_BALANCING */
 
-#ifdef CONFIG_PER_VMA_LOCK
-static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	static struct lock_class_key lockdep_key;
-
-	lockdep_init_map(&vma->vmlock_dep_map, "vm_lock", &lockdep_key, 0);
-#endif
-	if (reset_refcnt)
-		refcount_set(&vma->vm_refcnt, 0);
-	vma->vm_lock_seq = UINT_MAX;
-}
-
-static inline bool is_vma_writer_only(int refcnt)
-{
-	/*
-	 * With a writer and no readers, refcnt is VMA_LOCK_OFFSET if the vma
-	 * is detached and (VMA_LOCK_OFFSET + 1) if it is attached. Waiting on
-	 * a detached vma happens only in vma_mark_detached() and is a rare
-	 * case, therefore most of the time there will be no unnecessary wakeup.
-	 */
-	return refcnt & VMA_LOCK_OFFSET && refcnt <= VMA_LOCK_OFFSET + 1;
-}
-
-static inline void vma_refcount_put(struct vm_area_struct *vma)
-{
-	/* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
-	struct mm_struct *mm = vma->vm_mm;
-	int oldcnt;
-
-	rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
-	if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) {
-
-		if (is_vma_writer_only(oldcnt - 1))
-			rcuwait_wake_up(&mm->vma_writer_wait);
-	}
-}
-
-/*
- * Try to read-lock a vma. The function is allowed to occasionally yield false
- * locked result to avoid performance overhead, in which case we fall back to
- * using mmap_lock. The function should never yield false unlocked result.
- * False locked result is possible if mm_lock_seq overflows or if vma gets
- * reused and attached to a different mm before we lock it.
- * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
- * detached.
- */
-static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
-						    struct vm_area_struct *vma)
-{
-	int oldcnt;
-
-	/*
-	 * Check before locking. A race might cause false locked result.
-	 * We can use READ_ONCE() for the mm_lock_seq here, and don't need
-	 * ACQUIRE semantics, because this is just a lockless check whose result
-	 * we don't rely on for anything - the mm_lock_seq read against which we
-	 * need ordering is below.
-	 */
-	if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence))
-		return NULL;
-
-	/*
-	 * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire()
-	 * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET.
-	 * Acquire fence is required here to avoid reordering against later
-	 * vm_lock_seq check and checks inside lock_vma_under_rcu().
-	 */
-	if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
-							      VMA_REF_LIMIT))) {
-		/* return EAGAIN if vma got detached from under us */
-		return oldcnt ? NULL : ERR_PTR(-EAGAIN);
-	}
-
-	rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
-	/*
-	 * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
-	 * False unlocked result is impossible because we modify and check
-	 * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq
-	 * modification invalidates all existing locks.
-	 *
-	 * We must use ACQUIRE semantics for the mm_lock_seq so that if we are
-	 * racing with vma_end_write_all(), we only start reading from the VMA
-	 * after it has been unlocked.
-	 * This pairs with RELEASE semantics in vma_end_write_all().
-	 */
-	if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) {
-		vma_refcount_put(vma);
-		return NULL;
-	}
-
-	return vma;
-}
-
-/*
- * Use only while holding mmap read lock which guarantees that locking will not
- * fail (nobody can concurrently write-lock the vma). vma_start_read() should
- * not be used in such cases because it might fail due to mm_lock_seq overflow.
- * This functionality is used to obtain vma read lock and drop the mmap read lock.
- */
-static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
-{
-	int oldcnt;
-
-	mmap_assert_locked(vma->vm_mm);
-	if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
-							      VMA_REF_LIMIT)))
-		return false;
-
-	rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
-	return true;
-}
-
-/*
- * Use only while holding mmap read lock which guarantees that locking will not
- * fail (nobody can concurrently write-lock the vma). vma_start_read() should
- * not be used in such cases because it might fail due to mm_lock_seq overflow.
- * This functionality is used to obtain vma read lock and drop the mmap read lock.
- */
-static inline bool vma_start_read_locked(struct vm_area_struct *vma)
-{
-	return vma_start_read_locked_nested(vma, 0);
-}
-
-static inline void vma_end_read(struct vm_area_struct *vma)
-{
-	vma_refcount_put(vma);
-}
-
-/* WARNING! Can only be used if mmap_lock is expected to be write-locked */
-static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq)
-{
-	mmap_assert_write_locked(vma->vm_mm);
-
-	/*
-	 * current task is holding mmap_write_lock, both vma->vm_lock_seq and
-	 * mm->mm_lock_seq can't be concurrently modified.
-	 */
-	*mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence;
-	return (vma->vm_lock_seq == *mm_lock_seq);
-}
-
-void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq);
-
-/*
- * Begin writing to a VMA.
- * Exclude concurrent readers under the per-VMA lock until the currently
- * write-locked mmap_lock is dropped or downgraded.
- */
-static inline void vma_start_write(struct vm_area_struct *vma)
-{
-	unsigned int mm_lock_seq;
-
-	if (__is_vma_write_locked(vma, &mm_lock_seq))
-		return;
-
-	__vma_start_write(vma, mm_lock_seq);
-}
-
-static inline void vma_assert_write_locked(struct vm_area_struct *vma)
-{
-	unsigned int mm_lock_seq;
-
-	VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
-}
-
-static inline void vma_assert_locked(struct vm_area_struct *vma)
-{
-	unsigned int mm_lock_seq;
-
-	VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 &&
-		      !__is_vma_write_locked(vma, &mm_lock_seq), vma);
-}
-
 /*
- * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these
- * assertions should be made either under mmap_write_lock or when the object
- * has been isolated under mmap_write_lock, ensuring no competing writers.
+ * These must be here rather than mmap_lock.h as dependent on vm_fault type,
+ * declared in this header.
  */
-static inline void vma_assert_attached(struct vm_area_struct *vma)
-{
-	WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
-}
-
-static inline void vma_assert_detached(struct vm_area_struct *vma)
-{
-	WARN_ON_ONCE(refcount_read(&vma->vm_refcnt));
-}
-
-static inline void vma_mark_attached(struct vm_area_struct *vma)
-{
-	vma_assert_write_locked(vma);
-	vma_assert_detached(vma);
-	refcount_set_release(&vma->vm_refcnt, 1);
-}
-
-void vma_mark_detached(struct vm_area_struct *vma);
-
+#ifdef CONFIG_PER_VMA_LOCK
 static inline void release_fault_lock(struct vm_fault *vmf)
 {
 	if (vmf->flags & FAULT_FLAG_VMA_LOCK)
@@ -884,36 +691,7 @@ static inline void assert_fault_locked(struct vm_fault *vmf)
 	else
 		mmap_assert_locked(vmf->vma->vm_mm);
 }
-
-struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
-					  unsigned long address);
-
-#else /* CONFIG_PER_VMA_LOCK */
-
-static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {}
-static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
-						    struct vm_area_struct *vma)
-		{ return NULL; }
-static inline void vma_end_read(struct vm_area_struct *vma) {}
-static inline void vma_start_write(struct vm_area_struct *vma) {}
-static inline void vma_assert_write_locked(struct vm_area_struct *vma)
-		{ mmap_assert_write_locked(vma->vm_mm); }
-static inline void vma_assert_attached(struct vm_area_struct *vma) {}
-static inline void vma_assert_detached(struct vm_area_struct *vma) {}
-static inline void vma_mark_attached(struct vm_area_struct *vma) {}
-static inline void vma_mark_detached(struct vm_area_struct *vma) {}
-
-static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
-		unsigned long address)
-{
-	return NULL;
-}
-
-static inline void vma_assert_locked(struct vm_area_struct *vma)
-{
-	mmap_assert_locked(vma->vm_mm);
-}
-
+#else
 static inline void release_fault_lock(struct vm_fault *vmf)
 {
 	mmap_read_unlock(vmf->vma->vm_mm);
@@ -923,7 +701,6 @@ static inline void assert_fault_locked(struct vm_fault *vmf)
 {
 	mmap_assert_locked(vmf->vma->vm_mm);
 }
-
 #endif /* CONFIG_PER_VMA_LOCK */
 
 extern const struct vm_operations_struct vma_dummy_vm_ops;
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 4706c6769902..7983b2efe9bf 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -1,6 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LINUX_MMAP_LOCK_H
 #define _LINUX_MMAP_LOCK_H
 
+/* Avoid a dependency loop by declaring here. */
+extern int rcuwait_wake_up(struct rcuwait *w);
+
 #include <linux/lockdep.h>
 #include <linux/mm_types.h>
 #include <linux/mmdebug.h>
@@ -104,6 +108,206 @@ static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int
 	return read_seqcount_retry(&mm->mm_lock_seq, seq);
 }
 
+static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	static struct lock_class_key lockdep_key;
+
+	lockdep_init_map(&vma->vmlock_dep_map, "vm_lock", &lockdep_key, 0);
+#endif
+	if (reset_refcnt)
+		refcount_set(&vma->vm_refcnt, 0);
+	vma->vm_lock_seq = UINT_MAX;
+}
+
+static inline bool is_vma_writer_only(int refcnt)
+{
+	/*
+	 * With a writer and no readers, refcnt is VMA_LOCK_OFFSET if the vma
+	 * is detached and (VMA_LOCK_OFFSET + 1) if it is attached. Waiting on
+	 * a detached vma happens only in vma_mark_detached() and is a rare
+	 * case, therefore most of the time there will be no unnecessary wakeup.
+	 */
+	return refcnt & VMA_LOCK_OFFSET && refcnt <= VMA_LOCK_OFFSET + 1;
+}
+
+static inline void vma_refcount_put(struct vm_area_struct *vma)
+{
+	/* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
+	struct mm_struct *mm = vma->vm_mm;
+	int oldcnt;
+
+	rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
+	if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) {
+
+		if (is_vma_writer_only(oldcnt - 1))
+			rcuwait_wake_up(&mm->vma_writer_wait);
+	}
+}
+
+/*
+ * Try to read-lock a vma. The function is allowed to occasionally yield false
+ * locked result to avoid performance overhead, in which case we fall back to
+ * using mmap_lock. The function should never yield false unlocked result.
+ * False locked result is possible if mm_lock_seq overflows or if vma gets
+ * reused and attached to a different mm before we lock it.
+ * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
+ * detached.
+ */
+static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
+						    struct vm_area_struct *vma)
+{
+	int oldcnt;
+
+	/*
+	 * Check before locking. A race might cause false locked result.
+	 * We can use READ_ONCE() for the mm_lock_seq here, and don't need
+	 * ACQUIRE semantics, because this is just a lockless check whose result
+	 * we don't rely on for anything - the mm_lock_seq read against which we
+	 * need ordering is below.
+	 */
+	if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence))
+		return NULL;
+
+	/*
+	 * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire()
+	 * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET.
+	 * Acquire fence is required here to avoid reordering against later
+	 * vm_lock_seq check and checks inside lock_vma_under_rcu().
+	 */
+	if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
+							      VMA_REF_LIMIT))) {
+		/* return EAGAIN if vma got detached from under us */
+		return oldcnt ? NULL : ERR_PTR(-EAGAIN);
+	}
+
+	rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
+	/*
+	 * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
+	 * False unlocked result is impossible because we modify and check
+	 * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq
+	 * modification invalidates all existing locks.
+	 *
+	 * We must use ACQUIRE semantics for the mm_lock_seq so that if we are
+	 * racing with vma_end_write_all(), we only start reading from the VMA
+	 * after it has been unlocked.
+	 * This pairs with RELEASE semantics in vma_end_write_all().
+	 */
+	if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) {
+		vma_refcount_put(vma);
+		return NULL;
+	}
+
+	return vma;
+}
+
+/*
+ * Use only while holding mmap read lock which guarantees that locking will not
+ * fail (nobody can concurrently write-lock the vma). vma_start_read() should
+ * not be used in such cases because it might fail due to mm_lock_seq overflow.
+ * This functionality is used to obtain vma read lock and drop the mmap read lock.
+ */
+static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
+{
+	int oldcnt;
+
+	mmap_assert_locked(vma->vm_mm);
+	if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
+							      VMA_REF_LIMIT)))
+		return false;
+
+	rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
+	return true;
+}
+
+/*
+ * Use only while holding mmap read lock which guarantees that locking will not
+ * fail (nobody can concurrently write-lock the vma). vma_start_read() should
+ * not be used in such cases because it might fail due to mm_lock_seq overflow.
+ * This functionality is used to obtain vma read lock and drop the mmap read lock.
+ */
+static inline bool vma_start_read_locked(struct vm_area_struct *vma)
+{
+	return vma_start_read_locked_nested(vma, 0);
+}
+
+static inline void vma_end_read(struct vm_area_struct *vma)
+{
+	vma_refcount_put(vma);
+}
+
+/* WARNING! Can only be used if mmap_lock is expected to be write-locked */
+static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq)
+{
+	mmap_assert_write_locked(vma->vm_mm);
+
+	/*
+	 * current task is holding mmap_write_lock, both vma->vm_lock_seq and
+	 * mm->mm_lock_seq can't be concurrently modified.
+	 */
+	*mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence;
+	return (vma->vm_lock_seq == *mm_lock_seq);
+}
+
+void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq);
+
+/*
+ * Begin writing to a VMA.
+ * Exclude concurrent readers under the per-VMA lock until the currently
+ * write-locked mmap_lock is dropped or downgraded.
+ */
+static inline void vma_start_write(struct vm_area_struct *vma)
+{
+	unsigned int mm_lock_seq;
+
+	if (__is_vma_write_locked(vma, &mm_lock_seq))
+		return;
+
+	__vma_start_write(vma, mm_lock_seq);
+}
+
+static inline void vma_assert_write_locked(struct vm_area_struct *vma)
+{
+	unsigned int mm_lock_seq;
+
+	VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
+}
+
+static inline void vma_assert_locked(struct vm_area_struct *vma)
+{
+	unsigned int mm_lock_seq;
+
+	VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 &&
+		      !__is_vma_write_locked(vma, &mm_lock_seq), vma);
+}
+
+/*
+ * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these
+ * assertions should be made either under mmap_write_lock or when the object
+ * has been isolated under mmap_write_lock, ensuring no competing writers.
+ */
+static inline void vma_assert_attached(struct vm_area_struct *vma)
+{
+	WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
+}
+
+static inline void vma_assert_detached(struct vm_area_struct *vma)
+{
+	WARN_ON_ONCE(refcount_read(&vma->vm_refcnt));
+}
+
+static inline void vma_mark_attached(struct vm_area_struct *vma)
+{
+	vma_assert_write_locked(vma);
+	vma_assert_detached(vma);
+	refcount_set_release(&vma->vm_refcnt, 1);
+}
+
+void vma_mark_detached(struct vm_area_struct *vma);
+
+struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
+					  unsigned long address);
+
 #else /* CONFIG_PER_VMA_LOCK */
 
 static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
@@ -119,6 +323,29 @@ static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int
 {
 	return true;
 }
+static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {}
+static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
+						    struct vm_area_struct *vma)
+		{ return NULL; }
+static inline void vma_end_read(struct vm_area_struct *vma) {}
+static inline void vma_start_write(struct vm_area_struct *vma) {}
+static inline void vma_assert_write_locked(struct vm_area_struct *vma)
+		{ mmap_assert_write_locked(vma->vm_mm); }
+static inline void vma_assert_attached(struct vm_area_struct *vma) {}
+static inline void vma_assert_detached(struct vm_area_struct *vma) {}
+static inline void vma_mark_attached(struct vm_area_struct *vma) {}
+static inline void vma_mark_detached(struct vm_area_struct *vma) {}
+
+static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
+		unsigned long address)
+{
+	return NULL;
+}
+
+static inline void vma_assert_locked(struct vm_area_struct *vma)
+{
+	mmap_assert_locked(vma->vm_mm);
+}
 
 #endif /* CONFIG_PER_VMA_LOCK */
 
-- 
cgit v1.2.3


From db80bd2cea1b7c2574578461c9de4c3d9ee7634a Mon Sep 17 00:00:00 2001
From: Pasha Tatashin <pasha.tatashin@soleen.com>
Date: Thu, 3 Apr 2025 14:33:03 +0200
Subject: task_stack.h: remove obsolete __HAVE_ARCH_KSTACK_END check

Remove __HAVE_ARCH_KSTACK_END as it has been obsolete since removal of
metag architecture in v4.17.

Link: https://lkml.kernel.org/r/20250403-kstack-end-v1-1-7798e71f34d1@linaro.org
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Link: https://lore.kernel.org/20240311164638.2015063-2-pasha.tatashin@soleen.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sched/task_stack.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index cffad65bdc6a..85c5a6392e02 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -106,7 +106,6 @@ static inline unsigned long stack_not_used(struct task_struct *p)
 #endif
 extern void set_task_stack_end_magic(struct task_struct *tsk);
 
-#ifndef __HAVE_ARCH_KSTACK_END
 static inline int kstack_end(void *addr)
 {
 	/* Reliable end of stack detection:
@@ -114,6 +113,5 @@ static inline int kstack_end(void *addr)
 	 */
 	return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*)));
 }
-#endif
 
 #endif /* _LINUX_SCHED_TASK_STACK_H */
-- 
cgit v1.2.3


From 4ef5211ee68113070bd42142f06347866675055e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 24 Mar 2025 12:50:24 +0200
Subject: kernel.h: move READ/WRITE definitions to <linux/types.h>

Patch series "kernel.h: Move out a couple of macros and constants".

kernel.h hosts a couple of macros and constants that may be better placed.
Do that.  Also add missing documentation.  No functional changes
intended.


This patch (of 2):

Headers shouldn't be forced to include <linux/kernel.h> just to
gain these simple constants.

Link: https://lkml.kernel.org/r/20250324105228.775784-1-andriy.shevchenko@linux.intel.com
Link: https://lkml.kernel.org/r/20250324105228.775784-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Alexandru Ardelean <aardelean@baylibre.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kernel.h | 4 ----
 include/linux/types.h  | 4 ++++
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index be2e8c0a187e..01bb0fac3667 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -41,10 +41,6 @@
 
 #define STACK_MAGIC	0xdeadbeef
 
-/* generic data direction definitions */
-#define READ			0
-#define WRITE			1
-
 #define PTR_IF(cond, ptr)	((cond) ? (ptr) : NULL)
 
 #define u64_to_user_ptr(x) (		\
diff --git a/include/linux/types.h b/include/linux/types.h
index 49b79c8bb1a9..6dfdb8e8e4c3 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -136,6 +136,10 @@ typedef s64	ktime_t;
 typedef u64 sector_t;
 typedef u64 blkcnt_t;
 
+/* generic data direction definitions */
+#define READ			0
+#define WRITE			1
+
 /*
  * The type of an index into the pagecache.
  */
-- 
cgit v1.2.3


From 029c896c4105b7d74fcd88d99c5fbab2558fee89 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 24 Mar 2025 12:50:25 +0200
Subject: kernel.h: move PTR_IF() and u64_to_user_ptr() to util_macros.h

While the natural choice of PTR_IF() is kconfig.h, the latter is too broad
to include C code and actually the macro was moved out from there in the
past.  But kernel.h is neither a good choice for that.  Move it to
util_macros.h.  Do the same for u64_to_user_ptr().

While moving, add necessary documentation.

Link: https://lkml.kernel.org/r/20250324105228.775784-3-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Alexandru Ardelean <aardelean@baylibre.com>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kernel.h      | 10 +------
 include/linux/util_macros.h | 66 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 01bb0fac3667..1cce1f6410a9 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -33,6 +33,7 @@
 #include <linux/sprintf.h>
 #include <linux/static_call_types.h>
 #include <linux/instruction_pointer.h>
+#include <linux/util_macros.h>
 #include <linux/wordpart.h>
 
 #include <asm/byteorder.h>
@@ -41,15 +42,6 @@
 
 #define STACK_MAGIC	0xdeadbeef
 
-#define PTR_IF(cond, ptr)	((cond) ? (ptr) : NULL)
-
-#define u64_to_user_ptr(x) (		\
-{					\
-	typecheck(u64, (x));		\
-	(void __user *)(uintptr_t)(x);	\
-}					\
-)
-
 struct completion;
 struct user;
 
diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h
index 3b570b765b75..7b64fb597f85 100644
--- a/include/linux/util_macros.h
+++ b/include/linux/util_macros.h
@@ -79,6 +79,72 @@
 	(__fc_i);							\
 })
 
+/**
+ * PTR_IF - evaluate to @ptr if @cond is true, or to NULL otherwise.
+ * @cond: A conditional, usually in a form of IS_ENABLED(CONFIG_FOO)
+ * @ptr: A pointer to assign if @cond is true.
+ *
+ * PTR_IF(IS_ENABLED(CONFIG_FOO), ptr) evaluates to @ptr if CONFIG_FOO is set
+ * to 'y' or 'm', or to NULL otherwise. The (ptr) argument must be a pointer.
+ *
+ * The macro can be very useful to help compiler dropping dead code.
+ *
+ * For instance, consider the following::
+ *
+ *     #ifdef CONFIG_FOO_SUSPEND
+ *     static int foo_suspend(struct device *dev)
+ *     {
+ *        ...
+ *     }
+ *     #endif
+ *
+ *     static struct pm_ops foo_ops = {
+ *     #ifdef CONFIG_FOO_SUSPEND
+ *         .suspend = foo_suspend,
+ *     #endif
+ *     };
+ *
+ * While this works, the foo_suspend() macro is compiled conditionally,
+ * only when CONFIG_FOO_SUSPEND is set. This is problematic, as there could
+ * be a build bug in this function, we wouldn't have a way to know unless
+ * the configuration option is set.
+ *
+ * An alternative is to declare foo_suspend() always, but mark it
+ * as __maybe_unused. This works, but the __maybe_unused attribute
+ * is required to instruct the compiler that the function may not
+ * be referenced anywhere, and is safe to remove without making
+ * a fuss about it. This makes the programmer responsible for tagging
+ * the functions that can be garbage-collected.
+ *
+ * With the macro it is possible to write the following:
+ *
+ *     static int foo_suspend(struct device *dev)
+ *     {
+ *        ...
+ *     }
+ *
+ *     static struct pm_ops foo_ops = {
+ *         .suspend = PTR_IF(IS_ENABLED(CONFIG_FOO_SUSPEND), foo_suspend),
+ *     };
+ *
+ * The foo_suspend() function will now be automatically dropped by the
+ * compiler, and it does not require any specific attribute.
+ */
+#define PTR_IF(cond, ptr)	((cond) ? (ptr) : NULL)
+
+/**
+ * to_user_ptr - cast a pointer passed as u64 from user space to void __user *
+ * @x: The u64 value from user space, usually via IOCTL
+ *
+ * to_user_ptr() simply casts a pointer passed as u64 from user space to void
+ * __user * correctly. Using this lets us get rid of all the tiresome casts.
+ */
+#define u64_to_user_ptr(x)		\
+({					\
+	typecheck(u64, (x));		\
+	(void __user *)(uintptr_t)(x);	\
+})
+
 /**
  * is_insidevar - check if the @ptr points inside the @var memory range.
  * @ptr:	the pointer to a memory address.
-- 
cgit v1.2.3


From e711faaafbe54a884f33b53472434063d342f6d4 Mon Sep 17 00:00:00 2001
From: Lance Yang <ioworker0@gmail.com>
Date: Mon, 14 Apr 2025 22:59:43 +0800
Subject: hung_task: replace blocker_mutex with encoded blocker

Patch series "hung_task: extend blocking task stacktrace dump to
semaphore", v5.

Inspired by mutex blocker tracking[1], this patch series extend the
feature to not only dump the blocker task holding a mutex but also to
support semaphores.  Unlike mutexes, semaphores lack explicit ownership
tracking, making it challenging to identify the root cause of hangs.  To
address this, we introduce a last_holder field to the semaphore structure,
which is updated when a task successfully calls down() and cleared during
up().

The assumption is that if a task is blocked on a semaphore, the holders
must not have released it.  While this does not guarantee that the last
holder is one of the current blockers, it likely provides a practical hint
for diagnosing semaphore-related stalls.

With this change, the hung task detector can now show blocker task's info
like below:

[Tue Apr  8 12:19:07 2025] INFO: task cat:945 blocked for more than 120 seconds.
[Tue Apr  8 12:19:07 2025]       Tainted: G            E      6.14.0-rc6+ #1
[Tue Apr  8 12:19:07 2025] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[Tue Apr  8 12:19:07 2025] task:cat             state:D stack:0     pid:945   tgid:945   ppid:828    task_flags:0x400000 flags:0x00000000
[Tue Apr  8 12:19:07 2025] Call Trace:
[Tue Apr  8 12:19:07 2025]  <TASK>
[Tue Apr  8 12:19:07 2025]  __schedule+0x491/0xbd0
[Tue Apr  8 12:19:07 2025]  schedule+0x27/0xf0
[Tue Apr  8 12:19:07 2025]  schedule_timeout+0xe3/0xf0
[Tue Apr  8 12:19:07 2025]  ? __folio_mod_stat+0x2a/0x80
[Tue Apr  8 12:19:07 2025]  ? set_ptes.constprop.0+0x27/0x90
[Tue Apr  8 12:19:07 2025]  __down_common+0x155/0x280
[Tue Apr  8 12:19:07 2025]  down+0x53/0x70
[Tue Apr  8 12:19:07 2025]  read_dummy_semaphore+0x23/0x60
[Tue Apr  8 12:19:07 2025]  full_proxy_read+0x5f/0xa0
[Tue Apr  8 12:19:07 2025]  vfs_read+0xbc/0x350
[Tue Apr  8 12:19:07 2025]  ? __count_memcg_events+0xa5/0x140
[Tue Apr  8 12:19:07 2025]  ? count_memcg_events.constprop.0+0x1a/0x30
[Tue Apr  8 12:19:07 2025]  ? handle_mm_fault+0x180/0x260
[Tue Apr  8 12:19:07 2025]  ksys_read+0x66/0xe0
[Tue Apr  8 12:19:07 2025]  do_syscall_64+0x51/0x120
[Tue Apr  8 12:19:07 2025]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[Tue Apr  8 12:19:07 2025] RIP: 0033:0x7f419478f46e
[Tue Apr  8 12:19:07 2025] RSP: 002b:00007fff1c4d2668 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
[Tue Apr  8 12:19:07 2025] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f419478f46e
[Tue Apr  8 12:19:07 2025] RDX: 0000000000020000 RSI: 00007f4194683000 RDI: 0000000000000003
[Tue Apr  8 12:19:07 2025] RBP: 00007f4194683000 R08: 00007f4194682010 R09: 0000000000000000
[Tue Apr  8 12:19:07 2025] R10: fffffffffffffbc5 R11: 0000000000000246 R12: 0000000000000000
[Tue Apr  8 12:19:07 2025] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000
[Tue Apr  8 12:19:07 2025]  </TASK>
[Tue Apr  8 12:19:07 2025] INFO: task cat:945 blocked on a semaphore likely last held by task cat:938
[Tue Apr  8 12:19:07 2025] task:cat             state:S stack:0     pid:938   tgid:938   ppid:584    task_flags:0x400000 flags:0x00000000
[Tue Apr  8 12:19:07 2025] Call Trace:
[Tue Apr  8 12:19:07 2025]  <TASK>
[Tue Apr  8 12:19:07 2025]  __schedule+0x491/0xbd0
[Tue Apr  8 12:19:07 2025]  ? _raw_spin_unlock_irqrestore+0xe/0x40
[Tue Apr  8 12:19:07 2025]  schedule+0x27/0xf0
[Tue Apr  8 12:19:07 2025]  schedule_timeout+0x77/0xf0
[Tue Apr  8 12:19:07 2025]  ? __pfx_process_timeout+0x10/0x10
[Tue Apr  8 12:19:07 2025]  msleep_interruptible+0x49/0x60
[Tue Apr  8 12:19:07 2025]  read_dummy_semaphore+0x2d/0x60
[Tue Apr  8 12:19:07 2025]  full_proxy_read+0x5f/0xa0
[Tue Apr  8 12:19:07 2025]  vfs_read+0xbc/0x350
[Tue Apr  8 12:19:07 2025]  ? __count_memcg_events+0xa5/0x140
[Tue Apr  8 12:19:07 2025]  ? count_memcg_events.constprop.0+0x1a/0x30
[Tue Apr  8 12:19:07 2025]  ? handle_mm_fault+0x180/0x260
[Tue Apr  8 12:19:07 2025]  ksys_read+0x66/0xe0
[Tue Apr  8 12:19:07 2025]  do_syscall_64+0x51/0x120
[Tue Apr  8 12:19:07 2025]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[Tue Apr  8 12:19:07 2025] RIP: 0033:0x7f7c584a646e
[Tue Apr  8 12:19:07 2025] RSP: 002b:00007ffdba8ce158 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
[Tue Apr  8 12:19:07 2025] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f7c584a646e
[Tue Apr  8 12:19:07 2025] RDX: 0000000000020000 RSI: 00007f7c5839a000 RDI: 0000000000000003
[Tue Apr  8 12:19:07 2025] RBP: 00007f7c5839a000 R08: 00007f7c58399010 R09: 0000000000000000
[Tue Apr  8 12:19:07 2025] R10: fffffffffffffbc5 R11: 0000000000000246 R12: 0000000000000000
[Tue Apr  8 12:19:07 2025] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000
[Tue Apr  8 12:19:07 2025]  </TASK>


This patch (of 3):

This patch replaces 'struct mutex *blocker_mutex' with 'unsigned long
blocker', as only one blocker is active at a time.

The blocker filed can store both the lock addrees and the lock type, with
LSB used to encode the type as Masami suggested, making it easier to
extend the feature to cover other types of locks.

Also, once the lock type is determined, we can directly extract the
address and cast it to a lock pointer ;)

Link: https://lkml.kernel.org/r/20250414145945.84916-1-ioworker0@gmail.com
Link: https://lore.kernel.org/all/174046694331.2194069.15472952050240807469.stgit@mhiramat.tok.corp.google.com [1]
Link: https://lkml.kernel.org/r/20250414145945.84916-2-ioworker0@gmail.com
Signed-off-by: Mingzhe Yang <mingzhe.yang@ly.com>
Signed-off-by: Lance Yang <ioworker0@gmail.com>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Suggested-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: John Stultz <jstultz@google.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tomasz Figa <tfiga@chromium.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yongliang Gao <leonylgao@tencent.com>
Cc: Zi Li <amaindex@outlook.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hung_task.h | 99 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sched.h     |  6 ++-
 2 files changed, 104 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/hung_task.h

(limited to 'include/linux')

diff --git a/include/linux/hung_task.h b/include/linux/hung_task.h
new file mode 100644
index 000000000000..1bc2b3244613
--- /dev/null
+++ b/include/linux/hung_task.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Detect Hung Task: detecting tasks stuck in D state
+ *
+ * Copyright (C) 2025 Tongcheng Travel (www.ly.com)
+ * Author: Lance Yang <mingzhe.yang@ly.com>
+ */
+#ifndef __LINUX_HUNG_TASK_H
+#define __LINUX_HUNG_TASK_H
+
+#include <linux/bug.h>
+#include <linux/sched.h>
+#include <linux/compiler.h>
+
+/*
+ * @blocker: Combines lock address and blocking type.
+ *
+ * Since lock pointers are at least 4-byte aligned(32-bit) or 8-byte
+ * aligned(64-bit). This leaves the 2 least bits (LSBs) of the pointer
+ * always zero. So we can use these bits to encode the specific blocking
+ * type.
+ *
+ * Type encoding:
+ * 00 - Blocked on mutex        (BLOCKER_TYPE_MUTEX)
+ * 01 - Blocked on semaphore    (BLOCKER_TYPE_SEM)
+ * 10 - Blocked on rt-mutex     (BLOCKER_TYPE_RTMUTEX)
+ * 11 - Blocked on rw-semaphore (BLOCKER_TYPE_RWSEM)
+ */
+#define BLOCKER_TYPE_MUTEX      0x00UL
+#define BLOCKER_TYPE_SEM        0x01UL
+#define BLOCKER_TYPE_RTMUTEX    0x02UL
+#define BLOCKER_TYPE_RWSEM      0x03UL
+
+#define BLOCKER_TYPE_MASK       0x03UL
+
+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+static inline void hung_task_set_blocker(void *lock, unsigned long type)
+{
+	unsigned long lock_ptr = (unsigned long)lock;
+
+	WARN_ON_ONCE(!lock_ptr);
+	WARN_ON_ONCE(READ_ONCE(current->blocker));
+
+	/*
+	 * If the lock pointer matches the BLOCKER_TYPE_MASK, return
+	 * without writing anything.
+	 */
+	if (WARN_ON_ONCE(lock_ptr & BLOCKER_TYPE_MASK))
+		return;
+
+	WRITE_ONCE(current->blocker, lock_ptr | type);
+}
+
+static inline void hung_task_clear_blocker(void)
+{
+	WARN_ON_ONCE(!READ_ONCE(current->blocker));
+
+	WRITE_ONCE(current->blocker, 0UL);
+}
+
+/*
+ * hung_task_get_blocker_type - Extracts blocker type from encoded blocker
+ * address.
+ *
+ * @blocker: Blocker pointer with encoded type (via LSB bits)
+ *
+ * Returns: BLOCKER_TYPE_MUTEX, BLOCKER_TYPE_SEM, etc.
+ */
+static inline unsigned long hung_task_get_blocker_type(unsigned long blocker)
+{
+	WARN_ON_ONCE(!blocker);
+
+	return blocker & BLOCKER_TYPE_MASK;
+}
+
+static inline void *hung_task_blocker_to_lock(unsigned long blocker)
+{
+	WARN_ON_ONCE(!blocker);
+
+	return (void *)(blocker & ~BLOCKER_TYPE_MASK);
+}
+#else
+static inline void hung_task_set_blocker(void *lock, unsigned long type)
+{
+}
+static inline void hung_task_clear_blocker(void)
+{
+}
+static inline unsigned long hung_task_get_blocker_type(unsigned long blocker)
+{
+	return 0UL;
+}
+static inline void *hung_task_blocker_to_lock(unsigned long blocker)
+{
+	return NULL;
+}
+#endif
+
+#endif /* __LINUX_HUNG_TASK_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f96ac1982893..393d81978f40 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1240,7 +1240,11 @@ struct task_struct {
 #endif
 
 #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
-	struct mutex			*blocker_mutex;
+	/*
+	 * Encoded lock address causing task block (lower 2 bits = type from
+	 * <linux/hung_task.h>). Accessed via hung_task_*() helpers.
+	 */
+	unsigned long			blocker;
 #endif
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-- 
cgit v1.2.3


From 194a9b9e843b4077033be70a07d191107972aa53 Mon Sep 17 00:00:00 2001
From: Lance Yang <ioworker0@gmail.com>
Date: Mon, 14 Apr 2025 22:59:44 +0800
Subject: hung_task: show the blocker task if the task is hung on semaphore

Inspired by mutex blocker tracking[1], this patch makes a trade-off to
balance the overhead and utility of the hung task detector.

Unlike mutexes, semaphores lack explicit ownership tracking, making it
challenging to identify the root cause of hangs.  To address this, we
introduce a last_holder field to the semaphore structure, which is updated
when a task successfully calls down() and cleared during up().

The assumption is that if a task is blocked on a semaphore, the holders
must not have released it.  While this does not guarantee that the last
holder is one of the current blockers, it likely provides a practical hint
for diagnosing semaphore-related stalls.

With this change, the hung task detector can now show blocker task's info
like below:

[Tue Apr  8 12:19:07 2025] INFO: task cat:945 blocked for more than 120 seconds.
[Tue Apr  8 12:19:07 2025]       Tainted: G            E      6.14.0-rc6+ #1
[Tue Apr  8 12:19:07 2025] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[Tue Apr  8 12:19:07 2025] task:cat             state:D stack:0     pid:945   tgid:945   ppid:828    task_flags:0x400000 flags:0x00000000
[Tue Apr  8 12:19:07 2025] Call Trace:
[Tue Apr  8 12:19:07 2025]  <TASK>
[Tue Apr  8 12:19:07 2025]  __schedule+0x491/0xbd0
[Tue Apr  8 12:19:07 2025]  schedule+0x27/0xf0
[Tue Apr  8 12:19:07 2025]  schedule_timeout+0xe3/0xf0
[Tue Apr  8 12:19:07 2025]  ? __folio_mod_stat+0x2a/0x80
[Tue Apr  8 12:19:07 2025]  ? set_ptes.constprop.0+0x27/0x90
[Tue Apr  8 12:19:07 2025]  __down_common+0x155/0x280
[Tue Apr  8 12:19:07 2025]  down+0x53/0x70
[Tue Apr  8 12:19:07 2025]  read_dummy_semaphore+0x23/0x60
[Tue Apr  8 12:19:07 2025]  full_proxy_read+0x5f/0xa0
[Tue Apr  8 12:19:07 2025]  vfs_read+0xbc/0x350
[Tue Apr  8 12:19:07 2025]  ? __count_memcg_events+0xa5/0x140
[Tue Apr  8 12:19:07 2025]  ? count_memcg_events.constprop.0+0x1a/0x30
[Tue Apr  8 12:19:07 2025]  ? handle_mm_fault+0x180/0x260
[Tue Apr  8 12:19:07 2025]  ksys_read+0x66/0xe0
[Tue Apr  8 12:19:07 2025]  do_syscall_64+0x51/0x120
[Tue Apr  8 12:19:07 2025]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[Tue Apr  8 12:19:07 2025] RIP: 0033:0x7f419478f46e
[Tue Apr  8 12:19:07 2025] RSP: 002b:00007fff1c4d2668 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
[Tue Apr  8 12:19:07 2025] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f419478f46e
[Tue Apr  8 12:19:07 2025] RDX: 0000000000020000 RSI: 00007f4194683000 RDI: 0000000000000003
[Tue Apr  8 12:19:07 2025] RBP: 00007f4194683000 R08: 00007f4194682010 R09: 0000000000000000
[Tue Apr  8 12:19:07 2025] R10: fffffffffffffbc5 R11: 0000000000000246 R12: 0000000000000000
[Tue Apr  8 12:19:07 2025] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000
[Tue Apr  8 12:19:07 2025]  </TASK>
[Tue Apr  8 12:19:07 2025] INFO: task cat:945 blocked on a semaphore likely last held by task cat:938
[Tue Apr  8 12:19:07 2025] task:cat             state:S stack:0     pid:938   tgid:938   ppid:584    task_flags:0x400000 flags:0x00000000
[Tue Apr  8 12:19:07 2025] Call Trace:
[Tue Apr  8 12:19:07 2025]  <TASK>
[Tue Apr  8 12:19:07 2025]  __schedule+0x491/0xbd0
[Tue Apr  8 12:19:07 2025]  ? _raw_spin_unlock_irqrestore+0xe/0x40
[Tue Apr  8 12:19:07 2025]  schedule+0x27/0xf0
[Tue Apr  8 12:19:07 2025]  schedule_timeout+0x77/0xf0
[Tue Apr  8 12:19:07 2025]  ? __pfx_process_timeout+0x10/0x10
[Tue Apr  8 12:19:07 2025]  msleep_interruptible+0x49/0x60
[Tue Apr  8 12:19:07 2025]  read_dummy_semaphore+0x2d/0x60
[Tue Apr  8 12:19:07 2025]  full_proxy_read+0x5f/0xa0
[Tue Apr  8 12:19:07 2025]  vfs_read+0xbc/0x350
[Tue Apr  8 12:19:07 2025]  ? __count_memcg_events+0xa5/0x140
[Tue Apr  8 12:19:07 2025]  ? count_memcg_events.constprop.0+0x1a/0x30
[Tue Apr  8 12:19:07 2025]  ? handle_mm_fault+0x180/0x260
[Tue Apr  8 12:19:07 2025]  ksys_read+0x66/0xe0
[Tue Apr  8 12:19:07 2025]  do_syscall_64+0x51/0x120
[Tue Apr  8 12:19:07 2025]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[Tue Apr  8 12:19:07 2025] RIP: 0033:0x7f7c584a646e
[Tue Apr  8 12:19:07 2025] RSP: 002b:00007ffdba8ce158 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
[Tue Apr  8 12:19:07 2025] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f7c584a646e
[Tue Apr  8 12:19:07 2025] RDX: 0000000000020000 RSI: 00007f7c5839a000 RDI: 0000000000000003
[Tue Apr  8 12:19:07 2025] RBP: 00007f7c5839a000 R08: 00007f7c58399010 R09: 0000000000000000
[Tue Apr  8 12:19:07 2025] R10: fffffffffffffbc5 R11: 0000000000000246 R12: 0000000000000000
[Tue Apr  8 12:19:07 2025] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000
[Tue Apr  8 12:19:07 2025]  </TASK>

[1] https://lore.kernel.org/all/174046694331.2194069.15472952050240807469.stgit@mhiramat.tok.corp.google.com

Link: https://lkml.kernel.org/r/20250414145945.84916-3-ioworker0@gmail.com
Signed-off-by: Mingzhe Yang <mingzhe.yang@ly.com>
Signed-off-by: Lance Yang <ioworker0@gmail.com>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Suggested-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: John Stultz <jstultz@google.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tomasz Figa <tfiga@chromium.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yongliang Gao <leonylgao@tencent.com>
Cc: Zi Li <amaindex@outlook.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/semaphore.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
index 04655faadc2d..89706157e622 100644
--- a/include/linux/semaphore.h
+++ b/include/linux/semaphore.h
@@ -16,13 +16,25 @@ struct semaphore {
 	raw_spinlock_t		lock;
 	unsigned int		count;
 	struct list_head	wait_list;
+
+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+	unsigned long		last_holder;
+#endif
 };
 
+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+#define __LAST_HOLDER_SEMAPHORE_INITIALIZER				\
+	, .last_holder = 0UL
+#else
+#define __LAST_HOLDER_SEMAPHORE_INITIALIZER
+#endif
+
 #define __SEMAPHORE_INITIALIZER(name, n)				\
 {									\
 	.lock		= __RAW_SPIN_LOCK_UNLOCKED((name).lock),	\
 	.count		= n,						\
-	.wait_list	= LIST_HEAD_INIT((name).wait_list),		\
+	.wait_list	= LIST_HEAD_INIT((name).wait_list)		\
+	__LAST_HOLDER_SEMAPHORE_INITIALIZER				\
 }
 
 /*
@@ -47,5 +59,6 @@ extern int __must_check down_killable(struct semaphore *sem);
 extern int __must_check down_trylock(struct semaphore *sem);
 extern int __must_check down_timeout(struct semaphore *sem, long jiffies);
 extern void up(struct semaphore *sem);
+extern unsigned long sem_last_holder(struct semaphore *sem);
 
 #endif /* __LINUX_SEMAPHORE_H */
-- 
cgit v1.2.3


From 8d1d4b538bb12a858fa95a073c4aff31e79088ee Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Wed, 16 Apr 2025 10:06:13 -0600
Subject: scatterlist: inline sg_next()

sg_next() is a short function called frequently in I/O paths. Define it
in the header file so it can be inlined into its callers.

Link: https://lkml.kernel.org/r/20250416160615.3571958-1-csander@purestorage.com
Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Cc: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/scatterlist.h | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 138e2f1bd08f..0cdbfc42f153 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -94,6 +94,28 @@ static inline bool sg_is_last(struct scatterlist *sg)
 	return __sg_flags(sg) & SG_END;
 }
 
+/**
+ * sg_next - return the next scatterlist entry in a list
+ * @sg:		The current sg entry
+ *
+ * Description:
+ *   Usually the next entry will be @sg@ + 1, but if this sg element is part
+ *   of a chained scatterlist, it could jump to the start of a new
+ *   scatterlist array.
+ *
+ **/
+static inline struct scatterlist *sg_next(struct scatterlist *sg)
+{
+	if (sg_is_last(sg))
+		return NULL;
+
+	sg++;
+	if (unlikely(sg_is_chain(sg)))
+		sg = sg_chain_ptr(sg);
+
+	return sg;
+}
+
 /**
  * sg_assign_page - Assign a given page to an SG entry
  * @sg:		    SG entry
@@ -418,7 +440,6 @@ static inline void sg_init_marker(struct scatterlist *sgl,
 
 int sg_nents(struct scatterlist *sg);
 int sg_nents_for_len(struct scatterlist *sg, u64 len);
-struct scatterlist *sg_next(struct scatterlist *);
 struct scatterlist *sg_last(struct scatterlist *s, unsigned int);
 void sg_init_table(struct scatterlist *, unsigned int);
 void sg_init_one(struct scatterlist *, const void *, unsigned int);
-- 
cgit v1.2.3


From ba8182d44b4e557e2dc34e51a88105ce5b083372 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Sat, 19 Apr 2025 21:30:12 +0100
Subject: rapidio: remove unused functions

rio_request_dma() and rio_dma_prep_slave_sg() were added in 2012 by commit
e42d98ebe7d7 ("rapidio: add DMA engine support for RIO data transfers")
but never used.

rio_find_mport() last use was removed in 2013 by commit 9edbc30b434f
("rapidio: update enumerator registration mechanism")

rio_unregister_scan() was added in 2013 by commit a11650e11093 ("rapidio:
make enumeration/discovery configurable") but never used.

Remove them.

Link: https://lkml.kernel.org/r/20250419203012.429787-3-linux@treblig.org
Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Cc: Alexandre Bounine <alex.bou9@gmail.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/rio_drv.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index e49c32b0f394..dd8afe511242 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h
@@ -391,13 +391,8 @@ struct rio_dev *rio_dev_get(struct rio_dev *);
 void rio_dev_put(struct rio_dev *);
 
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
-extern struct dma_chan *rio_request_dma(struct rio_dev *rdev);
 extern struct dma_chan *rio_request_mport_dma(struct rio_mport *mport);
 extern void rio_release_dma(struct dma_chan *dchan);
-extern struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(
-		struct rio_dev *rdev, struct dma_chan *dchan,
-		struct rio_dma_data *data,
-		enum dma_transfer_direction direction, unsigned long flags);
 extern struct dma_async_tx_descriptor *rio_dma_prep_xfer(
 		struct dma_chan *dchan,	u16 destid,
 		struct rio_dma_data *data,
-- 
cgit v1.2.3


From 2a1c6158131f05c228001e1f73304535bc205444 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Sat, 19 Apr 2025 00:49:32 +0100
Subject: relay: remove unused relay_late_setup_files

The last use of relay_late_setup_files() was removed in 2018 by commit
2b47733045aa ("drm/i915/guc: Merge log relay file and channel creation")

Remove it and the helper it used.

relay_late_setup_files() was used for eventually registering 'buffer only'
channels.  With it gone, delete the docs that explain how to do that.
Which suggests it should be possible to lose the 'has_base_filename'
flags.

(Are there any other uses??)

Link: https://lkml.kernel.org/r/20250418234932.490863-1-linux@treblig.org
Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andriy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/relay.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/relay.h b/include/linux/relay.h
index 72b876dd5cb8..b3224111d074 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -159,9 +159,6 @@ struct rchan *relay_open(const char *base_filename,
 			 size_t n_subbufs,
 			 const struct rchan_callbacks *cb,
 			 void *private_data);
-extern int relay_late_setup_files(struct rchan *chan,
-				  const char *base_filename,
-				  struct dentry *parent);
 extern void relay_close(struct rchan *chan);
 extern void relay_flush(struct rchan *chan);
 extern void relay_subbufs_consumed(struct rchan *chan,
-- 
cgit v1.2.3


From 2e27fa943b74c9fc5fdf93090508c3c722531c66 Mon Sep 17 00:00:00 2001
From: WangYuli <wangyuli@uniontech.com>
Date: Mon, 21 Apr 2025 15:49:10 +0800
Subject: treewide: fix typo "previlege"

There are some spelling mistakes of 'previlege' in comments which
should be 'privilege'.

Fix them and add it to scripts/spelling.txt.

The typo in arch/loongarch/kvm/main.c was corrected by a different
patch [1] and is therefore not included in this submission.

[1]. https://lore.kernel.org/all/20250420142208.2252280-1-wheatfox17@icloud.com/

Link: https://lkml.kernel.org/r/46AD404E411A4BAC+20250421074910.66988-1-wangyuli@uniontech.com
Signed-off-by: WangYuli <wangyuli@uniontech.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/habanalabs/hl_boot_if.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/habanalabs/hl_boot_if.h b/include/linux/habanalabs/hl_boot_if.h
index d2a9fc96424b..af5fb4ad77eb 100644
--- a/include/linux/habanalabs/hl_boot_if.h
+++ b/include/linux/habanalabs/hl_boot_if.h
@@ -295,7 +295,7 @@ enum cpu_boot_dev_sts {
  *					Initialized in: linux
  *
  * CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN	GIC access permission only from
- *					previleged entity. FW sets this status
+ *					privileged entity. FW sets this status
  *					bit for host. If this bit is set then
  *					GIC can not be accessed from host.
  *					Initialized in: linux
-- 
cgit v1.2.3


From 9c7b53b21fb1df20f85e3822160b4687f98ff7b3 Mon Sep 17 00:00:00 2001
From: Marc Herbert <Marc.Herbert@linux.intel.com>
Date: Thu, 24 Apr 2025 19:40:35 +0000
Subject: compiler_types.h: fix "unused variable" in __compiletime_assert()

This refines commit c03567a8e8d5 ("include/linux/compiler.h: don't perform
compiletime_assert with -O0") and restores #ifdef __OPTIMIZE__ symmetry by
evaluating the 'condition' variable in both compile-time variants of
__compiletimeassert().

As __OPTIMIZE__ is always true by default, this commit does not change
anything by default.  But it fixes warnings with _non-default_ CFLAGS like
for instance this:

 make  CFLAGS_tcp.o='-Og -U__OPTIMIZE__'

                 from net/ipv4/tcp.c:273:

 include/net/sch_generic.h: In function `qdisc_cb_private_validate':
 include/net/sch_generic.h:511:30:
            error: unused variable `qcb' [-Werror=unused-variable]

  {
     struct qdisc_skb_cb *qcb;

     BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*qcb));
     ...
  }

[akpm@linux-foundation.org: regularize comment layout, reflow comment]
Link: https://lkml.kernel.org/r/20250424194048.652571-1-marc.herbert@linux.intel.com
Signed-off-by: Marc Herbert <Marc.Herbert@linux.intel.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Changbin Du <changbin.du@intel.com>
Cc: Jan Hendrik Farr <kernel@jfarr.cc>
Cc: Macro Elver <elver@google.com>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Tony Ambardar <tony.ambardar@gmail.com>
Cc: Uros Bizjak <ubizjak@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/compiler_types.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 501cffddc2f4..1f2eee8331d3 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -525,6 +525,12 @@ struct ftrace_likely_data {
 	 sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
 
 #ifdef __OPTIMIZE__
+/*
+ * #ifdef __OPTIMIZE__ is only a good approximation; for instance "make
+ * CFLAGS_foo.o=-Og" defines __OPTIMIZE__, does not elide the conditional code
+ * and can break compilation with wrong error message(s). Combine with
+ * -U__OPTIMIZE__ when needed.
+ */
 # define __compiletime_assert(condition, msg, prefix, suffix)		\
 	do {								\
 		/*							\
@@ -538,7 +544,7 @@ struct ftrace_likely_data {
 			prefix ## suffix();				\
 	} while (0)
 #else
-# define __compiletime_assert(condition, msg, prefix, suffix) do { } while (0)
+# define __compiletime_assert(condition, msg, prefix, suffix) ((void)(condition))
 #endif
 
 #define _compiletime_assert(condition, msg, prefix, suffix) \
-- 
cgit v1.2.3


From f3def8270c67217efb0e23dcd54714f74de3b6b2 Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin@ispras.ru>
Date: Sun, 27 Apr 2025 23:14:49 +0300
Subject: sort.h: hoist cmp_int() into generic header file

Deduplicate the same functionality implemented in several places by
moving the cmp_int() helper macro into linux/sort.h.

The macro performs a three-way comparison of the arguments mostly useful
in different sorting strategies and algorithms.

Link: https://lkml.kernel.org/r/20250427201451.900730-1-pchelkin@ispras.ru
Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
Suggested-by: Darrick J. Wong <djwong@kernel.org>
Acked-by: Kent Overstreet <kent.overstreet@linux.dev>
Acked-by: Coly Li <colyli@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Carlos Maiolino <cem@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Coly Li <colyli@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sort.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sort.h b/include/linux/sort.h
index 8e5603b10941..c01ef804a0eb 100644
--- a/include/linux/sort.h
+++ b/include/linux/sort.h
@@ -4,6 +4,16 @@
 
 #include <linux/types.h>
 
+/**
+ * cmp_int - perform a three-way comparison of the arguments
+ * @l: the left argument
+ * @r: the right argument
+ *
+ * Return: 1 if the left argument is greater than the right one; 0 if the
+ * arguments are equal; -1 if the left argument is less than the right one.
+ */
+#define cmp_int(l, r) (((l) > (r)) - ((l) < (r)))
+
 void sort_r(void *base, size_t num, size_t size,
 	    cmp_r_func_t cmp_func,
 	    swap_r_func_t swap_func,
-- 
cgit v1.2.3


From c91d78622e16f628176d7248044106b03818af6d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 28 Apr 2025 10:27:37 +0300
Subject: util_macros.h: fix the reference in kernel-doc

In PTR_IF() description the text refers to the parameter as (ptr) while
the kernel-doc format asks for @ptr.  Fix this accordingly.

Link: https://lkml.kernel.org/r/20250428072737.3265239-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Alexandru Ardelean <aardelean@baylibre.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/util_macros.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h
index 7b64fb597f85..8183ac610cc5 100644
--- a/include/linux/util_macros.h
+++ b/include/linux/util_macros.h
@@ -85,7 +85,7 @@
  * @ptr: A pointer to assign if @cond is true.
  *
  * PTR_IF(IS_ENABLED(CONFIG_FOO), ptr) evaluates to @ptr if CONFIG_FOO is set
- * to 'y' or 'm', or to NULL otherwise. The (ptr) argument must be a pointer.
+ * to 'y' or 'm', or to NULL otherwise. The @ptr argument must be a pointer.
  *
  * The macro can be very useful to help compiler dropping dead code.
  *
-- 
cgit v1.2.3


From 479d26ee013c5388ad6855e512ab20d81e43750d Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Thu, 1 May 2025 02:05:02 +0100
Subject: lib/oid_registry.c: remove unused sprint_OID

sprint_OID() was added as part of 2012's commit 4f73175d0375 ("X.509: Add
utility functions to render OIDs as strings") but it hasn't been used.
Remove it.

Note that there's also 'sprint_oid' (lower case) which is used in a lot of
places; that's left as is except for fixing its case in a comment.

Link: https://lkml.kernel.org/r/20250501010502.326472-1-linux@treblig.org
Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/oid_registry.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h
index 6f9242259edc..6de479ebbe5d 100644
--- a/include/linux/oid_registry.h
+++ b/include/linux/oid_registry.h
@@ -151,6 +151,5 @@ enum OID {
 extern enum OID look_up_OID(const void *data, size_t datasize);
 extern int parse_OID(const void *data, size_t datasize, enum OID *oid);
 extern int sprint_oid(const void *, size_t, char *, size_t);
-extern int sprint_OID(enum OID, char *, size_t);
 
 #endif /* _LINUX_OID_REGISTRY_H */
-- 
cgit v1.2.3


From cf80fdbc0a5512feabbc56280b2abbb51d4e5b4e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 2 May 2025 15:17:42 +0300
Subject: list: remove redundant 'extern' for function prototypes

The 'extern' keyword is redundant for function prototypes.  list.h never
used them and new code in general is better without them.

Link: https://lkml.kernel.org/r/20250502121742.3997529-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/list.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index 29a375889fb8..e7e28afd28f8 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -50,9 +50,9 @@ static inline void INIT_LIST_HEAD(struct list_head *list)
  * Performs the full set of list corruption checks before __list_add().
  * On list corruption reports a warning, and returns false.
  */
-extern bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new,
-							     struct list_head *prev,
-							     struct list_head *next);
+bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new,
+						      struct list_head *prev,
+						      struct list_head *next);
 
 /*
  * Performs list corruption checks before __list_add(). Returns false if a
@@ -93,7 +93,7 @@ static __always_inline bool __list_add_valid(struct list_head *new,
  * Performs the full set of list corruption checks before __list_del_entry().
  * On list corruption reports a warning, and returns false.
  */
-extern bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry);
+bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry);
 
 /*
  * Performs list corruption checks before __list_del_entry(). Returns false if a
-- 
cgit v1.2.3


From 285e871884ff3dc31c0c2c1a87f0018481bc8471 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Mon, 28 Apr 2025 12:22:16 +0300
Subject: mm/hmm: let users to tag specific PFN with DMA mapped bit

Introduce new sticky flag (HMM_PFN_DMA_MAPPED), which isn't overwritten
by HMM range fault. Such flag allows users to tag specific PFNs with
information if this specific PFN was already DMA mapped.

Tested-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/linux/hmm.h | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 126a36571667..a43e56f273a1 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -23,6 +23,8 @@ struct mmu_interval_notifier;
  * HMM_PFN_WRITE - if the page memory can be written to (requires HMM_PFN_VALID)
  * HMM_PFN_ERROR - accessing the pfn is impossible and the device should
  *                 fail. ie poisoned memory, special pages, no vma, etc
+ * HMM_PFN_DMA_MAPPED - Flag preserved on input-to-output transformation
+ *                      to mark that page is already DMA mapped
  *
  * On input:
  * 0                 - Return the current state of the page, do not fault it.
@@ -36,13 +38,19 @@ enum hmm_pfn_flags {
 	HMM_PFN_VALID = 1UL << (BITS_PER_LONG - 1),
 	HMM_PFN_WRITE = 1UL << (BITS_PER_LONG - 2),
 	HMM_PFN_ERROR = 1UL << (BITS_PER_LONG - 3),
-	HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 8),
+	/*
+	 * Sticky flags, carried from input to output,
+	 * don't forget to update HMM_PFN_INOUT_FLAGS
+	 */
+	HMM_PFN_DMA_MAPPED = 1UL << (BITS_PER_LONG - 4),
+
+	HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 9),
 
 	/* Input flags */
 	HMM_PFN_REQ_FAULT = HMM_PFN_VALID,
 	HMM_PFN_REQ_WRITE = HMM_PFN_WRITE,
 
-	HMM_PFN_FLAGS = 0xFFUL << HMM_PFN_ORDER_SHIFT,
+	HMM_PFN_FLAGS = ~((1UL << HMM_PFN_ORDER_SHIFT) - 1),
 };
 
 /*
@@ -57,6 +65,14 @@ static inline struct page *hmm_pfn_to_page(unsigned long hmm_pfn)
 	return pfn_to_page(hmm_pfn & ~HMM_PFN_FLAGS);
 }
 
+/*
+ * hmm_pfn_to_phys() - return physical address pointed to by a device entry
+ */
+static inline phys_addr_t hmm_pfn_to_phys(unsigned long hmm_pfn)
+{
+	return __pfn_to_phys(hmm_pfn & ~HMM_PFN_FLAGS);
+}
+
 /*
  * hmm_pfn_to_map_order() - return the CPU mapping size order
  *
-- 
cgit v1.2.3


From 8cad47130566123b2c70ef2aa53be02ef1aee5e5 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Mon, 28 Apr 2025 12:22:17 +0300
Subject: mm/hmm: provide generic DMA managing logic

HMM callers use PFN list to populate range while calling
to hmm_range_fault(), the conversion from PFN to DMA address
is done by the callers with help of another DMA list. However,
it is wasteful on any modern platform and by doing the right
logic, that DMA list can be avoided.

Provide generic logic to manage these lists and gave an interface
to map/unmap PFNs to DMA addresses, without requiring from the callers
to be an experts in DMA core API.

Tested-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/linux/hmm-dma.h | 33 +++++++++++++++++++++++++++++++++
 include/linux/hmm.h     |  6 +++++-
 2 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/hmm-dma.h

(limited to 'include/linux')

diff --git a/include/linux/hmm-dma.h b/include/linux/hmm-dma.h
new file mode 100644
index 000000000000..f58b9fc71999
--- /dev/null
+++ b/include/linux/hmm-dma.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+#ifndef LINUX_HMM_DMA_H
+#define LINUX_HMM_DMA_H
+
+#include <linux/dma-mapping.h>
+
+struct dma_iova_state;
+struct pci_p2pdma_map_state;
+
+/*
+ * struct hmm_dma_map - array of PFNs and DMA addresses
+ *
+ * @state: DMA IOVA state
+ * @pfns: array of PFNs
+ * @dma_list: array of DMA addresses
+ * @dma_entry_size: size of each DMA entry in the array
+ */
+struct hmm_dma_map {
+	struct dma_iova_state state;
+	unsigned long *pfn_list;
+	dma_addr_t *dma_list;
+	size_t dma_entry_size;
+};
+
+int hmm_dma_map_alloc(struct device *dev, struct hmm_dma_map *map,
+		      size_t nr_entries, size_t dma_entry_size);
+void hmm_dma_map_free(struct device *dev, struct hmm_dma_map *map);
+dma_addr_t hmm_dma_map_pfn(struct device *dev, struct hmm_dma_map *map,
+			   size_t idx,
+			   struct pci_p2pdma_map_state *p2pdma_state);
+bool hmm_dma_unmap_pfn(struct device *dev, struct hmm_dma_map *map, size_t idx);
+#endif /* LINUX_HMM_DMA_H */
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index a43e56f273a1..db75ffc949a7 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -23,6 +23,8 @@ struct mmu_interval_notifier;
  * HMM_PFN_WRITE - if the page memory can be written to (requires HMM_PFN_VALID)
  * HMM_PFN_ERROR - accessing the pfn is impossible and the device should
  *                 fail. ie poisoned memory, special pages, no vma, etc
+ * HMM_PFN_P2PDMA - P2P page
+ * HMM_PFN_P2PDMA_BUS - Bus mapped P2P transfer
  * HMM_PFN_DMA_MAPPED - Flag preserved on input-to-output transformation
  *                      to mark that page is already DMA mapped
  *
@@ -43,8 +45,10 @@ enum hmm_pfn_flags {
 	 * don't forget to update HMM_PFN_INOUT_FLAGS
 	 */
 	HMM_PFN_DMA_MAPPED = 1UL << (BITS_PER_LONG - 4),
+	HMM_PFN_P2PDMA     = 1UL << (BITS_PER_LONG - 5),
+	HMM_PFN_P2PDMA_BUS = 1UL << (BITS_PER_LONG - 6),
 
-	HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 9),
+	HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 11),
 
 	/* Input flags */
 	HMM_PFN_REQ_FAULT = HMM_PFN_VALID,
-- 
cgit v1.2.3


From f4856c20c137a73d73e448caa3964098024248bf Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Tue, 29 Apr 2025 02:36:03 +0200
Subject: power: supply: core: Add additional health status values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some batteries can signal when an internal fuse was blown. In such a
case POWER_SUPPLY_HEALTH_DEAD is too vague for userspace applications
to perform meaningful diagnostics.

Additionally some batteries can also signal when some of their
internal cells are imbalanced. In such a case returning
POWER_SUPPLY_HEALTH_UNSPEC_FAILURE is again too vague for userspace
applications to perform meaningful diagnostics.

Add new health status values for both cases.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Link: https://lore.kernel.org/r/20250429003606.303870-1-W_Armin@gmx.de
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/power_supply.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index cbec930430a7..03786c8c2efe 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -71,6 +71,8 @@ enum {
 	POWER_SUPPLY_HEALTH_COOL,
 	POWER_SUPPLY_HEALTH_HOT,
 	POWER_SUPPLY_HEALTH_NO_BATTERY,
+	POWER_SUPPLY_HEALTH_BLOWN_FUSE,
+	POWER_SUPPLY_HEALTH_CELL_IMBALANCE,
 };
 
 enum {
-- 
cgit v1.2.3


From 6157e62b07d9331cc1d4d9d525dab33d45b0e83c Mon Sep 17 00:00:00 2001
From: Paul Geurts <paul.geurts@prodrive-technologies.com>
Date: Mon, 5 May 2025 13:59:36 +0200
Subject: regulator: pca9450: Add restart handler

When restarting a CPU powered by the PCA9450 power management IC, it
is beneficial to use the PCA9450 to power cycle the CPU and all its
connected peripherals to start up in a known state. The PCA9450 features
a cold start procedure initiated by an I2C command.

Add a restart handler so that the PCA9450 is used to restart the CPU.
The restart handler sends command 0x14 to the SW_RST register,
initiating a cold reset (Power recycle all regulators except LDO1, LDO2
and CLK_32K_OUT)

As the PCA9450 is a PMIC specific for the i.MX8M family CPU, the restart
handler priority is set just slightly higher than imx2_wdt and the PSCI
restart handler. This makes sure this restart handler takes precedence.

Signed-off-by: Paul Geurts <paul.geurts@prodrive-technologies.com>
Link: https://patch.msgid.link/20250505115936.1946891-1-paul.geurts@prodrive-technologies.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/pca9450.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/pca9450.h b/include/linux/regulator/pca9450.h
index b427b5873de1..85b4fecc10d8 100644
--- a/include/linux/regulator/pca9450.h
+++ b/include/linux/regulator/pca9450.h
@@ -35,6 +35,8 @@ enum {
 	PCA9450_DVS_LEVEL_MAX,
 };
 
+#define PCA9450_RESTART_HANDLER_PRIORITY 130
+
 #define PCA9450_BUCK1_VOLTAGE_NUM	0x80
 #define PCA9450_BUCK2_VOLTAGE_NUM	0x80
 #define PCA9450_BUCK3_VOLTAGE_NUM	0x80
@@ -235,4 +237,7 @@ enum {
 #define I2C_LT_ON_RUN			0x02
 #define I2C_LT_FORCE_ENABLE		0x03
 
+/* PCA9450_REG_SW_RST command */
+#define SW_RST_COMMAND			0x14
+
 #endif /* __LINUX_REG_PCA9450_H__ */
-- 
cgit v1.2.3


From b9020bdb9f76b58117f9902db3047693fd12b11b Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 5 May 2025 10:38:17 -0700
Subject: ACPI: MRRM: Minimal parse of ACPI MRRM table

The resctrl file system code needs to know how many region tags
are supported. Parse the ACPI MRRM table and save the max_mem_region
value.

Provide a function for resctrl to collect that value.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Link: https://patch.msgid.link/20250505173819.419271-2-tony.luck@intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 3f2e93ed9730..c409f4cecb09 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -772,6 +772,10 @@ int acpi_get_local_u64_address(acpi_handle handle, u64 *addr);
 int acpi_get_local_address(acpi_handle handle, u32 *addr);
 const char *acpi_get_subsystem_id(acpi_handle handle);
 
+#ifdef CONFIG_ACPI_MRRM
+int acpi_mrrm_max_mem_region(void);
+#endif
+
 #else	/* !CONFIG_ACPI */
 
 #define acpi_disabled 1
@@ -1092,6 +1096,11 @@ static inline acpi_handle acpi_get_processor_handle(int cpu)
 	return NULL;
 }
 
+static inline int acpi_mrrm_max_mem_region(void)
+{
+	return -ENOENT;
+}
+
 #endif	/* !CONFIG_ACPI */
 
 #ifdef CONFIG_ACPI_HMAT
-- 
cgit v1.2.3


From 8fb7aee05591fd4d3dca1460448a59e95fa821c3 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 9 May 2025 12:12:54 +0100
Subject: io_uring: drain based on allocates reqs

Don't rely on CQ sequence numbers for draining, as it has become messy
and needs cq_extra adjustments. Instead, base it on the number of
allocated requests and only allow flushing when all requests are in the
drain list.

As a result, cq_extra is gone, no overhead for its accounting in aux cqe
posting, less bloating as it was inlined before, and it's in general
simpler than trying to track where we should bump it and where it should
be put back like in cases of overflow. Also, it'll likely help with
cleaning and unifying some of the CQ posting helpers.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/46ece1e34320b046c06fee2498d6b4cd12a700f2.1746788718.git.asml.silence@gmail.com
Link: https://lore.kernel.org/r/24497b04b004bceada496033d3c9d09ff8e81ae9.1746944903.git.asml.silence@gmail.com
[axboe: fold in fix from link2]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 73b289b48280..00dbd7cd0e7d 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -341,7 +341,6 @@ struct io_ring_ctx {
 		unsigned		cached_cq_tail;
 		unsigned		cq_entries;
 		struct io_ev_fd	__rcu	*io_ev_fd;
-		unsigned		cq_extra;
 
 		void			*cq_wait_arg;
 		size_t			cq_wait_size;
@@ -417,6 +416,7 @@ struct io_ring_ctx {
 
 	struct callback_head		poll_wq_task_work;
 	struct list_head		defer_list;
+	unsigned			nr_drained;
 
 	struct io_alloc_cache		msg_cache;
 	spinlock_t			msg_lock;
-- 
cgit v1.2.3


From 2c04e58e30ce858cc2be531298312c67c7d55fc3 Mon Sep 17 00:00:00 2001
From: Melody Olvera <melody.olvera@oss.qualcomm.com>
Date: Mon, 12 May 2025 13:54:43 -0700
Subject: soc: qcom: llcc-qcom: Add support for SM8750

Add system cache table and configs for SM8750 SoCs.

Signed-off-by: Melody Olvera <melody.olvera@oss.qualcomm.com>
Reviewed-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20250512-sm8750_llcc_master-v5-3-d78dca6282a5@oss.qualcomm.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/soc/qcom/llcc-qcom.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index 8e5d78fb4847..7a69210a250c 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -24,6 +24,7 @@
 #define LLCC_CMPTDMA     15
 #define LLCC_DISP        16
 #define LLCC_VIDFW       17
+#define LLCC_CAMFW       18
 #define LLCC_MDMHPFX     20
 #define LLCC_MDMPNG      21
 #define LLCC_AUDHW       22
@@ -67,6 +68,13 @@
 #define LLCC_EVCS_LEFT	 67
 #define LLCC_EVCS_RIGHT	 68
 #define LLCC_SPAD	 69
+#define LLCC_VIDDEC	 70
+#define LLCC_CAMOFE	 71
+#define LLCC_CAMRTIP	 72
+#define LLCC_CAMSRTIP	 73
+#define LLCC_CAMRTRF	 74
+#define LLCC_CAMSRTRF	 75
+#define LLCC_CPUSSMPAM	 89
 
 /**
  * struct llcc_slice_desc - Cache slice descriptor
-- 
cgit v1.2.3


From d060b6aab031b6113f78cd3d1585115f13386eec Mon Sep 17 00:00:00 2001
From: Mykyta Yatsenko <yatsenko@meta.com>
Date: Mon, 12 May 2025 21:53:46 +0100
Subject: helpers: make few bpf helpers public

Make bpf_dynptr_slice_rdwr, bpf_dynptr_check_off_len and
__bpf_dynptr_write available outside of the helpers.c by
adding their prototypes into linux/include/bpf.h.
bpf_dynptr_check_off_len() implementation is moved to header and made
inline explicitly, as small function should typically be inlined.

These functions are going to be used from bpf_trace.c in the next
patch of this series.

Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
Link: https://lore.kernel.org/r/20250512205348.191079-2-mykyta.yatsenko5@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3f0cc89c0622..83c56f40842b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1349,6 +1349,20 @@ u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr);
 const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len);
 void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len);
 bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr);
+int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset,
+		       void *src, u32 len, u64 flags);
+void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset,
+			    void *buffer__opt, u32 buffer__szk);
+
+static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
+{
+	u32 size = __bpf_dynptr_size(ptr);
+
+	if (len > size || offset > size - len)
+		return -E2BIG;
+
+	return 0;
+}
 
 #ifdef CONFIG_BPF_JIT
 int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
-- 
cgit v1.2.3


From e8007fad5457ea547ca63bb011fdb03213571c7e Mon Sep 17 00:00:00 2001
From: Steve Siwinski <ssiwinski@atto.com>
Date: Thu, 8 May 2025 16:01:22 -0400
Subject: scsi: sd_zbc: block: Respect bio vector limits for REPORT ZONES
 buffer

The REPORT ZONES buffer size is currently limited by the HBA's maximum
segment count to ensure the buffer can be mapped. However, the block
layer further limits the number of iovec entries to 1024 when allocating
a bio.

To avoid allocation of buffers too large to be mapped, further restrict
the maximum buffer size to BIO_MAX_INLINE_VECS.

Replace the UIO_MAXIOV symbolic name with the more contextually
appropriate BIO_MAX_INLINE_VECS.

Fixes: b091ac616846 ("sd_zbc: Fix report zones buffer allocation")
Cc: stable@vger.kernel.org
Signed-off-by: Steve Siwinski <ssiwinski@atto.com>
Link: https://lore.kernel.org/r/20250508200122.243129-1-ssiwinski@atto.com
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/linux/bio.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index cafc7c215de8..b786ec5bcc81 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -11,6 +11,7 @@
 #include <linux/uio.h>
 
 #define BIO_MAX_VECS		256U
+#define BIO_MAX_INLINE_VECS	UIO_MAXIOV
 
 struct queue_limits;
 
-- 
cgit v1.2.3


From 0e1c773b501f33437d87b72c7d26080361e224b1 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Sun, 20 Apr 2025 12:40:24 -0700
Subject: mm/damon/core: introduce damos quota goal metrics for memory node
 utilization

Patch series "mm/damon: auto-tune DAMOS for NUMA setups including tiered
memory".


Utilizing DAMON for memory tiering usually requires manual tuning and/or
tedious controls.  Let it self-tune hotness and coldness thresholds for
promotion and demotion aiming high utilization of high memory tiers, by
introducing new DAMOS quota goal metrics representing the used and the
free memory ratios of specific NUMA nodes.  And introduce a sample DAMON
module that demonstrates how the new feature can be used for memory
tiering use cases.

Backgrounds
===========

A type of tiered memory system exposes the memory tiers as NUMA nodes.  A
straightforward pages placement strategy for such systems is placing
access-hot and cold pages on upper and lower tiers, reespectively,
pursuing higher utilization of upper tiers.  Since access temperature can
be dynamic, periodically finding and migrating hot pages and cold pages to
proper tiers (promoting and demoting) is also required.  Linux kernel
provides several features for such dynamic and transparent pages
placement.

Page Faults and LRU
-------------------

One widely known way is using NUMA balancing in tiering mode (a.k.a
NUMAB-2) and reclaim-based demotion features.  In the setup, NUMAB-2 finds
hot pages using access check-purpose page faults (a.k.a prot_none) and
promote those inside each process' context, until there is no more pages
to promote, or the upper tier is filled up and memory pressure happens.
In the latter case, LRU-based reclaim logic wakes up as a response to the
memory pressure and demotes cold pages to lower tiers in asynchronous
(kswapd) and/or synchronous ways (direct reclaim).

DAMON
-----

Yet another available solution is using DAMOS with migrate_hot and
migrate_cold DAMOS actions for promotions and demotions, respectively.  To
make it optimum, users need to specify aggressiveness and access
temperature thresholds for promotions and demotions in a good balance that
results in high utilization of upper tiers.  The number of parameters is
not small, and optimum parameter values depend on characteristics of the
underlying hardware and the workload.  As a result, it often requires
manual, time consuming and repetitive tuning of the DAMOS schemes for
given workloads and systems combinations.

Self-tuned DAMON-based Memory Tiering
=====================================

To solve such manual tuning problems, DAMOS provides aim-oriented
feedback-driven quotas self-tuning.  Using the feature, we design a
self-tuned DAMON-based memory tiering for general multi-tier memory
systems.

For each memory tier node, if it has a lower tier, run a DAMOS scheme that
demotes cold pages of the node, auto-tuning the aggressiveness aiming an
amount of free space of the node.  The free space is for keeping the
headroom that avoids significant memory pressure during upper tier memory
usage spike, and promoting hot pages from the lower tier.

For each memory tier node, if it has an upper tier, run a DAMOS scheme
that promotes hot pages of the current node to the upper tier, auto-tuning
the aggressiveness aiming a high utilization ratio of the upper tier.  The
target ratio is to ensure higher tiers are utilized as much as possible.
It should match with the headroom for demotion scheme, but have slight
overlap, to ensure promotion and demotion are not entirely stopped.

The aim-oriented aggressiveness auto-tuning of DAMOS is already available.
Hence, to make such tiering solution implementation, only new quota goal
metrics for utilization and free space ratio of specific NUMA node need to
be developed.

Discussions
===========

The design imposes below discussion points.

Expected Behaviors
------------------

The system will let upper tier memory node accommodates as many hot data
as possible.  If total amount of the data is less than the top tier
memory's promotion/demotion target utilization, entire data will be just
placed on the top tier.  Promotion scheme will do nothing since there is
no data to promote.  Demotion scheme will also do nothing since the free
space ratio of the top tier is higher than the goal.

Only if the amount of data is larger than the top tier's utilization
ratio, demotion scheme will demote cold pages and ensure the headroom free
space.  Since the promotion and demotion schemes for a single node has
small overlap at their target utilization and free space goals, promotions
and demotions will continue working with a moderate aggressiveness level.
It will keep all data is placed on access hotness under dynamic access
pattern, while minimizing the migration overhead.

In any case, each node will keep headroom free space and as many upper
tiers are utilized as possible.

Ease of Use
-----------

Users still need to set the target utilization and free space ratio, but
it will be easier to set.  We argue 99.7 % utilization and 0.5 % free
space ratios can be good default values.  It can be easily adjusted based
on desired headroom size of given use case.  Users are also still required
to answer the minimum coldness and hotness thresholds.  Together with
monitoring intervals auto-tuning[2], DAMON will always show meaningful
amount of hot and cold memory.  And DAMOS quota's prioritization mechanism
will make good decision as long as the source information is that
colorful.  Hence, users can very naively set the minimum criterias.  We
believe any access observation and no access observation within last one
aggregation interval is enough for minimum hot and cold regions criterias.

General Tiered Memory Setup Applicability
-----------------------------------------

The design can be applied to any number of tiers having any performance
characteristics, as long as they can be hierarchical.  Hence, applying the
system to different tiered memory system will be straightforward.  Note
that this assumes only single CPU NUMA node case.  Because today's DAMON
is not aware of which CPU made each access, applying this on systems
having multiple CPU NUMA nodes can be complicated.  We are planning to
extend DAMON for the use case, but that's out of the scope of this patch
series.

How To Use
----------

Users can implement the auto-tuned DAMON-based memory tiering using DAMON
sysfs interface.  It can be easily done using DAMON user-space tool like
user-space tool.  Below evaluation results section shows an example DAMON
user-space tool command for that.

For wider and simpler deployment, having a kernel module that sets up and
run the DAMOS schemes via DAMON kernel API can be useful.  The module can
enable the memory tiering at boot time via kernel command line parameter
or at run time with single command.  This patch series implements a sample
DAMON kernel module that shows how such module can be implemented.

Comparison To Page Faults and LRU-based Approaches
--------------------------------------------------

The existing page faults based promotion (NUMAB-2) does hot pages
detection and migration in the process context.  When there are many pages
to promote, it can block the progress of the application's real works.
DAMOS works in asynchronous worker thread, so it doesn't block the real
works.

NUMAB-2 doesn't provide a way to control aggressiveness of promotion other
than the maximum amount of pages to promote per given time widnow.  If hot
pages are found, promotions can happen in the upper-bound speed,
regardless of upper tier's memory pressure.  If the maximum speed is not
well set for the given workload, it can result in slow promotion or
unnecessary memory pressure.  Self-tuned DAMON-based memory tiering
alleviates the problem by adjusting the speed based on current utilization
of the upper tier.

LRU-based demotion can be triggered in both asynchronous (kswapd) and
synchronous (direct reclaim) ways.  Other than the way of finding cold
pages, asynchronous LRU-based demotion and DAMON-based demotion has no big
difference.  DAMON-based demotion can make a better balancing with
DAMON-based promotion, though.  The LRU-based demotion can do better than
DAMON-based demotion when the tier is having significant memory pressure.
It would be wise to use DAMON-based demotion as a proactive and primary
one, but utilizing LRU-based demotions together as a fast backup solution.

Evaluation
==========

In short, under a setup that requires fast and frequent promotions,
self-tuned DAMON-based memory tiering's hot pages promotion improves
performance about 4.42 %.  We believe this shows self-tuned DAMON-based
promotion's effectiveness.  Meanwhile, NUMAB-2's hot pages promotion
degrades the performance about 7.34 %.  We suspect the degradation is
mostly due to NUMAB-2's synchronous nature that can block the
application's progress, which highlights the advantage of DAMON-based
solution's asynchronous nature.

Note that the test was done with the RFC version of this patch series.  We
don't run it again since this patch series got no meaningful change after
the RFC, while the test takes pretty long time.

Setup
-----

Hardware.  Use a machine that equips 250 GiB DRAM memory tier and 50 GiB
CXL memory tier.  The tiers are exposed as NUMA nodes 0 and 1,
respectively.

Kernel.  Use Linux kernel v6.13 that modified as following.  Add all DAMON
patches that available on mm tree of 2025-03-15, and this patch series.
Also modify it to ignore mempolicy() system calls, to avoid bad effects
from application's traditional NUMA systems assumed optimizations.

Workload.  Use a modified version of Taobench benchmark[3] that available
on DCPerf benchmark suite.  It represents an in-memory caching workload.
We set its 'memsize', 'warmup_time', and 'test_time' parameter as 340 GiB,
2,500 seconds and 1,440 seconds.  The parameters are chosen to ensure the
workload uses more than DRAM memory tier.  Its RSS under the parameter
grows to 270 GiB within the warmup time.

It turned out the workload has a very static access pattrn.  Only about 13
% of the RSS is frequently accessed from the beginning to end.  Hence
promotion shows no meaningful performance difference regardless of
different design and implementations.  We therefore modify the kernel to
periodically demote up to 10 GiB hot pages and promote up to 10 GiB cold
pages once per minute.  The intention is to simulate periodic access
pattern changes.  The hotness and coldness threshold is very naively set
so that it is more like random access pattern change rather than strict
hot/cold pages exchange.  This is why we call the workload as "modified".
It is implemented as two DAMOS schemes each running on an asynchronous
thread.  It can be reproduced with DAMON user-space tool like below.

    # ./damo start \
        --ops paddr --numa_node 0 --monitoring_intervals 10s 200s 200s \
            --damos_action migrate_hot 1 \
            --damos_quota_interval 60s --damos_quota_space 10G \
        --ops paddr --numa_node 1 --monitoring_intervals 10s 200s 200s \
            --damos_action migrate_cold 0 \
            --damos_quota_interval 60s --damos_quota_space 10G \
        --nr_schemes 1 1 --nr_targets 1 1 --nr_ctxs 1 1

System configurations.  Use below variant system configurations.

- Baseline.  No memory tiering features are turned on.
- Numab_tiering.  On the baseline, enable NUMAB-2 and relcaim-based
  demotion.  In detail, following command is executed:
  echo 2 > /proc/sys/kernel/numa_balancing;
  echo 1 > /sys/kernel/mm/numa/demotion_enabled;
  echo 7 > /proc/sys/vm/zone_reclaim_mode
- DAMON_tiering.  On the baseline, utilize self-tuned DAMON-based memory
  tiering implementation via DAMON user-space tool.  It utilizes two
  kernel threads, namely promotion thread and demotion thread.  Demotion
  thread monitors access pattern of DRAM node using DAMON with
  auto-tuned monitoring intervals aiming 4% DAMON-observed access ratio,
  and demote coldest pages up to 200 MiB per second aiming 0.5% free
  space of DRAM node.  Promotion thread monitors CXL node using same
  intervals auto-tuning, and promote hot pages in same way but aiming
  for 99.7% utilization of DRAM node.  Because DAMON provides only
  best-effort accuracy, add young page DAMOS filters to allow only and
  reject all young pages at promoting and demoting, respectively.  It
  can be reproduced with DAMON user-space tool like below.

    # ./damo start \
        --numa_node 0 --monitoring_intervals_goal 4% 3 5ms 10s \
            --damos_action migrate_cold 1 --damos_access_rate 0% 0% \
            --damos_apply_interval 1s \
            --damos_quota_interval 1s --damos_quota_space 200MB \
            --damos_quota_goal node_mem_free_bp 0.5% 0 \
            --damos_filter reject young \
        --numa_node 1 --monitoring_intervals_goal 4% 3 5ms 10s \
            --damos_action migrate_hot 0 --damos_access_rate 5% max \
            --damos_apply_interval 1s \
            --damos_quota_interval 1s --damos_quota_space 200MB \
            --damos_quota_goal node_mem_used_bp 99.7% 0 \
            --damos_filter allow young \
            --damos_nr_quota_goals 1 1 --damos_nr_filters 1 1 \
        --nr_targets 1 1 --nr_schemes 1 1 --nr_ctxs 1 1

Measurment Results
------------------

On each system configuration, run the modified version of Taobench and
collect 'score'.  'score' is a metric that calculated and provided by
Taobench to represents the performance of the run on the system.  To
handle the measurement errors, repeat the measurement five times.  The
results are as below.

    Config         Score   Stdev   (%)     Normalized
    Baseline       1.6165  0.0319  1.9764  1.0000
    Numab_tiering  1.4976  0.0452  3.0209  0.9264
    DAMON_tiering  1.6881  0.0249  1.4767  1.0443

'Config' column shows the system config of the measurement.  'Score'
column shows the 'score' measurement in average of the five runs on the
system config.  'Stdev' column shows the standsard deviation of the five
measurements of the scores.  '(%)' column shows the 'Stdev' to 'Score'
ratio in percentage.  Finally, 'Normalized' column shows the averaged
score values of the configs that normalized to that of 'Baseline'.

The periodic hot pages demotion and cold pages promotion that was
conducted to simulate dynamic access pattern was started from the
beginning of the workload.  It resulted in the DRAM tier utilization
always under the watermark, and hence no real demotion was happened for
all test runs.  This means the above results show no difference between
LRU-based and DAMON-based demotions.  Only difference between NUMAB-2 and
DAMON-based promotions are represented on the results.

Numab_tiering config degraded the performance about 7.36 %.  We suspect
this happened because NUMAB-2's synchronous promotion was blocking the
Taobench's real work progress.

DAMON_tiering config improved the performance about 4.43 %.  We believe
this shows effectiveness of DAMON-based promotion that didn't block
Taobench's real work progress due to its asynchronous nature.  Also this
means DAMON's monitoring results are accurate enough to provide visible
amount of improvement.

Evaluation Limitations
----------------------

As mentioned above, this evaluation shows only comparison of promotion
mechanisms.  DAMON-based tiering is recommended to be used together with
reclaim-based demotion as a faster backup under significant memory
pressure, though.

From some perspective, the modified version of Taobench may seems making
the picture distorted too much.  It would be better to evaluate with more
realistic workload, or more finely tuned micro benchmarks.

Patch Sequence
==============

The first patch (patch 1) implements two new quota goal metrics on core
layer and expose it to DAMON core kernel API.  The second and third ones
(patches 2 and 3) further link it to DAMON sysfs interface.  Three
following patches (patches 4-6) document the new feature and sysfs file on
design, usage, and ABI documents.  The final one (patch 7) implements a
working version of a self-tuned DAMON-based memory tiering solution in an
incomplete but easy to understand form as a kernel module under
samples/damon/ directory.

References
==========

[1] https://lore.kernel.org/20231112195602.61525-1-sj@kernel.org/
[2] https://lore.kernel.org/20250303221726.484227-1-sj@kernel.org
[3] https://github.com/facebookresearch/DCPerf/blob/main/packages/tao_bench/README.md


This patch (of 7):

Used and free space ratios for specific NUMA nodes can be useful inputs
for NUMA-specific DAMOS schemes' aggressiveness self-tuning feedback loop.
Implement DAMOS quota goal metrics for such self-tuned schemes.

Link: https://lkml.kernel.org/r/20250420194030.75838-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20250420194030.75838-2-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Yunjeong Mun <yunjeong.mun@sk.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 47e36e6ea203..a4011726cb3b 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -145,6 +145,8 @@ enum damos_action {
  *
  * @DAMOS_QUOTA_USER_INPUT:	User-input value.
  * @DAMOS_QUOTA_SOME_MEM_PSI_US:	System level some memory PSI in us.
+ * @DAMOS_QUOTA_NODE_MEM_USED_BP:	MemUsed ratio of a node.
+ * @DAMOS_QUOTA_NODE_MEM_FREE_BP:	MemFree ratio of a node.
  * @NR_DAMOS_QUOTA_GOAL_METRICS:	Number of DAMOS quota goal metrics.
  *
  * Metrics equal to larger than @NR_DAMOS_QUOTA_GOAL_METRICS are unsupported.
@@ -152,6 +154,8 @@ enum damos_action {
 enum damos_quota_goal_metric {
 	DAMOS_QUOTA_USER_INPUT,
 	DAMOS_QUOTA_SOME_MEM_PSI_US,
+	DAMOS_QUOTA_NODE_MEM_USED_BP,
+	DAMOS_QUOTA_NODE_MEM_FREE_BP,
 	NR_DAMOS_QUOTA_GOAL_METRICS,
 };
 
@@ -161,6 +165,7 @@ enum damos_quota_goal_metric {
  * @target_value:	Target value of @metric to achieve with the tuning.
  * @current_value:	Current value of @metric.
  * @last_psi_total:	Last measured total PSI
+ * @nid:		Node id.
  * @list:		List head for siblings.
  *
  * Data structure for getting the current score of the quota tuning goal.  The
@@ -179,6 +184,7 @@ struct damos_quota_goal {
 	/* metric-dependent fields */
 	union {
 		u64 last_psi_total;
+		int nid;
 	};
 	struct list_head list;
 };
-- 
cgit v1.2.3


From 786d5cc2b92ac331d0654452c6c3cea611772e09 Mon Sep 17 00:00:00 2001
From: "Christoph Lameter (Ampere)" <cl@gentwo.org>
Date: Mon, 21 Apr 2025 13:58:06 -0700
Subject: Update Christoph's Email address and make it consistent

Use cl@gentwo.org throughout and remove the old email addresses.

Link: https://lkml.kernel.org/r/8b962f57-4d98-cbb0-cd82-b6ba456733e8@gentwo.org
Signed-off-by: Christoph Lameter <cl@gentwo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/percpu-defs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 0aeb0e276a3e..c16cdeaa505e 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -375,7 +375,7 @@ do {									\
 } while (0)
 
 /*
- * this_cpu operations (C) 2008-2013 Christoph Lameter <cl@linux.com>
+ * this_cpu operations (C) 2008-2013 Christoph Lameter <cl@gentwo.org>
  *
  * Optimized manipulation for memory allocated through the per cpu
  * allocator or for addresses of per cpu variables.
-- 
cgit v1.2.3


From 8adce0857769d596c5b000d118119e3bb1d63a32 Mon Sep 17 00:00:00 2001
From: Gregory Price <gourry@gourry.net>
Date: Thu, 24 Apr 2025 16:28:05 -0400
Subject: cpuset: rename cpuset_node_allowed to cpuset_current_node_allowed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "vmscan: enforce mems_effective during demotion", v5.

Change reclaim to respect cpuset.mems_effective during demotion when
possible.  Presently, reclaim explicitly ignores cpuset.mems_effective
when demoting, which may cause the cpuset settings to violated.

Implement cpuset_node_allowed() to check the cpuset.mems_effective
associated wih the mem_cgroup of the lruvec being scanned.  This only
applies to cgroup/cpuset v2, as cpuset exists in a different hierarchy
than mem_cgroup in v1.

This requires renaming the existing cpuset_node_allowed() to be
cpuset_current_now_allowed() - which is more descriptive anyway - to
implement the new cpuset_node_allowed() which takes a target cgroup.


This patch (of 2):

Rename cpuset_node_allowed to reflect that the function checks the current
task's cpuset.mems.  This allows us to make a new cpuset_node_allowed
function that checks a target cgroup's cpuset.mems.

Link: https://lkml.kernel.org/r/20250424202806.52632-1-gourry@gourry.net
Link: https://lkml.kernel.org/r/20250424202806.52632-2-gourry@gourry.net
Signed-off-by: Gregory Price <gourry@gourry.net>
Acked-by: Waiman Long <longman@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/cpuset.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 5466c96a33db..d6a4fe5c3b6e 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -82,11 +82,11 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
 void cpuset_init_current_mems_allowed(void);
 int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask);
 
-extern bool cpuset_node_allowed(int node, gfp_t gfp_mask);
+extern bool cpuset_current_node_allowed(int node, gfp_t gfp_mask);
 
 static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
 {
-	return cpuset_node_allowed(zone_to_nid(z), gfp_mask);
+	return cpuset_current_node_allowed(zone_to_nid(z), gfp_mask);
 }
 
 static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
-- 
cgit v1.2.3


From 7d709f49babc28907b0ac60228f522d2e6216add Mon Sep 17 00:00:00 2001
From: Gregory Price <gourry@gourry.net>
Date: Thu, 24 Apr 2025 16:28:06 -0400
Subject: vmscan,cgroup: apply mems_effective to reclaim
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is possible for a reclaimer to cause demotions of an lruvec belonging
to a cgroup with cpuset.mems set to exclude some nodes.  Attempt to apply
this limitation based on the lruvec's memcg and prevent demotion.

Notably, this may still allow demotion of shared libraries or any memory
first instantiated in another cgroup.  This means cpusets still cannot
cannot guarantee complete isolation when demotion is enabled, and the docs
have been updated to reflect this.

This is useful for isolating workloads on a multi-tenant system from
certain classes of memory more consistently - with the noted exceptions.

Note on locking:

The cgroup_get_e_css reference protects the css->effective_mems, and calls
of this interface would be subject to the same race conditions associated
with a non-atomic access to cs->effective_mems.

So while this interface cannot make strong guarantees of correctness, it
can therefore avoid taking a global or rcu_read_lock for performance.

Link: https://lkml.kernel.org/r/20250424202806.52632-3-gourry@gourry.net
Signed-off-by: Gregory Price <gourry@gourry.net>
Suggested-by: Shakeel Butt <shakeel.butt@linux.dev>
Suggested-by: Waiman Long <longman@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>
Reviewed-by: Waiman Long <longman@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/cpuset.h     | 5 +++++
 include/linux/memcontrol.h | 7 +++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index d6a4fe5c3b6e..2ddb256187b5 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -173,6 +173,7 @@ static inline void set_mems_allowed(nodemask_t nodemask)
 	task_unlock(current);
 }
 
+extern bool cpuset_node_allowed(struct cgroup *cgroup, int nid);
 #else /* !CONFIG_CPUSETS */
 
 static inline bool cpusets_enabled(void) { return false; }
@@ -293,6 +294,10 @@ static inline bool read_mems_allowed_retry(unsigned int seq)
 	return false;
 }
 
+static inline bool cpuset_node_allowed(struct cgroup *cgroup, int nid)
+{
+	return true;
+}
 #endif /* !CONFIG_CPUSETS */
 
 #endif /* _LINUX_CPUSET_H */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5264d148bdd9..ac1b003ee5b8 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1736,6 +1736,8 @@ static inline void count_objcg_events(struct obj_cgroup *objcg,
 	rcu_read_unlock();
 }
 
+bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid);
+
 #else
 static inline bool mem_cgroup_kmem_disabled(void)
 {
@@ -1797,6 +1799,11 @@ static inline ino_t page_cgroup_ino(struct page *page)
 {
 	return 0;
 }
+
+static inline bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid)
+{
+	return true;
+}
 #endif /* CONFIG_MEMCG */
 
 #if defined(CONFIG_MEMCG) && defined(CONFIG_ZSWAP)
-- 
cgit v1.2.3


From b960818d51b3c8275d0f80c5fc4156eb2ff2fde6 Mon Sep 17 00:00:00 2001
From: Gavin Guo <gavinguo@igalia.com>
Date: Fri, 25 Apr 2025 18:38:59 +0800
Subject: mm/huge_memory: remove useless folio pointers passing

Since the previous commit "mm/huge_memory: Adjust try_to_migrate_one() and
split_huge_pmd_locked()" has simplified the logic by leveraging the folio
verification in page_vma_mapped_walk(), this patch removes the unnecessary
folio pointers passing.

Link: https://lkml.kernel.org/r/20250425103859.825879-3-gavinguo@igalia.com
Link: https://lore.kernel.org/all/98d1d195-7821-4627-b518-83103ade56c0@redhat.com/
Link: https://lore.kernel.org/all/91599a3c-e69e-4d79-bac5-5013c96203d7@redhat.com/
Signed-off-by: Gavin Guo <gavinguo@igalia.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Florent Revest <revest@google.com>
Cc: Gavin Shan <gshan@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index f190998b2ebd..2f190c90192d 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -395,7 +395,7 @@ static inline int split_huge_page(struct page *page)
 void deferred_split_folio(struct folio *folio, bool partially_mapped);
 
 void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-		unsigned long address, bool freeze, struct folio *folio);
+		unsigned long address, bool freeze);
 
 #define split_huge_pmd(__vma, __pmd, __address)				\
 	do {								\
@@ -403,12 +403,11 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 		if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd)	\
 					|| pmd_devmap(*____pmd))	\
 			__split_huge_pmd(__vma, __pmd, __address,	\
-						false, NULL);		\
+					 false);			\
 	}  while (0)
 
-
 void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
-		bool freeze, struct folio *folio);
+		bool freeze);
 
 void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
 		unsigned long address);
@@ -501,7 +500,7 @@ static inline bool thp_migration_supported(void)
 }
 
 void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
-			   pmd_t *pmd, bool freeze, struct folio *folio);
+			   pmd_t *pmd, bool freeze);
 bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr,
 			   pmd_t *pmdp, struct folio *folio);
 
@@ -576,12 +575,12 @@ static inline void deferred_split_folio(struct folio *folio, bool partially_mapp
 	do { } while (0)
 
 static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-		unsigned long address, bool freeze, struct folio *folio) {}
+		unsigned long address, bool freeze) {}
 static inline void split_huge_pmd_address(struct vm_area_struct *vma,
-		unsigned long address, bool freeze, struct folio *folio) {}
+		unsigned long address, bool freeze) {}
 static inline void split_huge_pmd_locked(struct vm_area_struct *vma,
 					 unsigned long address, pmd_t *pmd,
-					 bool freeze, struct folio *folio) {}
+					 bool freeze) {}
 
 static inline bool unmap_huge_pmd_locked(struct vm_area_struct *vma,
 					 unsigned long addr, pmd_t *pmdp,
-- 
cgit v1.2.3


From bc9817bb7a21f64fbca2c4b83811d943036ec870 Mon Sep 17 00:00:00 2001
From: Huan Yang <link@vivo.com>
Date: Fri, 25 Apr 2025 11:19:23 +0800
Subject: mm/memcg: move mem_cgroup_init() ahead of cgroup_init()

Patch series "Use kmem_cache for memcg alloc", v3.

(willy tldr: "you've gone from allocating 8 objects per 32KiB to
allocating 13 objects per 32KiB, a 62% improvement in memory consumption"
[1])


The mem_cgroup_alloc function creates mem_cgroup struct and it's
associated structures including mem_cgroup_per_node.  Through detailed
analysis on our test machine (Arm64, 16GB RAM, 6.6 kernel, 1 NUMA node,
memcgv2 with nokmem,nosocket,cgroup_disable=pressure), we can observe the
memory allocation for these structures using the following shell commands:

  # Enable tracing
  echo 1 > /sys/kernel/tracing/events/kmem/kmalloc/enable
  echo 1 > /sys/kernel/tracing/tracing_on
  cat /sys/kernel/tracing/trace_pipe | grep kmalloc | grep mem_cgroup

  # Trigger allocation if cgroup subtree do not enable memcg
  echo +memory > /sys/fs/cgroup/cgroup.subtree_control

Ftrace Output:

  # mem_cgroup struct allocation
  sh-6312    [000] ..... 58015.698365: kmalloc:
    call_site=mem_cgroup_css_alloc+0xd8/0x5b4
    ptr=000000003e4c3799 bytes_req=2312 bytes_alloc=4096
    gfp_flags=GFP_KERNEL|__GFP_ZERO node=-1 accounted=false

  # mem_cgroup_per_node allocation
  sh-6312    [000] ..... 58015.698389: kmalloc:
    call_site=mem_cgroup_css_alloc+0x1d8/0x5b4
    ptr=00000000d798700c bytes_req=2896 bytes_alloc=4096
    gfp_flags=GFP_KERNEL|__GFP_ZERO node=0 accounted=false

Key Observations:

  1. Both structures use kmalloc with requested sizes between 2KB-4KB
  2. Allocation alignment forces 4KB slab usage due to pre-defined sizes
     (64B, 128B,..., 2KB, 4KB, 8KB)
  3. Memory waste per memcg instance:
      Base struct: 4096 - 2312 = 1784 bytes
      Per-node struct: 4096 - 2896 = 1200 bytes
      Total waste: 2984 bytes (1-node system)
      NUMA scaling: (1200 + 8) * nr_node_ids bytes

So, it's a little waste.

This patchset introduces dedicated kmem_cache:
  Patch2 - mem_cgroup kmem_cache - memcg_cachep
  Patch3 - mem_cgroup_per_node kmem_cache - memcg_pn_cachep

The benefits of this change can be observed with the following tracing
commands:

  # Enable tracing
  echo 1 > /sys/kernel/tracing/events/kmem/kmem_cache_alloc/enable
  echo 1 > /sys/kernel/tracing/tracing_on
  cat /sys/kernel/tracing/trace_pipe | grep kmem_cache_alloc | grep mem_cgroup
  # In another terminal:
  echo +memory > /sys/fs/cgroup/cgroup.subtree_control


The output might now look like this:

  # mem_cgroup struct allocation
  sh-9827     [000] .....   289.513598: kmem_cache_alloc:
    call_site=mem_cgroup_css_alloc+0xbc/0x5d4 ptr=00000000695c1806
    bytes_req=2312 bytes_alloc=2368 gfp_flags=GFP_KERNEL|__GFP_ZERO node=-1
    accounted=false
  # mem_cgroup_per_node allocation
  sh-9827     [000] .....   289.513602: kmem_cache_alloc:
    call_site=mem_cgroup_css_alloc+0x1b8/0x5d4 ptr=000000002989e63a
    bytes_req=2896 bytes_alloc=2944 gfp_flags=GFP_KERNEL|__GFP_ZERO node=0
    accounted=false

This indicates that the `mem_cgroup` struct now requests 2312 bytes and is
allocated 2368 bytes, while `mem_cgroup_per_node` requests 2896 bytes and
is allocated 2944 bytes.  The slight increase in allocated size is due to
`SLAB_HWCACHE_ALIGN` in the `kmem_cache`.

Without `SLAB_HWCACHE_ALIGN`, the allocation might appear as:

  # mem_cgroup struct allocation
  sh-9269     [003] .....    80.396366: kmem_cache_alloc:
    call_site=mem_cgroup_css_alloc+0xbc/0x5d4 ptr=000000005b12b475
    bytes_req=2312 bytes_alloc=2312 gfp_flags=GFP_KERNEL|__GFP_ZERO node=-1
    accounted=false

  # mem_cgroup_per_node allocation
  sh-9269     [003] .....    80.396411: kmem_cache_alloc:
    call_site=mem_cgroup_css_alloc+0x1b8/0x5d4 ptr=00000000f347adc6
    bytes_req=2896 bytes_alloc=2896 gfp_flags=GFP_KERNEL|__GFP_ZERO node=0
    accounted=false

While the `bytes_alloc` now matches the `bytes_req`, this patchset
defaults to using `SLAB_HWCACHE_ALIGN` as it is generally considered more
beneficial for performance.  Please let me know if there are any issues or
if I've misunderstood anything.

This patchset also move mem_cgroup_init ahead of cgroup_init() due to
cgroup_init() will allocate root_mem_cgroup, but each initcall invoke
after cgroup_init, so if each kmem_cache do not prepare, we need testing
NULL before use it.


This patch (of 3):

When cgroup_init() creates root_mem_cgroup through css_alloc callback,
some critical resources might not be fully initialized, forcing later
operations to perform conditional checks for resource availability.

This patch move mem_cgroup_init() to address the init order, it invoke
before cgroup_init, so, compare to subsys_initcall, it can use to prepare
some key resources before root_mem_cgroup alloc.

Link: https://lkml.kernel.org/r/aAsRCj-niMMTtmK8@casper.infradead.org [1]
Link: https://lkml.kernel.org/r/20250425031935.76411-1-link@vivo.com
Link: https://lkml.kernel.org/r/20250425031935.76411-2-link@vivo.com
Signed-off-by: Huan Yang <link@vivo.com>
Suggested-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Francesco Valla <francesco@valla.it>
Cc: guoweikang <guoweikang.kernel@gmail.com>
Cc: Huang Shijie <shijie@os.amperecomputing.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: "Paul E . McKenney" <paulmck@kernel.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Raul E Rangel <rrangel@chromium.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ac1b003ee5b8..9ed75f82b858 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1057,6 +1057,7 @@ static inline u64 cgroup_id_from_mm(struct mm_struct *mm)
 	return id;
 }
 
+extern int mem_cgroup_init(void);
 #else /* CONFIG_MEMCG */
 
 #define MEM_CGROUP_ID_SHIFT	0
@@ -1472,6 +1473,8 @@ static inline u64 cgroup_id_from_mm(struct mm_struct *mm)
 {
 	return 0;
 }
+
+static inline int mem_cgroup_init(void) { return 0; }
 #endif /* CONFIG_MEMCG */
 
 /*
-- 
cgit v1.2.3


From 8d88b0769e256c238f80615f08fc5b7aebc29439 Mon Sep 17 00:00:00 2001
From: Frank van der Linden <fvdl@google.com>
Date: Wed, 2 Apr 2025 20:56:13 +0000
Subject: mm/hugetlb: use separate nodemask for bootmem allocations

Hugetlb boot allocation has used online nodes for allocation since commit
de55996d7188 ("mm/hugetlb: use online nodes for bootmem allocation").
This was needed to be able to do the allocations earlier in boot, before
N_MEMORY was set.

This might lead to a different distribution of gigantic hugepages across
NUMA nodes if there are memoryless nodes in the system.

What happens is that the memoryless nodes are tried, but then the memblock
allocation fails and falls back, which usually means that the node that
has the highest physical address available will be used (top-down
allocation).  While this will end up getting the same number of hugetlb
pages, they might not be be distributed the same way.  The fallback for
each memoryless node might not end up coming from the same node as the
successful round-robin allocation from N_MEMORY nodes.

While administrators that rely on having a specific number of hugepages
per node should use the hugepages=N:X syntax, it's better not to change
the old behavior for the plain hugepages=N case.

To do this, construct a nodemask for hugetlb bootmem purposes only,
containing nodes that have memory.  Then use that for round-robin bootmem
allocations.

This saves some cycles, and the added advantage here is that hugetlb_cma
can use it too, avoiding the older issue of pointless attempts to create a
CMA area for memoryless nodes (which will also cause the per-node CMA area
size to be too small).

Link: https://lkml.kernel.org/r/20250402205613.3086864-1-fvdl@google.com
Fixes: de55996d7188 ("mm/hugetlb: use online nodes for bootmem allocation")
Signed-off-by: Frank van der Linden <fvdl@google.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Luiz Capitulino <luizcap@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index a57bed83c657..23ebf49c5d6a 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -14,6 +14,7 @@
 #include <linux/pgtable.h>
 #include <linux/gfp.h>
 #include <linux/userfaultfd_k.h>
+#include <linux/nodemask.h>
 
 struct ctl_table;
 struct user_struct;
@@ -176,6 +177,8 @@ extern struct list_head huge_boot_pages[MAX_NUMNODES];
 
 void hugetlb_bootmem_alloc(void);
 bool hugetlb_bootmem_allocated(void);
+extern nodemask_t hugetlb_bootmem_nodes;
+void hugetlb_bootmem_set_nodes(void);
 
 /* arch callbacks */
 
-- 
cgit v1.2.3


From 68a1436bde00b40327efe27926076b53088775a3 Mon Sep 17 00:00:00 2001
From: Zhongkun He <hezhongkun.hzk@bytedance.com>
Date: Mon, 21 Apr 2025 17:13:28 +0800
Subject: mm: add swappiness=max arg to memory.reclaim for only anon reclaim

Patch series "add max arg to swappiness in memory.reclaim and lru_gen", v4.

This patchset adds max arg to swappiness in memory.reclaim and lru_gen for
anon only proactive memory reclaim.

With commit <68cd9050d871> ("mm: add swappiness= arg to memory.reclaim")
we can submit an additional swappiness=<val> argument to memory.reclaim.
It is very useful because we can dynamically adjust the reclamation ratio
based on the anonymous folios and file folios of each cgroup.  For
example,when swappiness is set to 0, we only reclaim from file folios.
But we can not relciam memory just from anon folios.

This patchset introduces a new macro, SWAPPINESS_ANON_ONLY, defined as
MAX_SWAPPINESS + 1, represent the max arg semantics.  It specifically
indicates that reclamation should occur only from anonymous pages.

Patch 1 adds swappiness=max arg to memory.reclaim suggested-by: Yosry
Ahmed

Patch 2 add more comments for cache_trim_mode from Johannes Weiner in [1].

Patch 3 add max arg to lru_gen for proactive memory reclaim in MGLRU.  The
MGLRU already supports reclaiming exclusively from anonymous pages.  This
patch formalizes that behavior by introducing a max parameter to represent
the corresponding semantics.

Patch 4 using SWAPPINESS_ANON_ONLY in MGLRU Using SWAPPINESS_ANON_ONLY
instead of MAX_SWAPPINESS + 1 to indicate reclaiming only from anonymous
pages makes the code more readable and explicit

Here is the previous discussion:
https://lore.kernel.org/all/20250314033350.1156370-1-hezhongkun.hzk@bytedance.com/
https://lore.kernel.org/all/20250312094337.2296278-1-hezhongkun.hzk@bytedance.com/
https://lore.kernel.org/all/20250318135330.3358345-1-hezhongkun.hzk@bytedance.com/


This patch (of 4):

With commit <68cd9050d871> ("mm: add swappiness= arg to memory.reclaim")
we can submit an additional swappiness=<val> argument to memory.reclaim.
It is very useful because we can dynamically adjust the reclamation ratio
based on the anonymous folios and file folios of each cgroup.  For
example,when swappiness is set to 0, we only reclaim from file folios.

However,we have also encountered a new issue: when swappiness is set to
the MAX_SWAPPINESS, it may still only reclaim file folios.

So, we hope to add a new arg 'swappiness=max' in memory.reclaim where
proactive memory reclaim only reclaims from anonymous folios when
swappiness is set to max.  The swappiness semantics from a user
perspective remain unchanged.

For example, something like this:

echo "2M swappiness=max" > /sys/fs/cgroup/memory.reclaim

will perform reclaim on the rootcg with a swappiness setting of 'max' (a
new mode) regardless of the file folios.  Users have a more comprehensive
view of the application's memory distribution because there are many
metrics available.  For example, if we find that a certain cgroup has a
large number of inactive anon folios, we can reclaim only those and skip
file folios, because with the zram/zswap, the IO tradeoff that
cache_trim_mode or other file first logic is making doesn't hold - file
refaults will cause IO, whereas anon decompression will not.

With this patch, the swappiness argument of memory.reclaim has a new
mode 'max', means reclaiming just from anonymous folios both in traditional
LRU and MGLRU.

Link: https://lkml.kernel.org/r/cover.1745225696.git.hezhongkun.hzk@bytedance.com
Link: https://lore.kernel.org/all/20250314141833.GA1316033@cmpxchg.org/ [1]
Link: https://lkml.kernel.org/r/519e12b9b1f8c31a01e228c8b4b91a2419684f77.1745225696.git.hezhongkun.hzk@bytedance.com
Signed-off-by: Zhongkun He <hezhongkun.hzk@bytedance.com>
Suggested-by: Yosry Ahmed <yosry.ahmed@linux.dev>
Acked-by: Muchun Song <muchun.song@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4e4e27d3ce3d..bc0e1c275fc0 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -414,6 +414,10 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 #define MEMCG_RECLAIM_PROACTIVE (1 << 2)
 #define MIN_SWAPPINESS 0
 #define MAX_SWAPPINESS 200
+
+/* Just recliam from anon folios in proactive memory reclaim */
+#define SWAPPINESS_ANON_ONLY (MAX_SWAPPINESS + 1)
+
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 						  unsigned long nr_pages,
 						  gfp_t gfp_mask,
-- 
cgit v1.2.3


From a3365bdca2200e24d815b19382d3f05deb8567ab Mon Sep 17 00:00:00 2001
From: Cheng-Han Wu <hank20010209@gmail.com>
Date: Sun, 27 Apr 2025 22:50:04 +0800
Subject: mm: remove unused macro INIT_PASID

The macro INIT_PASID was originally used by mm_init_pasid.  However, since
commit a6cbd44093ef ("kernel/fork: Initialize mm's PASID"), mm_init_pasid
has been removed.  Therefore, INIT_PASID is no longer needed and is
removed.

Link: https://lkml.kernel.org/r/20250427145004.13049-1-hank20010209@gmail.com
Signed-off-by: Cheng-Han Wu <hank20010209@gmail.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 56d07edd01f9..e76bade9ebb1 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -28,7 +28,6 @@
 #endif
 #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))
 
-#define INIT_PASID	0
 
 struct address_space;
 struct mem_cgroup;
-- 
cgit v1.2.3


From 4c78cc596bb8d39532f059e0198eeabf370c50f5 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Fri, 9 May 2025 00:46:19 -0700
Subject: memblock: add MEMBLOCK_RSRV_KERN flag

Patch series "kexec: introduce Kexec HandOver (KHO)", v8.

Kexec today considers itself purely a boot loader: When we enter the new
kernel, any state the previous kernel left behind is irrelevant and the
new kernel reinitializes the system.

However, there are use cases where this mode of operation is not what we
actually want.  In virtualization hosts for example, we want to use kexec
to update the host kernel while virtual machine memory stays untouched.
When we add device assignment to the mix, we also need to ensure that
IOMMU and VFIO states are untouched.  If we add PCIe peer to peer DMA, we
need to do the same for the PCI subsystem.  If we want to kexec while an
SEV-SNP enabled virtual machine is running, we need to preserve the VM
context pages and physical memory.  See "pkernfs: Persisting guest memory
and kernel/device state safely across kexec" Linux Plumbers Conference
2023 presentation for details:

  https://lpc.events/event/17/contributions/1485/

To start us on the journey to support all the use cases above, this patch
implements basic infrastructure to allow hand over of kernel state across
kexec (Kexec HandOver, aka KHO).  As a really simple example target, we
use memblock's reserve_mem.

With this patchset applied, memory that was reserved using "reserve_mem"
command line options remains intact after kexec and it is guaranteed to
reside at the same physical address.

== Alternatives ==

There are alternative approaches to (parts of) the problems above:

  * Memory Pools [1] - preallocated persistent memory region + allocator
  * PRMEM [2] - resizable persistent memory regions with fixed metadata
                pointer on the kernel command line + allocator
  * Pkernfs [3] - preallocated file system for in-kernel data with fixed
                  address location on the kernel command line
  * PKRAM [4] - handover of user space pages using a fixed metadata page
                specified via command line

All of the approaches above fundamentally have the same problem: They
require the administrator to explicitly carve out a physical memory
location because they have no mechanism outside of the kernel command line
to pass data (including memory reservations) between kexec'ing kernels.

KHO provides that base foundation.  We will determine later whether we
still need any of the approaches above for fast bulk memory handover of
for example IOMMU page tables.  But IMHO they would all be users of KHO,
with KHO providing the foundational primitive to pass metadata and bulk
memory reservations as well as provide easy versioning for data.

== Overview ==

We introduce a metadata file that the kernels pass between each other.
How they pass it is architecture specific.  The file's format is a
Flattened Device Tree (fdt) which has a generator and parser already
included in Linux.  KHO is enabled in the kernel command line by `kho=on`.
When the root user enables KHO through
/sys/kernel/debug/kho/out/finalize, the kernel invokes callbacks to every
KHO users to register preserved memory regions, which contain drivers'
states.

When the actual kexec happens, the fdt is part of the image set that we
boot into.  In addition, we keep "scratch regions" available for kexec:
physically contiguous memory regions that are guaranteed to not have any
memory that KHO would preserve.  The new kernel bootstraps itself using
the scratch regions and sets all handed over memory as in use.  When
drivers initialize that support KHO, they introspect the fdt, restore
preserved memory regions, and retrieve their states stored in the
preserved memory.

== Limitations ==

Currently KHO is only implemented for file based kexec.  The kernel
interfaces in the patch set are already in place to support user space
kexec as well, but it is still not implemented it yet inside kexec tools.

== How to Use ==

To use the code, please boot the kernel with the "kho=on" command line
parameter.  KHO will automatically create scratch regions.  If you want to
set the scratch size explicitly you can use "kho_scratch=" command line
parameter.  For instance, "kho_scratch=16M,512M,256M" will reserve a 16
MiB low memory scratch area, a 512 MiB global scratch region, and 256 MiB
per NUMA node scratch regions on boot.

Make sure to have a reserved memory range requested with reserv_mem
command line option, for example, "reserve_mem=64m:4k:n1".

Then before you invoke file based "kexec -l", finalize KHO FDT:

  # echo 1 > /sys/kernel/debug/kho/out/finalize

You can preview the generated FDT using `dtc`,

  # dtc /sys/kernel/debug/kho/out/fdt
  # dtc /sys/kernel/debug/kho/out/sub_fdts/memblock

`dtc` is available on ubuntu by `sudo apt-get install device-tree-compiler`.

Now kexec into the new kernel,

  # kexec -l Image --initrd=initrd -s
  # kexec -e

(The order of KHO finalization and "kexec -l" does not matter.)

The new kernel will boot up and contain the previous kernel's reserve_mem
contents at the same physical address as the first kernel.

You can also review the FDT passed from the old kernel,

  # dtc /sys/kernel/debug/kho/in/fdt
  # dtc /sys/kernel/debug/kho/in/sub_fdts/memblock


This patch (of 17):

To denote areas that were reserved for kernel use either directly with
memblock_reserve_kern() or via memblock allocations.

Link: https://lore.kernel.org/lkml/20250424083258.2228122-1-changyuanl@google.com/
Link: https://lore.kernel.org/lkml/aAeaJ2iqkrv_ffhT@kernel.org/
Link: https://lore.kernel.org/lkml/35c58191-f774-40cf-8d66-d1e2aaf11a62@intel.com/
Link: https://lore.kernel.org/lkml/20250424093302.3894961-1-arnd@kernel.org/
Link: https://lkml.kernel.org/r/20250509074635.3187114-1-changyuanl@google.com
Link: https://lkml.kernel.org/r/20250509074635.3187114-2-changyuanl@google.com
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Co-developed-by: Changyuan Lyu <changyuanl@google.com>
Signed-off-by: Changyuan Lyu <changyuanl@google.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Anthony Yznaga <anthony.yznaga@oracle.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Ashish Kalra <ashish.kalra@amd.com>
Cc: Ben Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Gowans <jgowans@amazon.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Pratyush Yadav <ptyadav@amazon.de>
Cc: Rob Herring <robh@kernel.org>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Lendacky <thomas.lendacky@amd.com>
Cc: Will Deacon <will@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memblock.h | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index ef5a1ecc6e59..6c00fbc08513 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -42,6 +42,9 @@ extern unsigned long long max_possible_pfn;
  * kernel resource tree.
  * @MEMBLOCK_RSRV_NOINIT: memory region for which struct pages are
  * not initialized (only for reserved regions).
+ * @MEMBLOCK_RSRV_KERN: memory region that is reserved for kernel use,
+ * either explictitly with memblock_reserve_kern() or via memblock
+ * allocation APIs. All memblock allocations set this flag.
  */
 enum memblock_flags {
 	MEMBLOCK_NONE		= 0x0,	/* No special request */
@@ -50,6 +53,7 @@ enum memblock_flags {
 	MEMBLOCK_NOMAP		= 0x4,	/* don't add to kernel direct mapping */
 	MEMBLOCK_DRIVER_MANAGED = 0x8,	/* always detected via a driver */
 	MEMBLOCK_RSRV_NOINIT	= 0x10,	/* don't initialize struct pages */
+	MEMBLOCK_RSRV_KERN	= 0x20,	/* memory reserved for kernel use */
 };
 
 /**
@@ -116,7 +120,19 @@ int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid,
 int memblock_add(phys_addr_t base, phys_addr_t size);
 int memblock_remove(phys_addr_t base, phys_addr_t size);
 int memblock_phys_free(phys_addr_t base, phys_addr_t size);
-int memblock_reserve(phys_addr_t base, phys_addr_t size);
+int __memblock_reserve(phys_addr_t base, phys_addr_t size, int nid,
+		       enum memblock_flags flags);
+
+static __always_inline int memblock_reserve(phys_addr_t base, phys_addr_t size)
+{
+	return __memblock_reserve(base, size, NUMA_NO_NODE, 0);
+}
+
+static __always_inline int memblock_reserve_kern(phys_addr_t base, phys_addr_t size)
+{
+	return __memblock_reserve(base, size, NUMA_NO_NODE, MEMBLOCK_RSRV_KERN);
+}
+
 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
 int memblock_physmem_add(phys_addr_t base, phys_addr_t size);
 #endif
@@ -476,6 +492,7 @@ static inline __init_memblock bool memblock_bottom_up(void)
 
 phys_addr_t memblock_phys_mem_size(void);
 phys_addr_t memblock_reserved_size(void);
+phys_addr_t memblock_reserved_kern_size(phys_addr_t limit, int nid);
 unsigned long memblock_estimated_nr_free_pages(void);
 phys_addr_t memblock_start_of_DRAM(void);
 phys_addr_t memblock_end_of_DRAM(void);
-- 
cgit v1.2.3


From d59f43b5748092557d34244e29a618221a250501 Mon Sep 17 00:00:00 2001
From: Alexander Graf <graf@amazon.com>
Date: Fri, 9 May 2025 00:46:20 -0700
Subject: memblock: add support for scratch memory

With KHO (Kexec HandOver), we need a way to ensure that the new kernel
does not allocate memory on top of any memory regions that the previous
kernel was handing over.  But to know where those are, we need to include
them in the memblock.reserved array which may not be big enough to hold
all ranges that need to be persisted across kexec.  To resize the array,
we need to allocate memory.  That brings us into a catch 22 situation.

The solution to that is limit memblock allocations to the scratch regions:
safe regions to operate in the case when there is memory that should
remain intact across kexec.

KHO provides several "scratch regions" as part of its metadata.  These
scratch regions are contiguous memory blocks that known not to contain any
memory that should be persisted across kexec.  These regions should be
large enough to accommodate all memblock allocations done by the kexeced
kernel.

We introduce a new memblock_set_scratch_only() function that allows KHO to
indicate that any memblock allocation must happen from the scratch
regions.

Later, we may want to perform another KHO kexec.  For that, we reuse the
same scratch regions.  To ensure that no eventually handed over data gets
allocated inside a scratch region, we flip the semantics of the scratch
region with memblock_clear_scratch_only(): After that call, no allocations
may happen from scratch memblock regions.  We will lift that restriction
in the next patch.

Link: https://lkml.kernel.org/r/20250509074635.3187114-3-changyuanl@google.com
Signed-off-by: Alexander Graf <graf@amazon.com>
Co-developed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Changyuan Lyu <changyuanl@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Anthony Yznaga <anthony.yznaga@oracle.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Ashish Kalra <ashish.kalra@amd.com>
Cc: Ben Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Gowans <jgowans@amazon.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Pratyush Yadav <ptyadav@amazon.de>
Cc: Rob Herring <robh@kernel.org>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Lendacky <thomas.lendacky@amd.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memblock.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 6c00fbc08513..993937a6b962 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -45,6 +45,11 @@ extern unsigned long long max_possible_pfn;
  * @MEMBLOCK_RSRV_KERN: memory region that is reserved for kernel use,
  * either explictitly with memblock_reserve_kern() or via memblock
  * allocation APIs. All memblock allocations set this flag.
+ * @MEMBLOCK_KHO_SCRATCH: memory region that kexec can pass to the next
+ * kernel in handover mode. During early boot, we do not know about all
+ * memory reservations yet, so we get scratch memory from the previous
+ * kernel that we know is good to use. It is the only memory that
+ * allocations may happen from in this phase.
  */
 enum memblock_flags {
 	MEMBLOCK_NONE		= 0x0,	/* No special request */
@@ -54,6 +59,7 @@ enum memblock_flags {
 	MEMBLOCK_DRIVER_MANAGED = 0x8,	/* always detected via a driver */
 	MEMBLOCK_RSRV_NOINIT	= 0x10,	/* don't initialize struct pages */
 	MEMBLOCK_RSRV_KERN	= 0x20,	/* memory reserved for kernel use */
+	MEMBLOCK_KHO_SCRATCH	= 0x40,	/* scratch memory for kexec handover */
 };
 
 /**
@@ -148,6 +154,8 @@ int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
 int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
 int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
 int memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size);
+int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size);
+int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size);
 
 void memblock_free(void *ptr, size_t size);
 void reset_all_zones_managed_pages(void);
@@ -291,6 +299,11 @@ static inline bool memblock_is_driver_managed(struct memblock_region *m)
 	return m->flags & MEMBLOCK_DRIVER_MANAGED;
 }
 
+static inline bool memblock_is_kho_scratch(struct memblock_region *m)
+{
+	return m->flags & MEMBLOCK_KHO_SCRATCH;
+}
+
 int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
 			    unsigned long  *end_pfn);
 void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
@@ -619,5 +632,12 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end) { }
 static inline void memtest_report_meminfo(struct seq_file *m) { }
 #endif
 
+#ifdef CONFIG_MEMBLOCK_KHO_SCRATCH
+void memblock_set_kho_scratch_only(void);
+void memblock_clear_kho_scratch_only(void);
+#else
+static inline void memblock_set_kho_scratch_only(void) { }
+static inline void memblock_clear_kho_scratch_only(void) { }
+#endif
 
 #endif /* _LINUX_MEMBLOCK_H */
-- 
cgit v1.2.3


From b8a8f96a6dce527ad316184ff1e20f238ed413d8 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Fri, 9 May 2025 00:46:21 -0700
Subject: memblock: introduce memmap_init_kho_scratch()

With deferred initialization of struct page it will be necessary to
initialize memory map for KHO scratch regions early.

Add memmap_init_kho_scratch() method that will allow such initialization
in upcoming patches.

Link: https://lkml.kernel.org/r/20250509074635.3187114-4-changyuanl@google.com
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Changyuan Lyu <changyuanl@google.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Anthony Yznaga <anthony.yznaga@oracle.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Ashish Kalra <ashish.kalra@amd.com>
Cc: Ben Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Gowans <jgowans@amazon.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Pratyush Yadav <ptyadav@amazon.de>
Cc: Rob Herring <robh@kernel.org>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Lendacky <thomas.lendacky@amd.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memblock.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 993937a6b962..bb19a2534224 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -635,9 +635,11 @@ static inline void memtest_report_meminfo(struct seq_file *m) { }
 #ifdef CONFIG_MEMBLOCK_KHO_SCRATCH
 void memblock_set_kho_scratch_only(void);
 void memblock_clear_kho_scratch_only(void);
+void memmap_init_kho_scratch_pages(void);
 #else
 static inline void memblock_set_kho_scratch_only(void) { }
 static inline void memblock_clear_kho_scratch_only(void) { }
+static inline void memmap_init_kho_scratch_pages(void) {}
 #endif
 
 #endif /* _LINUX_MEMBLOCK_H */
-- 
cgit v1.2.3


From 3dc92c311498c4d307cfdd0c6c3ac9355b50f683 Mon Sep 17 00:00:00 2001
From: Alexander Graf <graf@amazon.com>
Date: Fri, 9 May 2025 00:46:22 -0700
Subject: kexec: add Kexec HandOver (KHO) generation helpers

Add the infrastructure to generate Kexec HandOver metadata.  Kexec
HandOver is a mechanism that allows Linux to preserve state - arbitrary
properties as well as memory locations - across kexec.

It does so using 2 concepts:

  1) KHO FDT - Every KHO kexec carries a KHO specific flattened device tree
     blob that describes preserved memory regions. Device drivers can
     register to KHO to serialize and preserve their states before kexec.

  2) Scratch Regions - CMA regions that we allocate in the first kernel.
     CMA gives us the guarantee that no handover pages land in those
     regions, because handover pages must be at a static physical memory
     location. We use these regions as the place to load future kexec
     images so that they won't collide with any handover data.

Link: https://lkml.kernel.org/r/20250509074635.3187114-5-changyuanl@google.com
Signed-off-by: Alexander Graf <graf@amazon.com>
Co-developed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Co-developed-by: Pratyush Yadav <ptyadav@amazon.de>
Signed-off-by: Pratyush Yadav <ptyadav@amazon.de>
Co-developed-by: Changyuan Lyu <changyuanl@google.com>
Signed-off-by: Changyuan Lyu <changyuanl@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Anthony Yznaga <anthony.yznaga@oracle.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Ashish Kalra <ashish.kalra@amd.com>
Cc: Ben Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Gowans <jgowans@amazon.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rob Herring <robh@kernel.org>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Lendacky <thomas.lendacky@amd.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kexec_handover.h | 59 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 include/linux/kexec_handover.h

(limited to 'include/linux')

diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
new file mode 100644
index 000000000000..2e19004776f6
--- /dev/null
+++ b/include/linux/kexec_handover.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_KEXEC_HANDOVER_H
+#define LINUX_KEXEC_HANDOVER_H
+
+#include <linux/types.h>
+#include <linux/errno.h>
+
+struct kho_scratch {
+	phys_addr_t addr;
+	phys_addr_t size;
+};
+
+/* KHO Notifier index */
+enum kho_event {
+	KEXEC_KHO_FINALIZE = 0,
+	KEXEC_KHO_ABORT = 1,
+};
+
+struct notifier_block;
+
+struct kho_serialization;
+
+#ifdef CONFIG_KEXEC_HANDOVER
+bool kho_is_enabled(void);
+
+int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt);
+
+int register_kho_notifier(struct notifier_block *nb);
+int unregister_kho_notifier(struct notifier_block *nb);
+
+void kho_memory_init(void);
+#else
+static inline bool kho_is_enabled(void)
+{
+	return false;
+}
+
+static inline int kho_add_subtree(struct kho_serialization *ser,
+				  const char *name, void *fdt)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int register_kho_notifier(struct notifier_block *nb)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int unregister_kho_notifier(struct notifier_block *nb)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void kho_memory_init(void)
+{
+}
+#endif /* CONFIG_KEXEC_HANDOVER */
+
+#endif /* LINUX_KEXEC_HANDOVER_H */
-- 
cgit v1.2.3


From c609c144b0e8dbc19712ff8c8a0929be38afe58d Mon Sep 17 00:00:00 2001
From: Alexander Graf <graf@amazon.com>
Date: Fri, 9 May 2025 00:46:23 -0700
Subject: kexec: add KHO parsing support

When we have a KHO kexec, we get an FDT blob and scratch region to
populate the state of the system.  Provide helper functions that allow
architecture code to easily handle memory reservations based on them and
give device drivers visibility into the KHO FDT and memory reservations so
they can recover their own state.

Include a fix from Arnd Bergmann <arnd@arndb.de>
https://lore.kernel.org/lkml/20250424093302.3894961-1-arnd@kernel.org/.

Link: https://lkml.kernel.org/r/20250509074635.3187114-6-changyuanl@google.com
Signed-off-by: Alexander Graf <graf@amazon.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Co-developed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Co-developed-by: Changyuan Lyu <changyuanl@google.com>
Signed-off-by: Changyuan Lyu <changyuanl@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Anthony Yznaga <anthony.yznaga@oracle.com>
Cc: Ashish Kalra <ashish.kalra@amd.com>
Cc: Ben Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Gowans <jgowans@amazon.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Pratyush Yadav <ptyadav@amazon.de>
Cc: Rob Herring <robh@kernel.org>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Lendacky <thomas.lendacky@amd.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kexec_handover.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index 2e19004776f6..02dcfc8c427e 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -24,11 +24,15 @@ struct kho_serialization;
 bool kho_is_enabled(void);
 
 int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt);
+int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
 
 int register_kho_notifier(struct notifier_block *nb);
 int unregister_kho_notifier(struct notifier_block *nb);
 
 void kho_memory_init(void);
+
+void kho_populate(phys_addr_t fdt_phys, u64 fdt_len, phys_addr_t scratch_phys,
+		  u64 scratch_len);
 #else
 static inline bool kho_is_enabled(void)
 {
@@ -41,6 +45,11 @@ static inline int kho_add_subtree(struct kho_serialization *ser,
 	return -EOPNOTSUPP;
 }
 
+static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int register_kho_notifier(struct notifier_block *nb)
 {
 	return -EOPNOTSUPP;
@@ -54,6 +63,11 @@ static inline int unregister_kho_notifier(struct notifier_block *nb)
 static inline void kho_memory_init(void)
 {
 }
+
+static inline void kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
+				phys_addr_t scratch_phys, u64 scratch_len)
+{
+}
 #endif /* CONFIG_KEXEC_HANDOVER */
 
 #endif /* LINUX_KEXEC_HANDOVER_H */
-- 
cgit v1.2.3


From fc33e4b44b2717feba2f6f07ce7943a96499c9ec Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Fri, 9 May 2025 00:46:24 -0700
Subject: kexec: enable KHO support for memory preservation

Introduce APIs allowing KHO users to preserve memory across kexec and get
access to that memory after boot of the kexeced kernel

kho_preserve_folio() - record a folio to be preserved over kexec
kho_restore_folio() - recreates the folio from the preserved memory
kho_preserve_phys() - record physically contiguous range to be
preserved over kexec.

The memory preservations are tracked by two levels of xarrays to manage
chunks of per-order 512 byte bitmaps.  For instance if PAGE_SIZE = 4096,
the entire 1G order of a 1TB x86 system would fit inside a single 512 byte
bitmap.  For order 0 allocations each bitmap will cover 16M of address
space.  Thus, for 16G of memory at most 512K of bitmap memory will be
needed for order 0.

At serialization time all bitmaps are recorded in a linked list of pages
for the next kernel to process and the physical address of the list is
recorded in KHO FDT.

The next kernel then processes that list, reserves the memory ranges and
later, when a user requests a folio or a physical range, KHO restores
corresponding memory map entries.

Link: https://lkml.kernel.org/r/20250509074635.3187114-7-changyuanl@google.com
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Co-developed-by: Changyuan Lyu <changyuanl@google.com>
Signed-off-by: Changyuan Lyu <changyuanl@google.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Anthony Yznaga <anthony.yznaga@oracle.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Ashish Kalra <ashish.kalra@amd.com>
Cc: Ben Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Gowans <jgowans@amazon.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Pratyush Yadav <ptyadav@amazon.de>
Cc: Rob Herring <robh@kernel.org>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Lendacky <thomas.lendacky@amd.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kexec_handover.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index 02dcfc8c427e..348844cffb13 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -16,13 +16,34 @@ enum kho_event {
 	KEXEC_KHO_ABORT = 1,
 };
 
+struct folio;
 struct notifier_block;
 
+#define DECLARE_KHOSER_PTR(name, type) \
+	union {                        \
+		phys_addr_t phys;      \
+		type ptr;              \
+	} name
+#define KHOSER_STORE_PTR(dest, val)               \
+	({                                        \
+		typeof(val) v = val;              \
+		typecheck(typeof((dest).ptr), v); \
+		(dest).phys = virt_to_phys(v);    \
+	})
+#define KHOSER_LOAD_PTR(src)                                                 \
+	({                                                                   \
+		typeof(src) s = src;                                         \
+		(typeof((s).ptr))((s).phys ? phys_to_virt((s).phys) : NULL); \
+	})
+
 struct kho_serialization;
 
 #ifdef CONFIG_KEXEC_HANDOVER
 bool kho_is_enabled(void);
 
+int kho_preserve_folio(struct folio *folio);
+int kho_preserve_phys(phys_addr_t phys, size_t size);
+struct folio *kho_restore_folio(phys_addr_t phys);
 int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt);
 int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
 
@@ -39,6 +60,21 @@ static inline bool kho_is_enabled(void)
 	return false;
 }
 
+static inline int kho_preserve_folio(struct folio *folio)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int kho_preserve_phys(phys_addr_t phys, size_t size)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline struct folio *kho_restore_folio(phys_addr_t phys)
+{
+	return NULL;
+}
+
 static inline int kho_add_subtree(struct kho_serialization *ser,
 				  const char *name, void *fdt)
 {
-- 
cgit v1.2.3


From 3bdecc3c93f9f68d11ed54971dde169b6ead9d78 Mon Sep 17 00:00:00 2001
From: Alexander Graf <graf@amazon.com>
Date: Fri, 9 May 2025 00:46:25 -0700
Subject: kexec: add KHO support to kexec file loads

Kexec has 2 modes: A user space driven mode and a kernel driven mode.  For
the kernel driven mode, kernel code determines the physical addresses of
all target buffers that the payload gets copied into.

With KHO, we can only safely copy payloads into the "scratch area".  Teach
the kexec file loader about it, so it only allocates for that area.  In
addition, enlighten it with support to ask the KHO subsystem for its
respective payloads to copy into target memory.  Also teach the KHO
subsystem how to fill the images for file loads.

Link: https://lkml.kernel.org/r/20250509074635.3187114-8-changyuanl@google.com
Signed-off-by: Alexander Graf <graf@amazon.com>
Co-developed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Co-developed-by: Changyuan Lyu <changyuanl@google.com>
Signed-off-by: Changyuan Lyu <changyuanl@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Anthony Yznaga <anthony.yznaga@oracle.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Ashish Kalra <ashish.kalra@amd.com>
Cc: Ben Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Gowans <jgowans@amazon.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Pratyush Yadav <ptyadav@amazon.de>
Cc: Rob Herring <robh@kernel.org>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Lendacky <thomas.lendacky@amd.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kexec.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index c8971861521a..075255de8154 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -371,6 +371,11 @@ struct kimage {
 	size_t ima_buffer_size;
 #endif
 
+	struct {
+		struct kexec_segment *scratch;
+		phys_addr_t fdt;
+	} kho;
+
 	/* Core ELF header buffer */
 	void *elf_headers;
 	unsigned long elf_headers_sz;
-- 
cgit v1.2.3


From f88ce2c84a341f44a7d00bc10868714bc4751f7e Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Wed, 23 Apr 2025 14:33:37 +0100
Subject: mm: introduce for_each_valid_pfn() and use it from
 reserve_bootmem_region()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "mm: Introduce for_each_valid_pfn()", v4.

There are cases where a naïve loop over a PFN range, calling pfn_valid()
on each one, is horribly inefficient.  Ruihan Li reported the case where
memmap_init() iterates all the way from zero to a potentially large value
of ARCH_PFN_OFFSET, and we at Amazon found the reserve_bootmem_region()
one as it affects hypervisor live update.  Others are more cosmetic.

By introducing a for_each_valid_pfn() helper it can optimise away a lot of
pointless calls to pfn_valid(), skipping immediately to the next valid PFN
and also skipping *all* checks within a valid (sub)region according to the
granularity of the memory model in use.


This patch (of 7)

Especially since commit 9092d4f7a1f8 ("memblock: update initialization of
reserved pages"), the reserve_bootmem_region() function can spend a
significant amount of time iterating over every 4KiB PFN in a range,
calling pfn_valid() on each one, and ultimately doing absolutely nothing.

On a platform used for virtualization, with large NOMAP regions that
eventually get used for guest RAM, this leads to a significant increase in
steal time experienced during kexec for a live update.

Introduce for_each_valid_pfn() and use it from reserve_bootmem_region().
This implementation is precisely the same naïve loop that the functio
used to have, but subsequent commits will provide optimised versions for
FLATMEM and SPARSEMEM, and this version will remain for those
architectures which provide their own pfn_valid() implementation,
until/unless they also provide a matching for_each_valid_pfn().

Link: https://lkml.kernel.org/r/20250423133821.789413-1-dwmw2@infradead.org
Link: https://lkml.kernel.org/r/20250423133821.789413-2-dwmw2@infradead.org
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Ruihan Li <lrh2000@pku.edu.cn>
Cc: Will Deacon <will@kernel.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6ccec1bf2896..230a29c2ed1a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -2177,6 +2177,16 @@ void sparse_init(void);
 #define subsection_map_init(_pfn, _nr_pages) do {} while (0)
 #endif /* CONFIG_SPARSEMEM */
 
+/*
+ * Fallback case for when the architecture provides its own pfn_valid() but
+ * not a corresponding for_each_valid_pfn().
+ */
+#ifndef for_each_valid_pfn
+#define for_each_valid_pfn(_pfn, _start_pfn, _end_pfn)			\
+	for ((_pfn) = (_start_pfn); (_pfn) < (_end_pfn); (_pfn)++)	\
+		if (pfn_valid(_pfn))
+#endif
+
 #endif /* !__GENERATING_BOUNDS.H */
 #endif /* !__ASSEMBLY__ */
 #endif /* _LINUX_MMZONE_H */
-- 
cgit v1.2.3


From 037926316c9ded1194927ee56b862e3a1450aaf3 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Wed, 23 Apr 2025 14:33:39 +0100
Subject: mm: implement for_each_valid_pfn() for CONFIG_SPARSEMEM

Implement for_each_valid_pfn() based on two helper functions.

The first_valid_pfn() function largely mirrors pfn_valid(), calling into a
pfn_section_first_valid() helper which is trivial for the !VMEMMAP case,
and in the VMEMMAP case will skip to the next subsection as needed.

Since next_valid_pfn() knows that its argument *is* a valid PFN, it
doesn't need to do any checking at all while iterating over the low bits
within a (sub)section mask; the whole (sub)section is either present or
not.

Note that the VMEMMAP version of pfn_section_first_valid() may return a
value *higher* than end_pfn when skipping to the next subsection, and
first_valid_pfn() happily returns that higher value.  This is fine.

[dwmw2@infradead.org: fix next_valid_pfn() for sparsemem]
  Link: https://lkml.kernel.org/r/c15100fcf6781a60b852c4dbb43bdc98a678fcf0.camel@infradead.org
Link: https://lkml.kernel.org/r/20250423133821.789413-4-dwmw2@infradead.org
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Ruihan Li <lrh2000@pku.edu.cn>
Cc: Will Deacon <will@kernel.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 230a29c2ed1a..b19a98c20de8 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -2075,11 +2075,37 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
 
 	return usage ? test_bit(idx, usage->subsection_map) : 0;
 }
+
+static inline bool pfn_section_first_valid(struct mem_section *ms, unsigned long *pfn)
+{
+	struct mem_section_usage *usage = READ_ONCE(ms->usage);
+	int idx = subsection_map_index(*pfn);
+	unsigned long bit;
+
+	if (!usage)
+		return false;
+
+	if (test_bit(idx, usage->subsection_map))
+		return true;
+
+	/* Find the next subsection that exists */
+	bit = find_next_bit(usage->subsection_map, SUBSECTIONS_PER_SECTION, idx);
+	if (bit == SUBSECTIONS_PER_SECTION)
+		return false;
+
+	*pfn = (*pfn & PAGE_SECTION_MASK) + (bit * PAGES_PER_SUBSECTION);
+	return true;
+}
 #else
 static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
 {
 	return 1;
 }
+
+static inline bool pfn_section_first_valid(struct mem_section *ms, unsigned long *pfn)
+{
+	return true;
+}
 #endif
 
 void sparse_init_early_section(int nid, struct page *map, unsigned long pnum,
@@ -2128,6 +2154,58 @@ static inline int pfn_valid(unsigned long pfn)
 
 	return ret;
 }
+
+/* Returns end_pfn or higher if no valid PFN remaining in range */
+static inline unsigned long first_valid_pfn(unsigned long pfn, unsigned long end_pfn)
+{
+	unsigned long nr = pfn_to_section_nr(pfn);
+
+	rcu_read_lock_sched();
+
+	while (nr <= __highest_present_section_nr && pfn < end_pfn) {
+		struct mem_section *ms = __pfn_to_section(pfn);
+
+		if (valid_section(ms) &&
+		    (early_section(ms) || pfn_section_first_valid(ms, &pfn))) {
+			rcu_read_unlock_sched();
+			return pfn;
+		}
+
+		/* Nothing left in this section? Skip to next section */
+		nr++;
+		pfn = section_nr_to_pfn(nr);
+	}
+
+	rcu_read_unlock_sched();
+	return end_pfn;
+}
+
+static inline unsigned long next_valid_pfn(unsigned long pfn, unsigned long end_pfn)
+{
+	pfn++;
+
+	if (pfn >= end_pfn)
+		return end_pfn;
+
+	/*
+	 * Either every PFN within the section (or subsection for VMEMMAP) is
+	 * valid, or none of them are. So there's no point repeating the check
+	 * for every PFN; only call first_valid_pfn() again when crossing a
+	 * (sub)section boundary (i.e. !(pfn & ~PAGE_{SUB,}SECTION_MASK)).
+	 */
+	if (pfn & ~(IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP) ?
+		   PAGE_SUBSECTION_MASK : PAGE_SECTION_MASK))
+		return pfn;
+
+	return first_valid_pfn(pfn, end_pfn);
+}
+
+
+#define for_each_valid_pfn(_pfn, _start_pfn, _end_pfn)			\
+	for ((_pfn) = first_valid_pfn((_start_pfn), (_end_pfn));	\
+	     (_pfn) < (_end_pfn);					\
+	     (_pfn) = next_valid_pfn((_pfn), (_end_pfn)))
+
 #endif
 
 static inline int pfn_in_present_section(unsigned long pfn)
-- 
cgit v1.2.3


From 4428a35f91f0f0e31d874038b3091e1c5a461f34 Mon Sep 17 00:00:00 2001
From: Lance Yang <lance.yang@linux.dev>
Date: Thu, 24 Apr 2025 23:56:06 +0800
Subject: mm/rmap: inline folio_test_large_maybe_mapped_shared() into callers

To prevent the function from being used when CONFIG_MM_ID is disabled, we
intend to inline it into its few callers, which also would help maintain
the expected code placement.

Link: https://lkml.kernel.org/r/20250424155606.57488-1-lance.yang@linux.dev
Signed-off-by: Lance Yang <lance.yang@linux.dev>
Suggested-by: David Hildenbrand <david@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Mingzhe Yang <mingzhe.yang@ly.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h         | 2 +-
 include/linux/page-flags.h | 4 ----
 include/linux/rmap.h       | 2 +-
 3 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9b701cfbef22..21dd110b6655 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2111,7 +2111,7 @@ static inline bool folio_maybe_mapped_shared(struct folio *folio)
 	 */
 	if (mapcount <= 1)
 		return false;
-	return folio_test_large_maybe_mapped_shared(folio);
+	return test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids);
 }
 
 #ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index d3909cb1e576..37b11f15dbd9 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -1230,10 +1230,6 @@ static inline int folio_has_private(const struct folio *folio)
 	return !!(folio->flags & PAGE_FLAGS_PRIVATE);
 }
 
-static inline bool folio_test_large_maybe_mapped_shared(const struct folio *folio)
-{
-	return test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids);
-}
 #undef PF_ANY
 #undef PF_HEAD
 #undef PF_NO_TAIL
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 6b82b618846e..c4f4903b1088 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -223,7 +223,7 @@ static inline void __folio_large_mapcount_sanity_checks(const struct folio *foli
 	VM_WARN_ON_ONCE(folio_mm_id(folio, 1) != MM_ID_DUMMY &&
 			folio->_mm_id_mapcount[1] < 0);
 	VM_WARN_ON_ONCE(!folio_mapped(folio) &&
-			folio_test_large_maybe_mapped_shared(folio));
+			test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids));
 }
 
 static __always_inline void folio_set_large_mapcount(struct folio *folio,
-- 
cgit v1.2.3


From 60309008e1e2b2d4bff3c2475b0c74faf395f787 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 28 Apr 2025 10:27:54 +0300
Subject: util_macros.h: make the header more resilient

Add missing header inclusions.

Link: https://lkml.kernel.org/r/20250428072754.3265274-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/util_macros.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h
index 3b570b765b75..76ca2b83c13e 100644
--- a/include/linux/util_macros.h
+++ b/include/linux/util_macros.h
@@ -2,7 +2,10 @@
 #ifndef _LINUX_HELPER_MACROS_H_
 #define _LINUX_HELPER_MACROS_H_
 
+#include <linux/compiler_attributes.h>
 #include <linux/math.h>
+#include <linux/typecheck.h>
+#include <linux/stddef.h>
 
 /**
  * for_each_if - helper for handling conditionals in various for_each macros
-- 
cgit v1.2.3


From 86ebd50224c0734d965843260d0dc057a9431c61 Mon Sep 17 00:00:00 2001
From: Shivank Garg <shivankg@amd.com>
Date: Wed, 30 Apr 2025 10:01:51 +0000
Subject: mm: add folio_expected_ref_count() for reference count calculation

Patch series " JFS: Implement migrate_folio for jfs_metapage_aops" v5.

This patchset addresses a warning that occurs during memory compaction due
to JFS's missing migrate_folio operation.  The warning was introduced by
commit 7ee3647243e5 ("migrate: Remove call to ->writepage") which added
explicit warnings when filesystem don't implement migrate_folio.

The syzbot reported following [1]:
  jfs_metapage_aops does not implement migrate_folio
  WARNING: CPU: 1 PID: 5861 at mm/migrate.c:955 fallback_migrate_folio mm/migrate.c:953 [inline]
  WARNING: CPU: 1 PID: 5861 at mm/migrate.c:955 move_to_new_folio+0x70e/0x840 mm/migrate.c:1007
  Modules linked in:
  CPU: 1 UID: 0 PID: 5861 Comm: syz-executor280 Not tainted 6.15.0-rc1-next-20250411-syzkaller #0 PREEMPT(full)
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025
  RIP: 0010:fallback_migrate_folio mm/migrate.c:953 [inline]
  RIP: 0010:move_to_new_folio+0x70e/0x840 mm/migrate.c:1007

To fix this issue, this series implement metapage_migrate_folio() for JFS
which handles both single and multiple metapages per page configurations.

While most filesystems leverage existing migration implementations like
filemap_migrate_folio(), buffer_migrate_folio_norefs() or
buffer_migrate_folio() (which internally used folio_expected_refs()),
JFS's metapage architecture requires special handling of its private data
during migration.  To support this, this series introduce the
folio_expected_ref_count(), which calculates external references to a
folio from page/swap cache, private data, and page table mappings.

This standardized implementation replaces the previous ad-hoc
folio_expected_refs() function and enables JFS to accurately determine
whether a folio has unexpected references before attempting migration.


Implement folio_expected_ref_count() to calculate expected folio reference
counts from:
- Page/swap cache (1 per page)
- Private data (1)
- Page table mappings (1 per map)

While originally needed for page migration operations, this improved
implementation standardizes reference counting by consolidating all
refcount contributors into a single, reusable function that can benefit
any subsystem needing to detect unexpected references to folios.

The folio_expected_ref_count() returns the sum of these external
references without including any reference the caller itself might hold.
Callers comparing against the actual folio_ref_count() must account for
their own references separately.

Link: https://syzkaller.appspot.com/bug?extid=8bb6fd945af4e0ad9299 [1]
Link: https://lkml.kernel.org/r/20250430100150.279751-1-shivankg@amd.com
Link: https://lkml.kernel.org/r/20250430100150.279751-2-shivankg@amd.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Shivank Garg <shivankg@amd.com>
Suggested-by: Matthew Wilcox <willy@infradead.org>
Co-developed-by: David Hildenbrand <david@redhat.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Dave Kleikamp <shaggy@kernel.org>
Cc: Donet Tom <donettom@linux.ibm.com>
Cc: Jane Chu <jane.chu@oracle.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 21dd110b6655..1d1953e37baa 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2114,6 +2114,61 @@ static inline bool folio_maybe_mapped_shared(struct folio *folio)
 	return test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids);
 }
 
+/**
+ * folio_expected_ref_count - calculate the expected folio refcount
+ * @folio: the folio
+ *
+ * Calculate the expected folio refcount, taking references from the pagecache,
+ * swapcache, PG_private and page table mappings into account. Useful in
+ * combination with folio_ref_count() to detect unexpected references (e.g.,
+ * GUP or other temporary references).
+ *
+ * Does currently not consider references from the LRU cache. If the folio
+ * was isolated from the LRU (which is the case during migration or split),
+ * the LRU cache does not apply.
+ *
+ * Calling this function on an unmapped folio -- !folio_mapped() -- that is
+ * locked will return a stable result.
+ *
+ * Calling this function on a mapped folio will not result in a stable result,
+ * because nothing stops additional page table mappings from coming (e.g.,
+ * fork()) or going (e.g., munmap()).
+ *
+ * Calling this function without the folio lock will also not result in a
+ * stable result: for example, the folio might get dropped from the swapcache
+ * concurrently.
+ *
+ * However, even when called without the folio lock or on a mapped folio,
+ * this function can be used to detect unexpected references early (for example,
+ * if it makes sense to even lock the folio and unmap it).
+ *
+ * The caller must add any reference (e.g., from folio_try_get()) it might be
+ * holding itself to the result.
+ *
+ * Returns the expected folio refcount.
+ */
+static inline int folio_expected_ref_count(const struct folio *folio)
+{
+	const int order = folio_order(folio);
+	int ref_count = 0;
+
+	if (WARN_ON_ONCE(folio_test_slab(folio)))
+		return 0;
+
+	if (folio_test_anon(folio)) {
+		/* One reference per page from the swapcache. */
+		ref_count += folio_test_swapcache(folio) << order;
+	} else if (!((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS)) {
+		/* One reference per page from the pagecache. */
+		ref_count += !!folio->mapping << order;
+		/* One reference from PG_private. */
+		ref_count += folio_test_private(folio);
+	}
+
+	/* One reference per page table mapping. */
+	return ref_count + folio_mapcount(folio);
+}
+
 #ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
 static inline int arch_make_folio_accessible(struct folio *folio)
 {
-- 
cgit v1.2.3


From 6c36ac1e124f1be97cf0485a220865fce5a2020d Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Mon, 28 Apr 2025 16:28:14 +0100
Subject: mm: establish mm/vma_exec.c for shared exec/mm VMA functionality

Patch series "move all VMA allocation, freeing and duplication logic to
mm", v3.

Currently VMA allocation, freeing and duplication exist in kernel/fork.c,
which is a violation of separation of concerns, and leaves these functions
exposed to the rest of the kernel when they are in fact internal
implementation details.

Resolve this by moving this logic to mm, and making it internal to vma.c,
vma.h.

This also allows us, in future, to provide userland testing around this
functionality.

We additionally abstract dup_mmap() to mm, being careful to ensure
kernel/fork.c acceses this via the mm internal header so it is not exposed
elsewhere in the kernel.

As part of this change, also abstract initial stack allocation performed
in __bprm_mm_init() out of fs code into mm via the
create_init_stack_vma(), as this code uses vm_area_alloc() and
vm_area_free().

In order to do so sensibly, we introduce a new mm/vma_exec.c file, which
contains the code that is shared by mm and exec.  This file is added to
both memory mapping and exec sections in MAINTAINERS so both sets of
maintainers can maintain oversight.

As part of this change, we also move relocate_vma_down() to mm/vma_exec.c
so all shared mm/exec functionality is kept in one place.

We add code shared between nommu and mmu-enabled configurations in order
to share VMA allocation, freeing and duplication code correctly while also
keeping these functions available in userland VMA testing.

This is achieved by adding a mm/vma_init.c file which is also compiled by
the userland tests.


This patch (of 4):

There is functionality that overlaps the exec and memory mapping
subsystems.  While it properly belongs in mm, it is important that exec
maintainers maintain oversight of this functionality correctly.

We can establish both goals by adding a new mm/vma_exec.c file which
contains these 'glue' functions, and have fs/exec.c import them.

As a part of this change, to ensure that proper oversight is achieved, add
the file to both the MEMORY MAPPING and EXEC & BINFMT API, ELF sections.

scripts/get_maintainer.pl can correctly handle files in multiple entries
and this neatly handles the cross-over.

[akpm@linux-foundation.org: fix comment typo]
  Link: https://lkml.kernel.org/r/80f0d0c6-0b68-47f9-ab78-0ab7f74677fc@lucifer.local
Link: https://lkml.kernel.org/r/cover.1745853549.git.lorenzo.stoakes@oracle.com
Link: https://lkml.kernel.org/r/91f2cee8f17d65214a9d83abb7011aa15f1ea690.1745853549.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Pedro Falcato <pfalcato@suse.de>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Kees Cook <kees@kernel.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1d1953e37baa..43748c8f3454 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3278,7 +3278,6 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node);
 extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
 extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
 extern void exit_mmap(struct mm_struct *);
-int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift);
 bool mmap_read_lock_maybe_expand(struct mm_struct *mm, struct vm_area_struct *vma,
 				 unsigned long addr, bool write);
 
-- 
cgit v1.2.3


From 69eadd6a05409ca3725cabf8d60ccf6c8f87e193 Mon Sep 17 00:00:00 2001
From: Guilherme Giacomo Simoes <trintaeoitogc@gmail.com>
Date: Mon, 28 Apr 2025 17:14:09 -0300
Subject: mm: page-flags-layout.h: change the KASAN_TAG_WIDTH for HW_TAGS

KASAN_TAG_WIDTH is 8 bits for both (HW_TAGS and SW_TAGS), but for HW_TAGS
the KASAN_TAG_WIDTH can be 4 bits bits because due to the design of the
MTE the memory words for storing metadata only need 4 bits.  Change the
preprocessor define KASAN_TAG_WIDTH for check if SW_TAGS is define, so
KASAN_TAG_WIDTH should be 8 bits, but if HW_TAGS is define, so
KASAN_TAG_WIDTH should be 4 bits to save a few flags bits.

Link: https://lkml.kernel.org/r/20250428201409.5482-1-trintaeoitogc@gmail.com
Signed-off-by: Guilherme Giacomo Simoes <trintaeoitogc@gmail.com>
Suggested-by: Andrey Konovalov <andreyknvl@gmail.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags-layout.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h
index 4f5c9e979bb9..760006b1c480 100644
--- a/include/linux/page-flags-layout.h
+++ b/include/linux/page-flags-layout.h
@@ -72,8 +72,10 @@
 #define NODE_NOT_IN_PAGE_FLAGS	1
 #endif
 
-#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
+#if defined(CONFIG_KASAN_SW_TAGS)
 #define KASAN_TAG_WIDTH 8
+#elif defined(CONFIG_KASAN_HW_TAGS)
+#define KASAN_TAG_WIDTH 4
 #else
 #define KASAN_TAG_WIDTH 0
 #endif
-- 
cgit v1.2.3


From 2b80f633c360ce3c56a7071782eae70852c8f344 Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@tencent.com>
Date: Thu, 1 May 2025 02:10:50 +0800
Subject: filemap: do not use folio_contains for swap cache folios

Currently, none of the folio_contains callers should encounter swap cache
folios.

For fs/ callers, swap cache folios are never part of their workflow.

For filemap and truncate, folio_contains is only used for sanity checks to
verify the folio index matches the expected lookup / invalidation target.

The swap cache does not utilize filemap or truncate helpers in ways that
would trigger these checks, as it mostly implements its own cache
management.

Shmem won't trigger these sanity checks either unless thing went wrong, as
it would directly trigger a BUG because swap cache index are unrelated and
almost never matches shmem index.  Shmem have to handle mixed values of
folios, shadows, and swap entries, so it has its own way of handling the
mapping.

While some filemap helpers works for swap cache space, the swap cache is
different from the page cache in many ways.  So this particular helper
will unlikely to work in a helpful way for swap cache folios.

So make it explicit here that folio_contains should not be used for swap
cache folios.  This helps to avoid misuse, make swap cache less exposed
and remove the folio_index usage here.

[akpm@linux-foundation.org: s/VM_WARN_ON_FOLIO/VM_WARN_ON_ONCE_FOLIO/, per Kairui]
Link: https://lkml.kernel.org/r/20250430181052.55698-5-ryncsn@gmail.com
Signed-off-by: Kairui Song <kasong@tencent.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Chao Yu <chao@kernel.org>
Cc: Chris Li <chrisl@kernel.org>
Cc: Chris Mason <clm@fb.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Sterba <dsterba@suse.com>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Joanne Koong <joannelkoong@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Qu Wenruo <wqu@suse.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagemap.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index af25fb640463..b2c2ff8de046 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -935,14 +935,14 @@ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
  * @folio: The folio.
  * @index: The page index within the file.
  *
- * Context: The caller should have the page locked in order to prevent
- * (eg) shmem from moving the page between the page cache and swap cache
- * and changing its index in the middle of the operation.
+ * Context: The caller should have the folio locked and ensure
+ * e.g., shmem did not move this folio to the swap cache.
  * Return: true or false.
  */
 static inline bool folio_contains(struct folio *folio, pgoff_t index)
 {
-	return index - folio_index(folio) < folio_nr_pages(folio);
+	VM_WARN_ON_ONCE_FOLIO(folio_test_swapcache(folio), folio);
+	return index - folio->index < folio_nr_pages(folio);
 }
 
 unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
-- 
cgit v1.2.3


From 7d0f0f06153116bb737eef76d47406639e161613 Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@tencent.com>
Date: Thu, 1 May 2025 02:10:51 +0800
Subject: mm: move folio_index to mm/swap.h and remove no longer needed helper

There are no remaining users of folio_index() outside the mm subsystem.
Move it to mm/swap.h to co-locate it with swap_cache_index(), eliminating
a forward declaration, and a function call overhead.

Also remove the helper that was used to fix circular header dependency
issue.

Link: https://lkml.kernel.org/r/20250430181052.55698-6-ryncsn@gmail.com
Signed-off-by: Kairui Song <kasong@tencent.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Chao Yu <chao@kernel.org>
Cc: Chris Li <chrisl@kernel.org>
Cc: Chris Mason <clm@fb.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Sterba <dsterba@suse.com>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Joanne Koong <joannelkoong@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Qu Wenruo <wqu@suse.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagemap.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index b2c2ff8de046..5d66786867eb 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -884,26 +884,6 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
 			mapping_gfp_mask(mapping));
 }
 
-extern pgoff_t __folio_swap_cache_index(struct folio *folio);
-
-/**
- * folio_index - File index of a folio.
- * @folio: The folio.
- *
- * For a folio which is either in the page cache or the swap cache,
- * return its index within the address_space it belongs to.  If you know
- * the page is definitely in the page cache, you can look at the folio's
- * index directly.
- *
- * Return: The index (offset in units of pages) of a folio in its file.
- */
-static inline pgoff_t folio_index(struct folio *folio)
-{
-	if (unlikely(folio_test_swapcache(folio)))
-		return __folio_swap_cache_index(folio);
-	return folio->index;
-}
-
 /**
  * folio_next_index - Get the index of the next folio.
  * @folio: The current folio.
-- 
cgit v1.2.3


From dd309bfc68efc4522b4d33a95e92afff249e103b Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@tencent.com>
Date: Thu, 1 May 2025 02:10:52 +0800
Subject: mm, swap: remove no longer used swap mapping helper

This helper existed to fix the circular header dependency issue but it is
no longer used since commit 0d40cfe63a2f ("fs: remove
folio_file_mapping()"), remove it.

Link: https://lkml.kernel.org/r/20250430181052.55698-7-ryncsn@gmail.com
Signed-off-by: Kairui Song <kasong@tencent.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Chao Yu <chao@kernel.org>
Cc: Chris Li <chrisl@kernel.org>
Cc: Chris Mason <clm@fb.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Sterba <dsterba@suse.com>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Joanne Koong <joannelkoong@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Qu Wenruo <wqu@suse.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagemap.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 5d66786867eb..d2ced9920992 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -533,7 +533,6 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping)
 }
 
 struct address_space *folio_mapping(struct folio *);
-struct address_space *swapcache_mapping(struct folio *);
 
 /**
  * folio_flush_mapping - Find the file mapping this folio belongs to.
-- 
cgit v1.2.3


From 0aa7b390fc40a871267a2328bbbefca8b37ad307 Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Thu, 24 Apr 2025 16:25:25 +0300
Subject: mtd: core: always create master device

Create master device without partition when
CONFIG_MTD_PARTITIONED_MASTER flag is unset.

This streamlines device tree and allows to anchor
runtime power management on master device in all cases.

Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/partitions.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index b74a539ec581..5daf80df9e89 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -108,7 +108,7 @@ extern void deregister_mtd_parser(struct mtd_part_parser *parser);
 		      deregister_mtd_parser)
 
 int mtd_add_partition(struct mtd_info *master, const char *name,
-		      long long offset, long long length);
+		      long long offset, long long length, struct mtd_info **part);
 int mtd_del_partition(struct mtd_info *master, int partno);
 uint64_t mtd_get_device_size(const struct mtd_info *mtd);
 
-- 
cgit v1.2.3


From 598995027b9181ada81789bf01fb8ef30d93c9dc Mon Sep 17 00:00:00 2001
From: Peter Griffin <peter.griffin@linaro.org>
Date: Tue, 6 May 2025 21:57:31 +0100
Subject: soc: samsung: exynos-pmu: enable CPU hotplug support for gs101

Some additional register writes are required when hotplugging CPUs
on gs101, without these the system hangs when hotplugging.

Specifically a CPU_INFORM register needs to be programmed with
a hint value which is used by the EL3 firmware (el3mon) and the
pmu-intr-gen registers need to be programmed.

With this patch applied, and corresponding DT update CPU hotplug
now works as expected. e.g.

echo 0 > /sys/devices/system/cpu/cpu6/online
echo 1 > /sys/devices/system/cpu/cpu6/online

Note: to maintain compatibility with older DTs that didn't specify
pmu-intr-gen phandle only a warning is issued if the syscon can't
be obtained.

Signed-off-by: Peter Griffin <peter.griffin@linaro.org>
Link: https://lore.kernel.org/r/20250506-contrib-pg-cpu-hotplug-suspend2ram-fixes-v1-v4-5-9f64a2657316@linaro.org
[krzk: few blank line and white-space alignment fixes from checkpatch]
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
---
 include/linux/soc/samsung/exynos-regs-pmu.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h
index ce1a3790d6fb..0d5a17ea8fb8 100644
--- a/include/linux/soc/samsung/exynos-regs-pmu.h
+++ b/include/linux/soc/samsung/exynos-regs-pmu.h
@@ -658,9 +658,20 @@
 #define EXYNOS5433_PAD_RETENTION_FSYSGENIO_OPTION		(0x32A8)
 
 /* For Tensor GS101 */
+/* PMU ALIVE */
 #define GS101_SYSIP_DAT0					(0x810)
+#define GS101_CPU0_INFORM					(0x860)
+#define GS101_CPU_INFORM(cpu)	\
+			(GS101_CPU0_INFORM + (cpu*4))
 #define GS101_SYSTEM_CONFIGURATION				(0x3A00)
 #define GS101_PHY_CTRL_USB20					(0x3EB0)
 #define GS101_PHY_CTRL_USBDP					(0x3EB4)
 
+/* PMU INTR GEN */
+#define GS101_GRP1_INTR_BID_UPEND				(0x0108)
+#define GS101_GRP1_INTR_BID_CLEAR				(0x010c)
+#define GS101_GRP2_INTR_BID_ENABLE				(0x0200)
+#define GS101_GRP2_INTR_BID_UPEND				(0x0208)
+#define GS101_GRP2_INTR_BID_CLEAR				(0x020c)
+
 #endif /* __LINUX_SOC_EXYNOS_REGS_PMU_H */
-- 
cgit v1.2.3


From 8c619eb21b8e87ae95877e9cca9fcb0e3115776e Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Sun, 4 May 2025 10:14:34 +0200
Subject: net: phy: micrel: remove KSZ9477 EEE quirks now handled by phylink

The KSZ9477 PHY driver contained workarounds for broken EEE capability
advertisements by manually masking supported EEE modes and forcibly
disabling EEE if MICREL_NO_EEE was set.

With proper MAC-side EEE handling implemented via phylink, these quirks
are no longer necessary. Remove MICREL_NO_EEE handling and the use of
ksz9477_get_features().

This simplifies the PHY driver and avoids duplicated EEE management logic.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Cc: stable@vger.kernel.org # v6.14+
Link: https://patch.msgid.link/20250504081434.424489-3-o.rempel@pengutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/micrel_phy.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 591bf5b5e8dc..9af01bdd86d2 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -44,7 +44,6 @@
 #define MICREL_PHY_50MHZ_CLK	BIT(0)
 #define MICREL_PHY_FXEN		BIT(1)
 #define MICREL_KSZ8_P1_ERRATA	BIT(2)
-#define MICREL_NO_EEE		BIT(3)
 
 #define MICREL_KSZ9021_EXTREG_CTRL	0xB
 #define MICREL_KSZ9021_EXTREG_DATA_WRITE	0xC
-- 
cgit v1.2.3


From e9f3d61db5cb29b3f17f0dc40c3ec2cda2ee93e5 Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Thu, 8 May 2025 00:48:22 +0000
Subject: net: add get_netmem/put_netmem support

Currently net_iovs support only pp ref counts, and do not support a
page ref equivalent.

This is fine for the RX path as net_iovs are used exclusively with the
pp and only pp refcounting is needed there. The TX path however does not
use pp ref counts, thus, support for get_page/put_page equivalent is
needed for netmem.

Support get_netmem/put_netmem. Check the type of the netmem before
passing it to page or net_iov specific code to obtain a page ref
equivalent.

For dmabuf net_iovs, we obtain a ref on the underlying binding. This
ensures the entire binding doesn't disappear until all the net_iovs have
been put_netmem'ed. We do not need to track the refcount of individual
dmabuf net_iovs as we don't allocate/free them from a pool similar to
what the buddy allocator does for pages.

This code is written to be extensible by other net_iov implementers.
get_netmem/put_netmem will check the type of the netmem and route it to
the correct helper:

pages -> [get|put]_page()
dmabuf net_iovs -> net_devmem_[get|put]_net_iov()
new net_iovs ->	new helpers

Signed-off-by: Mina Almasry <almasrymina@google.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250508004830.4100853-3-almasrymina@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/skbuff_ref.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h
index 0f3c58007488..9e49372ef1a0 100644
--- a/include/linux/skbuff_ref.h
+++ b/include/linux/skbuff_ref.h
@@ -17,7 +17,7 @@
  */
 static inline void __skb_frag_ref(skb_frag_t *frag)
 {
-	get_page(skb_frag_page(frag));
+	get_netmem(skb_frag_netmem(frag));
 }
 
 /**
@@ -40,7 +40,7 @@ static inline void skb_page_unref(netmem_ref netmem, bool recycle)
 	if (recycle && napi_pp_put_page(netmem))
 		return;
 #endif
-	put_page(netmem_to_page(netmem));
+	put_netmem(netmem);
 }
 
 /**
-- 
cgit v1.2.3


From bd61848900bff597764238f3a8ec67c815cd316e Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Thu, 8 May 2025 00:48:24 +0000
Subject: net: devmem: Implement TX path

Augment dmabuf binding to be able to handle TX. Additional to all the RX
binding, we also create tx_vec needed for the TX path.

Provide API for sendmsg to be able to send dmabufs bound to this device:

- Provide a new dmabuf_tx_cmsg which includes the dmabuf to send from.
- MSG_ZEROCOPY with SCM_DEVMEM_DMABUF cmsg indicates send from dma-buf.

Devmem is uncopyable, so piggyback off the existing MSG_ZEROCOPY
implementation, while disabling instances where MSG_ZEROCOPY falls back
to copying.

We additionally pipe the binding down to the new
zerocopy_fill_skb_from_devmem which fills a TX skb with net_iov netmems
instead of the traditional page netmems.

We also special case skb_frag_dma_map to return the dma-address of these
dmabuf net_iovs instead of attempting to map pages.

The TX path may release the dmabuf in a context where we cannot wait.
This happens when the user unbinds a TX dmabuf while there are still
references to its netmems in the TX path. In that case, the netmems will
be put_netmem'd from a context where we can't unmap the dmabuf, Resolve
this by making __net_devmem_dmabuf_binding_free schedule_work'd.

Based on work by Stanislav Fomichev <sdf@fomichev.me>. A lot of the meat
of the implementation came from devmem TCP RFC v1[1], which included the
TX path, but Stan did all the rebasing on top of netmem/net_iov.

Cc: Stanislav Fomichev <sdf@fomichev.me>
Signed-off-by: Kaiyuan Zhang <kaiyuanz@google.com>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250508004830.4100853-5-almasrymina@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/skbuff.h | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f3e72be6f634..c7397b17bb08 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1707,13 +1707,16 @@ static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset)
 extern const struct ubuf_info_ops msg_zerocopy_ubuf_ops;
 
 struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
-				       struct ubuf_info *uarg);
+				       struct ubuf_info *uarg, bool devmem);
 
 void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref);
 
+struct net_devmem_dmabuf_binding;
+
 int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
 			    struct sk_buff *skb, struct iov_iter *from,
-			    size_t length);
+			    size_t length,
+			    struct net_devmem_dmabuf_binding *binding);
 
 int zerocopy_fill_skb_from_iter(struct sk_buff *skb,
 				struct iov_iter *from, size_t length);
@@ -1721,12 +1724,14 @@ int zerocopy_fill_skb_from_iter(struct sk_buff *skb,
 static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb,
 					  struct msghdr *msg, int len)
 {
-	return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len);
+	return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len,
+				       NULL);
 }
 
 int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
 			     struct msghdr *msg, int len,
-			     struct ubuf_info *uarg);
+			     struct ubuf_info *uarg,
+			     struct net_devmem_dmabuf_binding *binding);
 
 /* Internal */
 #define skb_shinfo(SKB)	((struct skb_shared_info *)(skb_end_pointer(SKB)))
@@ -3697,6 +3702,10 @@ static inline dma_addr_t __skb_frag_dma_map(struct device *dev,
 					    size_t offset, size_t size,
 					    enum dma_data_direction dir)
 {
+	if (skb_frag_is_net_iov(frag)) {
+		return netmem_to_net_iov(frag->netmem)->dma_addr + offset +
+		       frag->offset;
+	}
 	return dma_map_page(dev, skb_frag_page(frag),
 			    skb_frag_off(frag) + offset, size, dir);
 }
-- 
cgit v1.2.3


From 383faec0fd64b9bff15eb5f700f023ec35520a96 Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Thu, 8 May 2025 00:48:26 +0000
Subject: net: enable driver support for netmem TX

Drivers need to make sure not to pass netmem dma-addrs to the
dma-mapping API in order to support netmem TX.

Add helpers and netmem_dma_*() helpers that enables special handling of
netmem dma-addrs that drivers can use.

Document in netmem.rst what drivers need to do to support netmem TX.

Signed-off-by: Mina Almasry <almasrymina@google.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250508004830.4100853-7-almasrymina@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 773167508c82..32a1e41636a9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1772,6 +1772,7 @@ enum netdev_reg_state {
  *	@lltx:		device supports lockless Tx. Deprecated for real HW
  *			drivers. Mainly used by logical interfaces, such as
  *			bonding and tunnels
+ *	@netmem_tx:	device support netmem_tx.
  *
  *	@name:	This is the first field of the "visible" part of this structure
  *		(i.e. as seen by users in the "Space.c" file).  It is the name
@@ -2087,6 +2088,7 @@ struct net_device {
 	struct_group(priv_flags_fast,
 		unsigned long		priv_flags:32;
 		unsigned long		lltx:1;
+		unsigned long		netmem_tx:1;
 	);
 	const struct net_device_ops *netdev_ops;
 	const struct header_ops *header_ops;
-- 
cgit v1.2.3


From 4f8ceb0302b36c5f78bcc8d0e7cfa2372fba134c Mon Sep 17 00:00:00 2001
From: Fabrice Gasnier <fabrice.gasnier@foss.st.com>
Date: Tue, 29 Apr 2025 14:51:28 +0200
Subject: mfd: stm32-lptimer: Add support for stm32mp25

Add support for STM32MP25 SoC.
A new hardware configuration register (HWCFGR2) has been added, to gather
number of capture/compare channels, autonomous mode and input capture
capability. The full feature set is implemented in LPTIM1/2/3/4. LPTIM5
supports a smaller set of features. This can now be read from HWCFGR
registers.

Add new registers to the stm32-lptimer.h: CCMR1, CCR2, HWCFGR1/2 and VERR.
Update the stm32_lptimer data struct so signal the number of
capture/compare channels to the child devices.
Also Remove some unused bit masks (CMPOK_ARROK / CMPOKCF_ARROKCF).

Signed-off-by: Fabrice Gasnier <fabrice.gasnier@foss.st.com>
Link: https://lore.kernel.org/r/20250429125133.1574167-3-fabrice.gasnier@foss.st.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/stm32-lptimer.h | 37 ++++++++++++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/stm32-lptimer.h b/include/linux/mfd/stm32-lptimer.h
index 06d3f11dc3c9..a592c8dc716d 100644
--- a/include/linux/mfd/stm32-lptimer.h
+++ b/include/linux/mfd/stm32-lptimer.h
@@ -17,20 +17,30 @@
 #define STM32_LPTIM_IER		0x08	/* Interrupt Enable Reg      */
 #define STM32_LPTIM_CFGR	0x0C	/* Configuration Reg         */
 #define STM32_LPTIM_CR		0x10	/* Control Reg               */
-#define STM32_LPTIM_CMP		0x14	/* Compare Reg               */
+#define STM32_LPTIM_CMP		0x14	/* Compare Reg (MP25 CCR1)   */
 #define STM32_LPTIM_ARR		0x18	/* Autoreload Reg            */
 #define STM32_LPTIM_CNT		0x1C	/* Counter Reg               */
+#define STM32_LPTIM_CCMR1	0x2C	/* Capture/Compare Mode MP25 */
+#define STM32_LPTIM_CCR2	0x34	/* Compare Reg2 MP25         */
+
+#define STM32_LPTIM_HWCFGR2	0x3EC	/* Hardware configuration register 2 - MP25 */
+#define STM32_LPTIM_HWCFGR1	0x3F0	/* Hardware configuration register 1 - MP15 */
+#define STM32_LPTIM_VERR	0x3F4	/* Version identification register - MP15 */
 
 /* STM32_LPTIM_ISR - bit fields */
+#define STM32_LPTIM_DIEROK_ARROK	(BIT(24) | BIT(4)) /* MP25 */
+#define STM32_LPTIM_CMP2_ARROK		(BIT(19) | BIT(4))
 #define STM32_LPTIM_CMPOK_ARROK		GENMASK(4, 3)
 #define STM32_LPTIM_ARROK		BIT(4)
 #define STM32_LPTIM_CMPOK		BIT(3)
 
 /* STM32_LPTIM_ICR - bit fields */
-#define STM32_LPTIM_ARRMCF		BIT(1)
+#define STM32_LPTIM_DIEROKCF_ARROKCF	(BIT(24) | BIT(4)) /* MP25 */
+#define STM32_LPTIM_CMP2OKCF_ARROKCF	(BIT(19) | BIT(4))
 #define STM32_LPTIM_CMPOKCF_ARROKCF	GENMASK(4, 3)
+#define STM32_LPTIM_ARRMCF		BIT(1)
 
-/* STM32_LPTIM_IER - bit flieds */
+/* STM32_LPTIM_IER - bit fields */
 #define STM32_LPTIM_ARRMIE	BIT(1)
 
 /* STM32_LPTIM_CR - bit fields */
@@ -53,16 +63,37 @@
 /* STM32_LPTIM_ARR */
 #define STM32_LPTIM_MAX_ARR	0xFFFF
 
+/* STM32_LPTIM_CCMR1 */
+#define STM32_LPTIM_CC2P	GENMASK(19, 18)
+#define STM32_LPTIM_CC2E	BIT(17)
+#define STM32_LPTIM_CC2SEL	BIT(16)
+#define STM32_LPTIM_CC1P	GENMASK(3, 2)
+#define STM32_LPTIM_CC1E	BIT(1)
+#define STM32_LPTIM_CC1SEL	BIT(0)
+
+/* STM32_LPTIM_HWCFGR1 */
+#define STM32_LPTIM_HWCFGR1_ENCODER	BIT(16)
+
+/* STM32_LPTIM_HWCFGR2 */
+#define STM32_LPTIM_HWCFGR2_CHAN_NUM	GENMASK(3, 0)
+
+/* STM32_LPTIM_VERR */
+#define STM32_LPTIM_VERR_23	0x23	/* STM32MP25 */
+
 /**
  * struct stm32_lptimer - STM32 Low-Power Timer data assigned by parent device
  * @clk: clock reference for this instance
  * @regmap: register map reference for this instance
  * @has_encoder: indicates this Low-Power Timer supports encoder mode
+ * @num_cc_chans: indicates the number of capture/compare channels
+ * @version: indicates the major and minor revision of the controller
  */
 struct stm32_lptimer {
 	struct clk *clk;
 	struct regmap *regmap;
 	bool has_encoder;
+	unsigned int num_cc_chans;
+	u32 version;
 };
 
 #endif
-- 
cgit v1.2.3


From 34a364ff04e960a4d47f558acf7fbafcc3085c1f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 9 May 2025 15:02:27 +0200
Subject: PM: sleep: Introduce pm_suspend_in_progress()

Introduce pm_suspend_in_progress() to be used for checking if a system-
wide suspend or resume transition is in progress, instead of comparing
pm_suspend_target_state directly to PM_SUSPEND_ON, and use it where
applicable.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Raag Jadav <raag.jadav@intel.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Link: https://patch.msgid.link/2020901.PYKUYFuaPT@rjwysocki.net
---
 include/linux/suspend.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index da6ebca3ff77..52ea108f9451 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -298,6 +298,11 @@ static inline void s2idle_set_ops(const struct platform_s2idle_ops *ops) {}
 static inline void s2idle_wake(void) {}
 #endif /* !CONFIG_SUSPEND */
 
+static inline bool pm_suspend_in_progress(void)
+{
+	return pm_suspend_target_state != PM_SUSPEND_ON;
+}
+
 /* struct pbe is used for creating lists of pages that should be restored
  * atomically during the resume from disk, because the page frames they have
  * occupied before the suspend are in use.
-- 
cgit v1.2.3


From 4a6b1cf0d4c02d6da2976c6314c264d20672937e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 6 May 2025 22:41:21 +0200
Subject: PM: EM: Introduce em_adjust_cpu_capacity()

Add a function for updating the Energy Model for a CPU after its
capacity has changed, which subsequently will be used by the
intel_pstate driver.

An EM_PERF_DOMAIN_ARTIFICIAL check is added to em_recalc_and_update()
to prevent it from calling em_compute_costs() for an "artificial" perf
domain with a NULL cb parameter which would cause it to crash.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Christian Loehle <christian.loehle@arm.com>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://patch.msgid.link/3637203.iIbC2pHGDl@rjwysocki.net
---
 include/linux/energy_model.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index d8eabbf86a5b..7fa1eb3cc823 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -179,6 +179,7 @@ int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
 int em_dev_update_chip_binning(struct device *dev);
 int em_update_performance_limits(struct em_perf_domain *pd,
 		unsigned long freq_min_khz, unsigned long freq_max_khz);
+void em_adjust_cpu_capacity(unsigned int cpu);
 void em_rebuild_sched_domains(void);
 
 /**
@@ -403,6 +404,7 @@ int em_update_performance_limits(struct em_perf_domain *pd,
 {
 	return -EINVAL;
 }
+static inline void em_adjust_cpu_capacity(unsigned int cpu) {}
 static inline void em_rebuild_sched_domains(void) {}
 #endif
 
-- 
cgit v1.2.3


From a98e892c694284256296465a78d11072e2c5419f Mon Sep 17 00:00:00 2001
From: Werner Sembach <wse@tuxedocomputers.com>
Date: Tue, 11 Feb 2025 14:39:05 +0100
Subject: HID: core: Add functions for HID drivers to react on first open and
 last close call

Adds a new function to the hid_driver struct that is called when the
userspace starts using the device, and another one that is called when
userspace stop using the device. With this a hid driver can implement
special suspend handling for devices currently not in use.

Signed-off-by: Werner Sembach <wse@tuxedocomputers.com>
Link: https://patch.msgid.link/20250211133950.422232-1-wse@tuxedocomputers.com
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index a1305210b2fd..568a9d8c749b 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -795,6 +795,8 @@ struct hid_usage_id {
  * @suspend: invoked on suspend (NULL means nop)
  * @resume: invoked on resume if device was not reset (NULL means nop)
  * @reset_resume: invoked on resume if device was reset (NULL means nop)
+ * @on_hid_hw_open: invoked when hid core opens first instance (NULL means nop)
+ * @on_hid_hw_close: invoked when hid core closes last instance (NULL means nop)
  *
  * probe should return -errno on error, or 0 on success. During probe,
  * input will not be passed to raw_event unless hid_device_io_start is
@@ -850,6 +852,8 @@ struct hid_driver {
 	int (*suspend)(struct hid_device *hdev, pm_message_t message);
 	int (*resume)(struct hid_device *hdev);
 	int (*reset_resume)(struct hid_device *hdev);
+	void (*on_hid_hw_open)(struct hid_device *hdev);
+	void (*on_hid_hw_close)(struct hid_device *hdev);
 
 /* private: */
 	struct device_driver driver;
-- 
cgit v1.2.3


From 6c58d2791d6046727d87db50a5e46644f195dcf9 Mon Sep 17 00:00:00 2001
From: Alex Shi <alexs@kernel.org>
Date: Thu, 10 Apr 2025 17:24:16 +0800
Subject: tick/nohz: Remove unused tick_nohz_full_add_cpus_to()

This function isn't used anywhere. Remove it.

Signed-off-by: Alex Shi <alexs@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250410092423.9831-1-alexs@kernel.org
---
 include/linux/tick.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tick.h b/include/linux/tick.h
index b8ddc8e631a3..ac76ae9fa36d 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -195,12 +195,6 @@ static inline bool tick_nohz_full_enabled(void)
 	__ret;								\
 })
 
-static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask)
-{
-	if (tick_nohz_full_enabled())
-		cpumask_or(mask, mask, tick_nohz_full_mask);
-}
-
 extern void tick_nohz_dep_set(enum tick_dep_bits bit);
 extern void tick_nohz_dep_clear(enum tick_dep_bits bit);
 extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit);
@@ -281,7 +275,6 @@ extern void __init tick_nohz_full_setup(cpumask_var_t cpumask);
 #else
 static inline bool tick_nohz_full_enabled(void) { return false; }
 static inline bool tick_nohz_full_cpu(int cpu) { return false; }
-static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
 
 static inline void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
 static inline void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
-- 
cgit v1.2.3


From 895ee6a22e3195b7c1fee140c842bdeedb89ed33 Mon Sep 17 00:00:00 2001
From: "Yury Norov [NVIDIA]" <yury.norov@gmail.com>
Date: Fri, 9 May 2025 12:20:08 -0400
Subject: topology: make for_each_node_with_cpus() O(N)

for_each_node_with_cpus() calls nr_cpus_node() at every iteration, which
makes it O(N^2). Kernel tracks such nodes with N_CPU record in node_states
array. Switching to it makes for_each_node_with_cpus() O(N).

Andrea:

Now we can include also offline nodes with CPUs assigned (assuming it's
possible). If checking the online state is required, the user must use
node_online() within the loop.

CC: Andrea Righi <arighi@nvidia.com>
CC:Tejun Heo <tj@kernel.org>
Signed-off-by: Yury Norov [NVIDIA] <yury.norov@gmail.com>
---
 include/linux/nodemask.h | 1 +
 include/linux/topology.h | 5 +----
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 0aa6b63aa747..f08ae71585fa 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -522,6 +522,7 @@ static __always_inline int node_random(const nodemask_t *maskp)
 
 #define for_each_node(node)	   for_each_node_state(node, N_POSSIBLE)
 #define for_each_online_node(node) for_each_node_state(node, N_ONLINE)
+#define for_each_node_with_cpus(node)	for_each_node_state(node, N_CPU)
 
 /*
  * For nodemask scratch area.
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 24e715f0f6d2..ffee6b4a071a 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -29,6 +29,7 @@
 
 #include <linux/arch_topology.h>
 #include <linux/cpumask.h>
+#include <linux/nodemask.h>
 #include <linux/bitops.h>
 #include <linux/mmzone.h>
 #include <linux/smp.h>
@@ -39,10 +40,6 @@
 #define nr_cpus_node(node) cpumask_weight(cpumask_of_node(node))
 #endif
 
-#define for_each_node_with_cpus(node)			\
-	for_each_online_node(node)			\
-		if (nr_cpus_node(node))
-
 int arch_update_cpu_topology(void);
 
 /* Conform to ACPI 2.0 SLIT distance definitions */
-- 
cgit v1.2.3


From c4da7bf54b1f76e7c5c8cc6d1c4db8b19af67c5d Mon Sep 17 00:00:00 2001
From: Zizhi Wo <wozizhi@huawei.com>
Date: Tue, 6 May 2025 10:09:31 +0800
Subject: blk-throttle: Introduce flag "BIO_TG_BPS_THROTTLED"

Subsequent patches will split the single queue into separate bps and iops
queues. To prevent IO that has already passed through the bps queue at a
single tg level from being counted toward bps wait time again, we introduce
"BIO_TG_BPS_THROTTLED" flag. Since throttle and QoS operate at different
levels, we reuse the value as "BIO_QOS_THROTTLED".

We set this flag when charge bps and clear it when charge iops, as the bio
will move to the upper-level tg or be dispatched.

This patch does not involve functional changes.

Signed-off-by: Zizhi Wo <wozizhi@huawei.com>
Reviewed-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Zizhi Wo <wozizhi@huaweicloud.com>
Link: https://lore.kernel.org/r/20250506020935.655574-5-wozizhi@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk_types.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index f38425338c3f..3d1577f07c1c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -296,6 +296,14 @@ enum {
 				 * of this bio. */
 	BIO_CGROUP_ACCT,	/* has been accounted to a cgroup */
 	BIO_QOS_THROTTLED,	/* bio went through rq_qos throttle path */
+	/*
+	 * This bio has completed bps throttling at the single tg granularity,
+	 * which is different from BIO_BPS_THROTTLED. When the bio is enqueued
+	 * into the sq->queued of the upper tg, or is about to be dispatched,
+	 * this flag needs to be cleared. Since blk-throttle and rq_qos are not
+	 * on the same hierarchical level, reuse the value.
+	 */
+	BIO_TG_BPS_THROTTLED = BIO_QOS_THROTTLED,
 	BIO_QOS_MERGED,		/* but went through rq_qos merge path */
 	BIO_REMAPPED,
 	BIO_ZONE_WRITE_PLUGGING, /* bio handled through zone write plugging */
-- 
cgit v1.2.3


From f5c0ecf196aaf78777f1606f1e0392c5e57c4530 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 9 May 2025 15:03:35 +0200
Subject: PM: sleep: Introduce pm_sleep_transition_in_progress()

The "suspend in progress" check in device_wakeup_enable() does not
cover hibernation, but arguably it should do that, so introduce
pm_sleep_transition_in_progress() covering transitions during both
system suspend and hibernation to use in there and use it also in
pm_debug_messages_should_print().

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Link: https://patch.msgid.link/7820474.EvYhyI6sBW@rjwysocki.net
[ rjw: Move the new function definition under CONFIG_PM_SLEEP ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/suspend.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 52ea108f9451..b1c76c8f2c82 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -475,6 +475,8 @@ extern void pm_print_active_wakeup_sources(void);
 extern unsigned int lock_system_sleep(void);
 extern void unlock_system_sleep(unsigned int);
 
+extern bool pm_sleep_transition_in_progress(void);
+
 #else /* !CONFIG_PM_SLEEP */
 
 static inline int register_pm_notifier(struct notifier_block *nb)
@@ -503,6 +505,8 @@ static inline void pm_system_irq_wakeup(unsigned int irq_number) {}
 static inline unsigned int lock_system_sleep(void) { return 0; }
 static inline void unlock_system_sleep(unsigned int flags) {}
 
+static inline bool pm_sleep_transition_in_progress(void) { return false; }
+
 #endif /* !CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_PM_SLEEP_DEBUG
-- 
cgit v1.2.3


From c84bf6dd2b836b49bb2662668ff1692350d28236 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Fri, 9 May 2025 13:13:34 +0100
Subject: mm: introduce new .mmap_prepare() file callback

Patch series "eliminate mmap() retry merge, add .mmap_prepare hook", v2.

During the mmap() of a file-backed mapping, we invoke the underlying
driver file's mmap() callback in order to perform driver/file system
initialisation of the underlying VMA.

This has been a source of issues in the past, including a significant
security concern relating to unwinding of error state discovered by Jann
Horn, as fixed in commit 5de195060b2e ("mm: resolve faulty mmap_region()
error path behaviour") which performed the recent, significant, rework of
mmap() as a whole.

However, we have had a fly in the ointment remain - drivers have a great
deal of freedom in the .mmap() hook to manipulate VMA state (as well as
page table state).

This can be problematic, as we can no longer reason sensibly about VMA
state once the call is complete (the ability to do - anything - here does
rather interfere with that).

In addition, callers may choose to do odd or unusual things which might
interfere with subsequent steps in the mmap() process, and it may do so
and then raise an error, requiring very careful unwinding of state about
which we can make no assumptions.

Rather than providing such an open-ended interface, this series provides
an alternative, far more restrictive one - we expose a whitelist of fields
which can be adjusted by the driver, along with immutable state upon which
the driver can make such decisions:

struct vm_area_desc {
	/* Immutable state. */
	struct mm_struct *mm;
	unsigned long start;
	unsigned long end;

	/* Mutable fields. Populated with initial state. */
	pgoff_t pgoff;
	struct file *file;
	vm_flags_t vm_flags;
	pgprot_t page_prot;

	/* Write-only fields. */
	const struct vm_operations_struct *vm_ops;
	void *private_data;
};

The mmap logic then updates the state used to either merge with a VMA or
establish a new VMA based upon this logic.

This is achieved via new file hook .mmap_prepare(), which is, importantly,
invoked very early on in the mmap() process.

If an error arises, we can very simply abort the operation with very
little unwinding of state required.

The existing logic contains another, related, peccadillo - since the
.mmap() callback might do anything, it may also cause a previously
unmergeable VMA to become mergeable with adjacent VMAs.

Right now the logic will retry a merge like this only if the driver
changes VMA flags, and changes them in such a way that a merge might
succeed (that is, the flags are not 'special', that is do not contain any
of the flags specified in VM_SPECIAL).

This has also been the source of a great deal of pain - it's hard to
reason about an .mmap() callback that might do - anything - but it's also
hard to reason about setting up a VMA and writing to the maple tree, only
to do it again utilising a great deal of shared state.

Since .mmap_prepare() sets fields before the first merge is even
attempted, the use of this callback obviates the need for this retry merge
logic.

A driver may only specify .mmap_prepare() or the deprecated .mmap()
callback.  In future we may add futher callbacks beyond .mmap_prepare() to
faciliate all use cass as we convert drivers.

In researching this change, I examined every .mmap() callback, and
discovered only a very few that set VMA state in such a way that a.  the
VMA flags changed and b.  this would be mergeable.

In the majority of cases, it turns out that drivers are mapping kernel
memory and thus ultimately set VM_PFNMAP, VM_MIXEDMAP, or other
unmergeable VM_SPECIAL flags.

Of those that remain I identified a number of cases which are only
applicable in DAX, setting the VM_HUGEPAGE flag:

* dax_mmap()
* erofs_file_mmap()
* ext4_file_mmap()
* xfs_file_mmap()

For this remerge to not occur and to impact users, each of these cases
would require a user to mmap() files using DAX, in parts, immediately
adjacent to one another.

This is a very unlikely usecase and so it does not appear to be worthwhile
to adjust this functionality accordingly.

We can, however, very quickly do so if needed by simply adding an
.mmap_prepare() callback to these as required.

There are two further non-DAX cases I idenitfied:

* orangefs_file_mmap() - Clears VM_RAND_READ if set, replacing with
  VM_SEQ_READ.
* usb_stream_hwdep_mmap() - Sets VM_DONTDUMP.

Both of these cases again seem very unlikely to be mmap()'d immediately
adjacent to one another in a fashion that would result in a merge.

Finally, we are left with a viable case:

* secretmem_mmap() - Set VM_LOCKED, VM_DONTDUMP.

This is viable enough that the mm selftests trigger the logic as a matter
of course.  Therefore, this series replace the .secretmem_mmap() hook with
.secret_mmap_prepare().


This patch (of 3):

Provide a means by which drivers can specify which fields of those
permitted to be changed should be altered to prior to mmap()'ing a range
(which may either result from a merge or from mapping an entirely new
VMA).

Doing so is substantially safer than the existing .mmap() calback which
provides unrestricted access to the part-constructed VMA and permits
drivers and file systems to do 'creative' things which makes it hard to
reason about the state of the VMA after the function returns.

The existing .mmap() callback's freedom has caused a great deal of issues,
especially in error handling, as unwinding the mmap() state has proven to
be non-trivial and caused significant issues in the past, for instance
those addressed in commit 5de195060b2e ("mm: resolve faulty mmap_region()
error path behaviour").

It also necessitates a second attempt at merge once the .mmap() callback
has completed, which has caused issues in the past, is awkward, adds
overhead and is difficult to reason about.

The .mmap_prepare() callback eliminates this requirement, as we can update
fields prior to even attempting the first merge.  It is safer, as we
heavily restrict what can actually be modified, and being invoked very
early in the mmap() process, error handling can be performed safely with
very little unwinding of state required.

The .mmap_prepare() and deprecated .mmap() callbacks are mutually
exclusive, so we permit only one to be invoked at a time.

Update vma userland test stubs to account for changes.

Link: https://lkml.kernel.org/r/cover.1746792520.git.lorenzo.stoakes@oracle.com
Link: https://lkml.kernel.org/r/adb36a7c4affd7393b2fc4b54cc5cfe211e41f71.1746792520.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/fs.h       | 25 +++++++++++++++++++++++++
 include/linux/mm_types.h | 24 ++++++++++++++++++++++++
 2 files changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 016b0fe1536e..e2721a1ff13d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2169,6 +2169,7 @@ struct file_operations {
 	int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
 	int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *,
 				unsigned int poll_flags);
+	int (*mmap_prepare)(struct vm_area_desc *);
 } __randomize_layout;
 
 /* Supports async buffered reads */
@@ -2238,11 +2239,35 @@ struct inode_operations {
 	struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
 } ____cacheline_aligned;
 
+/* Did the driver provide valid mmap hook configuration? */
+static inline bool file_has_valid_mmap_hooks(struct file *file)
+{
+	bool has_mmap = file->f_op->mmap;
+	bool has_mmap_prepare = file->f_op->mmap_prepare;
+
+	/* Hooks are mutually exclusive. */
+	if (WARN_ON_ONCE(has_mmap && has_mmap_prepare))
+		return false;
+	if (WARN_ON_ONCE(!has_mmap && !has_mmap_prepare))
+		return false;
+
+	return true;
+}
+
 static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
 {
+	if (WARN_ON_ONCE(file->f_op->mmap_prepare))
+		return -EINVAL;
+
 	return file->f_op->mmap(file, vma);
 }
 
+static inline int __call_mmap_prepare(struct file *file,
+		struct vm_area_desc *desc)
+{
+	return file->f_op->mmap_prepare(desc);
+}
+
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e76bade9ebb1..15808cad2bc1 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -763,6 +763,30 @@ struct vma_numab_state {
 	int prev_scan_seq;
 };
 
+/*
+ * Describes a VMA that is about to be mmap()'ed. Drivers may choose to
+ * manipulate mutable fields which will cause those fields to be updated in the
+ * resultant VMA.
+ *
+ * Helper functions are not required for manipulating any field.
+ */
+struct vm_area_desc {
+	/* Immutable state. */
+	struct mm_struct *mm;
+	unsigned long start;
+	unsigned long end;
+
+	/* Mutable fields. Populated with initial state. */
+	pgoff_t pgoff;
+	struct file *file;
+	vm_flags_t vm_flags;
+	pgprot_t page_prot;
+
+	/* Write-only fields. */
+	const struct vm_operations_struct *vm_ops;
+	void *private_data;
+};
+
 /*
  * This struct describes a virtual memory area. There is one of these
  * per VM-area/task. A VM area is any part of the process virtual memory
-- 
cgit v1.2.3


From 0cad6736f4b9bbe8129f367ed5818fa4aef6c2b5 Mon Sep 17 00:00:00 2001
From: Feng Lee <379943137@qq.com>
Date: Fri, 9 May 2025 14:32:30 +0800
Subject: mm: remove obsolete pgd_offset_gate()

Remove pgd_offset_gate() completely and simply make the single caller use
pgd_offset().

It appears that the gate area resides in the kernel-mapped segment
exclusively on IA64.  Therefore, removing pgd_offset_k is safe since IA64
is now obsolete.

Link: https://lkml.kernel.org/r/tencent_503130C3CD56569191396268CF4D12F09A06@qq.com
Signed-off-by: Feng Lee <379943137@qq.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: bibo mao <maobibo@loongson.cn>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index b50447ef1c92..f1e890b60460 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1164,10 +1164,6 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
 }
 #endif
 
-#ifndef __HAVE_ARCH_PGD_OFFSET_GATE
-#define pgd_offset_gate(mm, addr)	pgd_offset(mm, addr)
-#endif
-
 #ifndef __HAVE_ARCH_MOVE_PTE
 #define move_pte(pte, old_addr, new_addr)	(pte)
 #endif
-- 
cgit v1.2.3


From dc75c3ced10c611f524e9e444303a0249ce32e43 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 12 May 2025 22:20:59 +0200
Subject: net: phy: remove stub for mdiobus_register_board_info

The functionality of mdiobus_register_board_info() typically isn't
optional for the caller. Therefore remove the stub.

Note: Currently we have only one caller of mdiobus_register_board_info(),
in a DSA/PHYLINK context. Therefore CONFIG_MDIO_DEVICE is selected anyway.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/410a2222-c4e8-45b0-9091-d49674caeb00@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index d62d292024bc..7c29d346d4b3 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -2071,17 +2071,8 @@ struct mdio_board_info {
 	const void	*platform_data;
 };
 
-#if IS_ENABLED(CONFIG_MDIO_DEVICE)
 int mdiobus_register_board_info(const struct mdio_board_info *info,
 				unsigned int n);
-#else
-static inline int mdiobus_register_board_info(const struct mdio_board_info *i,
-					      unsigned int n)
-{
-	return 0;
-}
-#endif
-
 
 /**
  * phy_module_driver() - Helper macro for registering PHY drivers
-- 
cgit v1.2.3


From bc049387b41f41bee61e8cc338a5e99ca9798a09 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Tue, 13 May 2025 07:28:12 -0700
Subject: bpf: Add support for __prog argument suffix to pass in prog->aux

Instead of hardcoding the list of kfuncs that need prog->aux passed to
them with a combination of fixup_kfunc_call adjustment + __ign suffix,
combine both in __prog suffix, which ignores the argument passed in, and
fixes it up to the prog->aux. This allows kfuncs to have the prog->aux
passed into them without having to touch the verifier.

Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20250513142812.1021591-1-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 9734544b6957..cedd66867ecf 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -591,6 +591,7 @@ struct bpf_insn_aux_data {
 	 * bpf_fastcall pattern.
 	 */
 	u8 fastcall_spills_num:3;
+	u8 arg_prog:4;
 
 	/* below fields are initialized once */
 	unsigned int orig_idx; /* original instruction index */
-- 
cgit v1.2.3


From 3937f6db6e932c560a0f9ee2cd2a4fdcc314dadf Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 12 May 2025 19:21:15 -0700
Subject: lib/crc16: unexport crc16_table and crc16_byte()

Now that neither crc16_table nor crc16_byte() is used outside
lib/crc16.c, fold them into lib/crc16.c.

Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250513022115.39109-3-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 include/linux/crc16.h | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crc16.h b/include/linux/crc16.h
index 9fa74529b317..b861d969b161 100644
--- a/include/linux/crc16.h
+++ b/include/linux/crc16.h
@@ -15,14 +15,7 @@
 
 #include <linux/types.h>
 
-extern u16 const crc16_table[256];
-
-extern u16 crc16(u16 crc, const u8 *buffer, size_t len);
-
-static inline u16 crc16_byte(u16 crc, const u8 data)
-{
-	return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff];
-}
+u16 crc16(u16 crc, const u8 *p, size_t len);
 
 #endif /* __CRC16_H */
 
-- 
cgit v1.2.3


From 439d47608bb3e5f7b5f5eb55c568576b60731c4d Mon Sep 17 00:00:00 2001
From: Niklas Cassel <cassel@kernel.org>
Date: Mon, 12 May 2025 10:44:00 +0200
Subject: ata: libata: Print if port is external on boot

Commit affccb16c117 ("ata: ahci: print the lpm policy on boot") added a
lpm-pol print during boot, which shows the LPM policy used by each port.

While the LPM policy is usually determined by the Kconfig
CONFIG_SATA_MOBILE_LPM_POLICY, the Kconfig value is overridden e.g. if
firmware has marked the port as hotplug capable / external.

Commit f97106b10d9a ("ata: ahci: Add debug print for external port") did
add a debug print to show if LPM was disabled because firmware has marked
the port as external, however, because devices having broken LPM (even
though they claim to support it) is more common than one would have hoped,
print "ext" during boot if firmware has marked the port is external.

This will make it easier to debug certain LPM issues, e.g. if firmware has
enabled/marked only some of the ports as hotplug capable / external.

Before (port marked as external by firmware):
ata1: SATA max UDMA/133 abar m4096@0xfebd3000 port 0xfebd3100 irq 57 lpm-pol 0

After (port marked as external by firmware):
ata1: SATA max UDMA/133 abar m4096@0xfebd3000 port 0xfebd3100 irq 57 lpm-pol 0 ext

Signed-off-by: Niklas Cassel <cassel@kernel.org>
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
---
 include/linux/libata.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 98affa1d9136..31be45fd47a6 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1618,6 +1618,8 @@ static inline void ata_port_desc_misc(struct ata_port *ap, int irq)
 {
 	ata_port_desc(ap, "irq %d", irq);
 	ata_port_desc(ap, "lpm-pol %d", ap->target_lpm_policy);
+	if (ap->pflags & ATA_PFLAG_EXTERNAL)
+		ata_port_desc(ap, "ext");
 }
 
 static inline bool ata_tag_internal(unsigned int tag)
-- 
cgit v1.2.3


From 6a09ae8281986d5fb5ce0115135c05a5afb8718e Mon Sep 17 00:00:00 2001
From: Richard Leitner <richard.leitner@linux.dev>
Date: Wed, 7 May 2025 09:51:31 +0200
Subject: leds: flash: Add support for flash/strobe duration

Add support for the new V4L2_CID_FLASH_DURATION control to the LEDs
driver.

Signed-off-by: Richard Leitner <richard.leitner@linux.dev>
Link: https://lore.kernel.org/r/20250507-ov9282-flash-strobe-v4-2-72b299c1b7c9@linux.dev
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/led-class-flash.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/led-class-flash.h b/include/linux/led-class-flash.h
index 36df927ec4b7..21ec856c36bc 100644
--- a/include/linux/led-class-flash.h
+++ b/include/linux/led-class-flash.h
@@ -45,6 +45,8 @@ struct led_flash_ops {
 	int (*timeout_set)(struct led_classdev_flash *fled_cdev, u32 timeout);
 	/* get the flash LED fault */
 	int (*fault_get)(struct led_classdev_flash *fled_cdev, u32 *fault);
+	/* set flash duration */
+	int (*duration_set)(struct led_classdev_flash *fled_cdev, u32 duration);
 };
 
 /*
@@ -75,6 +77,9 @@ struct led_classdev_flash {
 	/* flash timeout value in microseconds along with its constraints */
 	struct led_flash_setting timeout;
 
+	/* flash timeout value in microseconds along with its constraints */
+	struct led_flash_setting duration;
+
 	/* LED Flash class sysfs groups */
 	const struct attribute_group *sysfs_groups[LED_FLASH_SYSFS_GROUPS_SIZE];
 };
@@ -209,4 +214,15 @@ int led_set_flash_timeout(struct led_classdev_flash *fled_cdev, u32 timeout);
  */
 int led_get_flash_fault(struct led_classdev_flash *fled_cdev, u32 *fault);
 
+/**
+ * led_set_flash_duration - set flash LED duration
+ * @fled_cdev: the flash LED to set
+ * @timeout: the flash duration to set it to
+ *
+ * Set the flash strobe duration.
+ *
+ * Returns: 0 on success or negative error value on failure
+ */
+int led_set_flash_duration(struct led_classdev_flash *fled_cdev, u32 duration);
+
 #endif	/* __LINUX_FLASH_LEDS_H_INCLUDED */
-- 
cgit v1.2.3


From 28026cf2dd84d961a62123b1fa941dc3c2c4a132 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 13 May 2025 17:31:40 +0100
Subject: genirq/msi: Add .msi_teardown() callback as the reverse of
 .msi_prepare()

While the MSI ops do have a .msi_prepare() callback that is responsible for
setting up the relevant (usually per-device) allocation, there is no
callback reversing this setup.

For this purpose, add .msi_teardown() callback.

In order to avoid breaking the ITS driver that suffers from related issues,
do not call the callback just yet.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250513163144.2215824-2-maz@kernel.org
---
 include/linux/msi.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 8c0ec9fc05a3..63c23003ec9b 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -423,6 +423,7 @@ struct msi_domain_info;
  * @msi_init:		Domain specific init function for MSI interrupts
  * @msi_free:		Domain specific function to free a MSI interrupts
  * @msi_prepare:	Prepare the allocation of the interrupts in the domain
+ * @msi_teardown:	Reverse the effects of @msi_prepare
  * @prepare_desc:	Optional function to prepare the allocated MSI descriptor
  *			in the domain
  * @set_desc:		Set the msi descriptor for an interrupt
@@ -438,8 +439,9 @@ struct msi_domain_info;
  * @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying
  * irqdomain.
  *
- * @msi_check, @msi_prepare, @prepare_desc and @set_desc are callbacks used by the
- * msi_domain_alloc/free_irqs*() variants.
+ * @msi_check, @msi_prepare, @msi_teardown, @prepare_desc and
+ * @set_desc are callbacks used by the msi_domain_alloc/free_irqs*()
+ * variants.
  *
  * @domain_alloc_irqs, @domain_free_irqs can be used to override the
  * default allocation/free functions (__msi_domain_alloc/free_irqs). This
@@ -461,6 +463,8 @@ struct msi_domain_ops {
 	int		(*msi_prepare)(struct irq_domain *domain,
 				       struct device *dev, int nvec,
 				       msi_alloc_info_t *arg);
+	void		(*msi_teardown)(struct irq_domain *domain,
+					msi_alloc_info_t *arg);
 	void		(*prepare_desc)(struct irq_domain *domain, msi_alloc_info_t *arg,
 					struct msi_desc *desc);
 	void		(*set_desc)(msi_alloc_info_t *arg,
@@ -489,6 +493,7 @@ struct msi_domain_ops {
  * @handler:		Optional: associated interrupt flow handler
  * @handler_data:	Optional: associated interrupt flow handler data
  * @handler_name:	Optional: associated interrupt flow handler name
+ * @alloc_data:		Optional: associated interrupt allocation data
  * @data:		Optional: domain specific data
  */
 struct msi_domain_info {
@@ -501,6 +506,7 @@ struct msi_domain_info {
 	irq_flow_handler_t		handler;
 	void				*handler_data;
 	const char			*handler_name;
+	msi_alloc_info_t		*alloc_data;
 	void				*data;
 };
 
-- 
cgit v1.2.3


From 1396e89e09f00deb816e5f4a176d71d554922292 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 13 May 2025 17:31:42 +0100
Subject: genirq/msi: Move prepare() call to per-device allocation

The current device MSI infrastructure is subtly broken, as it will issue an
.msi_prepare() callback into the MSI controller driver every time it needs
to allocate an MSI. That's pretty wrong, as the contract (or unwarranted
assumption, depending who you ask) between the MSI controller and the core
code is that .msi_prepare() is called exactly once per device.

This leads to some subtle breakage in some MSI controller drivers, as it
gives the impression that there are multiple endpoints sharing a bus
identifier (RID in PCI parlance, DID for GICv3+). It implies that whatever
allocation the ITS driver (for example) has done on behalf of these devices
cannot be undone, as there is no way to track the shared state. This is
particularly bad for wire-MSI devices, for which .msi_prepare() is called
for each input line.

To address this issue, move the call to .msi_prepare() to take place at the
point of irq domain allocation, which is the only place that makes
sense. The msi_alloc_info_t structure is made part of the
msi_domain_template, so that its life-cycle is that of the domain as well.

Finally, the msi_info::alloc_data field is made to point at this allocation
tracking structure, ensuring that it is carried around the block.

This is all pretty straightforward, except for the non-device-MSI
leftovers, which still have to call .msi_prepare() at the old spot. One
day...

Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250513163144.2215824-4-maz@kernel.org
---
 include/linux/msi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 63c23003ec9b..f4b94ccaf0c1 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -516,12 +516,14 @@ struct msi_domain_info {
  * @chip:	Interrupt chip for this domain
  * @ops:	MSI domain ops
  * @info:	MSI domain info data
+ * @alloc_info:	MSI domain allocation data (architecture specific)
  */
 struct msi_domain_template {
 	char			name[48];
 	struct irq_chip		chip;
 	struct msi_domain_ops	ops;
 	struct msi_domain_info	info;
+	msi_alloc_info_t	alloc_info;
 };
 
 /*
-- 
cgit v1.2.3


From cc52a697f87e8b2d88298827aca3f81398385572 Mon Sep 17 00:00:00 2001
From: Ivaylo Ivanov <ivo.ivanov.ivanov1@gmail.com>
Date: Sun, 4 May 2025 17:45:27 +0300
Subject: phy: exynos5-usbdrd: support Exynos USBDRD 3.2 4nm controller

Add support for the Exynos USB 3.2 DRD 4nm controller. It's used in
recent 4nm SoCs like Exynos2200 and Exynos2400.

This device consists of 3 underlying and independent phys: SEC link
control phy, Synopsys eUSB 2.0 and Synopsys USBDP/SS combophy. Unlike
older device designs, where the internal phy blocks were all IP of
Samsung, Synopsys phys are present. This means that the link controller
is now mapped differently to account for missing bits and registers.
The Synopsys phys also have separate register bases.

As there are non-SEC PHYs present now, it doesn't make much sense to
implement them in this driver. They are expected to be configured
by external drivers, so pass phandles to them. USBDRD3.2 link controller
set up is still required beforehand.

This commit adds the necessary changes for USB HS to work. USB SS and
DisplayPort are out of scope in this commit and will be introduced
in the future.

Signed-off-by: Ivaylo Ivanov <ivo.ivanov.ivanov1@gmail.com>
Link: https://lore.kernel.org/r/20250504144527.1723980-11-ivo.ivanov.ivanov1@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soc/samsung/exynos-regs-pmu.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h
index cde299a85384..e921e7fe9be7 100644
--- a/include/linux/soc/samsung/exynos-regs-pmu.h
+++ b/include/linux/soc/samsung/exynos-regs-pmu.h
@@ -187,6 +187,9 @@
 /* Only for S5Pv210 */
 #define S5PV210_EINT_WAKEUP_MASK	0xC004
 
+/* Only for Exynos2200 */
+#define EXYNOS2200_PHY_CTRL_USB20	0x72C
+
 /* Only for Exynos4210 */
 #define S5P_CMU_CLKSTOP_LCD1_LOWPWR	0x1154
 #define S5P_CMU_RESET_LCD1_LOWPWR	0x1174
-- 
cgit v1.2.3


From 5fa96c83b81e50833274f3b450ee9a8c0b2172bc Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@arm.com>
Date: Tue, 1 Apr 2025 19:07:03 +0100
Subject: coresight: Introduce pause and resume APIs for source

Introduce APIs for pausing and resuming trace source and export as GPL
symbols.

Signed-off-by: Leo Yan <leo.yan@arm.com>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
Reviewed-by: James Clark <james.clark@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20250401180708.385396-3-leo.yan@arm.com
---
 include/linux/coresight.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 8abdd8b5c791..4ac65c68bbf4 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -398,6 +398,8 @@ struct coresight_ops_link {
  *		is associated to.
  * @enable:	enables tracing for a source.
  * @disable:	disables tracing for a source.
+ * @resume_perf: resumes tracing for a source in perf session.
+ * @pause_perf:	pauses tracing for a source in perf session.
  */
 struct coresight_ops_source {
 	int (*cpu_id)(struct coresight_device *csdev);
@@ -405,6 +407,8 @@ struct coresight_ops_source {
 		      enum cs_mode mode, struct coresight_path *path);
 	void (*disable)(struct coresight_device *csdev,
 			struct perf_event *event);
+	int (*resume_perf)(struct coresight_device *csdev);
+	void (*pause_perf)(struct coresight_device *csdev);
 };
 
 /**
-- 
cgit v1.2.3


From 676e8cf70cb0533e1118e29898c9a9c33ae3a10f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 9 May 2025 13:36:59 +0200
Subject: sched,livepatch: Untangle cond_resched() and live-patching

With the goal of deprecating / removing VOLUNTARY preempt, live-patch
needs to stop relying on cond_resched() to make forward progress.

Instead, rely on schedule() with TASK_FREEZABLE set. Just like
live-patching, the freezer needs to be able to stop tasks in a safe /
known state.

[bigeasy: use likely() in __klp_sched_try_switch() and update comments]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Tested-by: Petr Mladek <pmladek@suse.com>
Tested-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Link: https://lore.kernel.org/r/20250509113659.wkP_HJ5z@linutronix.de
---
 include/linux/livepatch_sched.h | 14 +++++---------
 include/linux/sched.h           |  6 ------
 2 files changed, 5 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/livepatch_sched.h b/include/linux/livepatch_sched.h
index 013794fb5da0..065c185f2763 100644
--- a/include/linux/livepatch_sched.h
+++ b/include/linux/livepatch_sched.h
@@ -3,27 +3,23 @@
 #define _LINUX_LIVEPATCH_SCHED_H_
 
 #include <linux/jump_label.h>
-#include <linux/static_call_types.h>
+#include <linux/sched.h>
 
 #ifdef CONFIG_LIVEPATCH
 
 void __klp_sched_try_switch(void);
 
-#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
-
 DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
 
-static __always_inline void klp_sched_try_switch(void)
+static __always_inline void klp_sched_try_switch(struct task_struct *curr)
 {
-	if (static_branch_unlikely(&klp_sched_try_switch_key))
+	if (static_branch_unlikely(&klp_sched_try_switch_key) &&
+	    READ_ONCE(curr->__state) & TASK_FREEZABLE)
 		__klp_sched_try_switch();
 }
 
-#endif /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
-
 #else /* !CONFIG_LIVEPATCH */
-static inline void klp_sched_try_switch(void) {}
-static inline void __klp_sched_try_switch(void) {}
+static inline void klp_sched_try_switch(struct task_struct *curr) {}
 #endif /* CONFIG_LIVEPATCH */
 
 #endif /* _LINUX_LIVEPATCH_SCHED_H_ */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f96ac1982893..b98195991031 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -44,7 +44,6 @@
 #include <linux/seqlock_types.h>
 #include <linux/kcsan.h>
 #include <linux/rv.h>
-#include <linux/livepatch_sched.h>
 #include <linux/uidgid_types.h>
 #include <linux/tracepoint-defs.h>
 #include <asm/kmap_size.h>
@@ -2089,9 +2088,6 @@ extern int __cond_resched(void);
 
 #if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
 
-void sched_dynamic_klp_enable(void);
-void sched_dynamic_klp_disable(void);
-
 DECLARE_STATIC_CALL(cond_resched, __cond_resched);
 
 static __always_inline int _cond_resched(void)
@@ -2112,7 +2108,6 @@ static __always_inline int _cond_resched(void)
 
 static inline int _cond_resched(void)
 {
-	klp_sched_try_switch();
 	return __cond_resched();
 }
 
@@ -2122,7 +2117,6 @@ static inline int _cond_resched(void)
 
 static inline int _cond_resched(void)
 {
-	klp_sched_try_switch();
 	return 0;
 }
 
-- 
cgit v1.2.3


From aab12022b076f0b385b7a9a78e1161bd2df5d1e3 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Tue, 29 Apr 2025 11:18:08 +0100
Subject: soundwire: bus: Add internal slave ID and use for IRQs

Currently the SoundWire IRQ code uses the dev_num to create an IRQ
mapping for each slave. However, there is an issue there, the dev_num
is only allocated when the slave enumerates on the bus and enumeration
may happen before or after probe of the slave driver. In the case
enumeration happens after probe of the slave driver then the IRQ
mapping will use dev_num before it is set. This could cause multiple
slaves to use zero as their IRQ mapping.

It is very desirable to have the IRQ mapped before the slave probe
is called, so drivers can do resource allocation in probe as normal. To
solve these issues add an internal ID created for each slave when it is
probed and use that for mapping the IRQ.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.dev>
Link: https://lore.kernel.org/r/20250429101808.348462-3-ckeepax@opensource.cirrus.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 2362f621d94c..0832776262ac 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -8,6 +8,7 @@
 #include <linux/bug.h>
 #include <linux/completion.h>
 #include <linux/device.h>
+#include <linux/idr.h>
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
 #include <linux/lockdep_types.h>
@@ -50,6 +51,7 @@ struct sdw_slave;
 
 #define SDW_FRAME_CTRL_BITS		48
 #define SDW_MAX_DEVICES			11
+#define SDW_FW_MAX_DEVICES		16
 
 #define SDW_MAX_PORTS			15
 #define SDW_VALID_PORT_RANGE(n)		((n) < SDW_MAX_PORTS && (n) >= 1)
@@ -630,6 +632,7 @@ struct sdw_slave_ops {
  * struct sdw_slave - SoundWire Slave
  * @id: MIPI device ID
  * @dev: Linux device
+ * @index: internal ID for this slave
  * @irq: IRQ number
  * @status: Status reported by the Slave
  * @bus: Bus handle
@@ -661,6 +664,7 @@ struct sdw_slave_ops {
 struct sdw_slave {
 	struct sdw_slave_id id;
 	struct device dev;
+	int index;
 	int irq;
 	enum sdw_slave_status status;
 	struct sdw_bus *bus;
@@ -968,6 +972,7 @@ struct sdw_stream_runtime {
  * @md: Master device
  * @bus_lock_key: bus lock key associated to @bus_lock
  * @bus_lock: bus lock
+ * @slave_ida: IDA for allocating internal slave IDs
  * @slaves: list of Slaves on this bus
  * @msg_lock_key: message lock key associated to @msg_lock
  * @msg_lock: message lock
@@ -1010,6 +1015,7 @@ struct sdw_bus {
 	struct sdw_master_device *md;
 	struct lock_class_key bus_lock_key;
 	struct mutex bus_lock;
+	struct ida slave_ida;
 	struct list_head slaves;
 	struct lock_class_key msg_lock_key;
 	struct mutex msg_lock;
-- 
cgit v1.2.3


From e1f3f5be9e8e730290ebe6d490383af4d77f0f38 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Date: Wed, 30 Apr 2025 15:47:13 +0800
Subject: soundwire: intel: Add awareness of ACE3+ microphone privacy

ACE3 introduced microphone privacy and along this feature it adds a new
register in vendor specific SHIM to control and status reporting.
The control of mic privacy via the SHIM register is only to enable the
interrupt generation via soundwire, but not handled by the soundwire code
as the mic privacy is not a feature of the soundwire IP.

On the other hand, printing the register value brings value for debugging,
so add a new flag to allow this conditionally.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20250430074714.94000-2-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw_intel.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 493d9de4e472..2af1d8174c50 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -189,6 +189,9 @@
 #define SDW_SHIM3_INTEL_VS_ACTMCTL_DOAISE2	BIT(14)
 #define SDW_SHIM3_INTEL_VS_ACTMCTL_CLDE		BIT(15)
 
+/* ACE3+ Mic privacy control and status register */
+#define SDW_SHIM2_INTEL_VS_PVCCS		0x10
+
 /**
  * struct sdw_intel_stream_params_data: configuration passed during
  * the @params_stream callback, e.g. for interaction with DSP
@@ -331,6 +334,7 @@ struct sdw_intel_ctx {
  * @shim_base: sdw shim base.
  * @alh_base: sdw alh base.
  * @ext: extended HDaudio link support
+ * @mic_privacy: ACE version supports microphone privacy
  * @hbus: hdac_bus pointer, needed for power management
  * @eml_lock: mutex protecting shared registers in the HDaudio multi-link
  * space
@@ -349,6 +353,7 @@ struct sdw_intel_res {
 	u32 shim_base;
 	u32 alh_base;
 	bool ext;
+	bool mic_privacy;
 	struct hdac_bus *hbus;
 	struct mutex *eml_lock;
 };
-- 
cgit v1.2.3


From 9002b75aa8e6f034ffbd1c1ccac46927a1cf0f12 Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Date: Wed, 23 Apr 2025 15:34:19 +0100
Subject: irqchip/renesas-rzv2h: Add rzv2h_icu_register_dma_req()

On the Renesas RZ/V2H(P) family of SoCs, DMAC IPs are connected
to the Interrupt Control Unit (ICU).
For DMA transfers, a request number must be registered with the
ICU, which means that the DMAC driver has to be able to instruct
the ICU driver with the registration of such id.

Export rzv2h_icu_register_dma_req() so that the DMAC driver can
register the DMAC request number.

Signed-off-by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20250423143422.3747702-4-fabrizio.castro.jz@renesas.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/irqchip/irq-renesas-rzv2h.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 include/linux/irqchip/irq-renesas-rzv2h.h

(limited to 'include/linux')

diff --git a/include/linux/irqchip/irq-renesas-rzv2h.h b/include/linux/irqchip/irq-renesas-rzv2h.h
new file mode 100644
index 000000000000..618a60d2eac0
--- /dev/null
+++ b/include/linux/irqchip/irq-renesas-rzv2h.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Renesas RZ/V2H(P) Interrupt Control Unit (ICU)
+ *
+ * Copyright (C) 2025 Renesas Electronics Corporation.
+ */
+
+#ifndef __LINUX_IRQ_RENESAS_RZV2H
+#define __LINUX_IRQ_RENESAS_RZV2H
+
+#include <linux/platform_device.h>
+
+#define RZV2H_ICU_DMAC_REQ_NO_DEFAULT		0x3ff
+
+#ifdef CONFIG_RENESAS_RZV2H_ICU
+void rzv2h_icu_register_dma_req(struct platform_device *icu_dev, u8 dmac_index, u8 dmac_channel,
+				u16 req_no);
+#else
+static inline void rzv2h_icu_register_dma_req(struct platform_device *icu_dev, u8 dmac_index,
+					      u8 dmac_channel, u16 req_no) { }
+#endif
+
+#endif /* __LINUX_IRQ_RENESAS_RZV2H */
-- 
cgit v1.2.3


From 9510b38dc0ba358c93cbf5ee7c28820afb85937b Mon Sep 17 00:00:00 2001
From: Erick Shepherd <erick.shepherd@ni.com>
Date: Mon, 31 Mar 2025 17:13:37 -0500
Subject: mmc: Add quirk to disable DDR50 tuning

Adds the MMC_QUIRK_NO_UHS_DDR50_TUNING quirk and updates
mmc_execute_tuning() to return 0 if that quirk is set. This fixes an
issue on certain Swissbit SD cards that do not support DDR50 tuning
where tuning requests caused I/O errors to be thrown.

Signed-off-by: Erick Shepherd <erick.shepherd@ni.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20250331221337.1414534-1-erick.shepherd@ni.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/card.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 526fce581657..ddcdf23d731c 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -329,6 +329,7 @@ struct mmc_card {
 #define MMC_QUIRK_BROKEN_SD_CACHE	(1<<15)	/* Disable broken SD cache support */
 #define MMC_QUIRK_BROKEN_CACHE_FLUSH	(1<<16)	/* Don't flush cache until the write has occurred */
 #define MMC_QUIRK_BROKEN_SD_POWEROFF_NOTIFY	(1<<17) /* Disable broken SD poweroff notify support */
+#define MMC_QUIRK_NO_UHS_DDR50_TUNING	(1<<18) /* Disable DDR50 tuning */
 
 	bool			written_flag;	/* Indicates eMMC has been written since power on */
 	bool			reenable_cmdq;	/* Re-enable Command Queue */
-- 
cgit v1.2.3


From da012e1eb5372be518e2667c1afa00996bb076e6 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 1 Apr 2025 11:58:46 +0200
Subject: mmc: rename mmc_can_gpio_cd() to mmc_host_can_gpio_cd()

mmc_can_* functions sometimes relate to the card and sometimes to the host.
Make it obvious by renaming this function to include 'host'.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Link: https://lore.kernel.org/r/20250401095847.29271-11-wsa+renesas@sang-engineering.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/slot-gpio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h
index 1ed7b0d1e4f9..4f6a46c636ec 100644
--- a/include/linux/mmc/slot-gpio.h
+++ b/include/linux/mmc/slot-gpio.h
@@ -24,7 +24,7 @@ int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id,
 int mmc_gpiod_set_cd_config(struct mmc_host *host, unsigned long config);
 int mmc_gpio_set_cd_wake(struct mmc_host *host, bool on);
 void mmc_gpiod_request_cd_irq(struct mmc_host *host);
-bool mmc_can_gpio_cd(struct mmc_host *host);
+bool mmc_host_can_gpio_cd(struct mmc_host *host);
 bool mmc_can_gpio_ro(struct mmc_host *host);
 
 #endif
-- 
cgit v1.2.3


From 2e1a26ed6b3864576720364c793e4abf0681fcf5 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 1 Apr 2025 11:58:47 +0200
Subject: mmc: rename mmc_can_gpio_ro() to mmc_host_can_gpio_ro()

mmc_can_* functions sometimes relate to the card and sometimes to the host.
Make it obvious by renaming this function to include 'host'.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Link: https://lore.kernel.org/r/20250401095847.29271-12-wsa+renesas@sang-engineering.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/slot-gpio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h
index 4f6a46c636ec..23ac5696fa38 100644
--- a/include/linux/mmc/slot-gpio.h
+++ b/include/linux/mmc/slot-gpio.h
@@ -25,6 +25,6 @@ int mmc_gpiod_set_cd_config(struct mmc_host *host, unsigned long config);
 int mmc_gpio_set_cd_wake(struct mmc_host *host, bool on);
 void mmc_gpiod_request_cd_irq(struct mmc_host *host);
 bool mmc_host_can_gpio_cd(struct mmc_host *host);
-bool mmc_can_gpio_ro(struct mmc_host *host);
+bool mmc_host_can_gpio_ro(struct mmc_host *host);
 
 #endif
-- 
cgit v1.2.3


From ac01fa73f5309a35eff83be61442a8891159b487 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Sat, 10 May 2025 16:37:30 -0400
Subject: tracepoint: Have tracepoints created with DECLARE_TRACE() have _tp
 suffix

Most tracepoints in the kernel are created with TRACE_EVENT(). The
TRACE_EVENT() macro (and DECLARE_EVENT_CLASS() and DEFINE_EVENT() where in
reality, TRACE_EVENT() is just a helper macro that calls those other two
macros), will create not only a tracepoint (the function trace_<event>()
used in the kernel), it also exposes the tracepoint to user space along
with defining what fields will be saved by that tracepoint.

There are a few places that tracepoints are created in the kernel that are
not exposed to userspace via tracefs. They can only be accessed from code
within the kernel. These tracepoints are created with DEFINE_TRACE()

Most of these tracepoints end with "_tp". This is useful as when the
developer sees that, they know that the tracepoint is for in-kernel only
(meaning it can only be accessed inside the kernel, either directly by the
kernel or indirectly via modules and BPF programs) and is not exposed to
user space.

Instead of making this only a process to add "_tp", enforce it by making
the DECLARE_TRACE() append the "_tp" suffix to the tracepoint. This
requires adding DECLARE_TRACE_EVENT() macros for the TRACE_EVENT() macro
to use that keeps the original name.

Link: https://lore.kernel.org/all/20250418083351.20a60e64@gandalf.local.home/

Cc: netdev <netdev@vger.kernel.org>
Cc: Jiri Olsa <olsajiri@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Ahern <dsahern@kernel.org>
Cc: Juri Lelli <juri.lelli@gmail.com>
Cc: Breno Leitao <leitao@debian.org>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Gabriele Monaco <gmonaco@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Link: https://lore.kernel.org/20250510163730.092fad5b@gandalf.local.home
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 38 ++++++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index a351763e6965..826ce3f8e1f8 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -464,16 +464,30 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 #endif
 
 #define DECLARE_TRACE(name, proto, args)				\
-	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
+	__DECLARE_TRACE(name##_tp, PARAMS(proto), PARAMS(args),		\
 			cpu_online(raw_smp_processor_id()),		\
 			PARAMS(void *__data, proto))
 
 #define DECLARE_TRACE_CONDITION(name, proto, args, cond)		\
-	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
+	__DECLARE_TRACE(name##_tp, PARAMS(proto), PARAMS(args),		\
 			cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \
 			PARAMS(void *__data, proto))
 
 #define DECLARE_TRACE_SYSCALL(name, proto, args)			\
+	__DECLARE_TRACE_SYSCALL(name##_tp, PARAMS(proto), PARAMS(args),	\
+				PARAMS(void *__data, proto))
+
+#define DECLARE_TRACE_EVENT(name, proto, args)				\
+	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
+			cpu_online(raw_smp_processor_id()),		\
+			PARAMS(void *__data, proto))
+
+#define DECLARE_TRACE_EVENT_CONDITION(name, proto, args, cond)		\
+	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
+			cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \
+			PARAMS(void *__data, proto))
+
+#define DECLARE_TRACE_EVENT_SYSCALL(name, proto, args)			\
 	__DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args),	\
 				PARAMS(void *__data, proto))
 
@@ -591,32 +605,32 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 
 #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)
 #define DEFINE_EVENT(template, name, proto, args)		\
-	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+	DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args))
 #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\
-	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+	DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args))
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
-	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+	DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args))
 #define DEFINE_EVENT_CONDITION(template, name, proto,		\
 			       args, cond)			\
-	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
+	DECLARE_TRACE_EVENT_CONDITION(name, PARAMS(proto),	\
 				PARAMS(args), PARAMS(cond))
 
 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
-	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+	DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args))
 #define TRACE_EVENT_FN(name, proto, args, struct,		\
 		assign, print, reg, unreg)			\
-	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
-#define TRACE_EVENT_FN_COND(name, proto, args, cond, struct,		\
+	DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args))
+#define TRACE_EVENT_FN_COND(name, proto, args, cond, struct,	\
 		assign, print, reg, unreg)			\
-	DECLARE_TRACE_CONDITION(name, PARAMS(proto),	\
+	DECLARE_TRACE_EVENT_CONDITION(name, PARAMS(proto),	\
 			PARAMS(args), PARAMS(cond))
 #define TRACE_EVENT_CONDITION(name, proto, args, cond,		\
 			      struct, assign, print)		\
-	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
+	DECLARE_TRACE_EVENT_CONDITION(name, PARAMS(proto),	\
 				PARAMS(args), PARAMS(cond))
 #define TRACE_EVENT_SYSCALL(name, proto, args, struct, assign,	\
 			    print, reg, unreg)			\
-	DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args))
+	DECLARE_TRACE_EVENT_SYSCALL(name, PARAMS(proto), PARAMS(args))
 
 #define TRACE_EVENT_FLAGS(event, flag)
 
-- 
cgit v1.2.3


From 289c99bec7eed918ab37c62cbb29a2e3f58fb1fb Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 13 May 2025 22:24:09 -0700
Subject: lib/crc32: add SPDX license identifier

lib/crc32.c and include/linux/crc32.h got missed by the bulk SPDX
conversion because of the nonstandard explanation of the license.
However, crc32.c clearly states that it's licensed under the GNU General
Public License, Version 2.  And the comment in crc32.h clearly indicates
that it's meant to have the same license as crc32.c.  Therefore, apply
SPDX-License-Identifier: GPL-2.0-only to both files.

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20250514052409.194822-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 include/linux/crc32.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index 69c2e8bb3782..569dc13f139f 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -1,7 +1,4 @@
-/*
- * crc32.h
- * See linux/lib/crc32.c for license and changes
- */
+/* SPDX-License-Identifier: GPL-2.0-only */
 #ifndef _LINUX_CRC32_H
 #define _LINUX_CRC32_H
 
-- 
cgit v1.2.3


From 539fbab37881e32ba6a708a100de6db19e1e7e7d Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Mon, 7 Apr 2025 15:28:05 +0300
Subject: tpm: Mask TPM RC in tpm2_start_auth_session()

tpm2_start_auth_session() does not mask TPM RC correctly from the callers:

[   28.766528] tpm tpm0: A TPM error (2307) occurred start auth session

Process TPM RCs inside tpm2_start_auth_session(), and map them to POSIX
error codes.

Cc: stable@vger.kernel.org # v6.10+
Fixes: 699e3efd6c64 ("tpm: Add HMAC session start and end functions")
Reported-by: Herbert Xu <herbert@gondor.apana.org.au>
Closes: https://lore.kernel.org/linux-integrity/Z_NgdRHuTKP6JK--@gondor.apana.org.au/
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 include/linux/tpm.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 6c3125300c00..9ac9768cc8f7 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -257,6 +257,7 @@ enum tpm2_return_codes {
 	TPM2_RC_TESTING		= 0x090A, /* RC_WARN */
 	TPM2_RC_REFERENCE_H0	= 0x0910,
 	TPM2_RC_RETRY		= 0x0922,
+	TPM2_RC_SESSION_MEMORY	= 0x0903,
 };
 
 enum tpm2_command_codes {
@@ -437,6 +438,24 @@ static inline u32 tpm2_rc_value(u32 rc)
 	return (rc & BIT(7)) ? rc & 0xbf : rc;
 }
 
+/*
+ * Convert a return value from tpm_transmit_cmd() to POSIX error code.
+ */
+static inline ssize_t tpm_ret_to_err(ssize_t ret)
+{
+	if (ret < 0)
+		return ret;
+
+	switch (tpm2_rc_value(ret)) {
+	case TPM2_RC_SUCCESS:
+		return 0;
+	case TPM2_RC_SESSION_MEMORY:
+		return -ENOMEM;
+	default:
+		return -EFAULT;
+	}
+}
+
 #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE)
 
 extern int tpm_is_tpm2(struct tpm_chip *chip);
-- 
cgit v1.2.3


From 2f661f71fda1fc0c42b7746ca5b7da529eb6b5be Mon Sep 17 00:00:00 2001
From: Michal Suchanek <msuchanek@suse.de>
Date: Fri, 4 Apr 2025 10:23:14 +0200
Subject: tpm: tis: Double the timeout B to 4s

With some Infineon chips the timeouts in tpm_tis_send_data (both B and
C) can reach up to about 2250 ms.

Timeout C is retried since
commit de9e33df7762 ("tpm, tpm_tis: Workaround failed command reception on Infineon devices")

Timeout B still needs to be extended.

The problem is most commonly encountered with context related operation
such as load context/save context. These are issued directly by the
kernel, and there is no retry logic for them.

When a filesystem is set up to use the TPM for unlocking the boot fails,
and restarting the userspace service is ineffective. This is likely
because ignoring a load context/save context result puts the real TPM
state and the TPM state expected by the kernel out of sync.

Chips known to be affected:
tpm_tis IFX1522:00: 2.0 TPM (device-id 0x1D, rev-id 54)
Description: SLB9672
Firmware Revision: 15.22

tpm_tis MSFT0101:00: 2.0 TPM (device-id 0x1B, rev-id 22)
Firmware Revision: 7.83

tpm_tis MSFT0101:00: 2.0 TPM (device-id 0x1A, rev-id 16)
Firmware Revision: 5.63

Link: https://lore.kernel.org/linux-integrity/Z5pI07m0Muapyu9w@kitsune.suse.cz/
Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 include/linux/tpm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 9ac9768cc8f7..a3d8305e88a5 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -224,7 +224,7 @@ enum tpm2_const {
 
 enum tpm2_timeouts {
 	TPM2_TIMEOUT_A          =    750,
-	TPM2_TIMEOUT_B          =   2000,
+	TPM2_TIMEOUT_B          =   4000,
 	TPM2_TIMEOUT_C          =    200,
 	TPM2_TIMEOUT_D          =     30,
 	TPM2_DURATION_SHORT     =     20,
-- 
cgit v1.2.3


From 45a442fe369e6c4e0b4aa9f63b31c3f2f9e2090e Mon Sep 17 00:00:00 2001
From: Michael Kelley <mhklinux@outlook.com>
Date: Mon, 12 May 2025 17:06:04 -0700
Subject: Drivers: hv: vmbus: Remove vmbus_sendpacket_pagebuffer()

With the netvsc driver changed to use vmbus_sendpacket_mpb_desc()
instead of vmbus_sendpacket_pagebuffer(), the latter has no remaining
callers. Remove it.

Cc: <stable@vger.kernel.org> # 6.1.x
Signed-off-by: Michael Kelley <mhklinux@outlook.com>
Link: https://patch.msgid.link/20250513000604.1396-6-mhklinux@outlook.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/hyperv.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 675959fb97ba..ea806cef4a0d 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1161,13 +1161,6 @@ extern int vmbus_sendpacket(struct vmbus_channel *channel,
 				  enum vmbus_packet_type type,
 				  u32 flags);
 
-extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
-					    struct hv_page_buffer pagebuffers[],
-					    u32 pagecount,
-					    void *buffer,
-					    u32 bufferlen,
-					    u64 requestid);
-
 extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
 				     struct vmbus_packet_mpb_array *mpb,
 				     u32 desc_size,
-- 
cgit v1.2.3


From 8d9117009dd690f647a66912f429c96335069907 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 13 May 2025 13:23:31 +0200
Subject: fuse: don't allow signals to interrupt getdents copying

When getting the directory contents, the entries are first fetched to a
kernel buffer, then they are copied to userspace with dir_emit().  This
second phase is non-blocking as long as the userspace buffer is not paged
out, making it interruptible makes zero sense.

Overload d_type as flags, since it only uses 4 bits from 32.

Reviewed-by: Bernd Schubert <bschubert@ddn.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Link: https://lore.kernel.org/20250513112335.1473177-1-mszeredi@redhat.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index cf7208500cee..c8b5af17060e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2073,6 +2073,9 @@ struct dir_context {
 	loff_t pos;
 };
 
+/* If OR-ed with d_type, pending signals are not checked */
+#define FILLDIR_FLAG_NOINTR	0x1000
+
 /*
  * These flags let !MMU mmap() govern direct device mapping vs immediate
  * copying more easily for MAP_PRIVATE, especially for ROM filesystems.
-- 
cgit v1.2.3


From 132833405e61463d47d6badff1b8080b09b5808e Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Mon, 5 May 2025 19:54:39 +0530
Subject: PCI: Add debugfs support for exposing PTM context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Precision Time Management (PTM) mechanism defined in PCIe spec r6.0,
sec 6.21 allows precise coordination of timing information across multiple
components in a PCIe hierarchy with independent local time clocks.

PCI core already supports enabling PTM in the root port and endpoint
devices through PTM Extended Capability registers. But the PTM context
supported by the PTM capable components such as Root Complex (RC) and
Endpoint (EP) controllers were not exposed as of now. Part of the reason is
that the spec doesn't define how the context information is exposed to the
software and left it to the vendor implementation. So there is no
standardized way to get access to the context information and each vendor
have defined their own way.

This commit adds debugfs support to expose the PTM context to userspace
from both PCIe RC and EP controllers. Since the context information is
exposed in a vendor specific way, the debugfs interface allows the
controller drivers to implement callbacks for each attribute, to be called
by the generic PTM driver.

The Controller drivers are expected to call pcie_ptm_create_debugfs() to
create the debugfs attributes for the PTM context and call
pcie_ptm_destroy_debugfs() to destroy them. The drivers should also
populate the relevant callbacks in the 'struct pcie_ptm_ops' structure
based on the controller implementation.

Below PTM context are exposed through debugfs:

PCIe RC
=======

1. PTM Local clock
2. PTM T2 timestamp
3. PTM T3 timestamp
4. PTM Context valid

PCIe EP
=======

1. PTM Local clock
2. PTM T1 timestamp
3. PTM T4 timestamp
4. PTM Master clock
5. PTM Context update

Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
[kwilczynski: fix overflow issue reported by Dan Carpenter from
https://lore.kernel.org/linux-pci/b41c1754-c6b7-4805-9f14-7c643d6c5304@suswa.mountain]
Signed-off-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
Link: https://patch.msgid.link/20250505-pcie-ptm-v4-1-02d26d51400b@linaro.org
---
 include/linux/pci.h | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0e8e3fd77e96..01a29076604f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1856,6 +1856,39 @@ static inline bool pci_aer_available(void) { return false; }
 
 bool pci_ats_disabled(void);
 
+#define PCIE_PTM_CONTEXT_UPDATE_AUTO 0
+#define PCIE_PTM_CONTEXT_UPDATE_MANUAL 1
+
+struct pcie_ptm_ops {
+	int (*check_capability)(void *drvdata);
+	int (*context_update_write)(void *drvdata, u8 mode);
+	int (*context_update_read)(void *drvdata, u8 *mode);
+	int (*context_valid_write)(void *drvdata, bool valid);
+	int (*context_valid_read)(void *drvdata, bool *valid);
+	int (*local_clock_read)(void *drvdata, u64 *clock);
+	int (*master_clock_read)(void *drvdata, u64 *clock);
+	int (*t1_read)(void *drvdata, u64 *clock);
+	int (*t2_read)(void *drvdata, u64 *clock);
+	int (*t3_read)(void *drvdata, u64 *clock);
+	int (*t4_read)(void *drvdata, u64 *clock);
+
+	bool (*context_update_visible)(void *drvdata);
+	bool (*context_valid_visible)(void *drvdata);
+	bool (*local_clock_visible)(void *drvdata);
+	bool (*master_clock_visible)(void *drvdata);
+	bool (*t1_visible)(void *drvdata);
+	bool (*t2_visible)(void *drvdata);
+	bool (*t3_visible)(void *drvdata);
+	bool (*t4_visible)(void *drvdata);
+};
+
+struct pci_ptm_debugfs {
+	struct dentry *debugfs;
+	const struct pcie_ptm_ops *ops;
+	struct mutex lock;
+	void *pdata;
+};
+
 #ifdef CONFIG_PCIE_PTM
 int pci_enable_ptm(struct pci_dev *dev, u8 *granularity);
 void pci_disable_ptm(struct pci_dev *dev);
@@ -1868,6 +1901,18 @@ static inline bool pcie_ptm_enabled(struct pci_dev *dev)
 { return false; }
 #endif
 
+#if IS_ENABLED(CONFIG_DEBUG_FS) && IS_ENABLED(CONFIG_PCIE_PTM)
+struct pci_ptm_debugfs *pcie_ptm_create_debugfs(struct device *dev, void *pdata,
+						const struct pcie_ptm_ops *ops);
+void pcie_ptm_destroy_debugfs(struct pci_ptm_debugfs *ptm_debugfs);
+#else
+static inline struct pci_ptm_debugfs
+*pcie_ptm_create_debugfs(struct device *dev, void *pdata,
+			 const struct pcie_ptm_ops *ops) { return NULL; }
+static inline void
+pcie_ptm_destroy_debugfs(struct pci_ptm_debugfs *ptm_debugfs) { }
+#endif
+
 void pci_cfg_access_lock(struct pci_dev *dev);
 bool pci_cfg_access_trylock(struct pci_dev *dev);
 void pci_cfg_access_unlock(struct pci_dev *dev);
-- 
cgit v1.2.3


From e0410e956b97e8b50b2aa7b02ba70e5f09b31ebe Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 13 May 2025 17:10:08 +0200
Subject: readdir: supply dir_context.count as readdir buffer size hint

This is a preparation for large readdir buffers in fuse.

Simply setting the fuse buffer size to the userspace buffer size should
work, the record sizes are similar (fuse's is slightly larger than libc's,
so no overflow should ever happen).

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Jaco Kroon <jaco@uls.co.za>
Link: https://lore.kernel.org/20250513151012.1476536-1-mszeredi@redhat.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c8b5af17060e..ddd54a664916 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2071,6 +2071,13 @@ typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
 struct dir_context {
 	filldir_t actor;
 	loff_t pos;
+	/*
+	 * Filesystems MUST NOT MODIFY count, but may use as a hint:
+	 * 0	    unknown
+	 * > 0      space in buffer (assume at least one entry)
+	 * INT_MAX  unlimited
+	 */
+	int count;
 };
 
 /* If OR-ed with d_type, pending signals are not checked */
-- 
cgit v1.2.3


From d8c5507cd140d9471472ece673e70250b957c595 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Tue, 13 May 2025 17:03:24 +0200
Subject: include/linux/fs.h: add inode_lock_killable()

Prepare for making inode operations killable while they're waiting for
the lock.

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Link: https://lore.kernel.org/20250513150327.1373061-1-max.kellermann@ionos.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index ddd54a664916..82c6093b8582 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -867,6 +867,11 @@ static inline void inode_lock(struct inode *inode)
 	down_write(&inode->i_rwsem);
 }
 
+static inline __must_check int inode_lock_killable(struct inode *inode)
+{
+	return down_write_killable(&inode->i_rwsem);
+}
+
 static inline void inode_unlock(struct inode *inode)
 {
 	up_write(&inode->i_rwsem);
@@ -877,6 +882,11 @@ static inline void inode_lock_shared(struct inode *inode)
 	down_read(&inode->i_rwsem);
 }
 
+static inline __must_check int inode_lock_shared_killable(struct inode *inode)
+{
+	return down_read_killable(&inode->i_rwsem);
+}
+
 static inline void inode_unlock_shared(struct inode *inode)
 {
 	up_read(&inode->i_rwsem);
-- 
cgit v1.2.3


From c16608005ccb99fbde3a4cd96eab28e16f148abf Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 13 May 2025 11:19:22 +0300
Subject: net: Look for bonding slaves in the bond's network namespace

Update the for_each_netdev_in_bond_rcu macro to iterate through network
devices in the bond's network namespace instead of always using
init_net. This change is safe because:

1. **Bond-Slave Namespace Relationship**: A bond device and its slaves
   must reside in the same network namespace. The bond device's
   namespace is established at creation time and cannot change.

2. **Slave Movement Implications**: Any attempt to move a slave device
   to a different namespace automatically removes it from the bond, as
   per kernel networking stack rules.
   This maintains the invariant that slaves must exist in the same
   namespace as their bond.

This change is part of an effort to enable Link Aggregation (LAG) to
work properly inside custom network namespaces. Previously, the macro
would only find slave devices in the initial network namespace,
preventing proper bonding functionality in custom namespaces.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Link: https://patch.msgid.link/20250513081922.525716-1-mbloch@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 32a1e41636a9..9e3a2d8452d6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3268,7 +3268,7 @@ int call_netdevice_notifiers_info(unsigned long val,
 #define for_each_netdev_continue_rcu(net, d)		\
 	list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list)
 #define for_each_netdev_in_bond_rcu(bond, slave)	\
-		for_each_netdev_rcu(&init_net, slave)	\
+		for_each_netdev_rcu(dev_net_rcu(bond), slave)	\
 			if (netdev_master_upper_dev_get_rcu(slave) == (bond))
 #define net_device_entry(lh)	list_entry(lh, struct net_device, dev_list)
 
-- 
cgit v1.2.3


From b9eef3391de028fdd88fd7a2f81a4834fc98c9ac Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 12 May 2025 11:27:26 +0200
Subject: xdp: Use nested-BH locking for system_page_pool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

system_page_pool is a per-CPU variable and relies on disabled BH for its
locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT
this data structure requires explicit locking.

Make a struct with a page_pool member (original system_page_pool) and a
local_lock_t and use local_lock_nested_bh() for locking. This change
adds only lockdep coverage and does not alter the functional behaviour
for !PREEMPT_RT.

Cc: Andrew Lunn <andrew+netdev@lunn.ch>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jesper Dangaard Brouer <hawk@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://patch.msgid.link/20250512092736.229935-6-bigeasy@linutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9e3a2d8452d6..73a97cf1bbce 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3503,7 +3503,12 @@ struct softnet_data {
 };
 
 DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
-DECLARE_PER_CPU(struct page_pool *, system_page_pool);
+
+struct page_pool_bh {
+	struct page_pool *pool;
+	local_lock_t bh_lock;
+};
+DECLARE_PER_CPU(struct page_pool_bh, system_page_pool);
 
 #ifndef CONFIG_PREEMPT_RT
 static inline int dev_recursion_level(void)
-- 
cgit v1.2.3


From 7fe70c06a182a140be9996b02256d907e114479a Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 12 May 2025 11:27:31 +0200
Subject: net/sched: act_mirred: Move the recursion counter struct netdev_xmit

mirred_nest_level is a per-CPU variable and relies on disabled BH for its
locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT
this data structure requires explicit locking.

Move mirred_nest_level to struct netdev_xmit as u8, provide wrappers.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Juri Lelli <juri.lelli@redhat.com>
Link: https://patch.msgid.link/20250512092736.229935-11-bigeasy@linutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice_xmit.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice_xmit.h b/include/linux/netdevice_xmit.h
index 38325e070296..848735b3a7c0 100644
--- a/include/linux/netdevice_xmit.h
+++ b/include/linux/netdevice_xmit.h
@@ -8,6 +8,9 @@ struct netdev_xmit {
 #ifdef CONFIG_NET_EGRESS
 	u8  skip_txqueue;
 #endif
+#if IS_ENABLED(CONFIG_NET_ACT_MIRRED)
+	u8 sched_mirred_nest;
+#endif
 };
 
 #endif
-- 
cgit v1.2.3


From ce45dc4bb22e96b59a07e19b67e915d99dd5281b Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Fri, 25 Apr 2025 11:24:22 +0200
Subject: PCI: Limit visibility of match_driver flag to PCI core
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit 58d9a38f6fac ("PCI: Skip attaching driver in device_add()"),
PCI enumeration is split into two steps:  In the first step, all devices
are published in sysfs with device_add().  In the second step, drivers are
bound to the devices with device_attach().  To delay driver binding until
the second step, a "bool match_driver" in struct pci_dev is used.

Instead of a bool, use a bit in the "unsigned long priv_flags" to shrink
struct pci_dev a little and prevent use of the bool outside the PCI core
(as has happened with commit cbbc00be2ce3 ("iommu/amd: Prevent binding
other PCI drivers to IOMMU PCI devices")).

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
Link: https://patch.msgid.link/d22a9e5b81d6bd8dd1837607d6156679b3b1199c.1745572340.git.lukas@wunner.de
---
 include/linux/pci.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index d26e6611bd00..053d9dd494d4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -423,8 +423,6 @@ struct pci_dev {
 	struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
 	struct resource driver_exclusive_resource;	 /* driver exclusive resource ranges */
 
-	bool		match_driver;		/* Skip attaching driver */
-
 	unsigned int	transparent:1;		/* Subtractive decode bridge */
 	unsigned int	io_window:1;		/* Bridge has I/O window */
 	unsigned int	pref_window:1;		/* Bridge has pref mem window */
-- 
cgit v1.2.3


From 241b9b584d50e01f7eb50c0555aa570502bcc8c5 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 14 May 2025 13:50:33 -0700
Subject: dm-zone: Use bdev_*() helper functions where applicable

Improve code readability by using bdev_is_zone_aligned() and
bdev_offset_from_zone_start() where applicable. No functionality
has been changed.

This patch is a reworked version of a patch from Pankaj Raghav.

See also https://lore.kernel.org/linux-block/20220923173618.6899-11-p.raghav@samsung.com/.

Cc: Damien Le Moal <dlemoal@kernel.org>
Cc: Pankaj Raghav <p.raghav@samsung.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
 include/linux/blkdev.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e39c45bc0a97..edc941ec8fdd 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1419,6 +1419,13 @@ static inline bool bdev_is_zone_start(struct block_device *bdev,
 	return bdev_offset_from_zone_start(bdev, sector) == 0;
 }
 
+/* Check whether @sector is a multiple of the zone size. */
+static inline bool bdev_is_zone_aligned(struct block_device *bdev,
+					sector_t sector)
+{
+	return bdev_is_zone_start(bdev, sector);
+}
+
 /**
  * bdev_zone_is_seq - check if a sector belongs to a sequential write zone
  * @bdev:	block device to check
-- 
cgit v1.2.3


From 18049c8cff9cc89daadc4df6975f7d9069638926 Mon Sep 17 00:00:00 2001
From: Yabin Cui <yabinc@google.com>
Date: Thu, 8 May 2025 16:26:42 -0700
Subject: perf/aux: Allocate non-contiguous AUX pages by default

perf always allocates contiguous AUX pages based on aux_watermark.
However, this contiguous allocation doesn't benefit all PMUs. For
instance, ARM SPE and TRBE operate with virtual pages, and Coresight
ETR allocates a separate buffer. For these PMUs, allocating contiguous
AUX pages unnecessarily exacerbates memory fragmentation. This
fragmentation can prevent their use on long-running devices.

This patch modifies the perf driver to be memory-friendly by default,
by allocating non-contiguous AUX pages. For PMUs requiring contiguous
pages (Intel BTS and some Intel PT), the existing
PERF_PMU_CAP_AUX_NO_SG capability can be used. For PMUs that don't
require but can benefit from contiguous pages (some Intel PT), a new
capability, PERF_PMU_CAP_AUX_PREFER_LARGE, is added to maintain their
existing behavior.

Signed-off-by: Yabin Cui <yabinc@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: James Clark <james.clark@linaro.org>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20250508232642.148767-1-yabinc@google.com
---
 include/linux/perf_event.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 947ad12dfdbe..a96c00e2ceca 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -303,6 +303,7 @@ struct perf_event_pmu_context;
 #define PERF_PMU_CAP_AUX_OUTPUT			0x0080
 #define PERF_PMU_CAP_EXTENDED_HW_TYPE		0x0100
 #define PERF_PMU_CAP_AUX_PAUSE			0x0200
+#define PERF_PMU_CAP_AUX_PREFER_LARGE		0x0400
 
 /**
  * pmu::scope
-- 
cgit v1.2.3


From 189572bf4e005431051319def435ac4b7e4489ca Mon Sep 17 00:00:00 2001
From: "Yury Norov [NVIDIA]" <yury.norov@gmail.com>
Date: Thu, 15 May 2025 16:58:31 +0000
Subject: cpumask: Relax cpumask_any_but()

Similarly to other cpumask search functions, accept -1, and consider it as
'any CPU' hint. This helps users to avoid coding special cases.

Signed-off-by: Yury Norov [NVIDIA] <yury.norov@gmail.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: James Morse <james.morse@arm.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
Tested-by: James Morse <james.morse@arm.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Tested-by: Fenghua Yu <fenghuay@nvidia.com>
Link: https://lore.kernel.org/20250515165855.31452-2-james.morse@arm.com
---
 include/linux/cpumask.h | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index f9a868384083..a3ee875df508 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -413,14 +413,18 @@ unsigned int cpumask_next_wrap(int n, const struct cpumask *src)
  * @cpu: the cpu to ignore.
  *
  * Often used to find any cpu but smp_processor_id() in a mask.
+ * If @cpu == -1, the function is equivalent to cpumask_any().
  * Return: >= nr_cpu_ids if no cpus set.
  */
 static __always_inline
-unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
+unsigned int cpumask_any_but(const struct cpumask *mask, int cpu)
 {
 	unsigned int i;
 
-	cpumask_check(cpu);
+	/* -1 is a legal arg here. */
+	if (cpu != -1)
+		cpumask_check(cpu);
+
 	for_each_cpu(i, mask)
 		if (i != cpu)
 			break;
@@ -433,16 +437,20 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
  * @mask2: the second input cpumask
  * @cpu: the cpu to ignore
  *
+ * If @cpu == -1, the function is equivalent to cpumask_any_and().
  * Returns >= nr_cpu_ids if no cpus set.
  */
 static __always_inline
 unsigned int cpumask_any_and_but(const struct cpumask *mask1,
 				 const struct cpumask *mask2,
-				 unsigned int cpu)
+				 int cpu)
 {
 	unsigned int i;
 
-	cpumask_check(cpu);
+	/* -1 is a legal arg here. */
+	if (cpu != -1)
+		cpumask_check(cpu);
+
 	i = cpumask_first_and(mask1, mask2);
 	if (i != cpu)
 		return i;
-- 
cgit v1.2.3


From 13f0a02bf4c1c5888c736cedef9ca50de666adb3 Mon Sep 17 00:00:00 2001
From: "Yury Norov [NVIDIA]" <yury.norov@gmail.com>
Date: Thu, 15 May 2025 16:58:32 +0000
Subject: find: Add find_first_andnot_bit()

The function helps to implement cpumask_andnot() APIs.

Signed-off-by: Yury Norov [NVIDIA] <yury.norov@gmail.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: James Morse <james.morse@arm.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
Tested-by: James Morse <james.morse@arm.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Tested-by: Fenghua Yu <fenghuay@nvidia.com>
Link: https://lore.kernel.org/20250515165855.31452-3-james.morse@arm.com
---
 include/linux/find.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/find.h b/include/linux/find.h
index 68685714bc18..5a2c267ea7f9 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -29,6 +29,8 @@ unsigned long __find_nth_and_andnot_bit(const unsigned long *addr1, const unsign
 					unsigned long n);
 extern unsigned long _find_first_and_bit(const unsigned long *addr1,
 					 const unsigned long *addr2, unsigned long size);
+unsigned long _find_first_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
+				 unsigned long size);
 unsigned long _find_first_and_and_bit(const unsigned long *addr1, const unsigned long *addr2,
 				      const unsigned long *addr3, unsigned long size);
 extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size);
@@ -347,6 +349,29 @@ unsigned long find_first_and_bit(const unsigned long *addr1,
 }
 #endif
 
+/**
+ * find_first_andnot_bit - find the first bit set in 1st memory region and unset in 2nd
+ * @addr1: The first address to base the search on
+ * @addr2: The second address to base the search on
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the first set bit
+ * If no bits are set, returns >= @size.
+ */
+static __always_inline
+unsigned long find_first_andnot_bit(const unsigned long *addr1,
+				 const unsigned long *addr2,
+				 unsigned long size)
+{
+	if (small_const_nbits(size)) {
+		unsigned long val = *addr1 & (~*addr2) & GENMASK(size - 1, 0);
+
+		return val ? __ffs(val) : size;
+	}
+
+	return _find_first_andnot_bit(addr1, addr2, size);
+}
+
 /**
  * find_first_and_and_bit - find the first set bit in 3 memory regions
  * @addr1: The first address to base the search on
-- 
cgit v1.2.3


From 5da703ef4e4a82b2f9a00f2b3a1eae5657e68a92 Mon Sep 17 00:00:00 2001
From: "Yury Norov [NVIDIA]" <yury.norov@gmail.com>
Date: Thu, 15 May 2025 16:58:33 +0000
Subject: cpumask: Add cpumask_{first,next}_andnot() API

With the lack of the functions, client code has to abuse less efficient
cpumask_nth().

Signed-off-by: Yury Norov [NVIDIA] <yury.norov@gmail.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: James Morse <james.morse@arm.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
Tested-by: Fenghua Yu <fenghuay@nvidia.com>
Tested-by: James Morse <james.morse@arm.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/20250515165855.31452-4-james.morse@arm.com
---
 include/linux/cpumask.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index a3ee875df508..6a569c7534db 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -178,6 +178,19 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask
 	return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
 }
 
+/**
+ * cpumask_first_andnot - return the first cpu from *srcp1 & ~*srcp2
+ * @srcp1: the first input
+ * @srcp2: the second input
+ *
+ * Return: >= nr_cpu_ids if no such cpu found.
+ */
+static __always_inline
+unsigned int cpumask_first_andnot(const struct cpumask *srcp1, const struct cpumask *srcp2)
+{
+	return find_first_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
+}
+
 /**
  * cpumask_first_and_and - return the first cpu from *srcp1 & *srcp2 & *srcp3
  * @srcp1: the first input
@@ -284,6 +297,25 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
 		small_cpumask_bits, n + 1);
 }
 
+/**
+ * cpumask_next_andnot - get the next cpu in *src1p & ~*src2p
+ * @n: the cpu prior to the place to search (i.e. return will be > @n)
+ * @src1p: the first cpumask pointer
+ * @src2p: the second cpumask pointer
+ *
+ * Return: >= nr_cpu_ids if no further cpus set in both.
+ */
+static __always_inline
+unsigned int cpumask_next_andnot(int n, const struct cpumask *src1p,
+				 const struct cpumask *src2p)
+{
+	/* -1 is a legal arg here. */
+	if (n != -1)
+		cpumask_check(n);
+	return find_next_andnot_bit(cpumask_bits(src1p), cpumask_bits(src2p),
+		small_cpumask_bits, n + 1);
+}
+
 /**
  * cpumask_next_and_wrap - get the next cpu in *src1p & *src2p, starting from
  *			   @n+1. If nothing found, wrap around and start from
@@ -458,6 +490,33 @@ unsigned int cpumask_any_and_but(const struct cpumask *mask1,
 	return cpumask_next_and(cpu, mask1, mask2);
 }
 
+/**
+ * cpumask_any_andnot_but - pick an arbitrary cpu from *mask1 & ~*mask2, but not this one.
+ * @mask1: the first input cpumask
+ * @mask2: the second input cpumask
+ * @cpu: the cpu to ignore
+ *
+ * If @cpu == -1, the function returns the first matching cpu.
+ * Returns >= nr_cpu_ids if no cpus set.
+ */
+static __always_inline
+unsigned int cpumask_any_andnot_but(const struct cpumask *mask1,
+				    const struct cpumask *mask2,
+				    int cpu)
+{
+	unsigned int i;
+
+	/* -1 is a legal arg here. */
+	if (cpu != -1)
+		cpumask_check(cpu);
+
+	i = cpumask_first_andnot(mask1, mask2);
+	if (i != cpu)
+		return i;
+
+	return cpumask_next_andnot(cpu, mask1, mask2);
+}
+
 /**
  * cpumask_nth - get the Nth cpu in a cpumask
  * @srcp: the cpumask pointer
-- 
cgit v1.2.3


From ea33537d82921e71f852ea2ed985acc562125efe Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 13 May 2025 19:39:12 +0000
Subject: tcp: add receive queue awareness in tcp_rcv_space_adjust()

If the application can not drain fast enough a TCP socket queue,
tcp_rcv_space_adjust() can overestimate tp->rcvq_space.space.

Then sk->sk_rcvbuf can grow and hit tcp_rmem[2] for no good reason.

Fix this by taking into acount the number of available bytes.

Keeping sk->sk_rcvbuf at the right size allows better cache efficiency.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Wei Wang <weiwan@google.com>
Link: https://patch.msgid.link/20250513193919.1089692-5-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index a8af71623ba7..29f59d50dc73 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -340,7 +340,7 @@ struct tcp_sock {
 	} rcv_rtt_est;
 /* Receiver queue space */
 	struct {
-		u32	space;
+		int	space;
 		u32	seq;
 		u64	time;
 	} rcvq_space;
-- 
cgit v1.2.3


From bc740420d7ae7878696d5ad9f3dbcb11e2599b2e Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Thu, 15 May 2025 16:58:39 +0000
Subject: x86/resctrl: Drop __init/__exit on assorted symbols

Because ARM's MPAM controls are probed using MMIO, resctrl can't be
initialised until enough CPUs are online to have determined the system-wide
supported num_closid. Arm64 also supports 'late onlined secondaries', where
only a subset of CPUs are online during boot.

These two combine to mean the MPAM driver may not be able to initialise
resctrl until user-space has brought 'enough' CPUs online.

To allow MPAM to initialise resctrl after __init text has been free'd, remove
all the __init markings from resctrl.

The existing __exit markings cause these functions to be removed by the linker
as it has never been possible to build resctrl as a module. MPAM has an error
interrupt which causes the driver to reset and disable itself. Remove the
__exit markings to allow the MPAM driver to tear down resctrl when an error
occurs.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
Tested-by: Fenghua Yu <fenghuay@nvidia.com>
Tested-by: Carl Worth <carl@os.amperecomputing.com> # arm64
Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Tested-by: Peter Newman <peternewman@google.com>
Tested-by: Amit Singh Tomar <amitsinght@marvell.com> # arm64
Tested-by: Shanker Donthineni <sdonthineni@nvidia.com> # arm64
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/20250515165855.31452-10-james.morse@arm.com
---
 include/linux/resctrl.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 880351ca3dfc..b8f8240050b4 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -358,7 +358,7 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
 u32 resctrl_arch_system_num_rmid_idx(void);
 int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
 
-__init bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt);
+bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt);
 
 /**
  * resctrl_arch_mon_event_config_write() - Write the config for an event.
@@ -514,7 +514,7 @@ void resctrl_arch_reset_all_ctrls(struct rdt_resource *r);
 extern unsigned int resctrl_rmid_realloc_threshold;
 extern unsigned int resctrl_rmid_realloc_limit;
 
-int __init resctrl_init(void);
-void __exit resctrl_exit(void);
+int resctrl_init(void);
+void resctrl_exit(void);
 
 #endif /* _RESCTRL_H */
-- 
cgit v1.2.3


From eff042ddf4b9587fa7394d502b93e06ec6015177 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 28 Apr 2025 15:36:50 -0400
Subject: sunrpc: Add a helper to derive maxpages from sv_max_mesg

This page count is to be used to allocate various arrays of pages
and bio_vecs, replacing the fixed RPCSVC_MAXPAGES value.

The documenting comment is somewhat stale -- of course NFSv4
COMPOUND procedures may have multiple payloads.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: NeilBrown <neil@brown.name>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 74658cca0f38..749f8e53e8a6 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -159,6 +159,23 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp);
 #define RPCSVC_MAXPAGES		((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE \
 				+ 2 + 1)
 
+/**
+ * svc_serv_maxpages - maximum count of pages needed for one RPC message
+ * @serv: RPC service context
+ *
+ * Returns a count of pages or vectors that can hold the maximum
+ * size RPC message for @serv.
+ *
+ * Each request/reply pair can have at most one "payload", plus two
+ * pages, one for the request, and one for the reply.
+ * nfsd_splice_actor() might need an extra page when a READ payload
+ * is not page-aligned.
+ */
+static inline unsigned long svc_serv_maxpages(const struct svc_serv *serv)
+{
+	return DIV_ROUND_UP(serv->sv_max_mesg, PAGE_SIZE) + 2 + 1;
+}
+
 /*
  * The context of a single thread, including the request currently being
  * processed.
-- 
cgit v1.2.3


From ed603bcf4feac05df937bf78bc7feb75f988a971 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 28 Apr 2025 15:36:52 -0400
Subject: sunrpc: Replace the rq_pages array with dynamically-allocated memory

As a step towards making NFSD's maximum rsize and wsize variable at
run-time, replace the fixed-size rq_vec[] array in struct svc_rqst
with a chunk of dynamically-allocated memory.

On a system with 8-byte pointers and 4KB pages, pahole reports that
the rq_pages[] array is 2080 bytes. This patch replaces that with
a single 8-byte pointer field.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: NeilBrown <neil@brown.name>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 749f8e53e8a6..57be69539888 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -205,7 +205,8 @@ struct svc_rqst {
 	struct xdr_stream	rq_res_stream;
 	struct page		*rq_scratch_page;
 	struct xdr_buf		rq_res;
-	struct page		*rq_pages[RPCSVC_MAXPAGES + 1];
+	unsigned long		rq_maxpages;	/* num of entries in rq_pages */
+	struct page *		*rq_pages;
 	struct page *		*rq_respages;	/* points into rq_pages */
 	struct page *		*rq_next_page; /* next reply page to use */
 	struct page *		*rq_page_end;  /* one past the last page */
-- 
cgit v1.2.3


From 59cf7346542babdaae99e72365174eab4fa276ac Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 28 Apr 2025 15:36:54 -0400
Subject: sunrpc: Replace the rq_bvec array with dynamically-allocated memory

As a step towards making NFSD's maximum rsize and wsize variable at
run-time, replace the fixed-size rq_bvec[] array in struct svc_rqst
with a chunk of dynamically-allocated memory.

The rq_bvec[] array contains enough bio_vecs to handle each page in
a maximum size RPC message.

On a system with 8-byte pointers and 4KB pages, pahole reports that
the rq_bvec[] array is 4144 bytes. This patch replaces that array
with a single 8-byte pointer field.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: NeilBrown <neil@brown.name>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 57be69539888..538bea716c6b 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -213,7 +213,7 @@ struct svc_rqst {
 
 	struct folio_batch	rq_fbatch;
 	struct kvec		rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
-	struct bio_vec		rq_bvec[RPCSVC_MAXPAGES];
+	struct bio_vec		*rq_bvec;
 
 	__be32			rq_xid;		/* transmission id */
 	u32			rq_prog;	/* program number */
-- 
cgit v1.2.3


From f2e597353d50f05a600dddbf84a362839cf1c224 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 9 May 2025 13:39:23 -0400
Subject: NFSD: De-duplicate the svc_fill_write_vector() call sites

All three call sites do the same thing.

I'm struggling with this a bit, however. struct xdr_buf is an XDR
layer object and unmarshaling a WRITE payload is clearly a task
intended to be done by the proc and xdr functions, not by VFS. This
feels vaguely like a layering violation.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 538bea716c6b..dec636345ee2 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -471,7 +471,7 @@ int		   svc_encode_result_payload(struct svc_rqst *rqstp,
 					     unsigned int offset,
 					     unsigned int length);
 unsigned int	   svc_fill_write_vector(struct svc_rqst *rqstp,
-					 struct xdr_buf *payload);
+					 const struct xdr_buf *payload);
 char		  *svc_fill_symlink_pathname(struct svc_rqst *rqstp,
 					     struct kvec *first, void *p,
 					     size_t total);
-- 
cgit v1.2.3


From b406c6b78198f123e08061c45d56803459785975 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 8 May 2025 11:56:40 -0400
Subject: SUNRPC: Remove svc_fill_write_vector()

Clean up: This API is no longer used.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index dec636345ee2..510ee1927977 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -470,8 +470,6 @@ const char *	   svc_proc_name(const struct svc_rqst *rqstp);
 int		   svc_encode_result_payload(struct svc_rqst *rqstp,
 					     unsigned int offset,
 					     unsigned int length);
-unsigned int	   svc_fill_write_vector(struct svc_rqst *rqstp,
-					 const struct xdr_buf *payload);
 char		  *svc_fill_symlink_pathname(struct svc_rqst *rqstp,
 					     struct kvec *first, void *p,
 					     size_t total);
-- 
cgit v1.2.3


From 1259560b988c959abf9b84da0b9e56c2863b6837 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 6 May 2025 20:15:08 -0400
Subject: SUNRPC: Remove svc_rqst :: rq_vec

Clean up: This array is no longer used.

On a system with 8-byte pointers and 4KB pages, pahole reports that
the rq_vec[] array accounts for 4144 bytes.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 510ee1927977..6540af4b9bdb 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -212,7 +212,6 @@ struct svc_rqst {
 	struct page *		*rq_page_end;  /* one past the last page */
 
 	struct folio_batch	rq_fbatch;
-	struct kvec		rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
 	struct bio_vec		*rq_bvec;
 
 	__be32			rq_xid;		/* transmission id */
-- 
cgit v1.2.3


From f4126823c1fd677d9811a0252830c6c73ddb7e99 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 28 Apr 2025 15:36:55 -0400
Subject: sunrpc: Adjust size of socket's receive page array dynamically

As a step towards making NFSD's maximum rsize and wsize variable at
run-time, make sk_pages a flexible array.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: NeilBrown <neil@brown.name>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svcsock.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index bf45d9e8492a..963bbe251e52 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -40,7 +40,9 @@ struct svc_sock {
 
 	struct completion	sk_handshake_done;
 
-	struct page *		sk_pages[RPCSVC_MAXPAGES];	/* received data */
+	/* received data */
+	unsigned long		sk_maxpages;
+	struct page *		sk_pages[] __counted_by(sk_maxpages);
 };
 
 static inline u32 svc_sock_reclen(struct svc_sock *svsk)
-- 
cgit v1.2.3


From 81381d1a90dea186ce4585d9bd3cdc40b50e9c48 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 28 Apr 2025 15:36:56 -0400
Subject: svcrdma: Adjust the number of entries in svc_rdma_recv_ctxt::rc_pages

Allow allocation of more entries in the rc_pages[] array when the
maximum size of an RPC message is increased.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: NeilBrown <neil@brown.name>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_rdma.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 619fc0bd837a..1016f2feddc4 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -202,7 +202,8 @@ struct svc_rdma_recv_ctxt {
 	struct svc_rdma_pcl	rc_reply_pcl;
 
 	unsigned int		rc_page_count;
-	struct page		*rc_pages[RPCSVC_MAXPAGES];
+	unsigned long		rc_maxpages;
+	struct page		*rc_pages[] __counted_by(rc_maxpages);
 };
 
 /*
-- 
cgit v1.2.3


From 56ab43f50d708e155f013ef60cc1b4fe39bed42e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 28 Apr 2025 15:36:57 -0400
Subject: svcrdma: Adjust the number of entries in svc_rdma_send_ctxt::sc_pages

Allow allocation of more entries in the sc_pages[] array when the
maximum size of an RPC message is increased.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: NeilBrown <neil@brown.name>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_rdma.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 1016f2feddc4..22704c2e5b9b 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -245,7 +245,8 @@ struct svc_rdma_send_ctxt {
 	void			*sc_xprt_buf;
 	int			sc_page_count;
 	int			sc_cur_sge_no;
-	struct page		*sc_pages[RPCSVC_MAXPAGES];
+	unsigned long		sc_maxpages;
+	struct page		**sc_pages;
 	struct ib_sge		sc_sges[];
 };
 
-- 
cgit v1.2.3


From 0af165bb903cdc005066494cc931076dafd76894 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 28 Apr 2025 15:36:58 -0400
Subject: sunrpc: Remove the RPCSVC_MAXPAGES macro

It is no longer used.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: NeilBrown <neil@brown.name>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 6540af4b9bdb..d57df042e24a 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -150,14 +150,7 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp);
  * list.  xdr_buf.tail points to the end of the first page.
  * This assumes that the non-page part of an rpc reply will fit
  * in a page - NFSd ensures this.  lockd also has no trouble.
- *
- * Each request/reply pair can have at most one "payload", plus two pages,
- * one for the request, and one for the reply.
- * We using ->sendfile to return read data, we might need one extra page
- * if the request is not page-aligned.  So add another '1'.
  */
-#define RPCSVC_MAXPAGES		((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE \
-				+ 2 + 1)
 
 /**
  * svc_serv_maxpages - maximum count of pages needed for one RPC message
-- 
cgit v1.2.3


From 1e7dbad6d1fe4b35ccd2aa8fea9496d837af4430 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 28 Apr 2025 15:37:02 -0400
Subject: SUNRPC: Bump the maximum payload size for the server

Increase the maximum server-side RPC payload to 4MB. The default
remains at 1MB.

An API to adjust the operational maximum was added in 2006 by commit
596bbe53eb3a ("[PATCH] knfsd: Allow max size of NFSd payload to be
configured"). To adjust the operational maximum using this API, shut
down the NFS server. Then echo a new value into:

  /proc/fs/nfsd/max_block_size

And restart the NFS server.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: NeilBrown <neil@brown.name>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index d57df042e24a..48666b83fe68 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -119,14 +119,14 @@ void svc_destroy(struct svc_serv **svcp);
  * Linux limit; someone who cares more about NFS/UDP performance
  * can test a larger number.
  *
- * For TCP transports we have more freedom.  A size of 1MB is
- * chosen to match the client limit.  Other OSes are known to
- * have larger limits, but those numbers are probably beyond
- * the point of diminishing returns.
+ * For non-UDP transports we have more freedom.  A size of 4MB is
+ * chosen to accommodate clients that support larger I/O sizes.
  */
-#define RPCSVC_MAXPAYLOAD	(1*1024*1024u)
-#define RPCSVC_MAXPAYLOAD_TCP	RPCSVC_MAXPAYLOAD
-#define RPCSVC_MAXPAYLOAD_UDP	(32*1024u)
+enum {
+	RPCSVC_MAXPAYLOAD	= 4 * 1024 * 1024,
+	RPCSVC_MAXPAYLOAD_TCP	= RPCSVC_MAXPAYLOAD,
+	RPCSVC_MAXPAYLOAD_UDP	= 32 * 1024,
+};
 
 extern u32 svc_max_payload(const struct svc_rqst *rqstp);
 
-- 
cgit v1.2.3


From d4fb6b8e4640adeff9673ae398525be7382d3197 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Thu, 15 May 2025 16:58:41 +0000
Subject: x86/resctrl: Add end-marker to the resctrl_event_id enum

The resctrl_event_id enum gives names to the counter event numbers on x86.
These are used directly by resctrl.

To allow the MPAM driver to keep an array of these the size of the enum
needs to be known.

Add a 'num_events' enum entry which can be used to size an array.  This is
added to the enum to reduce conflicts with another series, which in turn
requires get_arch_mbm_state() to have a default case.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
Tested-by: Fenghua Yu <fenghuay@nvidia.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/20250515165855.31452-12-james.morse@arm.com
---
 include/linux/resctrl_types.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
index f26450b3326b..69bf740130ac 100644
--- a/include/linux/resctrl_types.h
+++ b/include/linux/resctrl_types.h
@@ -49,6 +49,9 @@ enum resctrl_event_id {
 	QOS_L3_OCCUP_EVENT_ID		= 0x01,
 	QOS_L3_MBM_TOTAL_EVENT_ID	= 0x02,
 	QOS_L3_MBM_LOCAL_EVENT_ID	= 0x03,
+
+	/* Must be the last */
+	QOS_NUM_EVENTS,
 };
 
 #endif /* __LINUX_RESCTRL_TYPES_H */
-- 
cgit v1.2.3


From bff70402d6d67843fe319338e4c56e1cba13fbd8 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Thu, 15 May 2025 16:58:45 +0000
Subject: fs/resctrl: Add boiler plate for external resctrl code

Add Makefile and Kconfig for fs/resctrl. Add ARCH_HAS_CPU_RESCTRL
for the common parts of the resctrl interface and make X86_CPU_RESCTRL
select this.

Adding an include of asm/resctrl.h to linux/resctrl.h allows the
/fs/resctrl files to switch over to using this header instead.

Co-developed-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Fenghua Yu <fenghuay@nvidia.com>
Tested-by: Carl Worth <carl@os.amperecomputing.com> # arm64
Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Tested-by: Peter Newman <peternewman@google.com>
Tested-by: Amit Singh Tomar <amitsinght@marvell.com> # arm64
Tested-by: Shanker Donthineni <sdonthineni@nvidia.com> # arm64
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/20250515165855.31452-16-james.morse@arm.com
---
 include/linux/resctrl.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index b8f8240050b4..5c7c8bf2c47f 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -8,6 +8,10 @@
 #include <linux/pid.h>
 #include <linux/resctrl_types.h>
 
+#ifdef CONFIG_ARCH_HAS_CPU_RESCTRL
+#include <asm/resctrl.h>
+#endif
+
 /* CLOSID, RMID value used by the default control group */
 #define RESCTRL_RESERVED_CLOSID		0
 #define RESCTRL_RESERVED_RMID		0
-- 
cgit v1.2.3


From 3d95a49b365e06d1b4c4e5a426fdc70449a334f3 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Thu, 15 May 2025 16:58:46 +0000
Subject: x86/resctrl: Move the filesystem bits to headers visible to
 fs/resctrl

Once the filesystem parts of resctrl move to fs/resctrl, it cannot rely
on definitions in x86's internal.h.

Move definitions in internal.h that need to be shared between the
filesystem and architecture code to header files that fs/resctrl can
include.

Doing this separately means the filesystem code only moves between files
of the same name, instead of having these changes mixed in too.

Co-developed-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Fenghua Yu <fenghuay@nvidia.com>
Tested-by: Carl Worth <carl@os.amperecomputing.com> # arm64
Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Tested-by: Peter Newman <peternewman@google.com>
Tested-by: Amit Singh Tomar <amitsinght@marvell.com> # arm64
Tested-by: Shanker Donthineni <sdonthineni@nvidia.com> # arm64
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/20250515165855.31452-17-james.morse@arm.com
---
 include/linux/resctrl.h       | 3 +++
 include/linux/resctrl_types.h | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 5c7c8bf2c47f..b9d1f2916e9c 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -403,6 +403,9 @@ static inline u32 resctrl_get_config_index(u32 closid,
 	}
 }
 
+bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l);
+int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable);
+
 /*
  * Update the ctrl_val and apply this config right now.
  * Must be called on one of the domain's CPUs.
diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
index 69bf740130ac..a66e7936943e 100644
--- a/include/linux/resctrl_types.h
+++ b/include/linux/resctrl_types.h
@@ -7,6 +7,9 @@
 #ifndef __LINUX_RESCTRL_TYPES_H
 #define __LINUX_RESCTRL_TYPES_H
 
+#define MAX_MBA_BW			100u
+#define MBM_OVERFLOW_INTERVAL		1000
+
 /* Reads to Local DRAM Memory */
 #define READS_TO_LOCAL_MEM		BIT(0)
 
-- 
cgit v1.2.3


From 7bdb619c7f9f0a7264b2d69ad0472bf2c785e52a Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Thu, 15 May 2025 16:58:47 +0000
Subject: x86/resctrl: Move enum resctrl_event_id to resctrl.h

resctrl_types.h contains common types and constants to enable architectures
to use these types in their definitions within asm/resctrl.h.

enum resctrl_event_id was placed in resctrl_types.h for
resctrl_arch_get_cdp_enabled() and resctrl_arch_set_cdp_enabled(), but
these two functions are no longer inlined by any architecture.

Move enum resctrl_event_id to resctrl.h.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
Tested-by: Fenghua Yu <fenghuay@nvidia.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/20250515165855.31452-18-james.morse@arm.com
---
 include/linux/resctrl.h       | 10 ++++++++++
 include/linux/resctrl_types.h | 10 ----------
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index b9d1f2916e9c..5ef972cbf56b 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -48,6 +48,16 @@ int proc_resctrl_show(struct seq_file *m,
 	for_each_rdt_resource((r))					      \
 		if ((r)->mon_capable)
 
+enum resctrl_res_level {
+	RDT_RESOURCE_L3,
+	RDT_RESOURCE_L2,
+	RDT_RESOURCE_MBA,
+	RDT_RESOURCE_SMBA,
+
+	/* Must be the last */
+	RDT_NUM_RESOURCES,
+};
+
 /**
  * enum resctrl_conf_type - The type of configuration.
  * @CDP_NONE:	No prioritisation, both code and data are controlled or monitored.
diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
index a66e7936943e..a25fb9c4070d 100644
--- a/include/linux/resctrl_types.h
+++ b/include/linux/resctrl_types.h
@@ -34,16 +34,6 @@
 /* Max event bits supported */
 #define MAX_EVT_CONFIG_BITS		GENMASK(6, 0)
 
-enum resctrl_res_level {
-	RDT_RESOURCE_L3,
-	RDT_RESOURCE_L2,
-	RDT_RESOURCE_MBA,
-	RDT_RESOURCE_SMBA,
-
-	/* Must be the last */
-	RDT_NUM_RESOURCES,
-};
-
 /*
  * Event IDs, the values match those used to program IA32_QM_EVTSEL before
  * reading IA32_QM_CTR on RDT systems.
-- 
cgit v1.2.3


From 279f225951e3c77a1708d77f557b4cc7bdbd76ed Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Thu, 15 May 2025 16:58:49 +0000
Subject: x86/resctrl: Move pseudo lock prototypes to include/linux/resctrl.h

The resctrl pseudo-lock feature allows an architecture to allocate data
into particular cache portions, which are then treated as reserved to
avoid that data ever being evicted. Setting this up is deeply architecture
specific as it involves disabling prefetchers etc. It is not possible
to support this kind of feature on arm64. Risc-V is assumed to be the
same.

The prototypes for the architecture code were added to x86's asm/resctrl.h,
with other architectures able to provide stubs for their architecture. This
forces other architectures to provide identical stubs.

Move the prototypes and stubs to linux/resctrl.h, and switch between them
using the existing Kconfig symbol.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
Tested-by: Fenghua Yu <fenghuay@nvidia.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/20250515165855.31452-20-james.morse@arm.com
---
 include/linux/resctrl.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 5ef972cbf56b..9ba771f2ddea 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -534,4 +534,17 @@ extern unsigned int resctrl_rmid_realloc_limit;
 int resctrl_init(void);
 void resctrl_exit(void);
 
+#ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK
+u64 resctrl_arch_get_prefetch_disable_bits(void);
+int resctrl_arch_pseudo_lock_fn(void *_plr);
+int resctrl_arch_measure_cycles_lat_fn(void *_plr);
+int resctrl_arch_measure_l2_residency(void *_plr);
+int resctrl_arch_measure_l3_residency(void *_plr);
+#else
+static inline u64 resctrl_arch_get_prefetch_disable_bits(void) { return 0; }
+static inline int resctrl_arch_pseudo_lock_fn(void *_plr) { return 0; }
+static inline int resctrl_arch_measure_cycles_lat_fn(void *_plr) { return 0; }
+static inline int resctrl_arch_measure_l2_residency(void *_plr) { return 0; }
+static inline int resctrl_arch_measure_l3_residency(void *_plr) { return 0; }
+#endif /* CONFIG_RESCTRL_FS_PSEUDO_LOCK */
 #endif /* _RESCTRL_H */
-- 
cgit v1.2.3


From e847a847aea5728dfcd13b558b9d82b79f9a85c7 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Wed, 19 Mar 2025 10:29:04 +0100
Subject: irqdomain: Drop of_node_to_fwnode()

All uses of of_node_to_fwnode() in non-irqdomain code were changed to
"officially" defined of_fwnode_handle(). Therefore, the former can be
dropped along with the last uses in the irqdomain code.

Due to merge logistics the inline cannot be dropped immediately. Move it to
a deprecated section, which will be removed during the merge window.

[ tglx: Handle merge logistics ]

Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250319092951.37667-12-jirislaby@kernel.org
---
 include/linux/irqdomain.h | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index df7e9278c8ac..91ed86feac07 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -358,11 +358,6 @@ int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
 			   irq_hw_number_t hwirq, int node,
 			   const struct irq_affinity_desc *affinity);
 
-static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node)
-{
-	return node ? &node->fwnode : NULL;
-}
-
 extern const struct fwnode_operations irqchip_fwnode_ops;
 
 static inline bool is_fwnode_irqchip(const struct fwnode_handle *fwnode)
@@ -387,7 +382,7 @@ struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
 static inline struct irq_domain *irq_find_matching_host(struct device_node *node,
 							enum irq_domain_bus_token bus_token)
 {
-	return irq_find_matching_fwnode(of_node_to_fwnode(node), bus_token);
+	return irq_find_matching_fwnode(of_fwnode_handle(node), bus_token);
 }
 
 static inline struct irq_domain *irq_find_host(struct device_node *node)
@@ -407,7 +402,7 @@ static inline struct irq_domain *irq_domain_add_simple(struct device_node *of_no
 						       const struct irq_domain_ops *ops,
 						       void *host_data)
 {
-	return irq_domain_create_simple(of_node_to_fwnode(of_node), size, first_irq, ops, host_data);
+	return irq_domain_create_simple(of_fwnode_handle(of_node), size, first_irq, ops, host_data);
 }
 
 /**
@@ -423,7 +418,7 @@ static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_no
 					 void *host_data)
 {
 	struct irq_domain_info info = {
-		.fwnode		= of_node_to_fwnode(of_node),
+		.fwnode		= of_fwnode_handle(of_node),
 		.size		= size,
 		.hwirq_max	= size,
 		.ops		= ops,
@@ -442,7 +437,7 @@ static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_nod
 					 void *host_data)
 {
 	struct irq_domain_info info = {
-		.fwnode		= of_node_to_fwnode(of_node),
+		.fwnode		= of_fwnode_handle(of_node),
 		.hwirq_max	= max_irq,
 		.direct_max	= max_irq,
 		.ops		= ops,
@@ -462,7 +457,7 @@ static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node
 					 void *host_data)
 {
 	struct irq_domain_info info = {
-		.fwnode		= of_node_to_fwnode(of_node),
+		.fwnode		= of_fwnode_handle(of_node),
 		.hwirq_max	= ~0U,
 		.ops		= ops,
 		.host_data	= host_data,
@@ -611,7 +606,7 @@ static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *par
 					    void *host_data)
 {
 	return irq_domain_create_hierarchy(parent, flags, size,
-					   of_node_to_fwnode(node),
+					   of_fwnode_handle(node),
 					   ops, host_data);
 }
 
@@ -755,6 +750,12 @@ static inline void msi_device_domain_free_wired(struct irq_domain *domain, unsig
 }
 #endif
 
+/* Deprecated functions. Will be removed in the merge window */
+static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node)
+{
+	return node ? &node->fwnode : NULL;
+}
+
 #else /* CONFIG_IRQ_DOMAIN */
 static inline void irq_dispose_mapping(unsigned int virq) { }
 static inline struct irq_domain *irq_find_matching_fwnode(
-- 
cgit v1.2.3


From c7131b12080ad9c74eadd4f62386c8642168bec7 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Wed, 19 Mar 2025 10:29:05 +0100
Subject: irqdomain: Make irq_domain_create_hierarchy() an inline

There is no reason to export the function as an extra symbol. It is
simple enough and is just a wrapper to already exported functions.

Therefore, switch the exported function to an inline.

Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250319092951.37667-13-jirislaby@kernel.org
---
 include/linux/irqdomain.h | 45 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 39 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 91ed86feac07..6e9a5ec2986f 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -591,12 +591,45 @@ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
 			 void *handler_data, const char *handler_name);
 void irq_domain_reset_irq_data(struct irq_data *irq_data);
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
-struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
-					       unsigned int flags,
-					       unsigned int size,
-					       struct fwnode_handle *fwnode,
-					       const struct irq_domain_ops *ops,
-					       void *host_data);
+/**
+ * irq_domain_create_hierarchy - Add a irqdomain into the hierarchy
+ * @parent:	Parent irq domain to associate with the new domain
+ * @flags:	Irq domain flags associated to the domain
+ * @size:	Size of the domain. See below
+ * @fwnode:	Optional fwnode of the interrupt controller
+ * @ops:	Pointer to the interrupt domain callbacks
+ * @host_data:	Controller private data pointer
+ *
+ * If @size is 0 a tree domain is created, otherwise a linear domain.
+ *
+ * If successful the parent is associated to the new domain and the
+ * domain flags are set.
+ * Returns pointer to IRQ domain, or NULL on failure.
+ */
+static inline struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
+					    unsigned int flags,
+					    unsigned int size,
+					    struct fwnode_handle *fwnode,
+					    const struct irq_domain_ops *ops,
+					    void *host_data)
+{
+	struct irq_domain_info info = {
+		.fwnode		= fwnode,
+		.size		= size,
+		.hwirq_max	= size,
+		.ops		= ops,
+		.host_data	= host_data,
+		.domain_flags	= flags,
+		.parent		= parent,
+	};
+	struct irq_domain *d;
+
+	if (!info.size)
+		info.hwirq_max = ~0U;
+
+	d = irq_domain_instantiate(&info);
+	return IS_ERR(d) ? NULL : d;
+}
 
 static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent,
 					    unsigned int flags,
-- 
cgit v1.2.3


From 9cf19f061ccc98b63367b16551c290016b7d6b13 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Wed, 19 Mar 2025 10:29:10 +0100
Subject: gpio: Switch to irq_domain_create_*()

irq_domain_add_*() interfaces are going away as being obsolete now.
Switch to the preferred irq_domain_create_*() ones. Those differ in the
node parameter: They take more generic struct fwnode_handle instead of
struct device_node. Therefore, of_fwnode_handle() is added around the
original parameter.

Note some of the users can likely use dev->fwnode directly instead of
indirect of_fwnode_handle(dev->of_node). But dev->fwnode is not
guaranteed to be set for all, so this has to be investigated on case to
case basis (by people who can actually test with the HW).

[ tglx: Fix up subject prefix ]

Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250319092951.37667-18-jirislaby@kernel.org
---
 include/linux/gpio/driver.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 4c0294a9104d..b53233051bee 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -287,8 +287,9 @@ struct gpio_irq_chip {
 	/**
 	 * @first:
 	 *
-	 * Required for static IRQ allocation. If set, irq_domain_add_simple()
-	 * will allocate and map all IRQs during initialization.
+	 * Required for static IRQ allocation. If set,
+	 * irq_domain_create_simple() will allocate and map all IRQs
+	 * during initialization.
 	 */
 	unsigned int first;
 
-- 
cgit v1.2.3


From 813da4f379e789c428d2e0a44730f852e090d47d Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Wed, 19 Mar 2025 10:29:32 +0100
Subject: powerpc: Switch irq_domain_add_nomap() to use fwnode

All irq_domain_add_*() functions are going away. PowerPC is the only
user of irq_domain_add_nomap() and there is no irq_domain_create_nomap()
complement.

Therefore, to align with the rest of the kernel, rename
irq_domain_add_nomap() to irq_domain_create_nomap() and accept a
fwnode_handle instead of a device_node.

[ tglx: Fix up subject prefix ]

Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250319092951.37667-40-jirislaby@kernel.org
---
 include/linux/irqdomain.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 6e9a5ec2986f..f3c79f908a28 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -431,13 +431,13 @@ static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_no
 }
 
 #ifdef CONFIG_IRQ_DOMAIN_NOMAP
-static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_node,
+static inline struct irq_domain *irq_domain_create_nomap(struct fwnode_handle *fwnode,
 					 unsigned int max_irq,
 					 const struct irq_domain_ops *ops,
 					 void *host_data)
 {
 	struct irq_domain_info info = {
-		.fwnode		= of_fwnode_handle(of_node),
+		.fwnode		= fwnode,
 		.hwirq_max	= max_irq,
 		.direct_max	= max_irq,
 		.ops		= ops,
-- 
cgit v1.2.3


From 42b8b16fe56c206fde7fbae0116769d0addef4b7 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Wed, 19 Mar 2025 10:29:33 +0100
Subject: irqdomain: Drop irq_domain_add_*() functions

Most irq_domain_add_*() functions are unused now, so drop them. The
remaining ones are moved to the deprecated section and will be removed
during the merge window after the patches in various trees have been
merged.

Note: The Chinese docs are touched but unfinished. I cannot parse those.

[ tglx: Remove the leftover in irq-domain.rst and handle merge logistics ]

Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250319092951.37667-41-jirislaby@kernel.org
---
 include/linux/irqdomain.h | 102 ++++++++++++++++------------------------------
 1 file changed, 34 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index f3c79f908a28..712c662de981 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -338,12 +338,6 @@ struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode,
 					    unsigned int first_irq,
 					    const struct irq_domain_ops *ops,
 					    void *host_data);
-struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
-					 unsigned int size,
-					 unsigned int first_irq,
-					 irq_hw_number_t first_hwirq,
-					 const struct irq_domain_ops *ops,
-					 void *host_data);
 struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode,
 					    unsigned int size,
 					    unsigned int first_irq,
@@ -396,40 +390,6 @@ static inline struct irq_domain *irq_find_host(struct device_node *node)
 	return d;
 }
 
-static inline struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
-						       unsigned int size,
-						       unsigned int first_irq,
-						       const struct irq_domain_ops *ops,
-						       void *host_data)
-{
-	return irq_domain_create_simple(of_fwnode_handle(of_node), size, first_irq, ops, host_data);
-}
-
-/**
- * irq_domain_add_linear() - Allocate and register a linear revmap irq_domain.
- * @of_node: pointer to interrupt controller's device tree node.
- * @size: Number of interrupts in the domain.
- * @ops: map/unmap domain callbacks
- * @host_data: Controller private data pointer
- */
-static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_node,
-					 unsigned int size,
-					 const struct irq_domain_ops *ops,
-					 void *host_data)
-{
-	struct irq_domain_info info = {
-		.fwnode		= of_fwnode_handle(of_node),
-		.size		= size,
-		.hwirq_max	= size,
-		.ops		= ops,
-		.host_data	= host_data,
-	};
-	struct irq_domain *d;
-
-	d = irq_domain_instantiate(&info);
-	return IS_ERR(d) ? NULL : d;
-}
-
 #ifdef CONFIG_IRQ_DOMAIN_NOMAP
 static inline struct irq_domain *irq_domain_create_nomap(struct fwnode_handle *fwnode,
 					 unsigned int max_irq,
@@ -452,22 +412,6 @@ static inline struct irq_domain *irq_domain_create_nomap(struct fwnode_handle *f
 unsigned int irq_create_direct_mapping(struct irq_domain *domain);
 #endif
 
-static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node,
-					 const struct irq_domain_ops *ops,
-					 void *host_data)
-{
-	struct irq_domain_info info = {
-		.fwnode		= of_fwnode_handle(of_node),
-		.hwirq_max	= ~0U,
-		.ops		= ops,
-		.host_data	= host_data,
-	};
-	struct irq_domain *d;
-
-	d = irq_domain_instantiate(&info);
-	return IS_ERR(d) ? NULL : d;
-}
-
 static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *fwnode,
 					 unsigned int size,
 					 const struct irq_domain_ops *ops,
@@ -631,18 +575,6 @@ static inline struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *
 	return IS_ERR(d) ? NULL : d;
 }
 
-static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent,
-					    unsigned int flags,
-					    unsigned int size,
-					    struct device_node *node,
-					    const struct irq_domain_ops *ops,
-					    void *host_data)
-{
-	return irq_domain_create_hierarchy(parent, flags, size,
-					   of_fwnode_handle(node),
-					   ops, host_data);
-}
-
 int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 			    unsigned int nr_irqs, int node, void *arg,
 			    bool realloc,
@@ -789,6 +721,40 @@ static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node)
 	return node ? &node->fwnode : NULL;
 }
 
+static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node,
+					 const struct irq_domain_ops *ops,
+					 void *host_data)
+{
+	struct irq_domain_info info = {
+		.fwnode		= of_fwnode_handle(of_node),
+		.hwirq_max	= ~0U,
+		.ops		= ops,
+		.host_data	= host_data,
+	};
+	struct irq_domain *d;
+
+	d = irq_domain_instantiate(&info);
+	return IS_ERR(d) ? NULL : d;
+}
+
+static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_node,
+					 unsigned int size,
+					 const struct irq_domain_ops *ops,
+					 void *host_data)
+{
+	struct irq_domain_info info = {
+		.fwnode		= of_fwnode_handle(of_node),
+		.size		= size,
+		.hwirq_max	= size,
+		.ops		= ops,
+		.host_data	= host_data,
+	};
+	struct irq_domain *d;
+
+	d = irq_domain_instantiate(&info);
+	return IS_ERR(d) ? NULL : d;
+}
+
 #else /* CONFIG_IRQ_DOMAIN */
 static inline void irq_dispose_mapping(unsigned int virq) { }
 static inline struct irq_domain *irq_find_matching_fwnode(
-- 
cgit v1.2.3


From 14ebb11ba895c9223d9a453a17df2fd81410c96c Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Wed, 19 Mar 2025 10:29:42 +0100
Subject: irqdomain: Drop irq_linear_revmap()

irq_linear_revmap() is deprecated and unused now. So remove it.

Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250319092951.37667-50-jirislaby@kernel.org
---
 include/linux/irqdomain.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 712c662de981..c8e55cd1e352 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -492,12 +492,6 @@ static inline unsigned int irq_find_mapping(struct irq_domain *domain,
 	return 0;
 }
 
-static inline unsigned int irq_linear_revmap(struct irq_domain *domain,
-					     irq_hw_number_t hwirq)
-{
-	return irq_find_mapping(domain, hwirq);
-}
-
 extern const struct irq_domain_ops irq_domain_simple_ops;
 
 /* stock xlate functions */
-- 
cgit v1.2.3


From 18e743e911020eb74cc4eaf549c18ed12a5acb9a Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Wed, 19 Mar 2025 10:29:43 +0100
Subject: irqdomain: Use irq_domain_instantiate()'s return value as
 initializers

This makes the code more compact.

Note that irq_domain_create_hierarchy()'s handling of size is now part of
info's initializer (using the ternary operator). That makes more sense
while reading it.

Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250319092951.37667-51-jirislaby@kernel.org
---
 include/linux/irqdomain.h | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index c8e55cd1e352..1e3858438e69 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -403,9 +403,8 @@ static inline struct irq_domain *irq_domain_create_nomap(struct fwnode_handle *f
 		.ops		= ops,
 		.host_data	= host_data,
 	};
-	struct irq_domain *d;
+	struct irq_domain *d = irq_domain_instantiate(&info);
 
-	d = irq_domain_instantiate(&info);
 	return IS_ERR(d) ? NULL : d;
 }
 
@@ -424,9 +423,8 @@ static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *
 		.ops		= ops,
 		.host_data	= host_data,
 	};
-	struct irq_domain *d;
+	struct irq_domain *d = irq_domain_instantiate(&info);
 
-	d = irq_domain_instantiate(&info);
 	return IS_ERR(d) ? NULL : d;
 }
 
@@ -440,9 +438,8 @@ static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fw
 		.ops		= ops,
 		.host_data	= host_data,
 	};
-	struct irq_domain *d;
+	struct irq_domain *d = irq_domain_instantiate(&info);
 
-	d = irq_domain_instantiate(&info);
 	return IS_ERR(d) ? NULL : d;
 }
 
@@ -554,18 +551,14 @@ static inline struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *
 	struct irq_domain_info info = {
 		.fwnode		= fwnode,
 		.size		= size,
-		.hwirq_max	= size,
+		.hwirq_max	= size ? : ~0U,
 		.ops		= ops,
 		.host_data	= host_data,
 		.domain_flags	= flags,
 		.parent		= parent,
 	};
-	struct irq_domain *d;
+	struct irq_domain *d = irq_domain_instantiate(&info);
 
-	if (!info.size)
-		info.hwirq_max = ~0U;
-
-	d = irq_domain_instantiate(&info);
 	return IS_ERR(d) ? NULL : d;
 }
 
-- 
cgit v1.2.3


From 66cbf17fe67156608421e717975c415a85c08466 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Wed, 19 Mar 2025 10:29:44 +0100
Subject: irqdomain: Make struct irq_domain_info variables const

This is just expressing it explicitly as irq_domain_instantiate() takes
const already. No functional change.

Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250319092951.37667-52-jirislaby@kernel.org
---
 include/linux/irqdomain.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 1e3858438e69..66a26dfd254b 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -396,7 +396,7 @@ static inline struct irq_domain *irq_domain_create_nomap(struct fwnode_handle *f
 					 const struct irq_domain_ops *ops,
 					 void *host_data)
 {
-	struct irq_domain_info info = {
+	const struct irq_domain_info info = {
 		.fwnode		= fwnode,
 		.hwirq_max	= max_irq,
 		.direct_max	= max_irq,
@@ -416,7 +416,7 @@ static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *
 					 const struct irq_domain_ops *ops,
 					 void *host_data)
 {
-	struct irq_domain_info info = {
+	const struct irq_domain_info info = {
 		.fwnode		= fwnode,
 		.size		= size,
 		.hwirq_max	= size,
@@ -432,7 +432,7 @@ static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fw
 					 const struct irq_domain_ops *ops,
 					 void *host_data)
 {
-	struct irq_domain_info info = {
+	const struct irq_domain_info info = {
 		.fwnode		= fwnode,
 		.hwirq_max	= ~0,
 		.ops		= ops,
@@ -548,7 +548,7 @@ static inline struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *
 					    const struct irq_domain_ops *ops,
 					    void *host_data)
 {
-	struct irq_domain_info info = {
+	const struct irq_domain_info info = {
 		.fwnode		= fwnode,
 		.size		= size,
 		.hwirq_max	= size ? : ~0U,
-- 
cgit v1.2.3


From 2272a78b3f4a7a1621f00ac6e9c9232d12b7ff01 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Wed, 19 Mar 2025 10:29:45 +0100
Subject: irqdomain: Improve kernel-docs of functions

Many of irqdomain.h's functions are referenced in Documentation/ but are
not properly documented. Therefore, document these.

And use "Returns:" tag consistently, so that it is properly generated in
the resulting docs.

Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250319092951.37667-53-jirislaby@kernel.org
---
 include/linux/irqdomain.h | 42 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 66a26dfd254b..a70e2ba0b91f 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -411,6 +411,15 @@ static inline struct irq_domain *irq_domain_create_nomap(struct fwnode_handle *f
 unsigned int irq_create_direct_mapping(struct irq_domain *domain);
 #endif
 
+/**
+ * irq_domain_create_linear - Allocate and register a linear revmap irq_domain.
+ * @fwnode: pointer to interrupt controller's FW node.
+ * @size: Number of interrupts in the domain.
+ * @ops: map/unmap domain callbacks
+ * @host_data: Controller private data pointer
+ *
+ * Returns: Newly created irq_domain
+ */
 static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *fwnode,
 					 unsigned int size,
 					 const struct irq_domain_ops *ops,
@@ -457,6 +466,18 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
 unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec);
 void irq_dispose_mapping(unsigned int virq);
 
+/**
+ * irq_create_mapping - Map a hardware interrupt into linux irq space
+ * @domain: domain owning this hardware interrupt or NULL for default domain
+ * @hwirq: hardware irq number in that domain space
+ *
+ * Only one mapping per hardware interrupt is permitted.
+ *
+ * If the sense/trigger is to be specified, set_irq_type() should be called
+ * on the number returned from that call.
+ *
+ * Returns: Linux irq number or 0 on error
+ */
 static inline unsigned int irq_create_mapping(struct irq_domain *domain,
 					      irq_hw_number_t hwirq)
 {
@@ -467,6 +488,13 @@ struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain,
 				       irq_hw_number_t hwirq,
 				       unsigned int *irq);
 
+/**
+ * irq_resolve_mapping - Find a linux irq from a hw irq number.
+ * @domain: domain owning this hardware interrupt
+ * @hwirq: hardware irq number in that domain space
+ *
+ * Returns: Interrupt descriptor
+ */
 static inline struct irq_desc *irq_resolve_mapping(struct irq_domain *domain,
 						   irq_hw_number_t hwirq)
 {
@@ -477,6 +505,8 @@ static inline struct irq_desc *irq_resolve_mapping(struct irq_domain *domain,
  * irq_find_mapping() - Find a linux irq from a hw irq number.
  * @domain: domain owning this hardware interrupt
  * @hwirq: hardware irq number in that domain space
+ *
+ * Returns: Linux irq number or 0 if not found
  */
 static inline unsigned int irq_find_mapping(struct irq_domain *domain,
 					    irq_hw_number_t hwirq)
@@ -539,7 +569,8 @@ void irq_domain_reset_irq_data(struct irq_data *irq_data);
  *
  * If successful the parent is associated to the new domain and the
  * domain flags are set.
- * Returns pointer to IRQ domain, or NULL on failure.
+ *
+ * Returns: A pointer to IRQ domain, or %NULL on failure.
  */
 static inline struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
 					    unsigned int flags,
@@ -570,6 +601,15 @@ void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs);
 int irq_domain_activate_irq(struct irq_data *irq_data, bool early);
 void irq_domain_deactivate_irq(struct irq_data *irq_data);
 
+/**
+ * irq_domain_alloc_irqs - Allocate IRQs from domain
+ * @domain:	domain to allocate from
+ * @nr_irqs:	number of IRQs to allocate
+ * @node:	NUMA node id for memory allocation
+ * @arg:	domain specific argument
+ *
+ * See __irq_domain_alloc_irqs()' documentation.
+ */
 static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
 			unsigned int nr_irqs, int node, void *arg)
 {
-- 
cgit v1.2.3


From a4efe303e50e3222591ab6ec5eb0ea92b7260d96 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Wed, 19 Mar 2025 10:29:49 +0100
Subject: Documentation: irqdomain: Update it

The irqdomain documentaion became obsolete over time. Update and extend
it a bit with respect to the current code and HW.

Most notably the doubled documentation of irq_domain (from .rst and .h)
was unified and let only in .rst. A reference link was added to .h.

Furthermore:
 * Add some 'struct' keywords, so that the respective structs are
   hyperlinked
 * :c:member: use where appropriate to mark a member of a struct
 * Rephrase some wording to improve readability/understanding

Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250319092951.37667-57-jirislaby@kernel.org
---
 include/linux/irqdomain.h | 26 ++------------------------
 1 file changed, 2 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index a70e2ba0b91f..1a1786dc19d2 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -1,30 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * irq_domain - IRQ translation domains
+ * irq_domain - IRQ Translation Domains
  *
- * Translation infrastructure between hw and linux irq numbers.  This is
- * helpful for interrupt controllers to implement mapping between hardware
- * irq numbers and the Linux irq number space.
- *
- * irq_domains also have hooks for translating device tree or other
- * firmware interrupt representations into a hardware irq number that
- * can be mapped back to a Linux irq number without any extra platform
- * support code.
- *
- * Interrupt controller "domain" data structure. This could be defined as a
- * irq domain controller. That is, it handles the mapping between hardware
- * and virtual interrupt numbers for a given interrupt domain. The domain
- * structure is generally created by the PIC code for a given PIC instance
- * (though a domain can cover more than one PIC if they have a flat number
- * model). It's the domain callbacks that are responsible for setting the
- * irq_chip on a given irq_desc after it's been mapped.
- *
- * The host code and data structures use a fwnode_handle pointer to
- * identify the domain. In some cases, and in order to preserve source
- * code compatibility, this fwnode pointer is "upgraded" to a DT
- * device_node. For those firmware infrastructures that do not provide
- * a unique identifier for an interrupt controller, the irq_domain
- * code offers a fwnode allocator.
+ * See Documentation/core-api/irq/irq-domain.rst for the details.
  */
 
 #ifndef _LINUX_IRQDOMAIN_H
-- 
cgit v1.2.3


From 38c1e73fdeb37962324a3265ef95618dfa4552ab Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 6 May 2025 14:22:59 +0200
Subject: irqdomain: Consolidate coding style

Now that the file has been thrown through the mincer, finish the job and
consolidate the coding style.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h | 257 ++++++++++++++++++++--------------------------
 1 file changed, 113 insertions(+), 144 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 1a1786dc19d2..2f6e4c9dd743 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -39,9 +39,9 @@ struct msi_parent_ops;
  * pass a device-specific description of an interrupt.
  */
 struct irq_fwspec {
-	struct fwnode_handle *fwnode;
-	int param_count;
-	u32 param[IRQ_DOMAIN_IRQ_SPEC_PARAMS];
+	struct fwnode_handle	*fwnode;
+	int			param_count;
+	u32			param[IRQ_DOMAIN_IRQ_SPEC_PARAMS];
 };
 
 /* Conversion function from of_phandle_args fields to fwspec  */
@@ -50,26 +50,26 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
 
 /**
  * struct irq_domain_ops - Methods for irq_domain objects
- * @match: Match an interrupt controller device node to a domain, returns
- *         1 on a match
- * @select: Match an interrupt controller fw specification. It is more generic
- *	    than @match as it receives a complete struct irq_fwspec. Therefore,
- *	    @select is preferred if provided. Returns 1 on a match.
- * @map: Create or update a mapping between a virtual irq number and a hw
- *       irq number. This is called only once for a given mapping.
- * @unmap: Dispose of such a mapping
- * @xlate: Given a device tree node and interrupt specifier, decode
- *         the hardware irq number and linux irq type value.
- * @alloc: Allocate @nr_irqs interrupts starting from @virq.
- * @free: Free @nr_irqs interrupts starting from @virq.
- * @activate: Activate one interrupt in HW (@irqd). If @reserve is set, only
- *	      reserve the vector. If unset, assign the vector (called from
- *	      request_irq()).
- * @deactivate: Disarm one interrupt (@irqd).
- * @translate: Given @fwspec, decode the hardware irq number (@out_hwirq) and
- *	       linux irq type value (@out_type). This is a generalised @xlate
- *	       (over struct irq_fwspec) and is preferred if provided.
- * @debug_show: For domains to show specific data for an interrupt in debugfs.
+ * @match:	Match an interrupt controller device node to a domain, returns
+ *		1 on a match
+ * @select:	Match an interrupt controller fw specification. It is more generic
+ *		than @match as it receives a complete struct irq_fwspec. Therefore,
+ *		@select is preferred if provided. Returns 1 on a match.
+ * @map:	Create or update a mapping between a virtual irq number and a hw
+ *		irq number. This is called only once for a given mapping.
+ * @unmap:	Dispose of such a mapping
+ * @xlate:	Given a device tree node and interrupt specifier, decode
+ *		the hardware irq number and linux irq type value.
+ * @alloc:	Allocate @nr_irqs interrupts starting from @virq.
+ * @free:	Free @nr_irqs interrupts starting from @virq.
+ * @activate:	Activate one interrupt in HW (@irqd). If @reserve is set, only
+ *		reserve the vector. If unset, assign the vector (called from
+ *		request_irq()).
+ * @deactivate:	Disarm one interrupt (@irqd).
+ * @translate:	Given @fwspec, decode the hardware irq number (@out_hwirq) and
+ *		linux irq type value (@out_type). This is a generalised @xlate
+ *		(over struct irq_fwspec) and is preferred if provided.
+ * @debug_show:	For domains to show specific data for an interrupt in debugfs.
  *
  * Functions below are provided by the driver and called whenever a new mapping
  * is created or an old mapping is disposed. The driver can then proceed to
@@ -77,29 +77,29 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
  * to setup the irq_desc when returning from map().
  */
 struct irq_domain_ops {
-	int (*match)(struct irq_domain *d, struct device_node *node,
-		     enum irq_domain_bus_token bus_token);
-	int (*select)(struct irq_domain *d, struct irq_fwspec *fwspec,
-		      enum irq_domain_bus_token bus_token);
-	int (*map)(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw);
-	void (*unmap)(struct irq_domain *d, unsigned int virq);
-	int (*xlate)(struct irq_domain *d, struct device_node *node,
-		     const u32 *intspec, unsigned int intsize,
-		     unsigned long *out_hwirq, unsigned int *out_type);
+	int	(*match)(struct irq_domain *d, struct device_node *node,
+			 enum irq_domain_bus_token bus_token);
+	int	(*select)(struct irq_domain *d, struct irq_fwspec *fwspec,
+			  enum irq_domain_bus_token bus_token);
+	int	(*map)(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw);
+	void	(*unmap)(struct irq_domain *d, unsigned int virq);
+	int	(*xlate)(struct irq_domain *d, struct device_node *node,
+			 const u32 *intspec, unsigned int intsize,
+			 unsigned long *out_hwirq, unsigned int *out_type);
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
 	/* extended V2 interfaces to support hierarchy irq_domains */
-	int (*alloc)(struct irq_domain *d, unsigned int virq,
-		     unsigned int nr_irqs, void *arg);
-	void (*free)(struct irq_domain *d, unsigned int virq,
-		     unsigned int nr_irqs);
-	int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve);
-	void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data);
-	int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec,
-			 unsigned long *out_hwirq, unsigned int *out_type);
+	int	(*alloc)(struct irq_domain *d, unsigned int virq,
+			 unsigned int nr_irqs, void *arg);
+	void	(*free)(struct irq_domain *d, unsigned int virq,
+			unsigned int nr_irqs);
+	int	(*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve);
+	void	(*deactivate)(struct irq_domain *d, struct irq_data *irq_data);
+	int	(*translate)(struct irq_domain *d, struct irq_fwspec *fwspec,
+			     unsigned long *out_hwirq, unsigned int *out_type);
 #endif
 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
-	void (*debug_show)(struct seq_file *m, struct irq_domain *d,
-			   struct irq_data *irqd, int ind);
+	void	(*debug_show)(struct seq_file *m, struct irq_domain *d,
+			      struct irq_data *irqd, int ind);
 #endif
 };
 
@@ -222,8 +222,7 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d)
 	return to_of_node(d->fwnode);
 }
 
-static inline void irq_domain_set_pm_device(struct irq_domain *d,
-					    struct device *dev)
+static inline void irq_domain_set_pm_device(struct irq_domain *d, struct device *dev)
 {
 	if (d)
 		d->pm_dev = dev;
@@ -239,14 +238,12 @@ enum {
 	IRQCHIP_FWNODE_NAMED_ID,
 };
 
-static inline
-struct fwnode_handle *irq_domain_alloc_named_fwnode(const char *name)
+static inline struct fwnode_handle *irq_domain_alloc_named_fwnode(const char *name)
 {
 	return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED, 0, name, NULL);
 }
 
-static inline
-struct fwnode_handle *irq_domain_alloc_named_id_fwnode(const char *name, int id)
+static inline struct fwnode_handle *irq_domain_alloc_named_id_fwnode(const char *name, int id)
 {
 	return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED_ID, id, name,
 					 NULL);
@@ -311,23 +308,17 @@ struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info);
 struct irq_domain *devm_irq_domain_instantiate(struct device *dev,
 					       const struct irq_domain_info *info);
 
-struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode,
-					    unsigned int size,
+struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode, unsigned int size,
 					    unsigned int first_irq,
-					    const struct irq_domain_ops *ops,
-					    void *host_data);
-struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode,
-					    unsigned int size,
-					    unsigned int first_irq,
-					    irq_hw_number_t first_hwirq,
-					    const struct irq_domain_ops *ops,
-					    void *host_data);
+					    const struct irq_domain_ops *ops, void *host_data);
+struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, unsigned int size,
+					    unsigned int first_irq, irq_hw_number_t first_hwirq,
+					    const struct irq_domain_ops *ops, void *host_data);
 struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
 					    enum irq_domain_bus_token bus_token);
 void irq_set_default_domain(struct irq_domain *domain);
 struct irq_domain *irq_get_default_domain(void);
-int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
-			   irq_hw_number_t hwirq, int node,
+int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, irq_hw_number_t hwirq, int node,
 			   const struct irq_affinity_desc *affinity);
 
 extern const struct fwnode_operations irqchip_fwnode_ops;
@@ -337,12 +328,10 @@ static inline bool is_fwnode_irqchip(const struct fwnode_handle *fwnode)
 	return fwnode && fwnode->ops == &irqchip_fwnode_ops;
 }
 
-void irq_domain_update_bus_token(struct irq_domain *domain,
-				 enum irq_domain_bus_token bus_token);
+void irq_domain_update_bus_token(struct irq_domain *domain, enum irq_domain_bus_token bus_token);
 
-static inline
-struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
-					    enum irq_domain_bus_token bus_token)
+static inline struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
+							  enum irq_domain_bus_token bus_token)
 {
 	struct irq_fwspec fwspec = {
 		.fwnode = fwnode,
@@ -370,9 +359,9 @@ static inline struct irq_domain *irq_find_host(struct device_node *node)
 
 #ifdef CONFIG_IRQ_DOMAIN_NOMAP
 static inline struct irq_domain *irq_domain_create_nomap(struct fwnode_handle *fwnode,
-					 unsigned int max_irq,
-					 const struct irq_domain_ops *ops,
-					 void *host_data)
+							 unsigned int max_irq,
+							 const struct irq_domain_ops *ops,
+							 void *host_data)
 {
 	const struct irq_domain_info info = {
 		.fwnode		= fwnode,
@@ -391,17 +380,17 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain);
 
 /**
  * irq_domain_create_linear - Allocate and register a linear revmap irq_domain.
- * @fwnode: pointer to interrupt controller's FW node.
- * @size: Number of interrupts in the domain.
- * @ops: map/unmap domain callbacks
- * @host_data: Controller private data pointer
+ * @fwnode:	pointer to interrupt controller's FW node.
+ * @size:	Number of interrupts in the domain.
+ * @ops:	map/unmap domain callbacks
+ * @host_data:	Controller private data pointer
  *
  * Returns: Newly created irq_domain
  */
 static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *fwnode,
-					 unsigned int size,
-					 const struct irq_domain_ops *ops,
-					 void *host_data)
+							  unsigned int size,
+							  const struct irq_domain_ops *ops,
+							  void *host_data)
 {
 	const struct irq_domain_info info = {
 		.fwnode		= fwnode,
@@ -416,8 +405,8 @@ static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *
 }
 
 static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fwnode,
-					 const struct irq_domain_ops *ops,
-					 void *host_data)
+							const struct irq_domain_ops *ops,
+							void *host_data)
 {
 	const struct irq_domain_info info = {
 		.fwnode		= fwnode,
@@ -432,22 +421,19 @@ static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fw
 
 void irq_domain_remove(struct irq_domain *domain);
 
-int irq_domain_associate(struct irq_domain *domain, unsigned int irq,
-			 irq_hw_number_t hwirq);
-void irq_domain_associate_many(struct irq_domain *domain,
-			       unsigned int irq_base,
+int irq_domain_associate(struct irq_domain *domain, unsigned int irq, irq_hw_number_t hwirq);
+void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base,
 			       irq_hw_number_t hwirq_base, int count);
 
-unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
-					 irq_hw_number_t hwirq,
+unsigned int irq_create_mapping_affinity(struct irq_domain *domain, irq_hw_number_t hwirq,
 					 const struct irq_affinity_desc *affinity);
 unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec);
 void irq_dispose_mapping(unsigned int virq);
 
 /**
  * irq_create_mapping - Map a hardware interrupt into linux irq space
- * @domain: domain owning this hardware interrupt or NULL for default domain
- * @hwirq: hardware irq number in that domain space
+ * @domain:	domain owning this hardware interrupt or NULL for default domain
+ * @hwirq:	hardware irq number in that domain space
  *
  * Only one mapping per hardware interrupt is permitted.
  *
@@ -456,8 +442,7 @@ void irq_dispose_mapping(unsigned int virq);
  *
  * Returns: Linux irq number or 0 on error
  */
-static inline unsigned int irq_create_mapping(struct irq_domain *domain,
-					      irq_hw_number_t hwirq)
+static inline unsigned int irq_create_mapping(struct irq_domain *domain, irq_hw_number_t hwirq)
 {
 	return irq_create_mapping_affinity(domain, hwirq, NULL);
 }
@@ -468,8 +453,8 @@ struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain,
 
 /**
  * irq_resolve_mapping - Find a linux irq from a hw irq number.
- * @domain: domain owning this hardware interrupt
- * @hwirq: hardware irq number in that domain space
+ * @domain:	domain owning this hardware interrupt
+ * @hwirq:	hardware irq number in that domain space
  *
  * Returns: Interrupt descriptor
  */
@@ -481,8 +466,8 @@ static inline struct irq_desc *irq_resolve_mapping(struct irq_domain *domain,
 
 /**
  * irq_find_mapping() - Find a linux irq from a hw irq number.
- * @domain: domain owning this hardware interrupt
- * @hwirq: hardware irq number in that domain space
+ * @domain:	domain owning this hardware interrupt
+ * @hwirq:	hardware irq number in that domain space
  *
  * Returns: Linux irq number or 0 if not found
  */
@@ -501,14 +486,14 @@ extern const struct irq_domain_ops irq_domain_simple_ops;
 
 /* stock xlate functions */
 int irq_domain_xlate_onecell(struct irq_domain *d, struct device_node *ctrlr,
-			const u32 *intspec, unsigned int intsize,
-			irq_hw_number_t *out_hwirq, unsigned int *out_type);
+			     const u32 *intspec, unsigned int intsize,
+			     irq_hw_number_t *out_hwirq, unsigned int *out_type);
 int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr,
-			const u32 *intspec, unsigned int intsize,
-			irq_hw_number_t *out_hwirq, unsigned int *out_type);
+			     const u32 *intspec, unsigned int intsize,
+			     irq_hw_number_t *out_hwirq, unsigned int *out_type);
 int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr,
-			const u32 *intspec, unsigned int intsize,
-			irq_hw_number_t *out_hwirq, unsigned int *out_type);
+				const u32 *intspec, unsigned int intsize,
+				irq_hw_number_t *out_hwirq, unsigned int *out_type);
 int irq_domain_xlate_twothreecell(struct irq_domain *d, struct device_node *ctrlr,
 				  const u32 *intspec, unsigned int intsize,
 				  irq_hw_number_t *out_hwirq, unsigned int *out_type);
@@ -525,12 +510,9 @@ int irq_reserve_ipi(struct irq_domain *domain, const struct cpumask *dest);
 int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest);
 
 /* V2 interfaces to support hierarchy IRQ domains. */
-struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
-					 unsigned int virq);
-void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
-			 irq_hw_number_t hwirq,
-			 const struct irq_chip *chip,
-			 void *chip_data, irq_flow_handler_t handler,
+struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, unsigned int virq);
+void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq,
+			 const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler,
 			 void *handler_data, const char *handler_name);
 void irq_domain_reset_irq_data(struct irq_data *irq_data);
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
@@ -551,11 +533,10 @@ void irq_domain_reset_irq_data(struct irq_data *irq_data);
  * Returns: A pointer to IRQ domain, or %NULL on failure.
  */
 static inline struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
-					    unsigned int flags,
-					    unsigned int size,
-					    struct fwnode_handle *fwnode,
-					    const struct irq_domain_ops *ops,
-					    void *host_data)
+							     unsigned int flags, unsigned int size,
+							     struct fwnode_handle *fwnode,
+							     const struct irq_domain_ops *ops,
+							     void *host_data)
 {
 	const struct irq_domain_info info = {
 		.fwnode		= fwnode,
@@ -571,9 +552,8 @@ static inline struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *
 	return IS_ERR(d) ? NULL : d;
 }
 
-int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
-			    unsigned int nr_irqs, int node, void *arg,
-			    bool realloc,
+int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, unsigned int nr_irqs,
+			    int node, void *arg, bool realloc,
 			    const struct irq_affinity_desc *affinity);
 void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs);
 int irq_domain_activate_irq(struct irq_data *irq_data, bool early);
@@ -588,37 +568,29 @@ void irq_domain_deactivate_irq(struct irq_data *irq_data);
  *
  * See __irq_domain_alloc_irqs()' documentation.
  */
-static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
-			unsigned int nr_irqs, int node, void *arg)
+static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs,
+					int node, void *arg)
 {
-	return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false,
-				       NULL);
+	return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false, NULL);
 }
 
-int irq_domain_set_hwirq_and_chip(struct irq_domain *domain,
-				  unsigned int virq,
-				  irq_hw_number_t hwirq,
-				  const struct irq_chip *chip,
+int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq,
+				  irq_hw_number_t hwirq, const struct irq_chip *chip,
 				  void *chip_data);
-void irq_domain_free_irqs_common(struct irq_domain *domain,
-				 unsigned int virq,
+void irq_domain_free_irqs_common(struct irq_domain *domain, unsigned int virq,
 				 unsigned int nr_irqs);
-void irq_domain_free_irqs_top(struct irq_domain *domain,
-			      unsigned int virq, unsigned int nr_irqs);
+void irq_domain_free_irqs_top(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs);
 
 int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg);
 int irq_domain_pop_irq(struct irq_domain *domain, int virq);
 
-int irq_domain_alloc_irqs_parent(struct irq_domain *domain,
-				 unsigned int irq_base,
+int irq_domain_alloc_irqs_parent(struct irq_domain *domain, unsigned int irq_base,
 				 unsigned int nr_irqs, void *arg);
 
-void irq_domain_free_irqs_parent(struct irq_domain *domain,
-				 unsigned int irq_base,
+void irq_domain_free_irqs_parent(struct irq_domain *domain, unsigned int irq_base,
 				 unsigned int nr_irqs);
 
-int irq_domain_disconnect_hierarchy(struct irq_domain *domain,
-					   unsigned int virq);
+int irq_domain_disconnect_hierarchy(struct irq_domain *domain, unsigned int virq);
 
 static inline bool irq_domain_is_hierarchy(struct irq_domain *domain)
 {
@@ -627,8 +599,7 @@ static inline bool irq_domain_is_hierarchy(struct irq_domain *domain)
 
 static inline bool irq_domain_is_ipi(struct irq_domain *domain)
 {
-	return domain->flags &
-		(IRQ_DOMAIN_FLAG_IPI_PER_CPU | IRQ_DOMAIN_FLAG_IPI_SINGLE);
+	return domain->flags & (IRQ_DOMAIN_FLAG_IPI_PER_CPU | IRQ_DOMAIN_FLAG_IPI_SINGLE);
 }
 
 static inline bool irq_domain_is_ipi_per_cpu(struct irq_domain *domain)
@@ -657,14 +628,13 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain)
 }
 
 #else	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
-static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
-			unsigned int nr_irqs, int node, void *arg)
+static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs,
+					int node, void *arg)
 {
 	return -1;
 }
 
-static inline void irq_domain_free_irqs(unsigned int virq,
-					unsigned int nr_irqs) { }
+static inline void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs) { }
 
 static inline bool irq_domain_is_hierarchy(struct irq_domain *domain)
 {
@@ -704,8 +674,7 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain)
 #endif	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
 
 #ifdef CONFIG_GENERIC_MSI_IRQ
-int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq,
-				  unsigned int type);
+int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, unsigned int type);
 void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq);
 #else
 static inline int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq,
@@ -727,8 +696,8 @@ static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node)
 }
 
 static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node,
-					 const struct irq_domain_ops *ops,
-					 void *host_data)
+						     const struct irq_domain_ops *ops,
+						     void *host_data)
 {
 	struct irq_domain_info info = {
 		.fwnode		= of_fwnode_handle(of_node),
@@ -743,9 +712,9 @@ static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node
 }
 
 static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_node,
-					 unsigned int size,
-					 const struct irq_domain_ops *ops,
-					 void *host_data)
+						       unsigned int size,
+						       const struct irq_domain_ops *ops,
+						       void *host_data)
 {
 	struct irq_domain_info info = {
 		.fwnode		= of_fwnode_handle(of_node),
@@ -762,8 +731,8 @@ static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_no
 
 #else /* CONFIG_IRQ_DOMAIN */
 static inline void irq_dispose_mapping(unsigned int virq) { }
-static inline struct irq_domain *irq_find_matching_fwnode(
-	struct fwnode_handle *fwnode, enum irq_domain_bus_token bus_token)
+static inline struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
+							  enum irq_domain_bus_token bus_token)
 {
 	return NULL;
 }
-- 
cgit v1.2.3


From e51b27438a10391fdc94dd2046d9ffa9c2679c74 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 13 May 2025 18:28:11 +0100
Subject: irqchip: Make irq-msi-lib.h globally available

Move irq-msi-lib.h into include/linux/irqchip, making it available
to compilation units outside of drivers/irqchip.

This requires some churn in drivers to fetch it from the new location,
generated using this script:

	git grep -l -w \"irq-msi-lib.h\" | \
	xargs sed -i -e 's:"irq-msi-lib.h":\<linux/irqchip/irq-msi-lib.h\>:'

Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250513172819.2216709-2-maz@kernel.org
---
 include/linux/irqchip/irq-msi-lib.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 include/linux/irqchip/irq-msi-lib.h

(limited to 'include/linux')

diff --git a/include/linux/irqchip/irq-msi-lib.h b/include/linux/irqchip/irq-msi-lib.h
new file mode 100644
index 000000000000..dd8d1d138544
--- /dev/null
+++ b/include/linux/irqchip/irq-msi-lib.h
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2022 Linutronix GmbH
+// Copyright (C) 2022 Intel
+
+#ifndef _IRQCHIP_IRQ_MSI_LIB_H
+#define _IRQCHIP_IRQ_MSI_LIB_H
+
+#include <linux/bits.h>
+#include <linux/irqdomain.h>
+#include <linux/msi.h>
+
+#ifdef CONFIG_PCI_MSI
+#define MATCH_PCI_MSI		BIT(DOMAIN_BUS_PCI_MSI)
+#else
+#define MATCH_PCI_MSI		(0)
+#endif
+
+#define MATCH_PLATFORM_MSI	BIT(DOMAIN_BUS_PLATFORM_MSI)
+
+int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec,
+			      enum irq_domain_bus_token bus_token);
+
+bool msi_lib_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
+			       struct irq_domain *real_parent,
+			       struct msi_domain_info *info);
+
+#endif /* _IRQCHIP_IRQ_MSI_LIB_H */
-- 
cgit v1.2.3


From e4d001b54f78769ba1a1404c2801ae95e19fd893 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 13 May 2025 18:28:12 +0100
Subject: genirq/msi: Add helper for creating MSI-parent irq domains

Creating an irq domain that serves as an MSI parent requires
a substantial amount of esoteric boiler-plate code, some of
which is often provided twice (such as the bus token).

To make things a bit simpler for the unsuspecting MSI tinkerer,
provide a helper that does it for them, and serves as documentation
of what needs to be provided.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250513172819.2216709-3-maz@kernel.org
---
 include/linux/msi.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index f4b94ccaf0c1..6863540f4b71 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -636,6 +636,10 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
 					 struct msi_domain_info *info,
 					 struct irq_domain *parent);
 
+struct irq_domain_info;
+struct irq_domain *msi_create_parent_irq_domain(struct irq_domain_info *info,
+						const struct msi_parent_ops *msi_parent_ops);
+
 bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
 				  const struct msi_domain_template *template,
 				  unsigned int hwsize, void *domain_data,
-- 
cgit v1.2.3


From dcd21b609d4abc7303f8683bce4f35d78d7d6830 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Sun, 27 Apr 2025 18:21:06 -0400
Subject: NFS: Avoid flushing data while holding directory locks in
 nfs_rename()

The Linux client assumes that all filehandles are non-volatile for
renames within the same directory (otherwise sillyrename cannot work).
However, the existence of the Linux 'subtree_check' export option has
meant that nfs_rename() has always assumed it needs to flush writes
before attempting to rename.

Since NFSv4 does allow the client to query whether or not the server
exhibits this behaviour, and since knfsd does actually set the
appropriate flag when 'subtree_check' is enabled on an export, it
should be OK to optimise away the write flushing behaviour in the cases
where it is clearly not needed.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
---
 include/linux/nfs_fs_sb.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 71319637a84e..ee03f3cef30c 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -213,6 +213,15 @@ struct nfs_server {
 	char			*fscache_uniq;	/* Uniquifier (or NULL) */
 #endif
 
+	/* The following #defines numerically match the NFSv4 equivalents */
+#define NFS_FH_NOEXPIRE_WITH_OPEN (0x1)
+#define NFS_FH_VOLATILE_ANY (0x2)
+#define NFS_FH_VOL_MIGRATION (0x4)
+#define NFS_FH_VOL_RENAME (0x8)
+#define NFS_FH_RENAME_UNSAFE (NFS_FH_VOLATILE_ANY | NFS_FH_VOL_RENAME)
+	u32			fh_expire_type;	/* V4 bitmask representing file
+						   handle volatility type for
+						   this filesystem */
 	u32			pnfs_blksize;	/* layout_blksize attr */
 #if IS_ENABLED(CONFIG_NFS_V4)
 	u32			attr_bitmask[3];/* V4 bitmask representing the set
@@ -236,9 +245,6 @@ struct nfs_server {
 	u32			acl_bitmask;	/* V4 bitmask representing the ACEs
 						   that are supported on this
 						   filesystem */
-	u32			fh_expire_type;	/* V4 bitmask representing file
-						   handle volatility type for
-						   this filesystem */
 	struct pnfs_layoutdriver_type  *pnfs_curr_ld; /* Active layout driver */
 	struct rpc_wait_queue	roc_rpcwaitq;
 	void			*pnfs_ld_data;	/* per mount point data */
-- 
cgit v1.2.3


From 7b151e4efdde7cc7cfaae66e497d12487a70c6e9 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 14 May 2025 20:54:29 +0200
Subject: net: phy: fixed_phy: remove fixed_phy_register_with_gpiod

Since its introduction 6 yrs ago this functions has never had a user.
So remove it.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/ccbeef28-65ae-4e28-b1db-816c44338dee@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy_fixed.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 1acafd86ab13..3392c09b5d24 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -13,7 +13,6 @@ struct fixed_phy_status {
 };
 
 struct device_node;
-struct gpio_desc;
 struct net_device;
 
 #if IS_ENABLED(CONFIG_FIXED_PHY)
@@ -24,11 +23,6 @@ extern struct phy_device *fixed_phy_register(unsigned int irq,
 					     struct fixed_phy_status *status,
 					     struct device_node *np);
 
-extern struct phy_device *
-fixed_phy_register_with_gpiod(unsigned int irq,
-			      struct fixed_phy_status *status,
-			      struct gpio_desc *gpiod);
-
 extern void fixed_phy_unregister(struct phy_device *phydev);
 extern int fixed_phy_set_link_update(struct phy_device *phydev,
 			int (*link_update)(struct net_device *,
@@ -46,14 +40,6 @@ static inline struct phy_device *fixed_phy_register(unsigned int irq,
 	return ERR_PTR(-ENODEV);
 }
 
-static inline struct phy_device *
-fixed_phy_register_with_gpiod(unsigned int irq,
-			      struct fixed_phy_status *status,
-			      struct gpio_desc *gpiod)
-{
-	return ERR_PTR(-ENODEV);
-}
-
 static inline void fixed_phy_unregister(struct phy_device *phydev)
 {
 }
-- 
cgit v1.2.3


From c46286fdd6aa1d0e33c245bcffe9ff2428a777bd Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 15 May 2025 18:49:26 +0200
Subject: mr: consolidate the ipmr_can_free_table() checks.

Guoyu Yin reported a splat in the ipmr netns cleanup path:

WARNING: CPU: 2 PID: 14564 at net/ipv4/ipmr.c:440 ipmr_free_table net/ipv4/ipmr.c:440 [inline]
WARNING: CPU: 2 PID: 14564 at net/ipv4/ipmr.c:440 ipmr_rules_exit+0x135/0x1c0 net/ipv4/ipmr.c:361
Modules linked in:
CPU: 2 UID: 0 PID: 14564 Comm: syz.4.838 Not tainted 6.14.0 #1
Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
RIP: 0010:ipmr_free_table net/ipv4/ipmr.c:440 [inline]
RIP: 0010:ipmr_rules_exit+0x135/0x1c0 net/ipv4/ipmr.c:361
Code: ff df 48 c1 ea 03 80 3c 02 00 75 7d 48 c7 83 60 05 00 00 00 00 00 00 5b 5d 41 5c 41 5d 41 5e e9 71 67 7f 00 e8 4c 2d 8a fd 90 <0f> 0b 90 eb 93 e8 41 2d 8a fd 0f b6 2d 80 54 ea 01 31 ff 89 ee e8
RSP: 0018:ffff888109547c58 EFLAGS: 00010293
RAX: 0000000000000000 RBX: ffff888108c12dc0 RCX: ffffffff83e09868
RDX: ffff8881022b3300 RSI: ffffffff83e098d4 RDI: 0000000000000005
RBP: ffff888104288000 R08: 0000000000000000 R09: ffffed10211825c9
R10: 0000000000000001 R11: ffff88801816c4a0 R12: 0000000000000001
R13: ffff888108c13320 R14: ffff888108c12dc0 R15: fffffbfff0b74058
FS:  00007f84f39316c0(0000) GS:ffff88811b100000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f84f3930f98 CR3: 0000000113b56000 CR4: 0000000000350ef0
Call Trace:
 <TASK>
 ipmr_net_exit_batch+0x50/0x90 net/ipv4/ipmr.c:3160
 ops_exit_list+0x10c/0x160 net/core/net_namespace.c:177
 setup_net+0x47d/0x8e0 net/core/net_namespace.c:394
 copy_net_ns+0x25d/0x410 net/core/net_namespace.c:516
 create_new_namespaces+0x3f6/0xaf0 kernel/nsproxy.c:110
 unshare_nsproxy_namespaces+0xc3/0x180 kernel/nsproxy.c:228
 ksys_unshare+0x78d/0x9a0 kernel/fork.c:3342
 __do_sys_unshare kernel/fork.c:3413 [inline]
 __se_sys_unshare kernel/fork.c:3411 [inline]
 __x64_sys_unshare+0x31/0x40 kernel/fork.c:3411
 do_syscall_x64 arch/x86/entry/common.c:52 [inline]
 do_syscall_64+0xa6/0x1a0 arch/x86/entry/common.c:83
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f84f532cc29
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f84f3931038 EFLAGS: 00000246 ORIG_RAX: 0000000000000110
RAX: ffffffffffffffda RBX: 00007f84f5615fa0 RCX: 00007f84f532cc29
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000040000400
RBP: 00007f84f53fba18 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 0000000000000000 R14: 00007f84f5615fa0 R15: 00007fff51c5f328
 </TASK>

The running kernel has CONFIG_IP_MROUTE_MULTIPLE_TABLES disabled, and
the sanity check for such build is still too loose.

Address the issue consolidating the relevant sanity check in a single
helper regardless of the kernel configuration. Also share it between
the ipv4 and ipv6 code.

Reported-by: Guoyu Yin <y04609127@gmail.com>
Fixes: 50b94204446e ("ipmr: tune the ipmr_can_free_table() checks.")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Link: https://patch.msgid.link/372dc261e1bf12742276e1b984fc5a071b7fc5a8.1747321903.git.pabeni@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mroute_base.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index 58a2401e4b55..0075f6e5c3da 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -262,6 +262,11 @@ struct mr_table {
 	int			mroute_reg_vif_num;
 };
 
+static inline bool mr_can_free_table(struct net *net)
+{
+	return !check_net(net) || !net_initialized(net);
+}
+
 #ifdef CONFIG_IP_MROUTE_COMMON
 void vif_device_init(struct vif_device *v,
 		     struct net_device *dev,
-- 
cgit v1.2.3


From c80bdb1c55719cd6308d648a7920272a3be09e34 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 16 May 2025 13:16:44 -0600
Subject: io_uring: pass in struct io_big_cqe to io_alloc_ocqe()

Rather than pass extra1/extra2 separately, just pass in the (now) named
io_big_cqe struct instead. The callers that don't use/support CQE32 will
now just pass a single NULL, rather than two seperate mystery zero
values.

Move the clearing of the big_cqe elements into io_alloc_ocqe() as well,
so it can get moved out of the generic code.

Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 00dbd7cd0e7d..2922635986f5 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -710,7 +710,7 @@ struct io_kiocb {
 	const struct cred		*creds;
 	struct io_wq_work		work;
 
-	struct {
+	struct io_big_cqe {
 		u64			extra1;
 		u64			extra2;
 	} big_cqe;
-- 
cgit v1.2.3


From c451e2da54bce183fbb270ec01ab3ca725ddf943 Mon Sep 17 00:00:00 2001
From: Sumanth Gavini <sumanth.gavini@yahoo.com>
Date: Sun, 18 May 2025 01:57:32 -0700
Subject: regulator: max8952: Correct Samsung "Electronics" spelling in
 copyright headers

Fix the misspelling of 'Electronics' in max8952 driver copyright headers.

Signed-off-by: Sumanth Gavini <sumanth.gavini@yahoo.com>
Link: https://patch.msgid.link/20250518085734.88890-7-sumanth.gavini@yahoo.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/max8952.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/regulator/max8952.h b/include/linux/regulator/max8952.h
index 8712c091abf0..61dcd8e00a2f 100644
--- a/include/linux/regulator/max8952.h
+++ b/include/linux/regulator/max8952.h
@@ -2,7 +2,7 @@
 /*
  * max8952.h - Voltage regulation for the Maxim 8952
  *
- *  Copyright (C) 2010 Samsung Electrnoics
+ *  Copyright (C) 2010 Samsung Electronics
  *  MyungJoo Ham <myungjoo.ham@samsung.com>
  */
 
-- 
cgit v1.2.3


From ec23a899d96f9ee3389abe6c516d09cae2fde5e3 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 16 May 2025 15:32:24 +0200
Subject: spi: sh-msiof: Move register definitions to <linux/spi/sh_msiof.h>

Move the MSIOF register and register bit definitions from the MSIOF SPI
driver to the existing header file <linux/spi/sh_msiof.h>, so they can
be shared with the MSIOF I2S driver.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://patch.msgid.link/066d1086973eb309006258484e9fe8138807e565.1747401908.git.geert+renesas@glider.be
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/sh_msiof.h | 125 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 125 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/sh_msiof.h b/include/linux/spi/sh_msiof.h
index f950d280461b..9fbef3fd4056 100644
--- a/include/linux/spi/sh_msiof.h
+++ b/include/linux/spi/sh_msiof.h
@@ -2,6 +2,131 @@
 #ifndef __SPI_SH_MSIOF_H__
 #define __SPI_SH_MSIOF_H__
 
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+
+#define SITMDR1	0x00	/* Transmit Mode Register 1 */
+#define SITMDR2	0x04	/* Transmit Mode Register 2 */
+#define SITMDR3	0x08	/* Transmit Mode Register 3 */
+#define SIRMDR1	0x10	/* Receive Mode Register 1 */
+#define SIRMDR2	0x14	/* Receive Mode Register 2 */
+#define SIRMDR3	0x18	/* Receive Mode Register 3 */
+#define SITSCR	0x20	/* Transmit Clock Select Register */
+#define SIRSCR	0x22	/* Receive Clock Select Register (SH, A1, APE6) */
+#define SICTR	0x28	/* Control Register */
+#define SIFCTR	0x30	/* FIFO Control Register */
+#define SISTR	0x40	/* Status Register */
+#define SIIER	0x44	/* Interrupt Enable Register */
+#define SITDR1	0x48	/* Transmit Control Data Register 1 (SH, A1) */
+#define SITDR2	0x4c	/* Transmit Control Data Register 2 (SH, A1) */
+#define SITFDR	0x50	/* Transmit FIFO Data Register */
+#define SIRDR1	0x58	/* Receive Control Data Register 1 (SH, A1) */
+#define SIRDR2	0x5c	/* Receive Control Data Register 2 (SH, A1) */
+#define SIRFDR	0x60	/* Receive FIFO Data Register */
+
+/* SITMDR1 and SIRMDR1 */
+#define SIMDR1_TRMD		BIT(31)		/* Transfer Mode (1 = Master mode) */
+#define SIMDR1_SYNCMD		GENMASK(29, 28)	/* SYNC Mode */
+#define SIMDR1_SYNCMD_PULSE	0U		/*   Frame start sync pulse */
+#define SIMDR1_SYNCMD_SPI	2U		/*   Level mode/SPI */
+#define SIMDR1_SYNCMD_LR	3U		/*   L/R mode */
+#define SIMDR1_SYNCAC		BIT(25)		/* Sync Polarity (1 = Active-low) */
+#define SIMDR1_BITLSB		BIT(24)		/* MSB/LSB First (1 = LSB first) */
+#define SIMDR1_DTDL		GENMASK(22, 20)	/* Data Pin Bit Delay for MSIOF_SYNC */
+#define SIMDR1_SYNCDL		GENMASK(18, 16)	/* Frame Sync Signal Timing Delay */
+#define SIMDR1_FLD		GENMASK(3, 2)	/* Frame Sync Signal Interval (0-3) */
+#define SIMDR1_XXSTP		BIT(0)		/* Transmission/Reception Stop on FIFO */
+/* SITMDR1 */
+#define SITMDR1_PCON		BIT(30)		/* Transfer Signal Connection */
+#define SITMDR1_SYNCCH		GENMASK(27, 26)	/* Sync Signal Channel Select */
+						/* 0=MSIOF_SYNC, 1=MSIOF_SS1, 2=MSIOF_SS2 */
+
+/* SITMDR2 and SIRMDR2 */
+#define SIMDR2_GRP		GENMASK(31, 30)	/* Group Count */
+#define SIMDR2_BITLEN1		GENMASK(28, 24)	/* Data Size (8-32 bits) */
+#define SIMDR2_WDLEN1		GENMASK(23, 16)	/* Word Count (1-64/256 (SH, A1))) */
+#define SIMDR2_GRPMASK		GENMASK(3, 0)	/* Group Output Mask 1-4 (SH, A1) */
+
+/* SITMDR3 and SIRMDR3 */
+#define SIMDR3_BITLEN2		GENMASK(28, 24)	/* Data Size (8-32 bits) */
+#define SIMDR3_WDLEN2		GENMASK(23, 16)	/* Word Count (1-64/256 (SH, A1))) */
+
+/* SITSCR and SIRSCR */
+#define SISCR_BRPS		GENMASK(12, 8)	/* Prescaler Setting (1-32) */
+#define SISCR_BRDV		GENMASK(2, 0)	/* Baud Rate Generator's Division Ratio */
+
+/* SICTR */
+#define SICTR_TSCKIZ		GENMASK(31, 30)	/* Transmit Clock I/O Polarity Select */
+#define SICTR_TSCKIZ_SCK	BIT(31)		/*   Disable SCK when TX disabled */
+#define SICTR_TSCKIZ_POL	BIT(30)		/*   Transmit Clock Polarity */
+#define SICTR_RSCKIZ		GENMASK(29, 28) /* Receive Clock Polarity Select */
+#define SICTR_RSCKIZ_SCK	BIT(29)		/*   Must match CTR_TSCKIZ_SCK */
+#define SICTR_RSCKIZ_POL	BIT(28)		/*   Receive Clock Polarity */
+#define SICTR_TEDG		BIT(27)		/* Transmit Timing (1 = falling edge) */
+#define SICTR_REDG		BIT(26)		/* Receive Timing (1 = falling edge) */
+#define SICTR_TXDIZ		GENMASK(23, 22)	/* Pin Output When TX is Disabled */
+#define SICTR_TXDIZ_LOW		0U		/*   0 */
+#define SICTR_TXDIZ_HIGH	1U		/*   1 */
+#define SICTR_TXDIZ_HIZ		2U		/*   High-impedance */
+#define SICTR_TSCKE		BIT(15)		/* Transmit Serial Clock Output Enable */
+#define SICTR_TFSE		BIT(14)		/* Transmit Frame Sync Signal Output Enable */
+#define SICTR_TXE		BIT(9)		/* Transmit Enable */
+#define SICTR_RXE		BIT(8)		/* Receive Enable */
+#define SICTR_TXRST		BIT(1)		/* Transmit Reset */
+#define SICTR_RXRST		BIT(0)		/* Receive Reset */
+
+/* SIFCTR */
+#define SIFCTR_TFWM		GENMASK(31, 29)	/* Transmit FIFO Watermark */
+#define SIFCTR_TFWM_64		0U		/*  Transfer Request when 64 empty stages */
+#define SIFCTR_TFWM_32		1U		/*  Transfer Request when 32 empty stages */
+#define SIFCTR_TFWM_24		2U		/*  Transfer Request when 24 empty stages */
+#define SIFCTR_TFWM_16		3U		/*  Transfer Request when 16 empty stages */
+#define SIFCTR_TFWM_12		4U		/*  Transfer Request when 12 empty stages */
+#define SIFCTR_TFWM_8		5U		/*  Transfer Request when 8 empty stages */
+#define SIFCTR_TFWM_4		6U		/*  Transfer Request when 4 empty stages */
+#define SIFCTR_TFWM_1		7U		/*  Transfer Request when 1 empty stage */
+#define SIFCTR_TFUA		GENMASK(28, 20) /* Transmit FIFO Usable Area */
+#define SIFCTR_RFWM		GENMASK(15, 13)	/* Receive FIFO Watermark */
+#define SIFCTR_RFWM_1		0U		/*  Transfer Request when 1 valid stages */
+#define SIFCTR_RFWM_4		1U		/*  Transfer Request when 4 valid stages */
+#define SIFCTR_RFWM_8		2U		/*  Transfer Request when 8 valid stages */
+#define SIFCTR_RFWM_16		3U		/*  Transfer Request when 16 valid stages */
+#define SIFCTR_RFWM_32		4U		/*  Transfer Request when 32 valid stages */
+#define SIFCTR_RFWM_64		5U		/*  Transfer Request when 64 valid stages */
+#define SIFCTR_RFWM_128		6U		/*  Transfer Request when 128 valid stages */
+#define SIFCTR_RFWM_256		7U		/*  Transfer Request when 256 valid stages */
+#define SIFCTR_RFUA		GENMASK(12, 4)	/* Receive FIFO Usable Area (0x40 = full) */
+
+/* SISTR */
+#define SISTR_TFEMP		BIT(29) /* Transmit FIFO Empty */
+#define SISTR_TDREQ		BIT(28) /* Transmit Data Transfer Request */
+#define SISTR_TEOF		BIT(23) /* Frame Transmission End */
+#define SISTR_TFSERR		BIT(21) /* Transmit Frame Synchronization Error */
+#define SISTR_TFOVF		BIT(20) /* Transmit FIFO Overflow */
+#define SISTR_TFUDF		BIT(19) /* Transmit FIFO Underflow */
+#define SISTR_RFFUL		BIT(13) /* Receive FIFO Full */
+#define SISTR_RDREQ		BIT(12) /* Receive Data Transfer Request */
+#define SISTR_REOF		BIT(7)  /* Frame Reception End */
+#define SISTR_RFSERR		BIT(5)  /* Receive Frame Synchronization Error */
+#define SISTR_RFUDF		BIT(4)  /* Receive FIFO Underflow */
+#define SISTR_RFOVF		BIT(3)  /* Receive FIFO Overflow */
+
+/* SIIER */
+#define SIIER_TDMAE		BIT(31) /* Transmit Data DMA Transfer Req. Enable */
+#define SIIER_TFEMPE		BIT(29) /* Transmit FIFO Empty Enable */
+#define SIIER_TDREQE		BIT(28) /* Transmit Data Transfer Request Enable */
+#define SIIER_TEOFE		BIT(23) /* Frame Transmission End Enable */
+#define SIIER_TFSERRE		BIT(21) /* Transmit Frame Sync Error Enable */
+#define SIIER_TFOVFE		BIT(20) /* Transmit FIFO Overflow Enable */
+#define SIIER_TFUDFE		BIT(19) /* Transmit FIFO Underflow Enable */
+#define SIIER_RDMAE		BIT(15) /* Receive Data DMA Transfer Req. Enable */
+#define SIIER_RFFULE		BIT(13) /* Receive FIFO Full Enable */
+#define SIIER_RDREQE		BIT(12) /* Receive Data Transfer Request Enable */
+#define SIIER_REOFE		BIT(7)  /* Frame Reception End Enable */
+#define SIIER_RFSERRE		BIT(5)  /* Receive Frame Sync Error Enable */
+#define SIIER_RFUDFE		BIT(4)  /* Receive FIFO Underflow Enable */
+#define SIIER_RFOVFE		BIT(3)  /* Receive FIFO Overflow Enable */
+
 enum {
 	MSIOF_SPI_HOST,
 	MSIOF_SPI_TARGET,
-- 
cgit v1.2.3


From 08d6ee6d8a10aef958c2af16bb121070290ed589 Mon Sep 17 00:00:00 2001
From: Nikhil Jha <njha@janestreet.com>
Date: Wed, 19 Mar 2025 13:02:39 -0400
Subject: sunrpc: implement rfc2203 rpcsec_gss seqnum cache

This implements a sequence number cache of the last three (right now
hardcoded) sent sequence numbers for a given XID, as suggested by the
RFC.

From RFC2203 5.3.3.1:

"Note that the sequence number algorithm requires that the client
increment the sequence number even if it is retrying a request with
the same RPC transaction identifier.  It is not infrequent for
clients to get into a situation where they send two or more attempts
and a slow server sends the reply for the first attempt. With
RPCSEC_GSS, each request and reply will have a unique sequence
number. If the client wishes to improve turn around time on the RPC
call, it can cache the RPCSEC_GSS sequence number of each request it
sends. Then when it receives a response with a matching RPC
transaction identifier, it can compute the checksum of each sequence
number in the cache to try to match the checksum in the reply's
verifier."

Signed-off-by: Nikhil Jha <njha@janestreet.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/sunrpc/xprt.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 81b952649d35..f46d1fb8f71a 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -30,6 +30,8 @@
 #define RPC_MAXCWND(xprt)	((xprt)->max_reqs << RPC_CWNDSHIFT)
 #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
 
+#define RPC_GSS_SEQNO_ARRAY_SIZE 3U
+
 enum rpc_display_format_t {
 	RPC_DISPLAY_ADDR = 0,
 	RPC_DISPLAY_PORT,
@@ -66,7 +68,8 @@ struct rpc_rqst {
 	struct rpc_cred *	rq_cred;	/* Bound cred */
 	__be32			rq_xid;		/* request XID */
 	int			rq_cong;	/* has incremented xprt->cong */
-	u32			rq_seqno;	/* gss seq no. used on req. */
+	u32			rq_seqnos[RPC_GSS_SEQNO_ARRAY_SIZE];	/* past gss req seq nos. */
+	unsigned int		rq_seqno_count;	/* number of entries in rq_seqnos */
 	int			rq_enc_pages_num;
 	struct page		**rq_enc_pages;	/* scratch pages for use by
 						   gss privacy code */
@@ -119,6 +122,18 @@ struct rpc_rqst {
 #define rq_svec			rq_snd_buf.head
 #define rq_slen			rq_snd_buf.len
 
+static inline int xprt_rqst_add_seqno(struct rpc_rqst *req, u32 seqno)
+{
+	if (likely(req->rq_seqno_count < RPC_GSS_SEQNO_ARRAY_SIZE))
+		req->rq_seqno_count++;
+
+	/* Shift array to make room for the newest element at the beginning */
+	memmove(&req->rq_seqnos[1], &req->rq_seqnos[0],
+		(RPC_GSS_SEQNO_ARRAY_SIZE - 1) * sizeof(req->rq_seqnos[0]));
+	req->rq_seqnos[0] = seqno;
+	return 0;
+}
+
 /* RPC transport layer security policies */
 enum xprtsec_policies {
 	RPC_XPRTSEC_NONE = 0,
-- 
cgit v1.2.3


From e5296637a322b840d772f88f4d58b4bc72b29058 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Tue, 8 Apr 2025 09:43:15 -0400
Subject: nfs: add a refcount tracker for struct net as held by the nfs_client

These are long-held references to the netns, so make sure the refcount
tracker is aware of them.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfs_fs_sb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index ee03f3cef30c..e02f4c4e9cc4 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -125,6 +125,7 @@ struct nfs_client {
 	 */
 	char			cl_ipaddr[48];
 	struct net		*cl_net;
+	netns_tracker		cl_ns_tracker;
 	struct list_head	pending_cb_stateids;
 	struct rcu_head		rcu;
 
-- 
cgit v1.2.3


From 1cb0f56d96185cb20e63e191fc291191823e6f52 Mon Sep 17 00:00:00 2001
From: Paul Chaignon <paul.chaignon@gmail.com>
Date: Mon, 19 May 2025 15:43:57 +0200
Subject: bpf: WARN_ONCE on verifier bugs

Throughout the verifier's logic, there are multiple checks for
inconsistent states that should never happen and would indicate a
verifier bug. These bugs are typically logged in the verifier logs and
sometimes preceded by a WARN_ONCE.

This patch reworks these checks to consistently emit a verifier log AND
a warning when CONFIG_DEBUG_KERNEL is enabled. The consistent use of
WARN_ONCE should help fuzzers (ex. syzkaller) expose any situation
where they are actually able to reach one of those buggy verifier
states.

Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
Link: https://lore.kernel.org/r/aCs1nYvNNMq8dAWP@mail.gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h          |  6 ++++++
 include/linux/bpf_verifier.h | 11 +++++++++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 83c56f40842b..5b25d278409b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -346,6 +346,12 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
 	}
 }
 
+#if IS_ENABLED(CONFIG_DEBUG_KERNEL)
+#define BPF_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
+#else
+#define BPF_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
+#endif
+
 static inline u32 btf_field_type_size(enum btf_field_type type)
 {
 	switch (type) {
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index cedd66867ecf..78c97e12ea4e 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -839,6 +839,17 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env,
 				  u32 insn_off,
 				  const char *prefix_fmt, ...);
 
+#define verifier_bug_if(cond, env, fmt, args...)						\
+	({											\
+		bool __cond = (cond);								\
+		if (unlikely(__cond)) {								\
+			BPF_WARN_ONCE(1, "verifier bug: " fmt "(" #cond ")\n", ##args);		\
+			bpf_log(&env->log, "verifier bug: " fmt "(" #cond ")\n", ##args);	\
+		}										\
+		(__cond);									\
+	})
+#define verifier_bug(env, fmt, args...) verifier_bug_if(1, env, fmt, ##args)
+
 static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
 {
 	struct bpf_verifier_state *cur = env->cur_state;
-- 
cgit v1.2.3


From 541a4219bd66bef56d93dbd306dc64a4d70ae99e Mon Sep 17 00:00:00 2001
From: JP Kobryn <inwardvessel@gmail.com>
Date: Wed, 14 May 2025 17:19:33 -0700
Subject: cgroup: compare css to cgroup::self in helper for distingushing css

Adjust the implementation of css_is_cgroup() so that it compares the given
css to cgroup::self. Rename the function to css_is_self() in order to
reflect that. Change the existing css->ss NULL check to a warning in the
true branch. Finally, adjust call sites to use the new function name.

Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1f5b0a4a3356..989c08b09691 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -347,9 +347,15 @@ static inline bool css_is_dying(struct cgroup_subsys_state *css)
 	return css->flags & CSS_DYING;
 }
 
-static inline bool css_is_cgroup(struct cgroup_subsys_state *css)
+static inline bool css_is_self(struct cgroup_subsys_state *css)
 {
-	return css->ss == NULL;
+	if (css == &css->cgroup->self) {
+		/* cgroup::self should not have subsystem association */
+		WARN_ON(css->ss != NULL);
+		return true;
+	}
+
+	return false;
 }
 
 static inline void cgroup_get(struct cgroup *cgrp)
-- 
cgit v1.2.3


From 4d6d35d3417dcab14a9b1b9771c3cd62e8ed442b Mon Sep 17 00:00:00 2001
From: "Yo-Jung (Leo) Lin" <leo.lin@canonical.com>
Date: Wed, 30 Apr 2025 19:26:16 +0800
Subject: i2c: smbus: introduce Write Disable-aware SPD instantiating functions

Some SMBus controllers may restrict writes to addresses where SPD sensors
may reside. This may lead to some SPD sensors not functioning correctly,
and might need extra handling. Introduce new SPD-instantiating functions
that are aware of this, and use them instead.

Signed-off-by: Yo-Jung Lin (Leo) <leo.lin@canonical.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20250430-for-upstream-i801-spd5118-no-instantiate-v2-1-2f54d91ae2c7@canonical.com
Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
---
 include/linux/i2c-smbus.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c-smbus.h b/include/linux/i2c-smbus.h
index ced1c6ead52a..dc1bd2ab4c13 100644
--- a/include/linux/i2c-smbus.h
+++ b/include/linux/i2c-smbus.h
@@ -44,9 +44,11 @@ static inline void i2c_free_slave_host_notify_device(struct i2c_client *client)
 #endif
 
 #if IS_ENABLED(CONFIG_I2C_SMBUS) && IS_ENABLED(CONFIG_DMI)
-void i2c_register_spd(struct i2c_adapter *adap);
+void i2c_register_spd_write_disable(struct i2c_adapter *adap);
+void i2c_register_spd_write_enable(struct i2c_adapter *adap);
 #else
-static inline void i2c_register_spd(struct i2c_adapter *adap) { }
+static inline void i2c_register_spd_write_disable(struct i2c_adapter *adap) { }
+static inline void i2c_register_spd_write_enable(struct i2c_adapter *adap) { }
 #endif
 
 #endif /* _LINUX_I2C_SMBUS_H */
-- 
cgit v1.2.3


From 5da3bfa029d6809e192d112f39fca4dbe0137aaf Mon Sep 17 00:00:00 2001
From: JP Kobryn <inwardvessel@gmail.com>
Date: Wed, 14 May 2025 17:19:34 -0700
Subject: cgroup: use separate rstat trees for each subsystem

Different subsystems may call cgroup_rstat_updated() within the same
cgroup, resulting in a tree of pending updates from multiple subsystems.
When one of these subsystems is flushed via cgroup_rstat_flushed(), all
other subsystems with pending updates on the tree will also be flushed.

Change the paradigm of having a single rstat tree for all subsystems to
having separate trees for each subsystem. This separation allows for
subsystems to perform flushes without the side effects of other subsystems.
As an example, flushing the cpu stats will no longer cause the memory stats
to be flushed and vice versa.

In order to achieve subsystem-specific trees, change the tree node type
from cgroup to cgroup_subsys_state pointer. Then remove those pointers from
the cgroup and instead place them on the css. Finally, change update/flush
functions to make use of the different node type (css). These changes allow
a specific subsystem to be associated with an update or flush. Separate
rstat trees will now exist for each unique subsystem.

Since updating/flushing will now be done at the subsystem level, there is
no longer a need to keep track of updated css nodes at the cgroup level.
The list management of these nodes done within the cgroup (rstat_css_list
and related) has been removed accordingly.

Conditional guards for checking validity of a given css were placed within
css_rstat_updated/flush() to prevent undefined behavior occuring from kfunc
usage in bpf programs. Guards were also placed within css_rstat_init/exit()
in order to help consolidate calls to them. At call sites for all four
functions, the existing guards were removed.

Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h | 46 +++++++++++++++++++++------------------------
 1 file changed, 21 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index e58bfb880111..17ecaae9c5f8 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -169,6 +169,8 @@ struct cgroup_subsys_state {
 	/* reference count - access via css_[try]get() and css_put() */
 	struct percpu_ref refcnt;
 
+	struct css_rstat_cpu __percpu *rstat_cpu;
+
 	/*
 	 * siblings list anchored at the parent's ->children
 	 *
@@ -177,9 +179,6 @@ struct cgroup_subsys_state {
 	struct list_head sibling;
 	struct list_head children;
 
-	/* flush target list anchored at cgrp->rstat_css_list */
-	struct list_head rstat_css_node;
-
 	/*
 	 * PI: Subsys-unique ID.  0 is unused and root is always 1.  The
 	 * matching css can be looked up using css_from_id().
@@ -219,6 +218,13 @@ struct cgroup_subsys_state {
 	 * Protected by cgroup_mutex.
 	 */
 	int nr_descendants;
+
+	/*
+	 * A singly-linked list of css structures to be rstat flushed.
+	 * This is a scratch field to be used exclusively by
+	 * css_rstat_flush() and protected by cgroup_rstat_lock.
+	 */
+	struct cgroup_subsys_state *rstat_flush_next;
 };
 
 /*
@@ -329,10 +335,10 @@ struct cgroup_base_stat {
 
 /*
  * rstat - cgroup scalable recursive statistics.  Accounting is done
- * per-cpu in cgroup_rstat_cpu which is then lazily propagated up the
+ * per-cpu in css_rstat_cpu which is then lazily propagated up the
  * hierarchy on reads.
  *
- * When a stat gets updated, the cgroup_rstat_cpu and its ancestors are
+ * When a stat gets updated, the css_rstat_cpu and its ancestors are
  * linked into the updated tree.  On the following read, propagation only
  * considers and consumes the updated tree.  This makes reading O(the
  * number of descendants which have been active since last read) instead of
@@ -346,20 +352,20 @@ struct cgroup_base_stat {
  * This struct hosts both the fields which implement the above -
  * updated_children and updated_next.
  */
-struct cgroup_rstat_cpu {
+struct css_rstat_cpu {
 	/*
 	 * Child cgroups with stat updates on this cpu since the last read
 	 * are linked on the parent's ->updated_children through
-	 * ->updated_next.
+	 * ->updated_next. updated_children is terminated by its container css.
 	 *
-	 * In addition to being more compact, singly-linked list pointing
-	 * to the cgroup makes it unnecessary for each per-cpu struct to
-	 * point back to the associated cgroup.
+	 * In addition to being more compact, singly-linked list pointing to
+	 * the css makes it unnecessary for each per-cpu struct to point back
+	 * to the associated css.
 	 *
 	 * Protected by per-cpu cgroup_rstat_cpu_lock.
 	 */
-	struct cgroup *updated_children;	/* terminated by self cgroup */
-	struct cgroup *updated_next;		/* NULL iff not on the list */
+	struct cgroup_subsys_state *updated_children;
+	struct cgroup_subsys_state *updated_next;	/* NULL if not on the list */
 };
 
 /*
@@ -521,25 +527,15 @@ struct cgroup {
 	struct cgroup *dom_cgrp;
 	struct cgroup *old_dom_cgrp;		/* used while enabling threaded */
 
-	/* per-cpu recursive resource statistics */
-	struct cgroup_rstat_cpu __percpu *rstat_cpu;
 	struct cgroup_rstat_base_cpu __percpu *rstat_base_cpu;
-	struct list_head rstat_css_list;
 
 	/*
-	 * Add padding to separate the read mostly rstat_cpu and
-	 * rstat_css_list into a different cacheline from the following
-	 * rstat_flush_next and *bstat fields which can have frequent updates.
+	 * Add padding to keep the read mostly rstat per-cpu pointer on a
+	 * different cacheline than the following *bstat fields which can have
+	 * frequent updates.
 	 */
 	CACHELINE_PADDING(_pad_);
 
-	/*
-	 * A singly-linked list of cgroup structures to be rstat flushed.
-	 * This is a scratch field to be used exclusively by
-	 * css_rstat_flush_locked() and protected by cgroup_rstat_lock.
-	 */
-	struct cgroup	*rstat_flush_next;
-
 	/* cgroup basic resource statistics */
 	struct cgroup_base_stat last_bstat;
 	struct cgroup_base_stat bstat;
-- 
cgit v1.2.3


From 748922dcfabdd655d25fb6dd09a60e694a3d35e6 Mon Sep 17 00:00:00 2001
From: JP Kobryn <inwardvessel@gmail.com>
Date: Wed, 14 May 2025 17:19:35 -0700
Subject: cgroup: use subsystem-specific rstat locks to avoid contention

It is possible to eliminate contention between subsystems when
updating/flushing stats by using subsystem-specific locks. Let the existing
rstat locks be dedicated to the cgroup base stats and rename them to
reflect that. Add similar locks to the cgroup_subsys struct for use with
individual subsystems.

Lock initialization is done in the new function ss_rstat_init(ss) which
replaces cgroup_rstat_boot(void). If NULL is passed to this function, the
global base stat locks will be initialized. Otherwise, the subsystem locks
will be initialized.

Change the existing lock helper functions to accept a reference to a css.
Then within these functions, conditionally select the appropriate locks
based on the subsystem affiliation of the given css. Add helper functions
for this selection routine to avoid repeated code.

Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 17ecaae9c5f8..5b8127d29dc5 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -222,7 +222,10 @@ struct cgroup_subsys_state {
 	/*
 	 * A singly-linked list of css structures to be rstat flushed.
 	 * This is a scratch field to be used exclusively by
-	 * css_rstat_flush() and protected by cgroup_rstat_lock.
+	 * css_rstat_flush().
+	 *
+	 * Protected by rstat_base_lock when css is cgroup::self.
+	 * Protected by css->ss->rstat_ss_lock otherwise.
 	 */
 	struct cgroup_subsys_state *rstat_flush_next;
 };
@@ -362,7 +365,7 @@ struct css_rstat_cpu {
 	 * the css makes it unnecessary for each per-cpu struct to point back
 	 * to the associated css.
 	 *
-	 * Protected by per-cpu cgroup_rstat_cpu_lock.
+	 * Protected by per-cpu css->ss->rstat_ss_cpu_lock.
 	 */
 	struct cgroup_subsys_state *updated_children;
 	struct cgroup_subsys_state *updated_next;	/* NULL if not on the list */
@@ -792,6 +795,9 @@ struct cgroup_subsys {
 	 * specifies the mask of subsystems that this one depends on.
 	 */
 	unsigned int depends_on;
+
+	spinlock_t rstat_ss_lock;
+	raw_spinlock_t __percpu *rstat_ss_cpu_lock;
 };
 
 extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
-- 
cgit v1.2.3


From f3921fb7fdc23dd946b0c082f5fedca9ce75d506 Mon Sep 17 00:00:00 2001
From: JP Kobryn <inwardvessel@gmail.com>
Date: Wed, 14 May 2025 17:19:37 -0700
Subject: cgroup: document the rstat per-cpu initialization

The calls to css_rstat_init() occur at different places depending on the
context. Document the conditions that determine which point of
initialization is used.

Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 5b8127d29dc5..e61687d5e496 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -169,6 +169,21 @@ struct cgroup_subsys_state {
 	/* reference count - access via css_[try]get() and css_put() */
 	struct percpu_ref refcnt;
 
+	/*
+	 * Depending on the context, this field is initialized
+	 * via css_rstat_init() at different places:
+	 *
+	 * when css is associated with cgroup::self
+	 *   when css->cgroup is the root cgroup
+	 *     performed in cgroup_init()
+	 *   when css->cgroup is not the root cgroup
+	 *     performed in cgroup_create()
+	 * when css is associated with a subsystem
+	 *   when css->cgroup is the root cgroup
+	 *     performed in cgroup_init_subsys() in the non-early path
+	 *   when css->cgroup is not the root cgroup
+	 *     performed in css_create()
+	 */
 	struct css_rstat_cpu __percpu *rstat_cpu;
 
 	/*
@@ -530,6 +545,15 @@ struct cgroup {
 	struct cgroup *dom_cgrp;
 	struct cgroup *old_dom_cgrp;		/* used while enabling threaded */
 
+	/*
+	 * Depending on the context, this field is initialized via
+	 * css_rstat_init() at different places:
+	 *
+	 * when cgroup is the root cgroup
+	 *   performed in cgroup_setup_root()
+	 * otherwise
+	 *   performed in cgroup_create()
+	 */
 	struct cgroup_rstat_base_cpu __percpu *rstat_base_cpu;
 
 	/*
-- 
cgit v1.2.3


From 22f2dc03553fb3f407fbd5e6b5b4f9c747927cb2 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Date: Mon, 19 May 2025 11:08:51 +0300
Subject: PCI: Add Intel Wildcat Lake audio Device ID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add Wildcat Lake (WCL) audio Device ID.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.dev>
Acked-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20250519080855.16977-2-peter.ujfalusi@linux.intel.com
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 2e28182c3af0..f7ceefce6d3d 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -3049,6 +3049,7 @@
 #define PCI_DEVICE_ID_INTEL_HDA_DG1	0x490d
 #define PCI_DEVICE_ID_INTEL_HDA_EHL_0	0x4b55
 #define PCI_DEVICE_ID_INTEL_HDA_EHL_3	0x4b58
+#define PCI_DEVICE_ID_INTEL_HDA_WCL	0x4d28
 #define PCI_DEVICE_ID_INTEL_HDA_JSL_N	0x4dc8
 #define PCI_DEVICE_ID_INTEL_HDA_DG2_0	0x4f90
 #define PCI_DEVICE_ID_INTEL_HDA_DG2_1	0x4f91
-- 
cgit v1.2.3


From 58f5fbeb367ff6f30a2448b2cad70f70b2de4b06 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 16 May 2025 21:28:03 +0200
Subject: fanotify: support watching filesystems and mounts inside userns

An unprivileged user is allowed to create an fanotify group and add
inode marks, but not filesystem, mntns and mount marks.

Add limited support for setting up filesystem, mntns and mount marks by
an unprivileged user under the following conditions:

1.   User has CAP_SYS_ADMIN in the user ns where the group was created
2.a. User has CAP_SYS_ADMIN in the user ns where the sb was created
  OR (in case setting up a mntns mark)
2.b. User has CAP_SYS_ADMIN in the user ns associated with the mntns

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20250516192803.838659-3-amir73il@gmail.com
---
 include/linux/fanotify.h         | 5 ++---
 include/linux/fsnotify_backend.h | 1 +
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 3c817dc6292e..879cff5eccd4 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -38,8 +38,7 @@
 					 FAN_REPORT_PIDFD | \
 					 FAN_REPORT_FD_ERROR | \
 					 FAN_UNLIMITED_QUEUE | \
-					 FAN_UNLIMITED_MARKS | \
-					 FAN_REPORT_MNT)
+					 FAN_UNLIMITED_MARKS)
 
 /*
  * fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN.
@@ -48,7 +47,7 @@
  * so one of the flags for reporting file handles is required.
  */
 #define FANOTIFY_USER_INIT_FLAGS	(FAN_CLASS_NOTIF | \
-					 FANOTIFY_FID_BITS | \
+					 FANOTIFY_FID_BITS | FAN_REPORT_MNT | \
 					 FAN_CLOEXEC | FAN_NONBLOCK)
 
 #define FANOTIFY_INIT_FLAGS	(FANOTIFY_ADMIN_INIT_FLAGS | \
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 6cd8d1d28b8b..7dd22db06317 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -250,6 +250,7 @@ struct fsnotify_group {
 						 * full */
 
 	struct mem_cgroup *memcg;	/* memcg to charge allocations */
+	struct user_namespace *user_ns;	/* user ns where group was created */
 
 	/* groups can define private fields here or use the void *private */
 	union {
-- 
cgit v1.2.3


From a462903fa22541f212134fba81084315ad843e6e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 16 May 2025 13:59:27 +0200
Subject: net: netlink: reduce extack cookie size

Seems like the extack cookie hasn't found any users outside
of wireless, which always uses nl_set_extack_cookie_u64().
Thus, allocating 20 bytes for it is pointless, reduce that
to 8 bytes, and add a BUILD_BUG_ON() to ensure it's enough
(obviously it is, for a u64, but in case it changes again.)

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://patch.msgid.link/20250516115927.38209-2-johannes@sipsolutions.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netlink.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index c3ae84a77e16..882e9c1b6c1d 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -63,7 +63,7 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)
 }
 
 /* this can be increased when necessary - don't expose to userland */
-#define NETLINK_MAX_COOKIE_LEN	20
+#define NETLINK_MAX_COOKIE_LEN	8
 #define NETLINK_MAX_FMTMSG_LEN	80
 
 /**
@@ -212,6 +212,7 @@ static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack,
 {
 	if (!extack)
 		return;
+	BUILD_BUG_ON(sizeof(extack->cookie) < sizeof(cookie));
 	memcpy(extack->cookie, &cookie, sizeof(cookie));
 	extack->cookie_len = sizeof(cookie);
 }
-- 
cgit v1.2.3


From 1c9a93bf1d01729c54e9acbaa538db81f16563b2 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Fri, 25 Apr 2025 20:06:34 -0600
Subject: dmapool: add NUMA affinity support

Introduce dma_pool_create_node(), like dma_pool_create() but taking an
additional NUMA node argument. Allocate struct dma_pool on the desired
node, and store the node on dma_pool for allocating struct dma_page.
Make dma_pool_create() an alias for dma_pool_create_node() with node set
to NUMA_NO_NODE.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/dmapool.h | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h
index f632ecfb4238..06c4de602b2f 100644
--- a/include/linux/dmapool.h
+++ b/include/linux/dmapool.h
@@ -11,6 +11,7 @@
 #ifndef LINUX_DMAPOOL_H
 #define	LINUX_DMAPOOL_H
 
+#include <linux/nodemask_types.h>
 #include <linux/scatterlist.h>
 #include <asm/io.h>
 
@@ -18,8 +19,8 @@ struct device;
 
 #ifdef CONFIG_HAS_DMA
 
-struct dma_pool *dma_pool_create(const char *name, struct device *dev, 
-			size_t size, size_t align, size_t allocation);
+struct dma_pool *dma_pool_create_node(const char *name, struct device *dev,
+		size_t size, size_t align, size_t boundary, int node);
 
 void dma_pool_destroy(struct dma_pool *pool);
 
@@ -35,9 +36,12 @@ struct dma_pool *dmam_pool_create(const char *name, struct device *dev,
 void dmam_pool_destroy(struct dma_pool *pool);
 
 #else /* !CONFIG_HAS_DMA */
-static inline struct dma_pool *dma_pool_create(const char *name,
-	struct device *dev, size_t size, size_t align, size_t allocation)
-{ return NULL; }
+static inline struct dma_pool *dma_pool_create_node(const char *name,
+		struct device *dev, size_t size, size_t align, size_t boundary,
+		int node)
+{
+	return NULL;
+}
 static inline void dma_pool_destroy(struct dma_pool *pool) { }
 static inline void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
 				   dma_addr_t *handle) { return NULL; }
@@ -49,6 +53,13 @@ static inline struct dma_pool *dmam_pool_create(const char *name,
 static inline void dmam_pool_destroy(struct dma_pool *pool) { }
 #endif /* !CONFIG_HAS_DMA */
 
+static inline struct dma_pool *dma_pool_create(const char *name,
+		struct device *dev, size_t size, size_t align, size_t boundary)
+{
+	return dma_pool_create_node(name, dev, size, align, boundary,
+				    NUMA_NO_NODE);
+}
+
 static inline void *dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags,
 				    dma_addr_t *handle)
 {
-- 
cgit v1.2.3


From 2695b3c7fe4fa06a377ea0d66e3fe5fdb60310f0 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 11 May 2025 11:28:36 -0700
Subject: MIPS: bcm63xx: nvram: avoid inefficient use of crc32_le_combine()

bcm963xx_nvram_checksum() was using crc32_le_combine() to update a CRC
with four zero bytes.  However, this is about 5x slower than just
CRC'ing four zero bytes in the normal way.  Just do that instead.

(We could instead make crc32_le_combine() faster on short lengths.  But
all its callers do just fine without it, so I'd like to just remove it.)

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 include/linux/bcm963xx_nvram.h | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bcm963xx_nvram.h b/include/linux/bcm963xx_nvram.h
index c8c7f01159fe..48830bf18042 100644
--- a/include/linux/bcm963xx_nvram.h
+++ b/include/linux/bcm963xx_nvram.h
@@ -81,25 +81,21 @@ static int __maybe_unused bcm963xx_nvram_checksum(
 	const struct bcm963xx_nvram *nvram,
 	u32 *expected_out, u32 *actual_out)
 {
+	const u32 zero = 0;
 	u32 expected, actual;
 	size_t len;
 
 	if (nvram->version <= 4) {
 		expected = nvram->checksum_v4;
-		len = BCM963XX_NVRAM_V4_SIZE - sizeof(u32);
+		len = BCM963XX_NVRAM_V4_SIZE;
 	} else {
 		expected = nvram->checksum_v5;
-		len = BCM963XX_NVRAM_V5_SIZE - sizeof(u32);
+		len = BCM963XX_NVRAM_V5_SIZE;
 	}
 
-	/*
-	 * Calculate the CRC32 value for the nvram with a checksum value
-	 * of 0 without modifying or copying the nvram by combining:
-	 * - The CRC32 of the nvram without the checksum value
-	 * - The CRC32 of a zero checksum value (which is also 0)
-	 */
-	actual = crc32_le_combine(
-		crc32_le(~0, (u8 *)nvram, len), 0, sizeof(u32));
+	/* Calculate the CRC32 of the nvram with the checksum field set to 0. */
+	actual = crc32_le(~0, nvram, len - sizeof(u32));
+	actual = crc32_le(actual, &zero, sizeof(u32));
 
 	if (expected_out)
 		*expected_out = expected;
-- 
cgit v1.2.3


From 31be641d74267d98317ef5a2b90e6200511cabb3 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 15 May 2025 10:11:54 +0200
Subject: net: phy: make mdio consumer / device layer a separate module

After having factored out the provider part from mdio_bus.c, we can
make the mdio consumer / device layer a separate module. This also
allows to remove Kconfig symbol MDIO_DEVICE.
The module init / exit functions from mdio_bus.c no longer have to be
called from phy_device.c. The link order defined in
drivers/net/phy/Makefile ensures that init / exit functions are called
in the right order.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/dba6b156-5748-44ce-b5e2-e8dc2fcee5a7@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/phy.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 7c29d346d4b3..92a88b5ce356 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -2033,9 +2033,6 @@ int phy_ethtool_set_link_ksettings(struct net_device *ndev,
 				   const struct ethtool_link_ksettings *cmd);
 int phy_ethtool_nway_reset(struct net_device *ndev);
 
-int __init mdio_bus_init(void);
-void mdio_bus_exit(void);
-
 int phy_ethtool_get_strings(struct phy_device *phydev, u8 *data);
 int phy_ethtool_get_sset_count(struct phy_device *phydev);
 int phy_ethtool_get_stats(struct phy_device *phydev,
-- 
cgit v1.2.3


From 3318f7b5cefbff96b1bb49584ac38d2c9997a830 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Mon, 19 May 2025 10:51:28 -0700
Subject: iommu/io-pgtable-arm: Add quirk to quiet WARN_ON()

In situations where mapping/unmapping sequence can be controlled by
userspace, attempting to map over a region that has not yet been
unmapped is an error.  But not something that should spam dmesg.

Now that there is a quirk, we can also drop the selftest_running
flag, and use the quirk instead for selftests.

Acked-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Rob Clark <robdclark@chromium.org>
Link: https://lore.kernel.org/r/20250519175348.11924-6-robdclark@gmail.com
[will: Rename quirk to IO_PGTABLE_QUIRK_NO_WARN per Robin's suggestion]
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/io-pgtable.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index bba2a51c87d2..138fbd89b1e6 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -88,6 +88,13 @@ struct io_pgtable_cfg {
 	 *
 	 * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable.
 	 * IO_PGTABLE_QUIRK_ARM_S2FWB: Use the FWB format for the MemAttrs bits
+	 *
+	 * IO_PGTABLE_QUIRK_NO_WARN: Do not WARN_ON() on conflicting
+	 *	mappings, but silently return -EEXISTS.  Normally an attempt
+	 *	to map over an existing mapping would indicate some sort of
+	 *	kernel bug, which would justify the WARN_ON().  But for GPU
+	 *	drivers, this could be under control of userspace.  Which
+	 *	deserves an error return, but not to spam dmesg.
 	 */
 	#define IO_PGTABLE_QUIRK_ARM_NS			BIT(0)
 	#define IO_PGTABLE_QUIRK_NO_PERMS		BIT(1)
@@ -97,6 +104,7 @@ struct io_pgtable_cfg {
 	#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA		BIT(6)
 	#define IO_PGTABLE_QUIRK_ARM_HD			BIT(7)
 	#define IO_PGTABLE_QUIRK_ARM_S2FWB		BIT(8)
+	#define IO_PGTABLE_QUIRK_NO_WARN		BIT(9)
 	unsigned long			quirks;
 	unsigned long			pgsize_bitmap;
 	unsigned int			ias;
-- 
cgit v1.2.3


From d6bf294773a472fe4694b92714af482f5c6aa4c6 Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Mon, 12 May 2025 14:33:15 +0800
Subject: ext4/jbd2: convert jbd2_journal_blocks_per_page() to support large
 folio

jbd2_journal_blocks_per_page() returns the number of blocks in a single
page. Rename it to jbd2_journal_blocks_per_folio() and make it returns
the number of blocks in the largest folio, preparing for the calculation
of journal credits blocks when allocating blocks within a large folio in
the writeback path.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Link: https://patch.msgid.link/20250512063319.3539411-5-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/jbd2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 023e8abdb99a..ebbcdab474d5 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1723,7 +1723,7 @@ static inline int tid_geq(tid_t x, tid_t y)
 	return (difference >= 0);
 }
 
-extern int jbd2_journal_blocks_per_page(struct inode *inode);
+extern int jbd2_journal_blocks_per_folio(struct inode *inode);
 extern size_t journal_tag_bytes(journal_t *journal);
 
 static inline int jbd2_journal_has_csum_v2or3(journal_t *journal)
-- 
cgit v1.2.3


From 76005718cf8bfdb6b0f818ea75ca6a4b3bee34cd Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 12 May 2025 22:38:08 -0700
Subject: jbd2: remove journal_t argument from jbd2_chksum()

Since jbd2_chksum() no longer uses its journal_t argument, remove it.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Baokun Li <libaokun1@huawei.com>
Link: https://patch.msgid.link/20250513053809.699974-4-ebiggers@kernel.org
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/jbd2.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index ebbcdab474d5..43b9297fe8a7 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1766,8 +1766,7 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal)
 #define BJ_Reserved	4	/* Buffer is reserved for access by journal */
 #define BJ_Types	5
 
-static inline u32 jbd2_chksum(journal_t *journal, u32 crc,
-			      const void *address, unsigned int length)
+static inline u32 jbd2_chksum(u32 crc, const void *address, unsigned int length)
 {
 	return crc32c(crc, address, length);
 }
-- 
cgit v1.2.3


From 7e6f4a0a7512eca93cecff3bcb37f0ed164949a2 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Mon, 19 May 2025 13:13:14 +0200
Subject: i2c: remove 'of_node' member from i2c_boardinfo

There is no user of this member anymore. We can remove it.

Reviewed-by: Andi Shyti <andi.shyti@kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/i2c.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index cc1437f29823..20fd41b51d5c 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -405,7 +405,6 @@ static inline bool i2c_detect_slave_mode(struct device *dev) { return false; }
  * @addr: stored in i2c_client.addr
  * @dev_name: Overrides the default <busnr>-<addr> dev_name if set
  * @platform_data: stored in i2c_client.dev.platform_data
- * @of_node: **DEPRECATED** - use @fwnode for this
  * @fwnode: device node supplied by the platform firmware
  * @swnode: software node for the device
  * @resources: resources associated with the device
@@ -429,7 +428,6 @@ struct i2c_board_info {
 	unsigned short	addr;
 	const char	*dev_name;
 	void		*platform_data;
-	struct device_node *of_node;
 	struct fwnode_handle *fwnode;
 	const struct software_node *swnode;
 	const struct resource *resources;
-- 
cgit v1.2.3


From 3f1716ee0f6c63795e6d225e3f5ec3825cd2bd57 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 17 May 2025 22:34:32 +0200
Subject: net: phy: fixed_phy: remove irq argument from fixed_phy_add

All callers pass PHY_POLL, therefore remove irq argument from
fixed_phy_add().

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Acked-by: Greg Ungerer <gerg@linux-m68k.org>
Link: https://patch.msgid.link/b3b9b3bc-c310-4a54-b376-c909c83575de@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy_fixed.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 3392c09b5d24..316bb4deda37 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -17,8 +17,7 @@ struct net_device;
 
 #if IS_ENABLED(CONFIG_FIXED_PHY)
 extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier);
-extern int fixed_phy_add(unsigned int irq, int phy_id,
-			 struct fixed_phy_status *status);
+int fixed_phy_add(int phy_id, struct fixed_phy_status *status);
 extern struct phy_device *fixed_phy_register(unsigned int irq,
 					     struct fixed_phy_status *status,
 					     struct device_node *np);
@@ -28,7 +27,7 @@ extern int fixed_phy_set_link_update(struct phy_device *phydev,
 			int (*link_update)(struct net_device *,
 					   struct fixed_phy_status *));
 #else
-static inline int fixed_phy_add(unsigned int irq, int phy_id,
+static inline int fixed_phy_add(int phy_id,
 				struct fixed_phy_status *status)
 {
 	return -ENODEV;
-- 
cgit v1.2.3


From d23b4af5df3900fb0b4e1a05cb8119dd1c395519 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 17 May 2025 22:35:56 +0200
Subject: net: phy: fixed_phy: remove irq argument from fixed_phy_register

All callers pass PHY_POLL, therefore remove irq argument from
fixed_phy_register().

Note: I keep the irq argument in fixed_phy_add_gpiod() for now,
for the case that somebody may want to use a GPIO interrupt in
the future, by e.g. adding a call to fwnode_irq_get() to
fixed_phy_get_gpiod().

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Link: https://patch.msgid.link/31cdb232-a5e9-4997-a285-cb9a7d208124@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy_fixed.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 316bb4deda37..634149a73c2a 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -18,9 +18,8 @@ struct net_device;
 #if IS_ENABLED(CONFIG_FIXED_PHY)
 extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier);
 int fixed_phy_add(int phy_id, struct fixed_phy_status *status);
-extern struct phy_device *fixed_phy_register(unsigned int irq,
-					     struct fixed_phy_status *status,
-					     struct device_node *np);
+struct phy_device *fixed_phy_register(struct fixed_phy_status *status,
+				      struct device_node *np);
 
 extern void fixed_phy_unregister(struct phy_device *phydev);
 extern int fixed_phy_set_link_update(struct phy_device *phydev,
@@ -32,9 +31,9 @@ static inline int fixed_phy_add(int phy_id,
 {
 	return -ENODEV;
 }
-static inline struct phy_device *fixed_phy_register(unsigned int irq,
-						struct fixed_phy_status *status,
-						struct device_node *np)
+static inline struct phy_device *
+fixed_phy_register(struct fixed_phy_status *status,
+		   struct device_node *np)
 {
 	return ERR_PTR(-ENODEV);
 }
-- 
cgit v1.2.3


From 4ba1c5bb4811f560a86697311cb4e9741e047a5d Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 17 May 2025 22:37:29 +0200
Subject: net: phy: fixed_phy: constify status argument where possible

Constify the passed struct fixed_phy_status *status where possible.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Link: https://patch.msgid.link/d1764b62-8538-408b-a4e3-b63715481a38@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy_fixed.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 634149a73c2a..5399b9e41e35 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -17,8 +17,8 @@ struct net_device;
 
 #if IS_ENABLED(CONFIG_FIXED_PHY)
 extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier);
-int fixed_phy_add(int phy_id, struct fixed_phy_status *status);
-struct phy_device *fixed_phy_register(struct fixed_phy_status *status,
+int fixed_phy_add(int phy_id, const struct fixed_phy_status *status);
+struct phy_device *fixed_phy_register(const struct fixed_phy_status *status,
 				      struct device_node *np);
 
 extern void fixed_phy_unregister(struct phy_device *phydev);
@@ -27,12 +27,12 @@ extern int fixed_phy_set_link_update(struct phy_device *phydev,
 					   struct fixed_phy_status *));
 #else
 static inline int fixed_phy_add(int phy_id,
-				struct fixed_phy_status *status)
+				const struct fixed_phy_status *status)
 {
 	return -ENODEV;
 }
 static inline struct phy_device *
-fixed_phy_register(struct fixed_phy_status *status,
+fixed_phy_register(const struct fixed_phy_status *status,
 		   struct device_node *np)
 {
 	return ERR_PTR(-ENODEV);
-- 
cgit v1.2.3


From 7190b3c8bd2b0cde483bd440cf91ba1c518b4261 Mon Sep 17 00:00:00 2001
From: Ignacio Moreno Gonzalez <Ignacio.MorenoGonzalez@kuka.com>
Date: Wed, 7 May 2025 15:28:06 +0200
Subject: mm: mmap: map MAP_STACK to VM_NOHUGEPAGE only if THP is enabled

commit c4608d1bf7c6 ("mm: mmap: map MAP_STACK to VM_NOHUGEPAGE") maps the
mmap option MAP_STACK to VM_NOHUGEPAGE.  This is also done if
CONFIG_TRANSPARENT_HUGEPAGE is not defined.  But in that case, the
VM_NOHUGEPAGE does not make sense.

I discovered this issue when trying to use the tool CRIU to checkpoint and
restore a container.  Our running kernel is compiled without
CONFIG_TRANSPARENT_HUGEPAGE.  CRIU parses the output of /proc/<pid>/smaps
and saves the "nh" flag.  When trying to restore the container, CRIU fails
to restore the "nh" mappings, since madvise() MADV_NOHUGEPAGE always
returns an error because CONFIG_TRANSPARENT_HUGEPAGE is not defined.

Link: https://lkml.kernel.org/r/20250507-map-map_stack-to-vm_nohugepage-only-if-thp-is-enabled-v5-1-c6c38cfefd6e@kuka.com
Fixes: c4608d1bf7c6 ("mm: mmap: map MAP_STACK to VM_NOHUGEPAGE")
Signed-off-by: Ignacio Moreno Gonzalez <Ignacio.MorenoGonzalez@kuka.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Yang Shi <yang@os.amperecomputing.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mman.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mman.h b/include/linux/mman.h
index bce214fece16..f4c6346a8fcd 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -155,7 +155,9 @@ calc_vm_flag_bits(struct file *file, unsigned long flags)
 	return _calc_vm_trans(flags, MAP_GROWSDOWN,  VM_GROWSDOWN ) |
 	       _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    ) |
 	       _calc_vm_trans(flags, MAP_SYNC,	     VM_SYNC      ) |
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	       _calc_vm_trans(flags, MAP_STACK,	     VM_NOHUGEPAGE) |
+#endif
 	       arch_calc_vm_flag_bits(file, flags);
 }
 
-- 
cgit v1.2.3


From 0f518255bde881d2a2605bbc080b438b532b6ab2 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Wed, 7 May 2025 15:09:57 +0200
Subject: mm: fix VM_UFFD_MINOR == VM_SHADOW_STACK on USERFAULTFD=y &&
 ARM64_GCS=y

On configs with CONFIG_ARM64_GCS=y, VM_SHADOW_STACK is bit 38.  On configs
with CONFIG_HAVE_ARCH_USERFAULTFD_MINOR=y (selected by CONFIG_ARM64 when
CONFIG_USERFAULTFD=y), VM_UFFD_MINOR is _also_ bit 38.

This bit being shared by two different VMA flags could lead to all sorts
of unintended behaviors.  Presumably, a process could maybe call into
userfaultfd in a way that disables the shadow stack vma flag.  I can't
think of any attack where this would help (presumably, if an attacker
tries to disable shadow stacks, they are trying to hijack control flow so
can't arbitrarily call into userfaultfd yet anyway) but this still feels
somewhat scary.

Link: https://lkml.kernel.org/r/20250507131000.1204175-2-revest@chromium.org
Fixes: ae80e1629aea ("mm: Define VM_SHADOW_STACK for arm64 when we support GCS")
Signed-off-by: Florent Revest <revest@chromium.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Brendan Jackman <jackmanb@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Florent Revest <revest@chromium.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index bf55206935c4..fdda6b16263b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -385,7 +385,7 @@ extern unsigned int kobjsize(const void *objp);
 #endif
 
 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
-# define VM_UFFD_MINOR_BIT	38
+# define VM_UFFD_MINOR_BIT	41
 # define VM_UFFD_MINOR		BIT(VM_UFFD_MINOR_BIT)	/* UFFD minor faults */
 #else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
 # define VM_UFFD_MINOR		VM_NONE
-- 
cgit v1.2.3


From 97dfbbd135cb5e4426f37ca53a8fa87eaaa4e376 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 14 May 2025 18:06:02 +0100
Subject: highmem: add folio_test_partial_kmap()

In commit c749d9b7ebbc ("iov_iter: fix copy_page_from_iter_atomic() if
KMAP_LOCAL_FORCE_MAP"), Hugh correctly noted that if KMAP_LOCAL_FORCE_MAP
is enabled, we must limit ourselves to PAGE_SIZE bytes per call to
kmap_local().  The same problem exists in memcpy_from_folio(),
memcpy_to_folio(), folio_zero_tail(), folio_fill_tail() and
memcpy_from_file_folio(), so add folio_test_partial_kmap() to do this more
succinctly.

Link: https://lkml.kernel.org/r/20250514170607.3000994-2-willy@infradead.org
Fixes: 00cdf76012ab ("mm: add memcpy_from_file_folio()")
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Hugh Dickins <hughd@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/highmem.h    | 10 +++++-----
 include/linux/page-flags.h |  7 +++++++
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 5c6bea81a90e..c698f8415675 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -461,7 +461,7 @@ static inline void memcpy_from_folio(char *to, struct folio *folio,
 		const char *from = kmap_local_folio(folio, offset);
 		size_t chunk = len;
 
-		if (folio_test_highmem(folio) &&
+		if (folio_test_partial_kmap(folio) &&
 		    chunk > PAGE_SIZE - offset_in_page(offset))
 			chunk = PAGE_SIZE - offset_in_page(offset);
 		memcpy(to, from, chunk);
@@ -489,7 +489,7 @@ static inline void memcpy_to_folio(struct folio *folio, size_t offset,
 		char *to = kmap_local_folio(folio, offset);
 		size_t chunk = len;
 
-		if (folio_test_highmem(folio) &&
+		if (folio_test_partial_kmap(folio) &&
 		    chunk > PAGE_SIZE - offset_in_page(offset))
 			chunk = PAGE_SIZE - offset_in_page(offset);
 		memcpy(to, from, chunk);
@@ -522,7 +522,7 @@ static inline __must_check void *folio_zero_tail(struct folio *folio,
 {
 	size_t len = folio_size(folio) - offset;
 
-	if (folio_test_highmem(folio)) {
+	if (folio_test_partial_kmap(folio)) {
 		size_t max = PAGE_SIZE - offset_in_page(offset);
 
 		while (len > max) {
@@ -560,7 +560,7 @@ static inline void folio_fill_tail(struct folio *folio, size_t offset,
 
 	VM_BUG_ON(offset + len > folio_size(folio));
 
-	if (folio_test_highmem(folio)) {
+	if (folio_test_partial_kmap(folio)) {
 		size_t max = PAGE_SIZE - offset_in_page(offset);
 
 		while (len > max) {
@@ -597,7 +597,7 @@ static inline size_t memcpy_from_file_folio(char *to, struct folio *folio,
 	size_t offset = offset_in_folio(folio, pos);
 	char *from = kmap_local_folio(folio, offset);
 
-	if (folio_test_highmem(folio)) {
+	if (folio_test_partial_kmap(folio)) {
 		offset = offset_in_page(offset);
 		len = min_t(size_t, len, PAGE_SIZE - offset);
 	} else
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index e6a21b62dcce..3b814ce08331 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -615,6 +615,13 @@ FOLIO_FLAG(dropbehind, FOLIO_HEAD_PAGE)
 PAGEFLAG_FALSE(HighMem, highmem)
 #endif
 
+/* Does kmap_local_folio() only allow access to one page of the folio? */
+#ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP
+#define folio_test_partial_kmap(f)	true
+#else
+#define folio_test_partial_kmap(f)	folio_test_highmem(f)
+#endif
+
 #ifdef CONFIG_SWAP
 static __always_inline bool folio_test_swapcache(const struct folio *folio)
 {
-- 
cgit v1.2.3


From 223657cfc87c3d5b9c2172014181c8ed7acf58e8 Mon Sep 17 00:00:00 2001
From: Thomas Richard <thomas.richard@bootlin.com>
Date: Tue, 20 May 2025 15:28:26 +0200
Subject: pinctrl: remove extern specifier for functions in machine.h

Extern is the default specifier for a function, no need to define it.

Suggested-by: Andy Shevchenko <andy@kernel.org>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Thomas Richard <thomas.richard@bootlin.com>
Link: https://lore.kernel.org/20250520-aaeon-up-board-pinctrl-support-v6-2-dcb3756be3c6@bootlin.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/machine.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h
index 673e96df453b..0940fabb154d 100644
--- a/include/linux/pinctrl/machine.h
+++ b/include/linux/pinctrl/machine.h
@@ -153,10 +153,10 @@ struct pinctrl_map;
 
 #ifdef CONFIG_PINCTRL
 
-extern int pinctrl_register_mappings(const struct pinctrl_map *map,
-				     unsigned int num_maps);
-extern void pinctrl_unregister_mappings(const struct pinctrl_map *map);
-extern void pinctrl_provide_dummies(void);
+int pinctrl_register_mappings(const struct pinctrl_map *map,
+			      unsigned int num_maps);
+void pinctrl_unregister_mappings(const struct pinctrl_map *map);
+void pinctrl_provide_dummies(void);
 #else
 
 static inline int pinctrl_register_mappings(const struct pinctrl_map *map,
-- 
cgit v1.2.3


From 2e9ba1d9a31f68b4deb5f9e62a9201a51b00abf7 Mon Sep 17 00:00:00 2001
From: Thomas Richard <thomas.richard@bootlin.com>
Date: Tue, 20 May 2025 15:28:27 +0200
Subject: pinctrl: core: add devm_pinctrl_register_mappings()

Using devm_pinctrl_register_mappings(), the core can automatically
unregister pinctrl mappings.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Thomas Richard <thomas.richard@bootlin.com>
Link: https://lore.kernel.org/20250520-aaeon-up-board-pinctrl-support-v6-3-dcb3756be3c6@bootlin.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/machine.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h
index 0940fabb154d..25620229b1d6 100644
--- a/include/linux/pinctrl/machine.h
+++ b/include/linux/pinctrl/machine.h
@@ -149,12 +149,16 @@ struct pinctrl_map {
 #define PIN_MAP_CONFIGS_GROUP_HOG_DEFAULT(dev, grp, cfgs)		\
 	PIN_MAP_CONFIGS_GROUP(dev, PINCTRL_STATE_DEFAULT, dev, grp, cfgs)
 
+struct device;
 struct pinctrl_map;
 
 #ifdef CONFIG_PINCTRL
 
 int pinctrl_register_mappings(const struct pinctrl_map *map,
 			      unsigned int num_maps);
+int devm_pinctrl_register_mappings(struct device *dev,
+				   const struct pinctrl_map *map,
+				   unsigned int num_maps);
 void pinctrl_unregister_mappings(const struct pinctrl_map *map);
 void pinctrl_provide_dummies(void);
 #else
@@ -165,6 +169,13 @@ static inline int pinctrl_register_mappings(const struct pinctrl_map *map,
 	return 0;
 }
 
+static inline int devm_pinctrl_register_mappings(struct device *dev,
+						 const struct pinctrl_map *map,
+						 unsigned int num_maps)
+{
+	return 0;
+}
+
 static inline void pinctrl_unregister_mappings(const struct pinctrl_map *map)
 {
 }
-- 
cgit v1.2.3


From 70892277ca2dbad30ce89acf62fb62045d4bc59b Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Tue, 20 May 2025 15:08:56 -0400
Subject: iommu/arm-smmu-qcom: Make set_stall work when the device is on

Up until now we have only called the set_stall callback during
initialization when the device is off. But we will soon start calling it
to temporarily disable stall-on-fault when the device is on, so handle
that by checking if the device is on and writing SCTLR.

Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Rob Clark <robdclark@gmail.com>
Link: https://lore.kernel.org/r/20250520-msm-gpu-fault-fixes-next-v8-3-fce6ee218787@gmail.com
[will: Fix "mixed declarations and code" warning from sparse]
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/adreno-smmu-priv.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
index abec23c7744f..d83c9175828f 100644
--- a/include/linux/adreno-smmu-priv.h
+++ b/include/linux/adreno-smmu-priv.h
@@ -45,9 +45,9 @@ struct adreno_smmu_fault_info {
  *                 TTBR0 translation is enabled with the specified cfg
  * @get_fault_info: Called by the GPU fault handler to get information about
  *                  the fault
- * @set_stall:     Configure whether stall on fault (CFCFG) is enabled.  Call
- *                 before set_ttbr0_cfg().  If stalling on fault is enabled,
- *                 the GPU driver must call resume_translation()
+ * @set_stall:     Configure whether stall on fault (CFCFG) is enabled. If
+ *                 stalling on fault is enabled, the GPU driver must call
+ *                 resume_translation()
  * @resume_translation: Resume translation after a fault
  *
  * @set_prr_bit:   [optional] Configure the GPU's Partially Resident
-- 
cgit v1.2.3


From fe26933cf1e151ce0a5d858c263e8dacb2f12cee Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <npitre@baylibre.com>
Date: Wed, 7 May 2025 10:13:21 -0400
Subject: vt: add ucs_get_fallback()

This is the code querying the newly introduced tables.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
Link: https://lore.kernel.org/r/20250507141535.40655-7-nico@fluxnic.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/consolemap.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h
index 8167494229db..6180b803795c 100644
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -31,6 +31,7 @@ void console_map_init(void);
 bool ucs_is_double_width(uint32_t cp);
 bool ucs_is_zero_width(uint32_t cp);
 u32 ucs_recompose(u32 base, u32 mark);
+u32 ucs_get_fallback(u32 cp);
 #else
 static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph,
 		bool use_unicode)
@@ -75,6 +76,11 @@ static inline u32 ucs_recompose(u32 base, u32 mark)
 {
 	return 0;
 }
+
+static inline u32 ucs_get_fallback(u32 cp)
+{
+	return 0;
+}
 #endif /* CONFIG_CONSOLE_TRANSLATIONS */
 
 #endif /* __LINUX_CONSOLEMAP_H__ */
-- 
cgit v1.2.3


From 80fa7a03378588582eb40f89b6f418c0c256cf24 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <npitre@baylibre.com>
Date: Tue, 20 May 2025 13:16:43 -0400
Subject: vt: bracketed paste support

This is comprised of 3 aspects:

- Take note of when applications advertise bracketed paste support via
  "\e[?2004h" and "\e[?2004l".

- Insert bracketed paste markers ("\e[200~" and "\e[201~") around pasted
  content in paste_selection() when bracketed paste is active.

- Add TIOCL_GETBRACKETEDPASTE to return bracketed paste status so user
  space daemons implementing cut-and-paste functionality (e.g. gpm,
  BRLTTY) may know when to insert bracketed paste markers.

Link: https://en.wikipedia.org/wiki/Bracketed-paste

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20250520171851.1219676-2-nico@fluxnic.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/console_struct.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index 20f564e98552..59b4fec5f254 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -145,6 +145,7 @@ struct vc_data {
 	unsigned int	vc_need_wrap	: 1;
 	unsigned int	vc_can_do_color	: 1;
 	unsigned int	vc_report_mouse : 2;
+	unsigned int	vc_bracketed_paste : 1;
 	unsigned char	vc_utf		: 1;	/* Unicode UTF-8 encoding */
 	unsigned char	vc_utf_count;
 		 int	vc_utf_char;
-- 
cgit v1.2.3


From 279f2c2c8e2169403d01190f042efa6e41731578 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sat, 17 May 2025 17:14:53 +0200
Subject: futex: Use RCU_INIT_POINTER() in futex_mm_init().

There is no need for an explicit NULL pointer initialisation plus a
comment why it is okay. RCU_INIT_POINTER() can be used for NULL
initialisations and it is documented.

This has been build tested with gcc version 9.3.0 (Debian 9.3.0-22) on a
x86-64 defconfig.

Fixes: 094ac8cff7858 ("futex: Relax the rcu_assign_pointer() assignment of mm->futex_phash in futex_mm_init()")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20250517151455.1065363-4-bigeasy@linutronix.de
---
 include/linux/futex.h | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/futex.h b/include/linux/futex.h
index 168ffd5996b4..005b040c4791 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -88,14 +88,7 @@ void futex_hash_free(struct mm_struct *mm);
 
 static inline void futex_mm_init(struct mm_struct *mm)
 {
-	/*
-	 * No need for rcu_assign_pointer() here, as we can rely on
-	 * tasklist_lock write-ordering in copy_process(), before
-	 * the task's MM becomes visible and the ->futex_phash
-	 * becomes externally observable:
-	 */
-	mm->futex_phash = NULL;
-
+	RCU_INIT_POINTER(mm->futex_phash, NULL);
 	mutex_init(&mm->futex_hash_lock);
 }
 
-- 
cgit v1.2.3


From a9194f88782afa1386641451a6c76beaa60485a0 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 16 May 2025 13:25:31 +0200
Subject: coredump: add coredump socket

Coredumping currently supports two modes:

(1) Dumping directly into a file somewhere on the filesystem.
(2) Dumping into a pipe connected to a usermode helper process
    spawned as a child of the system_unbound_wq or kthreadd.

For simplicity I'm mostly ignoring (1). There's probably still some
users of (1) out there but processing coredumps in this way can be
considered adventurous especially in the face of set*id binaries.

The most common option should be (2) by now. It works by allowing
userspace to put a string into /proc/sys/kernel/core_pattern like:

        |/usr/lib/systemd/systemd-coredump %P %u %g %s %t %c %h

The "|" at the beginning indicates to the kernel that a pipe must be
used. The path following the pipe indicator is a path to a binary that
will be spawned as a usermode helper process. Any additional parameters
pass information about the task that is generating the coredump to the
binary that processes the coredump.

In the example core_pattern shown above systemd-coredump is spawned as a
usermode helper. There's various conceptual consequences of this
(non-exhaustive list):

- systemd-coredump is spawned with file descriptor number 0 (stdin)
  connected to the read-end of the pipe. All other file descriptors are
  closed. That specifically includes 1 (stdout) and 2 (stderr). This has
  already caused bugs because userspace assumed that this cannot happen
  (Whether or not this is a sane assumption is irrelevant.).

- systemd-coredump will be spawned as a child of system_unbound_wq. So
  it is not a child of any userspace process and specifically not a
  child of PID 1. It cannot be waited upon and is in a weird hybrid
  upcall which are difficult for userspace to control correctly.

- systemd-coredump is spawned with full kernel privileges. This
  necessitates all kinds of weird privilege dropping excercises in
  userspace to make this safe.

- A new usermode helper has to be spawned for each crashing process.

This series adds a new mode:

(3) Dumping into an AF_UNIX socket.

Userspace can set /proc/sys/kernel/core_pattern to:

        @/path/to/coredump.socket

The "@" at the beginning indicates to the kernel that an AF_UNIX
coredump socket will be used to process coredumps.

The coredump socket must be located in the initial mount namespace.
When a task coredumps it opens a client socket in the initial network
namespace and connects to the coredump socket.

- The coredump server uses SO_PEERPIDFD to get a stable handle on the
  connected crashing task. The retrieved pidfd will provide a stable
  reference even if the crashing task gets SIGKILLed while generating
  the coredump.

- By setting core_pipe_limit non-zero userspace can guarantee that the
  crashing task cannot be reaped behind it's back and thus process all
  necessary information in /proc/<pid>. The SO_PEERPIDFD can be used to
  detect whether /proc/<pid> still refers to the same process.

  The core_pipe_limit isn't used to rate-limit connections to the
  socket. This can simply be done via AF_UNIX sockets directly.

- The pidfd for the crashing task will grow new information how the task
  coredumps.

- The coredump server should mark itself as non-dumpable.

- A container coredump server in a separate network namespace can simply
  bind to another well-know address and systemd-coredump fowards
  coredumps to the container.

- Coredumps could in the future also be handled via per-user/session
  coredump servers that run only with that users privileges.

  The coredump server listens on the coredump socket and accepts a
  new coredump connection. It then retrieves SO_PEERPIDFD for the
  client, inspects uid/gid and hands the accepted client to the users
  own coredump handler which runs with the users privileges only
  (It must of coure pay close attention to not forward crashing suid
  binaries.).

The new coredump socket will allow userspace to not have to rely on
usermode helpers for processing coredumps and provides a safer way to
handle them instead of relying on super privileged coredumping helpers
that have and continue to cause significant CVEs.

This will also be significantly more lightweight since no fork()+exec()
for the usermodehelper is required for each crashing process. The
coredump server in userspace can e.g., just keep a worker pool.

Link: https://lore.kernel.org/20250516-work-coredump-socket-v8-4-664f3caf2516@kernel.org
Acked-by: Luca Boccassi <luca.boccassi@gmail.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Reviewed-by: Jann Horn <jannh@google.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/net.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 0ff950eecc6b..139c85d0f2ea 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -81,6 +81,7 @@ enum sock_type {
 #ifndef SOCK_NONBLOCK
 #define SOCK_NONBLOCK	O_NONBLOCK
 #endif
+#define SOCK_COREDUMP	O_NOCTTY
 
 #endif /* ARCH_HAS_SOCKET_TYPES */
 
-- 
cgit v1.2.3


From 1d8db6fd698de1f73b1a7d72aea578fdd18d9a87 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 16 May 2025 13:25:32 +0200
Subject: pidfs, coredump: add PIDFD_INFO_COREDUMP

Extend the PIDFD_INFO_COREDUMP ioctl() with the new PIDFD_INFO_COREDUMP
mask flag. This adds the @coredump_mask field to struct pidfd_info.

When a task coredumps the kernel will provide the following information
to userspace in @coredump_mask:

* PIDFD_COREDUMPED is raised if the task did actually coredump.
* PIDFD_COREDUMP_SKIP is raised if the task skipped coredumping (e.g.,
  undumpable).
* PIDFD_COREDUMP_USER is raised if this is a regular coredump and
  doesn't need special care by the coredump server.
* PIDFD_COREDUMP_ROOT is raised if the generated coredump should be
  treated as sensitive and the coredump server should restrict to the
  generated coredump to sufficiently privileged users.

The kernel guarantees that by the time the connection is made the all
PIDFD_INFO_COREDUMP info is available.

Link: https://lore.kernel.org/20250516-work-coredump-socket-v8-5-664f3caf2516@kernel.org
Acked-by: Luca Boccassi <luca.boccassi@gmail.com>
Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Reviewed-by: Jann Horn <jannh@google.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/pidfs.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h
index 2676890c4d0d..77e7db194914 100644
--- a/include/linux/pidfs.h
+++ b/include/linux/pidfs.h
@@ -2,11 +2,16 @@
 #ifndef _LINUX_PID_FS_H
 #define _LINUX_PID_FS_H
 
+struct coredump_params;
+
 struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags);
 void __init pidfs_init(void);
 void pidfs_add_pid(struct pid *pid);
 void pidfs_remove_pid(struct pid *pid);
 void pidfs_exit(struct task_struct *tsk);
+#ifdef CONFIG_COREDUMP
+void pidfs_coredump(const struct coredump_params *cprm);
+#endif
 extern const struct dentry_operations pidfs_dentry_operations;
 int pidfs_register_pid(struct pid *pid);
 void pidfs_get_pid(struct pid *pid);
-- 
cgit v1.2.3


From 01465f296a6871222b185c350423978d44431c96 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 9 May 2025 13:24:50 +0100
Subject: nvmem: Remove unused nvmem cell table support

Board files are deprecated by DT, and the last user of
nvmem_add_cell_table() was removed by commit 2af4fcc0d3574482 ("ARM:
davinci: remove unused board support") in v6.3.  Hence remove all
support for nvmem cell tables, and update the documentation.

Device drivers can still register a single cell using
nvmem_add_one_cell() (which was not documented before).

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Srinivas Kandagatla <srini@kernel.org>
Link: https://lore.kernel.org/r/20250509122452.11827-2-srini@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/nvmem-provider.h | 24 ------------------------
 1 file changed, 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index 515676ebe598..615a560d9edb 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h
@@ -137,25 +137,6 @@ struct nvmem_config {
 	struct device		*base_dev;
 };
 
-/**
- * struct nvmem_cell_table - NVMEM cell definitions for given provider
- *
- * @nvmem_name:		Provider name.
- * @cells:		Array of cell definitions.
- * @ncells:		Number of cell definitions in the array.
- * @node:		List node.
- *
- * This structure together with related helper functions is provided for users
- * that don't can't access the nvmem provided structure but wish to register
- * cell definitions for it e.g. board files registering an EEPROM device.
- */
-struct nvmem_cell_table {
-	const char		*nvmem_name;
-	const struct nvmem_cell_info	*cells;
-	size_t			ncells;
-	struct list_head	node;
-};
-
 /**
  * struct nvmem_layout - NVMEM layout definitions
  *
@@ -190,9 +171,6 @@ void nvmem_unregister(struct nvmem_device *nvmem);
 struct nvmem_device *devm_nvmem_register(struct device *dev,
 					 const struct nvmem_config *cfg);
 
-void nvmem_add_cell_table(struct nvmem_cell_table *table);
-void nvmem_del_cell_table(struct nvmem_cell_table *table);
-
 int nvmem_add_one_cell(struct nvmem_device *nvmem,
 		       const struct nvmem_cell_info *info);
 
@@ -223,8 +201,6 @@ devm_nvmem_register(struct device *dev, const struct nvmem_config *c)
 	return nvmem_register(c);
 }
 
-static inline void nvmem_add_cell_table(struct nvmem_cell_table *table) {}
-static inline void nvmem_del_cell_table(struct nvmem_cell_table *table) {}
 static inline int nvmem_add_one_cell(struct nvmem_device *nvmem,
 				     const struct nvmem_cell_info *info)
 {
-- 
cgit v1.2.3


From b803c4a4f78834b31ebfbbcea350473333760559 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Fri, 2 May 2025 02:12:10 +0900
Subject: can: dev: add struct data_bittiming_params to group FD parameters

This is a preparation patch for the introduction of CAN XL.

CAN FD and CAN XL uses similar bittiming parameters. Add one level of
nesting for all the CAN FD parameters. Typically:

  priv->can.data_bittiming;

becomes:

  priv->can.fd.data_bittiming;

This way, the CAN XL equivalent (to be introduced later) would be:

  priv->can.xl.data_bittiming;

Add the new struct data_bittiming_params which contains all the data
bittiming parameters, including the TDC and the callback functions.

This done, update all the CAN FD drivers to make use of the new
layout.

Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Link: https://patch.msgid.link/20250501171213.2161572-2-mailhol.vincent@wanadoo.fr
[mkl: fix rcar_canfd]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev.h | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 23492213ea35..492d23bec7be 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -38,6 +38,17 @@ enum can_termination_gpio {
 	CAN_TERMINATION_GPIO_MAX,
 };
 
+struct data_bittiming_params {
+	const struct can_bittiming_const *data_bittiming_const;
+	struct can_bittiming data_bittiming;
+	const struct can_tdc_const *tdc_const;
+	struct can_tdc tdc;
+	const u32 *data_bitrate_const;
+	unsigned int data_bitrate_const_cnt;
+	int (*do_set_data_bittiming)(struct net_device *dev);
+	int (*do_get_auto_tdcv)(const struct net_device *dev, u32 *tdcv);
+};
+
 /*
  * CAN common private data
  */
@@ -45,16 +56,11 @@ struct can_priv {
 	struct net_device *dev;
 	struct can_device_stats can_stats;
 
-	const struct can_bittiming_const *bittiming_const,
-		*data_bittiming_const;
-	struct can_bittiming bittiming, data_bittiming;
-	const struct can_tdc_const *tdc_const;
-	struct can_tdc tdc;
-
+	const struct can_bittiming_const *bittiming_const;
+	struct can_bittiming bittiming;
+	struct data_bittiming_params fd;
 	unsigned int bitrate_const_cnt;
 	const u32 *bitrate_const;
-	const u32 *data_bitrate_const;
-	unsigned int data_bitrate_const_cnt;
 	u32 bitrate_max;
 	struct can_clock clock;
 
@@ -77,14 +83,12 @@ struct can_priv {
 	struct delayed_work restart_work;
 
 	int (*do_set_bittiming)(struct net_device *dev);
-	int (*do_set_data_bittiming)(struct net_device *dev);
 	int (*do_set_mode)(struct net_device *dev, enum can_mode mode);
 	int (*do_set_termination)(struct net_device *dev, u16 term);
 	int (*do_get_state)(const struct net_device *dev,
 			    enum can_state *state);
 	int (*do_get_berr_counter)(const struct net_device *dev,
 				   struct can_berr_counter *bec);
-	int (*do_get_auto_tdcv)(const struct net_device *dev, u32 *tdcv);
 };
 
 static inline bool can_tdc_is_enabled(const struct can_priv *priv)
@@ -114,11 +118,11 @@ static inline bool can_tdc_is_enabled(const struct can_priv *priv)
  */
 static inline s32 can_get_relative_tdco(const struct can_priv *priv)
 {
-	const struct can_bittiming *dbt = &priv->data_bittiming;
+	const struct can_bittiming *dbt = &priv->fd.data_bittiming;
 	s32 sample_point_in_tc = (CAN_SYNC_SEG + dbt->prop_seg +
 				  dbt->phase_seg1) * dbt->brp;
 
-	return (s32)priv->tdc.tdco - sample_point_in_tc;
+	return (s32)priv->fd.tdc.tdco - sample_point_in_tc;
 }
 
 /* helper to define static CAN controller features at device creation time */
-- 
cgit v1.2.3


From c1a606cd75fbe98d261b224b6dfb76d47f40dc12 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 19 May 2025 14:47:58 +0100
Subject: fs/netfs: remove unused flag NETFS_SREQ_SEEK_DATA_READ

This flag was added by commit 3d3c95046742 ("netfs: Provide readahead
and readpage netfs helpers") but its only user was removed by commit
86b374d061ee ("netfs: Remove fs/netfs/io.c").

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/20250519134813.2975312-3-dhowells@redhat.com
cc: Paulo Alcantara <pc@manguebit.com>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index c86a11cfc4a3..d315d86d0ad4 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -191,7 +191,6 @@ struct netfs_io_subrequest {
 	unsigned long		flags;
 #define NETFS_SREQ_COPY_TO_CACHE	0	/* Set if should copy the data to the cache */
 #define NETFS_SREQ_CLEAR_TAIL		1	/* Set if the rest of the read should be cleared */
-#define NETFS_SREQ_SEEK_DATA_READ	3	/* Set if ->read() should SEEK_DATA first */
 #define NETFS_SREQ_MADE_PROGRESS	4	/* Set if we transferred at least some data */
 #define NETFS_SREQ_ONDEMAND		5	/* Set if it's from on-demand read mode */
 #define NETFS_SREQ_BOUNDARY		6	/* Set if ends on hard boundary (eg. ceph object) */
-- 
cgit v1.2.3


From 9cd78ca04fb827f42d7c0d492b96fbb940451266 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 19 May 2025 14:47:59 +0100
Subject: fs/netfs: remove unused source NETFS_INVALID_WRITE

This enum choice was added by commit 16af134ca4b7 ("netfs: Extend the
netfs_io_*request structs to handle writes") and its only user was
later removed by commit c245868524cc ("netfs: Remove the old writeback
code").

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/20250519134813.2975312-4-dhowells@redhat.com
cc: Paulo Alcantara <pc@manguebit.com>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index d315d86d0ad4..5a76bea51d24 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -48,7 +48,6 @@ enum netfs_io_source {
 	NETFS_INVALID_READ,
 	NETFS_UPLOAD_TO_SERVER,
 	NETFS_WRITE_TO_CACHE,
-	NETFS_INVALID_WRITE,
 } __mode(byte);
 
 typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error,
-- 
cgit v1.2.3


From 9fcf235e91fae9f3e99f4e09d332ed09296b11ec Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 19 May 2025 14:48:00 +0100
Subject: fs/netfs: remove unused flag NETFS_ICTX_WRITETHROUGH

This flag was added by commit 41d8e7673a77 ("netfs: Implement a
write-through caching option") but it was never used.

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/20250519134813.2975312-5-dhowells@redhat.com
cc: Paulo Alcantara <pc@manguebit.com>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 5a76bea51d24..242daec8c837 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -70,7 +70,6 @@ struct netfs_inode {
 	unsigned long		flags;
 #define NETFS_ICTX_ODIRECT	0		/* The file has DIO in progress */
 #define NETFS_ICTX_UNBUFFERED	1		/* I/O should not use the pagecache */
-#define NETFS_ICTX_WRITETHROUGH	2		/* Write-through caching */
 #define NETFS_ICTX_MODIFIED_ATTR 3		/* Indicate change in mtime/ctime */
 #define NETFS_ICTX_SINGLE_NO_UPLOAD 4		/* Monolithic payload, cache but no upload */
 };
-- 
cgit v1.2.3


From d46a7b217d6abbaea78ce5abf4b295e3c1d819d9 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 19 May 2025 14:48:01 +0100
Subject: fs/netfs: remove unused enum choice NETFS_READ_HOLE_CLEAR

This choice was added by commit 3a11b3a86366 ("netfs: Pass more
information on how to deal with a hole in the cache") but the last
user was removed by commit 86b374d061ee ("netfs: Remove
fs/netfs/io.c").

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/20250519134813.2975312-6-dhowells@redhat.com
cc: Paulo Alcantara <pc@manguebit.com>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fscache.h | 3 ---
 include/linux/netfs.h   | 1 -
 2 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 9de27643607f..fea0d9779b55 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -498,9 +498,6 @@ static inline void fscache_end_operation(struct netfs_cache_resources *cres)
  *
  *	NETFS_READ_HOLE_IGNORE - Just try to read (may return a short read).
  *
- *	NETFS_READ_HOLE_CLEAR - Seek for data, clearing the part of the buffer
- *				skipped over, then do as for IGNORE.
- *
  *	NETFS_READ_HOLE_FAIL - Give ENODATA if we encounter a hole.
  */
 static inline
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 242daec8c837..73537dafa224 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -318,7 +318,6 @@ struct netfs_request_ops {
  */
 enum netfs_read_from_hole {
 	NETFS_READ_HOLE_IGNORE,
-	NETFS_READ_HOLE_CLEAR,
 	NETFS_READ_HOLE_FAIL,
 };
 
-- 
cgit v1.2.3


From 314ee7035febc86f4f9452fb90a6c2165a183fe5 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 19 May 2025 14:48:02 +0100
Subject: fs/netfs: reorder struct fields to eliminate holes

This shrinks `struct netfs_io_stream` from 104 to 96 bytes and `struct
netfs_io_request` from 600 to 576 bytes.

[DH: Modified as the patch to turn netfs_io_request::error into a short
was removed from the set]

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/20250519134813.2975312-7-dhowells@redhat.com
cc: Paulo Alcantara <pc@manguebit.com>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 73537dafa224..33f145f7f2c2 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -144,8 +144,8 @@ struct netfs_io_stream {
 	struct netfs_io_subrequest *front;	/* Op being collected */
 	unsigned long long	collected_to;	/* Position we've collected results to */
 	size_t			transferred;	/* The amount transferred from this stream */
-	enum netfs_io_source	source;		/* Where to read from/write to */
 	unsigned short		error;		/* Aggregate error for the stream */
+	enum netfs_io_source	source;		/* Where to read from/write to */
 	unsigned char		stream_nr;	/* Index of stream in parent table */
 	bool			avail;		/* T if stream is available */
 	bool			active;		/* T if stream is active */
@@ -240,19 +240,10 @@ struct netfs_io_request {
 	void			*netfs_priv;	/* Private data for the netfs */
 	void			*netfs_priv2;	/* Private data for the netfs */
 	struct bio_vec		*direct_bv;	/* DIO buffer list (when handling iovec-iter) */
-	unsigned int		direct_bv_count; /* Number of elements in direct_bv[] */
-	unsigned int		debug_id;
-	unsigned int		rsize;		/* Maximum read size (0 for none) */
-	unsigned int		wsize;		/* Maximum write size (0 for none) */
-	atomic_t		subreq_counter;	/* Next subreq->debug_index */
-	unsigned int		nr_group_rel;	/* Number of refs to release on ->group */
-	spinlock_t		lock;		/* Lock for queuing subreqs */
 	unsigned long long	submitted;	/* Amount submitted for I/O so far */
 	unsigned long long	len;		/* Length of the request */
 	size_t			transferred;	/* Amount to be indicated as transferred */
 	long			error;		/* 0 or error that occurred */
-	enum netfs_io_origin	origin;		/* Origin of the request */
-	bool			direct_bv_unpin; /* T if direct_bv[] must be unpinned */
 	unsigned long long	i_size;		/* Size of the file */
 	unsigned long long	start;		/* Start position */
 	atomic64_t		issued_to;	/* Write issuer folio cursor */
@@ -260,7 +251,16 @@ struct netfs_io_request {
 	unsigned long long	cleaned_to;	/* Position we've cleaned folios to */
 	unsigned long long	abandon_to;	/* Position to abandon folios to */
 	pgoff_t			no_unlock_folio; /* Don't unlock this folio after read */
+	unsigned int		direct_bv_count; /* Number of elements in direct_bv[] */
+	unsigned int		debug_id;
+	unsigned int		rsize;		/* Maximum read size (0 for none) */
+	unsigned int		wsize;		/* Maximum write size (0 for none) */
+	atomic_t		subreq_counter;	/* Next subreq->debug_index */
+	unsigned int		nr_group_rel;	/* Number of refs to release on ->group */
+	spinlock_t		lock;		/* Lock for queuing subreqs */
 	unsigned char		front_folio_order; /* Order (size) of front folio */
+	enum netfs_io_origin	origin;		/* Origin of the request */
+	bool			direct_bv_unpin; /* T if direct_bv[] must be unpinned */
 	refcount_t		ref;
 	unsigned long		flags;
 #define NETFS_RREQ_OFFLOAD_COLLECTION	0	/* Offload collection to workqueue */
-- 
cgit v1.2.3


From 3dc00bca8dc8226f79f6958293a2777f0691cfb5 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 19 May 2025 14:48:03 +0100
Subject: fs/netfs: remove `netfs_io_request.ractl`

Since this field is only used by netfs_prepare_read_iterator() when
called by netfs_readahead(), we can simply pass it as parameter.  This
shrinks the struct from 576 to 568 bytes.

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/20250519134813.2975312-8-dhowells@redhat.com
cc: Paulo Alcantara <pc@manguebit.com>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 33f145f7f2c2..2b127527544e 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -228,7 +228,6 @@ struct netfs_io_request {
 	struct kiocb		*iocb;		/* AIO completion vector */
 	struct netfs_cache_resources cache_resources;
 	struct netfs_io_request	*copy_to_cache;	/* Request to write just-read data to the cache */
-	struct readahead_control *ractl;	/* Readahead descriptor */
 	struct list_head	proc_link;	/* Link in netfs_iorequests */
 	struct netfs_io_stream	io_streams[2];	/* Streams of parallel I/O operations */
 #define NR_IO_STREAMS 2 //wreq->nr_io_streams
-- 
cgit v1.2.3


From 07c08bac9302012d9a91d40e95c8f98800f7d787 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 19 May 2025 14:48:04 +0100
Subject: fs/netfs: declare field `proc_link` only if CONFIG_PROC_FS=y

This field is only used for the "proc" filesystem.

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/20250519134813.2975312-9-dhowells@redhat.com
cc: Paulo Alcantara <pc@manguebit.com>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 2b127527544e..3f7056d837f8 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -228,7 +228,9 @@ struct netfs_io_request {
 	struct kiocb		*iocb;		/* AIO completion vector */
 	struct netfs_cache_resources cache_resources;
 	struct netfs_io_request	*copy_to_cache;	/* Request to write just-read data to the cache */
+#ifdef CONFIG_PROC_FS
 	struct list_head	proc_link;	/* Link in netfs_iorequests */
+#endif
 	struct netfs_io_stream	io_streams[2];	/* Streams of parallel I/O operations */
 #define NR_IO_STREAMS 2 //wreq->nr_io_streams
 	struct netfs_group	*group;		/* Writeback group being written back */
-- 
cgit v1.2.3


From 6bb09e5db3a07cf3e03f25f725bd8edc959e38ba Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 19 May 2025 14:48:05 +0100
Subject: folio_queue: remove unused field `marks3`

The last user was removed by commit e2d46f2ec332 ("netfs: Change the
read result collector to only use one work item").

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/20250519134813.2975312-10-dhowells@redhat.com
cc: Paulo Alcantara <pc@manguebit.com>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/folio_queue.h | 42 ------------------------------------------
 1 file changed, 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h
index 45ad2408a80c..adab609c972e 100644
--- a/include/linux/folio_queue.h
+++ b/include/linux/folio_queue.h
@@ -34,7 +34,6 @@ struct folio_queue {
 	struct folio_queue	*prev;		/* Previous queue segment of NULL */
 	unsigned long		marks;		/* 1-bit mark per folio */
 	unsigned long		marks2;		/* Second 1-bit mark per folio */
-	unsigned long		marks3;		/* Third 1-bit mark per folio */
 #if PAGEVEC_SIZE > BITS_PER_LONG
 #error marks is not big enough
 #endif
@@ -58,7 +57,6 @@ static inline void folioq_init(struct folio_queue *folioq, unsigned int rreq_id)
 	folioq->prev = NULL;
 	folioq->marks = 0;
 	folioq->marks2 = 0;
-	folioq->marks3 = 0;
 	folioq->rreq_id = rreq_id;
 	folioq->debug_id = 0;
 }
@@ -178,45 +176,6 @@ static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot)
 	clear_bit(slot, &folioq->marks2);
 }
 
-/**
- * folioq_is_marked3: Check third folio mark in a folio queue segment
- * @folioq: The segment to query
- * @slot: The slot number of the folio to query
- *
- * Determine if the third mark is set for the folio in the specified slot in a
- * folio queue segment.
- */
-static inline bool folioq_is_marked3(const struct folio_queue *folioq, unsigned int slot)
-{
-	return test_bit(slot, &folioq->marks3);
-}
-
-/**
- * folioq_mark3: Set the third mark on a folio in a folio queue segment
- * @folioq: The segment to modify
- * @slot: The slot number of the folio to modify
- *
- * Set the third mark for the folio in the specified slot in a folio queue
- * segment.
- */
-static inline void folioq_mark3(struct folio_queue *folioq, unsigned int slot)
-{
-	set_bit(slot, &folioq->marks3);
-}
-
-/**
- * folioq_unmark3: Clear the third mark on a folio in a folio queue segment
- * @folioq: The segment to modify
- * @slot: The slot number of the folio to modify
- *
- * Clear the third mark for the folio in the specified slot in a folio queue
- * segment.
- */
-static inline void folioq_unmark3(struct folio_queue *folioq, unsigned int slot)
-{
-	clear_bit(slot, &folioq->marks3);
-}
-
 /**
  * folioq_append: Add a folio to a folio queue segment
  * @folioq: The segment to add to
@@ -318,7 +277,6 @@ static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot)
 	folioq->vec.folios[slot] = NULL;
 	folioq_unmark(folioq, slot);
 	folioq_unmark2(folioq, slot);
-	folioq_unmark3(folioq, slot);
 }
 
 #endif /* _LINUX_FOLIO_QUEUE_H */
-- 
cgit v1.2.3


From 67b916719a15c33fd18d6e8298828caeef428d00 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 19 May 2025 14:48:06 +0100
Subject: fs/netfs: remove unused flag NETFS_RREQ_DONT_UNLOCK_FOLIOS

NETFS_RREQ_DONT_UNLOCK_FOLIOS has never been used ever since it was
added by commit 3d3c95046742 ("netfs: Provide readahead and readpage
netfs helpers").

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/20250519134813.2975312-11-dhowells@redhat.com
cc: Paulo Alcantara <pc@manguebit.com>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 3f7056d837f8..5f60d8e3a7ef 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -266,7 +266,6 @@ struct netfs_io_request {
 	unsigned long		flags;
 #define NETFS_RREQ_OFFLOAD_COLLECTION	0	/* Offload collection to workqueue */
 #define NETFS_RREQ_NO_UNLOCK_FOLIO	2	/* Don't unlock no_unlock_folio on completion */
-#define NETFS_RREQ_DONT_UNLOCK_FOLIOS	3	/* Don't unlock the folios on completion */
 #define NETFS_RREQ_FAILED		4	/* The request failed */
 #define NETFS_RREQ_IN_PROGRESS		5	/* Unlocked when the request completes */
 #define NETFS_RREQ_FOLIO_COPY_TO_CACHE	6	/* Copy current folio to cache from read */
-- 
cgit v1.2.3


From 4b1ca12dd3f2529dc788cf4f18259ed62006ccb8 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 19 May 2025 14:48:07 +0100
Subject: fs/netfs: remove unused flag NETFS_RREQ_BLOCKED

NETFS_RREQ_BLOCKED was added by commit 016dc8516aec ("netfs: Implement
unbuffered/DIO read support") but has never been used either.  Without
NETFS_RREQ_BLOCKED, NETFS_RREQ_NONBLOCK makes no sense, and thus can
be removed as well.

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/20250519134813.2975312-12-dhowells@redhat.com
cc: Paulo Alcantara <pc@manguebit.com>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 5f60d8e3a7ef..cf634c28522d 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -270,8 +270,6 @@ struct netfs_io_request {
 #define NETFS_RREQ_IN_PROGRESS		5	/* Unlocked when the request completes */
 #define NETFS_RREQ_FOLIO_COPY_TO_CACHE	6	/* Copy current folio to cache from read */
 #define NETFS_RREQ_UPLOAD_TO_SERVER	8	/* Need to write to the server */
-#define NETFS_RREQ_NONBLOCK		9	/* Don't block if possible (O_NONBLOCK) */
-#define NETFS_RREQ_BLOCKED		10	/* We blocked */
 #define NETFS_RREQ_PAUSE		11	/* Pause subrequest generation */
 #define NETFS_RREQ_USE_IO_ITER		12	/* Use ->io_iter rather than ->i_pages */
 #define NETFS_RREQ_ALL_QUEUED		13	/* All subreqs are now queued */
-- 
cgit v1.2.3


From 34eb98c6598c4057640ca56dd1fad6555187473a Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@manguebit.com>
Date: Mon, 19 May 2025 10:07:02 +0100
Subject: netfs: Fix setting of transferred bytes with short DIO reads

A netfslib request comprises an ordered stream of subrequests that, when
doing an unbuffered/DIO read, are contiguous.  The subrequests may be
performed in parallel, but may not be fully completed.

For instance, if we try and make a 256KiB DIO read from a 3-byte file with
a 64KiB rsize and 256KiB bsize, netfslib will attempt to make a read of
256KiB, broken up into four 64KiB subreads, with the expectation that the
first will be short and the subsequent three be completely devoid - but we
do all four on the basis that the file may have been changed by a third
party.

The read-collection code, however, walks through all the subreqs and
advances the notion of how much data has been read in the stream to the
start of each subreq plus its amount transferred (which are 3, 0, 0, 0 for
the example above) - which gives an amount apparently read of 3*64KiB -
which is incorrect.

Fix the collection code to cut short the calculation of the transferred
amount with the first short subrequest in an unbuffered read; everything
beyond that must be ignored as there's a hole that cannot be filled.  This
applies both to shortness due to hitting the EOF and shortness due to an
error.

This is achieved by setting a flag on the request when we collect the first
short subrequest (collection is done in ascending order).

This can be tested by mounting a cifs volume with rsize=65536,bsize=262144
and doing a 256k DIO read of a very small file (e.g. 3 bytes).  read()
should return 3, not >3.

This problem came in when netfs_read_collection() set rreq->transferred to
stream->transferred, even for DIO.  Prior to that, netfs_rreq_assess_dio()
just went over the list and added up the subreqs till it met a short one -
but now the subreqs are discarded earlier.

Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item")
Reported-by: Nicolas Baranger <nicolas.baranger@3xo.fr>
Closes: https://lore.kernel.org/all/10bec2430ed4df68bde10ed95295d093@3xo.fr/
Signed-off-by: "Paulo Alcantara (Red Hat)" <pc@manguebit.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/20250519090707.2848510-3-dhowells@redhat.com
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index c86a11cfc4a3..497c4f4698f6 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -279,6 +279,7 @@ struct netfs_io_request {
 #define NETFS_RREQ_USE_IO_ITER		12	/* Use ->io_iter rather than ->i_pages */
 #define NETFS_RREQ_ALL_QUEUED		13	/* All subreqs are now queued */
 #define NETFS_RREQ_RETRYING		14	/* Set if we're in the retry path */
+#define NETFS_RREQ_SHORT_TRANSFER	15	/* Set if we have a short transfer */
 #define NETFS_RREQ_USE_PGPRIV2		31	/* [DEPRECATED] Use PG_private_2 to mark
 						 * write to cache on read */
 	const struct netfs_request_ops *netfs_ops;
-- 
cgit v1.2.3


From 20d72b00ca814d748f5663484e5c53bb2bf37a3a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 19 May 2025 10:07:03 +0100
Subject: netfs: Fix the request's work item to not require a ref

When the netfs_io_request struct's work item is queued, it must be supplied
with a ref to the work item struct to prevent it being deallocated whilst
on the queue or whilst it is being processed.  This is tricky to manage as
we have to get a ref before we try and queue it and then we may find it's
already queued and is thus already holding a ref - in which case we have to
try and get rid of the ref again.

The problem comes if we're in BH or IRQ context and need to drop the ref:
if netfs_put_request() reduces the count to 0, we have to do the cleanup -
but the cleanup may need to wait.

Fix this by adding a new work item to the request, ->cleanup_work, and
dispatching that when the refcount hits zero.  That can then synchronously
cancel any outstanding work on the main work item before doing the cleanup.

Adding a new work item also deals with another problem upstream where it's
sometimes changing the work func in the put function and requeuing it -
which has occasionally in the past caused the cleanup to happen
incorrectly.

As a bonus, this allows us to get rid of the 'was_async' parameter from a
bunch of functions.  This indicated whether the put function might not be
permitted to sleep.

Fixes: 3d3c95046742 ("netfs: Provide readahead and readpage netfs helpers")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/20250519090707.2848510-4-dhowells@redhat.com
cc: Paulo Alcantara <pc@manguebit.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Steve French <stfrench@microsoft.com>
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fscache.h |  2 +-
 include/linux/netfs.h   | 13 ++++++-------
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 9de27643607f..266e6c9e6f83 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -628,7 +628,7 @@ static inline void fscache_write_to_cache(struct fscache_cookie *cookie,
 					 term_func, term_func_priv,
 					 using_pgpriv2, caching);
 	else if (term_func)
-		term_func(term_func_priv, -ENOBUFS, false);
+		term_func(term_func_priv, -ENOBUFS);
 
 }
 
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 497c4f4698f6..c3f230732f51 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -51,8 +51,7 @@ enum netfs_io_source {
 	NETFS_INVALID_WRITE,
 } __mode(byte);
 
-typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error,
-				      bool was_async);
+typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error);
 
 /*
  * Per-inode context.  This wraps the VFS inode.
@@ -223,9 +222,10 @@ enum netfs_io_origin {
  */
 struct netfs_io_request {
 	union {
-		struct work_struct work;
+		struct work_struct cleanup_work; /* Deferred cleanup work */
 		struct rcu_head rcu;
 	};
+	struct work_struct	work;		/* Result collector work */
 	struct inode		*inode;		/* The file being accessed */
 	struct address_space	*mapping;	/* The mapping being accessed */
 	struct kiocb		*iocb;		/* AIO completion vector */
@@ -270,7 +270,7 @@ struct netfs_io_request {
 #define NETFS_RREQ_NO_UNLOCK_FOLIO	2	/* Don't unlock no_unlock_folio on completion */
 #define NETFS_RREQ_DONT_UNLOCK_FOLIOS	3	/* Don't unlock the folios on completion */
 #define NETFS_RREQ_FAILED		4	/* The request failed */
-#define NETFS_RREQ_IN_PROGRESS		5	/* Unlocked when the request completes */
+#define NETFS_RREQ_IN_PROGRESS		5	/* Unlocked when the request completes (has ref) */
 #define NETFS_RREQ_FOLIO_COPY_TO_CACHE	6	/* Copy current folio to cache from read */
 #define NETFS_RREQ_UPLOAD_TO_SERVER	8	/* Need to write to the server */
 #define NETFS_RREQ_NONBLOCK		9	/* Don't block if possible (O_NONBLOCK) */
@@ -440,15 +440,14 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq);
 void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
 			  enum netfs_sreq_ref_trace what);
 void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
-			  bool was_async, enum netfs_sreq_ref_trace what);
+			  enum netfs_sreq_ref_trace what);
 ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
 				struct iov_iter *new,
 				iov_iter_extraction_t extraction_flags);
 size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset,
 			size_t max_size, size_t max_segs);
 void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq);
-void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
-				       bool was_async);
+void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error);
 void netfs_queue_write_request(struct netfs_io_subrequest *subreq);
 
 int netfs_start_io_read(struct inode *inode);
-- 
cgit v1.2.3


From 8f08055bc67a355aca55856cc810b89645506a5f Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 13 Apr 2025 11:34:28 +0100
Subject: iio: introduced iio_push_to_buffers_with_ts() that takes a
 data_total_len argument.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Check that data_total_len argument against iio_dev->scan_bytes.

The size needs to be at least as big as the scan. It can be larger,
which is typical if only part of fixed sized storage is used due to
a subset of channels being enabled.

Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Reviewed-by: David Lechner <dlechner@baylibre.com>
Link: https://patch.msgid.link/20250413103443.2420727-6-jic23@kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/buffer.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index 3b8d618bb3df..5c84ec4a9810 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -45,6 +45,18 @@ static inline int iio_push_to_buffers_with_timestamp(struct iio_dev *indio_dev,
 	return iio_push_to_buffers(indio_dev, data);
 }
 
+static inline int iio_push_to_buffers_with_ts(struct iio_dev *indio_dev,
+					      void *data, size_t data_total_len,
+					      s64 timestamp)
+{
+	if (unlikely(data_total_len < indio_dev->scan_bytes)) {
+		dev_err(&indio_dev->dev, "Undersized storage pushed to buffer\n");
+		return -ENOSPC;
+	}
+
+	return iio_push_to_buffers_with_timestamp(indio_dev, data, timestamp);
+}
+
 int iio_push_to_buffers_with_ts_unaligned(struct iio_dev *indio_dev,
 					  const void *data, size_t data_sz,
 					  int64_t timestamp);
-- 
cgit v1.2.3


From 7e00d74eacf77c98a60e7c754a9e5f73d8832095 Mon Sep 17 00:00:00 2001
From: Chelsy Ratnawat <chelsyratnawat2001@gmail.com>
Date: Thu, 1 May 2025 17:36:55 -0700
Subject: HID: sensor-hub: Fix typo and improve documentation for
 sensor_hub_remove_callback()

Fixed a typo in "registered" and improved grammar for better readability
and consistency with kernel-doc standards. No functional changes.

Signed-off-by: Chelsy Ratnawat <chelsyratnawat2001@gmail.com>
Reviewed-by: David Lechner <dlechner@baylibre.com>
Link: https://patch.msgid.link/20250502003655.1943000-1-chelsyratnawat2001@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/hid-sensor-hub.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h
index c27329e2a5ad..0f9f7df865db 100644
--- a/include/linux/hid-sensor-hub.h
+++ b/include/linux/hid-sensor-hub.h
@@ -128,12 +128,13 @@ int sensor_hub_register_callback(struct hid_sensor_hub_device *hsdev,
 			struct hid_sensor_hub_callbacks *usage_callback);
 
 /**
-* sensor_hub_remove_callback() - Remove client callbacks
+* sensor_hub_remove_callback() - Remove client callback
 * @hsdev:	Hub device instance.
-* @usage_id:	Usage id of the client (E.g. 0x200076 for Gyro).
+* @usage_id:	Usage id of the client (e.g. 0x200076 for gyro).
 *
-* If there is a callback registred, this call will remove that
-* callbacks, so that it will stop data and event notifications.
+* Removes a previously registered callback for the given usage_id
+* and hsdev. Once removed, the client will no longer receive data or
+* event notifications.
 */
 int sensor_hub_remove_callback(struct hid_sensor_hub_device *hsdev,
 			u32 usage_id);
-- 
cgit v1.2.3


From fa19c303254bae5b8d105ecdc7d714d092f2adda Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner@baylibre.com>
Date: Wed, 7 May 2025 15:42:40 -0500
Subject: iio: make IIO_DMA_MINALIGN minimum of 8 bytes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a condition to ensure that IIO_DMA_MINALIGN is at least 8 bytes.
On some 32-bit architectures, IIO_DMA_MINALIGN is 4. In many cases,
drivers are using this alignment for buffers that include a 64-bit
timestamp that is used with iio_push_to_buffers_with_ts(), which expects
the timestamp to be aligned to 8 bytes. To handle this, we can just make
IIO_DMA_MINALIGN at least 8 bytes.

Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Signed-off-by: David Lechner <dlechner@baylibre.com>
Link: https://patch.msgid.link/20250507-iio-introduce-iio_declare_buffer_with_ts-v6-1-4aee1b9f1b89@baylibre.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 638cf2420fbd..a574f22398e4 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -10,6 +10,7 @@
 #include <linux/device.h>
 #include <linux/cdev.h>
 #include <linux/compiler_types.h>
+#include <linux/minmax.h>
 #include <linux/slab.h>
 #include <linux/iio/types.h>
 /* IIO TODO LIST */
@@ -775,8 +776,14 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev)
  * to in turn include IIO_DMA_MINALIGN'd elements such as buffers which
  * must not share  cachelines with the rest of the structure, thus making
  * them safe for use with non-coherent DMA.
+ *
+ * A number of drivers also use this on buffers that include a 64-bit timestamp
+ * that is used with iio_push_to_buffer_with_ts(). Therefore, in the case where
+ * DMA alignment is not sufficient for proper timestamp alignment, we align to
+ * 8 bytes instead.
  */
-#define IIO_DMA_MINALIGN ARCH_DMA_MINALIGN
+#define IIO_DMA_MINALIGN MAX(ARCH_DMA_MINALIGN, sizeof(s64))
+
 struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv);
 
 /* The information at the returned address is guaranteed to be cacheline aligned */
-- 
cgit v1.2.3


From 63fc53526d3090b27bd06bb43b92e8bc85f46fb1 Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner@baylibre.com>
Date: Wed, 7 May 2025 15:42:41 -0500
Subject: iio: introduce IIO_DECLARE_BUFFER_WITH_TS macros
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add new macros to help with the common case of declaring a buffer that
is safe to use with iio_push_to_buffers_with_ts(). This is not trivial
to do correctly because of the alignment requirements of the timestamp.
This will make it easier for both authors and reviewers.

To avoid double __align() attributes in cases where we also need DMA
alignment, add a 2nd variant IIO_DECLARE_DMA_BUFFER_WITH_TS().

Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Signed-off-by: David Lechner <dlechner@baylibre.com>
Link: https://patch.msgid.link/20250507-iio-introduce-iio_declare_buffer_with_ts-v6-2-4aee1b9f1b89@baylibre.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index a574f22398e4..d11668f14a3e 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -7,6 +7,7 @@
 #ifndef _INDUSTRIAL_IO_H_
 #define _INDUSTRIAL_IO_H_
 
+#include <linux/align.h>
 #include <linux/device.h>
 #include <linux/cdev.h>
 #include <linux/compiler_types.h>
@@ -784,6 +785,37 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev)
  */
 #define IIO_DMA_MINALIGN MAX(ARCH_DMA_MINALIGN, sizeof(s64))
 
+#define __IIO_DECLARE_BUFFER_WITH_TS(type, name, count) \
+	type name[ALIGN((count), sizeof(s64) / sizeof(type)) + sizeof(s64) / sizeof(type)]
+
+/**
+ * IIO_DECLARE_BUFFER_WITH_TS() - Declare a buffer with timestamp
+ * @type: element type of the buffer
+ * @name: identifier name of the buffer
+ * @count: number of elements in the buffer
+ *
+ * Declares a buffer that is safe to use with iio_push_to_buffer_with_ts(). In
+ * addition to allocating enough space for @count elements of @type, it also
+ * allocates space for a s64 timestamp at the end of the buffer and ensures
+ * proper alignment of the timestamp.
+ */
+#define IIO_DECLARE_BUFFER_WITH_TS(type, name, count) \
+	__IIO_DECLARE_BUFFER_WITH_TS(type, name, count) __aligned(sizeof(s64))
+
+/**
+ * IIO_DECLARE_DMA_BUFFER_WITH_TS() - Declare a DMA-aligned buffer with timestamp
+ * @type: element type of the buffer
+ * @name: identifier name of the buffer
+ * @count: number of elements in the buffer
+ *
+ * Same as IIO_DECLARE_BUFFER_WITH_TS(), but is uses __aligned(IIO_DMA_MINALIGN)
+ * to ensure that the buffer doesn't share cachelines with anything that comes
+ * before it in a struct. This should not be used for stack-allocated buffers
+ * as stack memory cannot generally be used for DMA.
+ */
+#define IIO_DECLARE_DMA_BUFFER_WITH_TS(type, name, count) \
+	__IIO_DECLARE_BUFFER_WITH_TS(type, name, count) __aligned(IIO_DMA_MINALIGN)
+
 struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv);
 
 /* The information at the returned address is guaranteed to be cacheline aligned */
-- 
cgit v1.2.3


From 27737b8407585c0160063b9b39e888d486d86188 Mon Sep 17 00:00:00 2001
From: Chelsy Ratnawat <chelsyratnawat2001@gmail.com>
Date: Tue, 6 May 2025 22:57:45 -0700
Subject: HID: sensor-hub: Fix typo and improve documentation

Includes the following corrections -
 - Changed Measurment -> Measurement
 - Changed clode -> close
 - Gyro -> gyro

Signed-off-by: Chelsy Ratnawat <chelsyratnawat2001@gmail.com>
Reviewed-by: David Lechner <dlechner@baylibre.com>
Link: https://patch.msgid.link/20250507055745.4069933-1-chelsyratnawat2001@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/hid-sensor-hub.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h
index 0f9f7df865db..e71056553108 100644
--- a/include/linux/hid-sensor-hub.h
+++ b/include/linux/hid-sensor-hub.h
@@ -17,7 +17,7 @@
  * @attrib_id:		Attribute id for this attribute.
  * @report_id:		Report id in which this information resides.
  * @index:		Field index in the report.
- * @units:		Measurment unit for this attribute.
+ * @units:		Measurement unit for this attribute.
  * @unit_expo:		Exponent used in the data.
  * @size:		Size in bytes for data size.
  * @logical_minimum:	Logical minimum value for this attribute.
@@ -39,8 +39,8 @@ struct hid_sensor_hub_attribute_info {
  * struct sensor_hub_pending - Synchronous read pending information
  * @status:		Pending status true/false.
  * @ready:		Completion synchronization data.
- * @usage_id:		Usage id for physical device, E.g. Gyro usage id.
- * @attr_usage_id:	Usage Id of a field, E.g. X-AXIS for a gyro.
+ * @usage_id:		Usage id for physical device, e.g. gyro usage id.
+ * @attr_usage_id:	Usage Id of a field, e.g. X-axis for a gyro.
  * @raw_size:		Response size for a read request.
  * @raw_data:		Place holder for received response.
  */
@@ -104,10 +104,10 @@ struct hid_sensor_hub_callbacks {
 int sensor_hub_device_open(struct hid_sensor_hub_device *hsdev);
 
 /**
-* sensor_hub_device_clode() - Close hub device
+* sensor_hub_device_close() - Close hub device
 * @hsdev:	Hub device instance.
 *
-* Used to clode hid device for sensor hub.
+* Used to close hid device for sensor hub.
 */
 void sensor_hub_device_close(struct hid_sensor_hub_device *hsdev);
 
-- 
cgit v1.2.3


From 46550e2b878d60923c72f0526a7aac02e8eda3d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Sun, 4 May 2025 20:22:44 +0200
Subject: include: pe.h: Fix PE definitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Rename constants to their standard PE names:
  - MZ_MAGIC -> IMAGE_DOS_SIGNATURE
  - PE_MAGIC -> IMAGE_NT_SIGNATURE
  - PE_OPT_MAGIC_PE32_ROM -> IMAGE_ROM_OPTIONAL_HDR_MAGIC
  - PE_OPT_MAGIC_PE32 -> IMAGE_NT_OPTIONAL_HDR32_MAGIC
  - PE_OPT_MAGIC_PE32PLUS -> IMAGE_NT_OPTIONAL_HDR64_MAGIC
  - IMAGE_DLL_CHARACTERISTICS_NX_COMPAT -> IMAGE_DLLCHARACTERISTICS_NX_COMPAT

* Import constants and their description from readpe and file projects
  which contains current up-to-date information:
  - IMAGE_FILE_MACHINE_*
  - IMAGE_FILE_*
  - IMAGE_SUBSYSTEM_*
  - IMAGE_DLLCHARACTERISTICS_*
  - IMAGE_DLLCHARACTERISTICS_EX_*
  - IMAGE_DEBUG_TYPE_*

* Add missing IMAGE_SCN_* constants and update their incorrect description

* Fix incorrect value of IMAGE_SCN_MEM_PURGEABLE constant

* Add description for win32_version and loader_flags PE fields

Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/linux/pe.h | 279 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 174 insertions(+), 105 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pe.h b/include/linux/pe.h
index fdf9c95709ba..cd2b7275385f 100644
--- a/include/linux/pe.h
+++ b/include/linux/pe.h
@@ -39,113 +39,160 @@
  */
 #define LINUX_PE_MAGIC	0x818223cd
 
-#define MZ_MAGIC	0x5a4d	/* "MZ" */
+#define IMAGE_DOS_SIGNATURE	0x5a4d /* "MZ" */
 
-#define PE_MAGIC		0x00004550	/* "PE\0\0" */
-#define PE_OPT_MAGIC_PE32	0x010b
-#define PE_OPT_MAGIC_PE32_ROM	0x0107
-#define PE_OPT_MAGIC_PE32PLUS	0x020b
+#define IMAGE_NT_SIGNATURE	0x00004550 /* "PE\0\0" */
+
+#define IMAGE_ROM_OPTIONAL_HDR_MAGIC	0x0107 /* ROM image (for R3000/R4000/R10000/ALPHA), without MZ and PE\0\0 sign */
+#define IMAGE_NT_OPTIONAL_HDR32_MAGIC	0x010b /* PE32 executable image */
+#define IMAGE_NT_OPTIONAL_HDR64_MAGIC	0x020b /* PE32+ executable image */
 
 /* machine type */
-#define	IMAGE_FILE_MACHINE_UNKNOWN	0x0000
-#define	IMAGE_FILE_MACHINE_AM33		0x01d3
-#define	IMAGE_FILE_MACHINE_AMD64	0x8664
-#define	IMAGE_FILE_MACHINE_ARM		0x01c0
-#define	IMAGE_FILE_MACHINE_ARMV7	0x01c4
-#define	IMAGE_FILE_MACHINE_ARM64	0xaa64
-#define	IMAGE_FILE_MACHINE_EBC		0x0ebc
-#define	IMAGE_FILE_MACHINE_I386		0x014c
-#define	IMAGE_FILE_MACHINE_IA64		0x0200
-#define	IMAGE_FILE_MACHINE_M32R		0x9041
-#define	IMAGE_FILE_MACHINE_MIPS16	0x0266
-#define	IMAGE_FILE_MACHINE_MIPSFPU	0x0366
-#define	IMAGE_FILE_MACHINE_MIPSFPU16	0x0466
-#define	IMAGE_FILE_MACHINE_POWERPC	0x01f0
-#define	IMAGE_FILE_MACHINE_POWERPCFP	0x01f1
-#define	IMAGE_FILE_MACHINE_R4000	0x0166
-#define	IMAGE_FILE_MACHINE_RISCV32	0x5032
-#define	IMAGE_FILE_MACHINE_RISCV64	0x5064
-#define	IMAGE_FILE_MACHINE_RISCV128	0x5128
-#define	IMAGE_FILE_MACHINE_SH3		0x01a2
-#define	IMAGE_FILE_MACHINE_SH3DSP	0x01a3
-#define	IMAGE_FILE_MACHINE_SH3E		0x01a4
-#define	IMAGE_FILE_MACHINE_SH4		0x01a6
-#define	IMAGE_FILE_MACHINE_SH5		0x01a8
-#define	IMAGE_FILE_MACHINE_THUMB	0x01c2
-#define	IMAGE_FILE_MACHINE_WCEMIPSV2	0x0169
-#define	IMAGE_FILE_MACHINE_LOONGARCH32	0x6232
-#define	IMAGE_FILE_MACHINE_LOONGARCH64	0x6264
+#define	IMAGE_FILE_MACHINE_UNKNOWN	0x0000 /* Unknown architecture */
+#define	IMAGE_FILE_MACHINE_TARGET_HOST	0x0001 /* Interacts with the host and not a WOW64 guest (not for file image) */
+#define	IMAGE_FILE_MACHINE_ALPHA_OLD	0x0183 /* DEC Alpha AXP 32-bit (old images) */
+#define	IMAGE_FILE_MACHINE_ALPHA	0x0184 /* DEC Alpha AXP 32-bit */
+#define	IMAGE_FILE_MACHINE_ALPHA64	0x0284 /* DEC Alpha AXP 64-bit (with 8kB page size) */
+#define	IMAGE_FILE_MACHINE_AXP64	IMAGE_FILE_MACHINE_ALPHA64
+#define	IMAGE_FILE_MACHINE_AM33		0x01d3 /* Matsushita AM33, now Panasonic MN103 */
+#define	IMAGE_FILE_MACHINE_AMD64	0x8664 /* AMD64 (x64) */
+#define	IMAGE_FILE_MACHINE_ARM		0x01c0 /* ARM Little-Endian (ARMv4) */
+#define	IMAGE_FILE_MACHINE_THUMB	0x01c2 /* ARM Thumb Little-Endian (ARMv4T) */
+#define	IMAGE_FILE_MACHINE_ARMNT	0x01c4 /* ARM Thumb-2 Little-Endian (ARMv7) */
+#define	IMAGE_FILE_MACHINE_ARMV7	IMAGE_FILE_MACHINE_ARMNT
+#define	IMAGE_FILE_MACHINE_ARM64	0xaa64 /* ARM64 Little-Endian (Classic ABI) */
+#define	IMAGE_FILE_MACHINE_ARM64EC	0xa641 /* ARM64 Little-Endian (Emulation Compatible ABI for AMD64) */
+#define	IMAGE_FILE_MACHINE_ARM64X	0xa64e /* ARM64 Little-Endian (fat binary with both Classic ABI and EC ABI code) */
+#define	IMAGE_FILE_MACHINE_CEE		0xc0ee /* COM+ Execution Engine (CLR pure MSIL object files) */
+#define	IMAGE_FILE_MACHINE_CEF		0x0cef /* Windows CE 3.0 Common Executable Format (CEF bytecode) */
+#define	IMAGE_FILE_MACHINE_CHPE_X86	0x3a64 /* ARM64 Little-Endian (Compiled Hybrid PE ABI for I386) */
+#define	IMAGE_FILE_MACHINE_HYBRID_X86	IMAGE_FILE_MACHINE_CHPE_X86
+#define	IMAGE_FILE_MACHINE_EBC		0x0ebc /* EFI/UEFI Byte Code */
+#define	IMAGE_FILE_MACHINE_I386		0x014c /* Intel 386 (x86) */
+#define	IMAGE_FILE_MACHINE_I860		0x014d /* Intel 860 (N10) */
+#define	IMAGE_FILE_MACHINE_IA64		0x0200 /* Intel IA-64 (with 8kB page size) */
+#define	IMAGE_FILE_MACHINE_LOONGARCH32	0x6232 /* LoongArch 32-bit processor family */
+#define	IMAGE_FILE_MACHINE_LOONGARCH64	0x6264 /* LoongArch 64-bit processor family */
+#define	IMAGE_FILE_MACHINE_M32R		0x9041 /* Mitsubishi M32R 32-bit Little-Endian */
+#define	IMAGE_FILE_MACHINE_M68K		0x0268 /* Motorola 68000 series */
+#define	IMAGE_FILE_MACHINE_MIPS16	0x0266 /* MIPS III with MIPS16 ASE Little-Endian */
+#define	IMAGE_FILE_MACHINE_MIPSFPU	0x0366 /* MIPS III with FPU Little-Endian */
+#define	IMAGE_FILE_MACHINE_MIPSFPU16	0x0466 /* MIPS III with MIPS16 ASE and FPU Little-Endian */
+#define	IMAGE_FILE_MACHINE_MPPC_601	0x0601 /* PowerPC 32-bit Big-Endian */
+#define	IMAGE_FILE_MACHINE_OMNI		0xace1 /* Microsoft OMNI VM (omniprox.dll) */
+#define	IMAGE_FILE_MACHINE_PARISC	0x0290 /* HP PA-RISC */
+#define	IMAGE_FILE_MACHINE_POWERPC	0x01f0 /* PowerPC 32-bit Little-Endian */
+#define	IMAGE_FILE_MACHINE_POWERPCFP	0x01f1 /* PowerPC 32-bit with FPU Little-Endian */
+#define	IMAGE_FILE_MACHINE_POWERPCBE	0x01f2 /* PowerPC 64-bit Big-Endian */
+#define	IMAGE_FILE_MACHINE_R3000	0x0162 /* MIPS I Little-Endian */
+#define	IMAGE_FILE_MACHINE_R3000_BE	0x0160 /* MIPS I Big-Endian */
+#define	IMAGE_FILE_MACHINE_R4000	0x0166 /* MIPS III Little-Endian (with 1kB or 4kB page size) */
+#define	IMAGE_FILE_MACHINE_R10000	0x0168 /* MIPS IV Little-Endian */
+#define	IMAGE_FILE_MACHINE_RISCV32	0x5032 /* RISC-V 32-bit address space */
+#define	IMAGE_FILE_MACHINE_RISCV64	0x5064 /* RISC-V 64-bit address space */
+#define	IMAGE_FILE_MACHINE_RISCV128	0x5128 /* RISC-V 128-bit address space */
+#define	IMAGE_FILE_MACHINE_SH3		0x01a2 /* Hitachi SH-3 32-bit Little-Endian (with 1kB page size) */
+#define	IMAGE_FILE_MACHINE_SH3DSP	0x01a3 /* Hitachi SH-3 DSP 32-bit (with 1kB page size) */
+#define	IMAGE_FILE_MACHINE_SH3E		0x01a4 /* Hitachi SH-3E Little-Endian (with 1kB page size) */
+#define	IMAGE_FILE_MACHINE_SH4		0x01a6 /* Hitachi SH-4 32-bit Little-Endian (with 1kB page size) */
+#define	IMAGE_FILE_MACHINE_SH5		0x01a8 /* Hitachi SH-5 64-bit */
+#define	IMAGE_FILE_MACHINE_TAHOE	0x07cc /* Intel EM machine */
+#define	IMAGE_FILE_MACHINE_TRICORE	0x0520 /* Infineon AUDO 32-bit */
+#define	IMAGE_FILE_MACHINE_WCEMIPSV2	0x0169 /* MIPS Windows CE v2 Little-Endian */
 
 /* flags */
-#define IMAGE_FILE_RELOCS_STRIPPED           0x0001
-#define IMAGE_FILE_EXECUTABLE_IMAGE          0x0002
-#define IMAGE_FILE_LINE_NUMS_STRIPPED        0x0004
-#define IMAGE_FILE_LOCAL_SYMS_STRIPPED       0x0008
-#define IMAGE_FILE_AGGRESSIVE_WS_TRIM        0x0010
-#define IMAGE_FILE_LARGE_ADDRESS_AWARE       0x0020
-#define IMAGE_FILE_16BIT_MACHINE             0x0040
-#define IMAGE_FILE_BYTES_REVERSED_LO         0x0080
-#define IMAGE_FILE_32BIT_MACHINE             0x0100
-#define IMAGE_FILE_DEBUG_STRIPPED            0x0200
-#define IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP   0x0400
-#define IMAGE_FILE_NET_RUN_FROM_SWAP         0x0800
-#define IMAGE_FILE_SYSTEM                    0x1000
-#define IMAGE_FILE_DLL                       0x2000
-#define IMAGE_FILE_UP_SYSTEM_ONLY            0x4000
-#define IMAGE_FILE_BYTES_REVERSED_HI         0x8000
-
-#define IMAGE_FILE_OPT_ROM_MAGIC	0x107
-#define IMAGE_FILE_OPT_PE32_MAGIC	0x10b
-#define IMAGE_FILE_OPT_PE32_PLUS_MAGIC	0x20b
-
-#define IMAGE_SUBSYSTEM_UNKNOWN			 0
-#define IMAGE_SUBSYSTEM_NATIVE			 1
-#define IMAGE_SUBSYSTEM_WINDOWS_GUI		 2
-#define IMAGE_SUBSYSTEM_WINDOWS_CUI		 3
-#define IMAGE_SUBSYSTEM_POSIX_CUI		 7
-#define IMAGE_SUBSYSTEM_WINDOWS_CE_GUI		 9
-#define IMAGE_SUBSYSTEM_EFI_APPLICATION		10
-#define IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER	11
-#define IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER	12
-#define IMAGE_SUBSYSTEM_EFI_ROM_IMAGE		13
-#define IMAGE_SUBSYSTEM_XBOX			14
-
-#define IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE          0x0040
-#define IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY       0x0080
-#define IMAGE_DLL_CHARACTERISTICS_NX_COMPAT             0x0100
-#define IMAGE_DLLCHARACTERISTICS_NO_ISOLATION           0x0200
-#define IMAGE_DLLCHARACTERISTICS_NO_SEH                 0x0400
-#define IMAGE_DLLCHARACTERISTICS_NO_BIND                0x0800
-#define IMAGE_DLLCHARACTERISTICS_WDM_DRIVER             0x2000
-#define IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE  0x8000
-
-#define IMAGE_DLLCHARACTERISTICS_EX_CET_COMPAT		0x0001
-#define IMAGE_DLLCHARACTERISTICS_EX_FORWARD_CFI_COMPAT	0x0040
-
-/* they actually defined 0x00000000 as well, but I think we'll skip that one. */
-#define IMAGE_SCN_RESERVED_0	0x00000001
-#define IMAGE_SCN_RESERVED_1	0x00000002
-#define IMAGE_SCN_RESERVED_2	0x00000004
-#define IMAGE_SCN_TYPE_NO_PAD	0x00000008 /* don't pad - obsolete */
-#define IMAGE_SCN_RESERVED_3	0x00000010
+#define IMAGE_FILE_RELOCS_STRIPPED		0x0001 /* Relocation info stripped from file */
+#define IMAGE_FILE_EXECUTABLE_IMAGE		0x0002 /* File is executable (i.e. no unresolved external references) */
+#define IMAGE_FILE_LINE_NUMS_STRIPPED		0x0004 /* Line nunbers stripped from file */
+#define IMAGE_FILE_LOCAL_SYMS_STRIPPED		0x0008 /* Local symbols stripped from file */
+#define IMAGE_FILE_AGGRESSIVE_WS_TRIM		0x0010 /* Aggressively trim working set */
+#define IMAGE_FILE_LARGE_ADDRESS_AWARE		0x0020 /* App can handle >2gb addresses (image can be loaded at address above 2GB) */
+#define IMAGE_FILE_16BIT_MACHINE		0x0040 /* 16 bit word machine */
+#define IMAGE_FILE_BYTES_REVERSED_LO		0x0080 /* Bytes of machine word are reversed (should be set together with IMAGE_FILE_BYTES_REVERSED_HI) */
+#define IMAGE_FILE_32BIT_MACHINE		0x0100 /* 32 bit word machine */
+#define IMAGE_FILE_DEBUG_STRIPPED		0x0200 /* Debugging info stripped from file in .DBG file */
+#define IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP	0x0400 /* If Image is on removable media, copy and run from the swap file */
+#define IMAGE_FILE_NET_RUN_FROM_SWAP		0x0800 /* If Image is on Net, copy and run from the swap file */
+#define IMAGE_FILE_SYSTEM			0x1000 /* System kernel-mode file (can't be loaded in user-mode) */
+#define IMAGE_FILE_DLL				0x2000 /* File is a DLL */
+#define IMAGE_FILE_UP_SYSTEM_ONLY		0x4000 /* File should only be run on a UP (uniprocessor) machine */
+#define IMAGE_FILE_BYTES_REVERSED_HI		0x8000 /* Bytes of machine word are reversed (should be set together with IMAGE_FILE_BYTES_REVERSED_LO) */
+
+/* subsys */
+#define IMAGE_SUBSYSTEM_UNKNOWN				 0 /* Unknown subsystem */
+#define IMAGE_SUBSYSTEM_NATIVE				 1 /* No subsystem required (NT device drivers and NT native system processes) */
+#define IMAGE_SUBSYSTEM_WINDOWS_GUI			 2 /* Windows graphical user interface (GUI) subsystem */
+#define IMAGE_SUBSYSTEM_WINDOWS_CUI			 3 /* Windows character-mode user interface (CUI) subsystem */
+#define IMAGE_SUBSYSTEM_WINDOWS_OLD_CE_GUI		 4 /* Old Windows CE subsystem */
+#define IMAGE_SUBSYSTEM_OS2_CUI				 5 /* OS/2 CUI subsystem */
+#define IMAGE_SUBSYSTEM_RESERVED_6			 6
+#define IMAGE_SUBSYSTEM_POSIX_CUI			 7 /* POSIX CUI subsystem */
+#define IMAGE_SUBSYSTEM_MMOSA				 8 /* MMOSA/Native Win32E */
+#define IMAGE_SUBSYSTEM_WINDOWS_CE_GUI			 9 /* Windows CE subsystem */
+#define IMAGE_SUBSYSTEM_EFI_APPLICATION			10 /* Extensible Firmware Interface (EFI) application */
+#define IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER		11 /* EFI driver with boot services */
+#define IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER		12 /* EFI driver with run-time services */
+#define IMAGE_SUBSYSTEM_EFI_ROM_IMAGE			13 /* EFI ROM image */
+#define IMAGE_SUBSYSTEM_XBOX				14 /* Xbox system */
+#define IMAGE_SUBSYSTEM_RESERVED_15			15
+#define IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION	16 /* Windows Boot application */
+#define IMAGE_SUBSYSTEM_XBOX_CODE_CATALOG		17 /* Xbox Code Catalog */
+
+/* dll_flags */
+#define IMAGE_LIBRARY_PROCESS_INIT			0x0001 /* DLL initialization function called just after process initialization */
+#define IMAGE_LIBRARY_PROCESS_TERM			0x0002 /* DLL initialization function called just before process termination */
+#define IMAGE_LIBRARY_THREAD_INIT			0x0004 /* DLL initialization function called just after thread initialization */
+#define IMAGE_LIBRARY_THREAD_TERM			0x0008 /* DLL initialization function called just before thread initialization */
+#define IMAGE_DLLCHARACTERISTICS_RESERVED_4		0x0010
+#define IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA	0x0020 /* ASLR with 64 bit address space (image can be loaded at address above 4GB) */
+#define IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE		0x0040 /* The DLL can be relocated at load time */
+#define IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY	0x0080 /* Code integrity checks are forced */
+#define IMAGE_DLLCHARACTERISTICS_NX_COMPAT		0x0100 /* Image is compatible with data execution prevention */
+#define IMAGE_DLLCHARACTERISTICS_NO_ISOLATION		0x0200 /* Image is isolation aware, but should not be isolated (prevents loading of manifest file) */
+#define IMAGE_DLLCHARACTERISTICS_NO_SEH			0x0400 /* Image does not use SEH, no SE handler may reside in this image */
+#define IMAGE_DLLCHARACTERISTICS_NO_BIND		0x0800 /* Do not bind the image */
+#define IMAGE_DLLCHARACTERISTICS_X86_THUNK		0x1000 /* Image is a Wx86 Thunk DLL (for non-x86/risc DLL files) */
+#define IMAGE_DLLCHARACTERISTICS_APPCONTAINER		0x1000 /* Image should execute in an AppContainer (for EXE Metro Apps in Windows 8) */
+#define IMAGE_DLLCHARACTERISTICS_WDM_DRIVER		0x2000 /* A WDM driver */
+#define IMAGE_DLLCHARACTERISTICS_GUARD_CF		0x4000 /* Image supports Control Flow Guard */
+#define IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE	0x8000 /* The image is terminal server (Remote Desktop Services) aware */
+
+/* IMAGE_DEBUG_TYPE_EX_DLLCHARACTERISTICS flags */
+#define IMAGE_DLLCHARACTERISTICS_EX_CET_COMPAT					0x0001 /* Image is Control-flow Enforcement Technology Shadow Stack compatible */
+#define IMAGE_DLLCHARACTERISTICS_EX_CET_COMPAT_STRICT_MODE			0x0002 /* CET is enforced in strict mode */
+#define IMAGE_DLLCHARACTERISTICS_EX_CET_SET_CONTEXT_IP_VALIDATION_RELAXED_MODE	0x0004 /* Relaxed mode for Context IP Validation under CET is allowed */
+#define IMAGE_DLLCHARACTERISTICS_EX_CET_DYNAMIC_APIS_ALLOW_IN_PROC		0x0008 /* Use of dynamic APIs is restricted to processes only */
+#define IMAGE_DLLCHARACTERISTICS_EX_CET_RESERVED_1				0x0010
+#define IMAGE_DLLCHARACTERISTICS_EX_CET_RESERVED_2				0x0020
+#define IMAGE_DLLCHARACTERISTICS_EX_FORWARD_CFI_COMPAT				0x0040 /* All branch targets in all image code sections are annotated with forward-edge control flow integrity guard instructions */
+#define IMAGE_DLLCHARACTERISTICS_EX_HOTPATCH_COMPATIBLE				0x0080 /* Image can be modified while in use, hotpatch-compatible */
+
+/* section_header flags */
+#define IMAGE_SCN_SCALE_INDEX	0x00000001 /* address of tls index is scaled = multiplied by 4 (for .tls section on MIPS only) */
+#define IMAGE_SCN_TYPE_NO_LOAD	0x00000002 /* reserved */
+#define IMAGE_SCN_TYPE_GROUPED	0x00000004 /* obsolete (used for 16-bit offset code) */
+#define IMAGE_SCN_TYPE_NO_PAD	0x00000008 /* .o only - don't pad - obsolete (same as IMAGE_SCN_ALIGN_1BYTES) */
+#define IMAGE_SCN_TYPE_COPY	0x00000010 /* reserved */
 #define IMAGE_SCN_CNT_CODE	0x00000020 /* .text */
 #define IMAGE_SCN_CNT_INITIALIZED_DATA 0x00000040 /* .data */
 #define IMAGE_SCN_CNT_UNINITIALIZED_DATA 0x00000080 /* .bss */
-#define IMAGE_SCN_LNK_OTHER	0x00000100 /* reserved */
-#define IMAGE_SCN_LNK_INFO	0x00000200 /* .drectve comments */
-#define IMAGE_SCN_RESERVED_4	0x00000400
+#define IMAGE_SCN_LNK_OTHER	0x00000100 /* .o only - other type than code, data or info */
+#define IMAGE_SCN_LNK_INFO	0x00000200 /* .o only - .drectve comments */
+#define IMAGE_SCN_LNK_OVERLAY	0x00000400 /* section contains overlay */
 #define IMAGE_SCN_LNK_REMOVE	0x00000800 /* .o only - scn to be rm'd*/
 #define IMAGE_SCN_LNK_COMDAT	0x00001000 /* .o only - COMDAT data */
-#define IMAGE_SCN_RESERVED_5	0x00002000 /* spec omits this */
-#define IMAGE_SCN_RESERVED_6	0x00004000 /* spec omits this */
-#define IMAGE_SCN_GPREL		0x00008000 /* global pointer referenced data */
-/* spec lists 0x20000 twice, I suspect they meant 0x10000 for one of them */
-#define IMAGE_SCN_MEM_PURGEABLE	0x00010000 /* reserved for "future" use */
-#define IMAGE_SCN_16BIT		0x00020000 /* reserved for "future" use */
-#define IMAGE_SCN_LOCKED	0x00040000 /* reserved for "future" use */
-#define IMAGE_SCN_PRELOAD	0x00080000 /* reserved for "future" use */
+#define IMAGE_SCN_RESERVED_13	0x00002000 /* spec omits this */
+#define IMAGE_SCN_MEM_PROTECTED	0x00004000 /* section is memory protected (for M68K) */
+#define IMAGE_SCN_NO_DEFER_SPEC_EXC 0x00004000 /* reset speculative exceptions handling bits in the TLB entries (for non-M68K) */
+#define IMAGE_SCN_MEM_FARDATA	0x00008000 /* section uses FAR_EXTERNAL relocations (for M68K) */
+#define IMAGE_SCN_GPREL		0x00008000 /* global pointer referenced data (for non-M68K) */
+#define IMAGE_SCN_MEM_SYSHEAP	0x00010000 /* use system heap (for M68K) */
+#define IMAGE_SCN_MEM_PURGEABLE	0x00020000 /* section can be released from RAM (for M68K) */
+#define IMAGE_SCN_MEM_16BIT	0x00020000 /* section is 16-bit (for non-M68K where it makes sense: I386, THUMB, MIPS16, MIPSFPU16, ...) */
+#define IMAGE_SCN_MEM_LOCKED	0x00040000 /* prevent the section from being moved (for M68K and .o I386) */
+#define IMAGE_SCN_MEM_PRELOAD	0x00080000 /* section is preload to RAM (for M68K and .o I386) */
 /* and here they just stuck a 1-byte integer in the middle of a bitfield */
-#define IMAGE_SCN_ALIGN_1BYTES	0x00100000 /* it does what it says on the box */
+#define IMAGE_SCN_ALIGN_1BYTES	0x00100000 /* .o only - it does what it says on the box */
 #define IMAGE_SCN_ALIGN_2BYTES	0x00200000
 #define IMAGE_SCN_ALIGN_4BYTES	0x00300000
 #define IMAGE_SCN_ALIGN_8BYTES	0x00400000
@@ -159,7 +206,9 @@
 #define IMAGE_SCN_ALIGN_2048BYTES 0x00c00000
 #define IMAGE_SCN_ALIGN_4096BYTES 0x00d00000
 #define IMAGE_SCN_ALIGN_8192BYTES 0x00e00000
-#define IMAGE_SCN_LNK_NRELOC_OVFL 0x01000000 /* extended relocations */
+#define IMAGE_SCN_ALIGN_RESERVED 0x00f00000
+#define IMAGE_SCN_ALIGN_MASK	0x00f00000
+#define IMAGE_SCN_LNK_NRELOC_OVFL 0x01000000 /* .o only - extended relocations */
 #define IMAGE_SCN_MEM_DISCARDABLE 0x02000000 /* scn can be discarded */
 #define IMAGE_SCN_MEM_NOT_CACHED 0x04000000 /* cannot be cached */
 #define IMAGE_SCN_MEM_NOT_PAGED	0x08000000 /* not pageable */
@@ -168,8 +217,28 @@
 #define IMAGE_SCN_MEM_READ	0x40000000 /* readable */
 #define IMAGE_SCN_MEM_WRITE	0x80000000 /* writeable */
 
-#define IMAGE_DEBUG_TYPE_CODEVIEW	2
-#define IMAGE_DEBUG_TYPE_EX_DLLCHARACTERISTICS	20
+#define IMAGE_DEBUG_TYPE_UNKNOWN		 0 /* Unknown value, ignored by all tools */
+#define IMAGE_DEBUG_TYPE_COFF			 1 /* COFF debugging information */
+#define IMAGE_DEBUG_TYPE_CODEVIEW		 2 /* CodeView debugging information or Visual C++ Program Database debugging information */
+#define IMAGE_DEBUG_TYPE_FPO			 3 /* Frame pointer omission (FPO) information */
+#define IMAGE_DEBUG_TYPE_MISC			 4 /* Location of DBG file with CodeView debugging information */
+#define IMAGE_DEBUG_TYPE_EXCEPTION		 5 /* Exception information, copy of .pdata section */
+#define IMAGE_DEBUG_TYPE_FIXUP			 6 /* Fixup information */
+#define IMAGE_DEBUG_TYPE_OMAP_TO_SRC		 7 /* The mapping from an RVA in image to an RVA in source image */
+#define IMAGE_DEBUG_TYPE_OMAP_FROM_SRC		 8 /* The mapping from an RVA in source image to an RVA in image */
+#define IMAGE_DEBUG_TYPE_BORLAND		 9 /* Borland debugging information */
+#define IMAGE_DEBUG_TYPE_RESERVED10		10 /* Coldpath / Hotpatch debug information */
+#define IMAGE_DEBUG_TYPE_CLSID			11 /* CLSID */
+#define IMAGE_DEBUG_TYPE_VC_FEATURE		12 /* Visual C++ counts / statistics */
+#define IMAGE_DEBUG_TYPE_POGO			13 /* COFF group information, data for profile-guided optimization */
+#define IMAGE_DEBUG_TYPE_ILTCG			14 /* Incremental link-time code generation */
+#define IMAGE_DEBUG_TYPE_MPX			15 /* Intel Memory Protection Extensions */
+#define IMAGE_DEBUG_TYPE_REPRO			16 /* PE determinism or reproducibility */
+#define IMAGE_DEBUG_TYPE_EMBEDDED_PORTABLE_PDB	17 /* Embedded Portable PDB debugging information */
+#define IMAGE_DEBUG_TYPE_SPGO			18 /* Sample profile-guided optimization */
+#define IMAGE_DEBUG_TYPE_PDBCHECKSUM		19 /* PDB Checksum */
+#define IMAGE_DEBUG_TYPE_EX_DLLCHARACTERISTICS	20 /* Extended DLL characteristics bits */
+#define IMAGE_DEBUG_TYPE_PERFMAP		21 /* Location of associated Ready To Run PerfMap file */
 
 #ifndef __ASSEMBLY__
 
@@ -235,7 +304,7 @@ struct pe32_opt_hdr {
 	uint16_t image_minor;	/* minor image version */
 	uint16_t subsys_major;	/* major subsystem version */
 	uint16_t subsys_minor;	/* minor subsystem version */
-	uint32_t win32_version;	/* reserved, must be 0 */
+	uint32_t win32_version;	/* win32 version reported at runtime */
 	uint32_t image_size;	/* image size */
 	uint32_t header_size;	/* header size rounded up to
 				   file_align */
@@ -246,7 +315,7 @@ struct pe32_opt_hdr {
 	uint32_t stack_size;	/* amt of stack required */
 	uint32_t heap_size_req;	/* amt of heap requested */
 	uint32_t heap_size;	/* amt of heap required */
-	uint32_t loader_flags;	/* reserved, must be 0 */
+	uint32_t loader_flags;	/* loader flags */
 	uint32_t data_dirs;	/* number of data dir entries */
 };
 
@@ -269,7 +338,7 @@ struct pe32plus_opt_hdr {
 	uint16_t image_minor;	/* minor image version */
 	uint16_t subsys_major;	/* major subsystem version */
 	uint16_t subsys_minor;	/* minor subsystem version */
-	uint32_t win32_version;	/* reserved, must be 0 */
+	uint32_t win32_version;	/* win32 version reported at runtime */
 	uint32_t image_size;	/* image size */
 	uint32_t header_size;	/* header size rounded up to
 				   file_align */
@@ -280,7 +349,7 @@ struct pe32plus_opt_hdr {
 	uint64_t stack_size;	/* amt of stack required */
 	uint64_t heap_size_req;	/* amt of heap requested */
 	uint64_t heap_size;	/* amt of heap required */
-	uint32_t loader_flags;	/* reserved, must be 0 */
+	uint32_t loader_flags;	/* loader flags */
 	uint32_t data_dirs;	/* number of data dir entries */
 };
 
@@ -301,10 +370,10 @@ struct data_directory {
 	struct data_dirent global_ptr;		/* global pointer reg. Size=0 */
 	struct data_dirent tls;			/* .tls */
 	struct data_dirent load_config;		/* load configuration structure */
-	struct data_dirent bound_imports;	/* no idea */
+	struct data_dirent bound_imports;	/* bound import table */
 	struct data_dirent import_addrs;	/* import address table */
 	struct data_dirent delay_imports;	/* delay-load import table */
-	struct data_dirent clr_runtime_hdr;	/* .cor (object only) */
+	struct data_dirent clr_runtime_hdr;	/* .cor (clr/.net executables) */
 	struct data_dirent reserved;
 };
 
-- 
cgit v1.2.3


From e341f9c3c8412e57fe0042a33a2640245ecdf619 Mon Sep 17 00:00:00 2001
From: Joshua Hahn <joshua.hahnjy@gmail.com>
Date: Mon, 5 May 2025 11:23:28 -0700
Subject: mm/mempolicy: Weighted Interleave Auto-tuning

On machines with multiple memory nodes, interleaving page allocations
across nodes allows for better utilization of each node's bandwidth.
Previous work by Gregory Price [1] introduced weighted interleave, which
allowed for pages to be allocated across nodes according to user-set
ratios.

Ideally, these weights should be proportional to their bandwidth, so that
under bandwidth pressure, each node uses its maximal efficient bandwidth
and prevents latency from increasing exponentially.

Previously, weighted interleave's default weights were just 1s -- which
would be equivalent to the (unweighted) interleave mempolicy, which goes
through the nodes in a round-robin fashion, ignoring bandwidth
information.

This patch has two main goals: First, it makes weighted interleave easier
to use for users who wish to relieve bandwidth pressure when using nodes
with varying bandwidth (CXL).  By providing a set of "real" default
weights that just work out of the box, users who might not have the
capability (or wish to) perform experimentation to find the most optimal
weights for their system can still take advantage of bandwidth-informed
weighted interleave.

Second, it allows for weighted interleave to dynamically adjust to
hotplugged memory with new bandwidth information.  Instead of manually
updating node weights every time new bandwidth information is reported or
taken off, weighted interleave adjusts and provides a new set of default
weights for weighted interleave to use when there is a change in bandwidth
information.

To meet these goals, this patch introduces an auto-configuration mode for
the interleave weights that provides a reasonable set of default weights,
calculated using bandwidth data reported by the system.  In auto mode,
weights are dynamically adjusted based on whatever the current bandwidth
information reports (and responds to hotplug events).

This patch still supports users manually writing weights into the nodeN
sysfs interface by entering into manual mode.  When a user enters manual
mode, the system stops dynamically updating any of the node weights, even
during hotplug events that shift the optimal weight distribution.

A new sysfs interface "auto" is introduced, which allows users to switch
between the auto (writing 1 or Y) and manual (writing 0 or N) modes.  The
system also automatically enters manual mode when a nodeN interface is
manually written to.

There is one functional change that this patch makes to the existing
weighted_interleave ABI: previously, writing 0 directly to a nodeN
interface was said to reset the weight to the system default.  Before this
patch, the default for all weights were 1, which meant that writing 0 and
1 were functionally equivalent.  With this patch, writing 0 is invalid.

Link: https://lkml.kernel.org/r/20250520141236.2987309-1-joshua.hahnjy@gmail.com
[joshua.hahnjy@gmail.com: wordsmithing changes, simplification, fixes]
  Link: https://lkml.kernel.org/r/20250511025840.2410154-1-joshua.hahnjy@gmail.com
[joshua.hahnjy@gmail.com: remove auto_kobj_attr field from struct sysfs_wi_group]
  Link: https://lkml.kernel.org/r/20250512142511.3959833-1-joshua.hahnjy@gmail.com
https://lore.kernel.org/linux-mm/20240202170238.90004-1-gregory.price@memverge.com/ [1]
Link: https://lkml.kernel.org/r/20250505182328.4148265-1-joshua.hahnjy@gmail.com
Co-developed-by: Gregory Price <gourry@gourry.net>
Signed-off-by: Gregory Price <gourry@gourry.net>
Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com>
Suggested-by: Yunjeong Mun <yunjeong.mun@sk.com>
Suggested-by: Oscar Salvador <osalvador@suse.de>
Suggested-by: Ying Huang <ying.huang@linux.alibaba.com>
Suggested-by: Harry Yoo <harry.yoo@oracle.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Reviewed-by: Huang Ying <ying.huang@linux.alibaba.com>
Reviewed-by: Honggyu Kim <honggyu.kim@sk.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Len Brown <lenb@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mempolicy.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index ce9885e0178a..0fe96f3ab3ef 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -11,6 +11,7 @@
 #include <linux/slab.h>
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
+#include <linux/node.h>
 #include <linux/nodemask.h>
 #include <linux/pagemap.h>
 #include <uapi/linux/mempolicy.h>
@@ -178,6 +179,9 @@ static inline bool mpol_is_preferred_many(struct mempolicy *pol)
 
 extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone);
 
+extern int mempolicy_set_node_perf(unsigned int node,
+				   struct access_coordinate *coords);
+
 #else
 
 struct mempolicy {};
-- 
cgit v1.2.3


From bf454ec31add6790f6cdc88328e38901fcbbade6 Mon Sep 17 00:00:00 2001
From: Coiby Xu <coxu@redhat.com>
Date: Fri, 2 May 2025 09:12:35 +0800
Subject: kexec_file: allow to place kexec_buf randomly

Patch series "Support kdump with LUKS encryption by reusing LUKS volume
keys", v9.

LUKS is the standard for Linux disk encryption, widely adopted by users,
and in some cases, such as Confidential VMs, it is a requirement.  With
kdump enabled, when the first kernel crashes, the system can boot into the
kdump/crash kernel to dump the memory image (i.e., /proc/vmcore) to a
specified target.  However, there are two challenges when dumping vmcore
to a LUKS-encrypted device:

 - Kdump kernel may not be able to decrypt the LUKS partition. For some
   machines, a system administrator may not have a chance to enter the
   password to decrypt the device in kdump initramfs after the 1st kernel
   crashes; For cloud confidential VMs, depending on the policy the
   kdump kernel may not be able to unseal the keys with TPM and the
   console virtual keyboard is untrusted.

 - LUKS2 by default use the memory-hard Argon2 key derivation function
   which is quite memory-consuming compared to the limited memory reserved
   for kdump. Take Fedora example, by default, only 256M is reserved for
   systems having memory between 4G-64G. With LUKS enabled, ~1300M needs
   to be reserved for kdump. Note if the memory reserved for kdump can't
   be used by 1st kernel i.e. an user sees ~1300M memory missing in the
   1st kernel.

Besides users (at least for Fedora) usually expect kdump to work out of
the box i.e.  no manual password input or custom crashkernel value is
needed.  And it doesn't make sense to derivate the keys again in kdump
kernel which seems to be redundant work.

This patchset addresses the above issues by making the LUKS volume keys
persistent for kdump kernel with the help of cryptsetup's new APIs
(--link-vk-to-keyring/--volume-key-keyring).  Here is the life cycle of
the kdump copies of LUKS volume keys,

 1. After the 1st kernel loads the initramfs during boot, systemd
    use an user-input passphrase to de-crypt the LUKS volume keys
    or TPM-sealed key and then save the volume keys to specified keyring
    (using the --link-vk-to-keyring API) and the key will expire within
    specified time.

 2. A user space tool (kdump initramfs loader like kdump-utils) create
    key items inside /sys/kernel/config/crash_dm_crypt_keys to inform
    the 1st kernel which keys are needed.

 3. When the kdump initramfs is loaded by the kexec_file_load
    syscall, the 1st kernel will iterate created key items, save the
    keys to kdump reserved memory.

 4. When the 1st kernel crashes and the kdump initramfs is booted, the
    kdump initramfs asks the kdump kernel to create a user key using the
    key stored in kdump reserved memory by writing yes to
    /sys/kernel/crash_dm_crypt_keys/restore. Then the LUKS encrypted
    device is unlocked with libcryptsetup's --volume-key-keyring API.

 5. The system gets rebooted to the 1st kernel after dumping vmcore to
    the LUKS encrypted device is finished

After libcryptsetup saving the LUKS volume keys to specified keyring,
whoever takes this should be responsible for the safety of these copies of
keys.  The keys will be saved in the memory area exclusively reserved for
kdump where even the 1st kernel has no direct access.  And further more,
two additional protections are added,
 - save the copy randomly in kdump reserved memory as suggested by Jan
 - clear the _PAGE_PRESENT flag of the page that stores the copy as
   suggested by Pingfan

This patchset only supports x86.  There will be patches to support other
architectures once this patch set gets merged.


This patch (of 9):

Currently, kexec_buf is placed in order which means for the same machine,
the info in the kexec_buf is always located at the same position each time
the machine is booted.  This may cause a risk for sensitive information
like LUKS volume key.  Now struct kexec_buf has a new field random which
indicates it's supposed to be placed in a random position.

Note this feature is enabled only when CONFIG_CRASH_DUMP is enabled.  So
it only takes effect for kdump and won't impact kexec reboot.

Link: https://lkml.kernel.org/r/20250502011246.99238-1-coxu@redhat.com
Link: https://lkml.kernel.org/r/20250502011246.99238-2-coxu@redhat.com
Signed-off-by: Coiby Xu <coxu@redhat.com>
Suggested-by: Jan Pazdziora <jpazdziora@redhat.com>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: "Daniel P. Berrange" <berrange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Liu Pingfan <kernelfans@gmail.com>
Cc: Milan Broz <gmazyland@gmail.com>
Cc: Ondrej Kozina <okozina@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kexec.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index c8971861521a..1871eaa95432 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -25,6 +25,10 @@
 
 extern note_buf_t __percpu *crash_notes;
 
+#ifdef CONFIG_CRASH_DUMP
+#include <linux/prandom.h>
+#endif
+
 #ifdef CONFIG_KEXEC_CORE
 #include <linux/list.h>
 #include <linux/compat.h>
@@ -169,6 +173,7 @@ int kexec_image_post_load_cleanup_default(struct kimage *image);
  * @buf_min:	The buffer can't be placed below this address.
  * @buf_max:	The buffer can't be placed above this address.
  * @top_down:	Allocate from top of memory.
+ * @random:	Place the buffer at a random position.
  */
 struct kexec_buf {
 	struct kimage *image;
@@ -180,8 +185,33 @@ struct kexec_buf {
 	unsigned long buf_min;
 	unsigned long buf_max;
 	bool top_down;
+#ifdef CONFIG_CRASH_DUMP
+	bool random;
+#endif
 };
 
+
+#ifdef CONFIG_CRASH_DUMP
+static inline void kexec_random_range_start(unsigned long start,
+					    unsigned long end,
+					    struct kexec_buf *kbuf,
+					    unsigned long *temp_start)
+{
+	unsigned short i;
+
+	if (kbuf->random) {
+		get_random_bytes(&i, sizeof(unsigned short));
+		*temp_start = start + (end - start) / USHRT_MAX * i;
+	}
+}
+#else
+static inline void kexec_random_range_start(unsigned long start,
+					    unsigned long end,
+					    struct kexec_buf *kbuf,
+					    unsigned long *temp_start)
+{}
+#endif
+
 int kexec_load_purgatory(struct kimage *image, struct kexec_buf *kbuf);
 int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
 				   void *buf, unsigned int size,
-- 
cgit v1.2.3


From 479e58549b0fa7e80f1e0b9e69e0a2a8e6711132 Mon Sep 17 00:00:00 2001
From: Coiby Xu <coxu@redhat.com>
Date: Fri, 2 May 2025 09:12:37 +0800
Subject: crash_dump: store dm crypt keys in kdump reserved memory

When the kdump kernel image and initrd are loaded, the dm crypts keys will
be read from keyring and then stored in kdump reserved memory.

Assume a key won't exceed 256 bytes thus MAX_KEY_SIZE=256 according to
"cryptsetup benchmark".

Link: https://lkml.kernel.org/r/20250502011246.99238-4-coxu@redhat.com
Signed-off-by: Coiby Xu <coxu@redhat.com>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: "Daniel P. Berrange" <berrange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Jan Pazdziora <jpazdziora@redhat.com>
Cc: Liu Pingfan <kernelfans@gmail.com>
Cc: Milan Broz <gmazyland@gmail.com>
Cc: Ondrej Kozina <okozina@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/crash_core.h | 6 +++++-
 include/linux/kexec.h      | 4 ++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 44305336314e..2e6782239034 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -34,7 +34,11 @@ static inline void arch_kexec_protect_crashkres(void) { }
 static inline void arch_kexec_unprotect_crashkres(void) { }
 #endif
 
-
+#ifdef CONFIG_CRASH_DM_CRYPT
+int crash_load_dm_crypt_keys(struct kimage *image);
+#else
+static inline int crash_load_dm_crypt_keys(struct kimage *image) {return 0; }
+#endif
 
 #ifndef arch_crash_handle_hotplug_event
 static inline void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) { }
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 1871eaa95432..6e688c5d8e4d 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -405,6 +405,10 @@ struct kimage {
 	void *elf_headers;
 	unsigned long elf_headers_sz;
 	unsigned long elf_load_addr;
+
+	/* dm crypt keys buffer */
+	unsigned long dm_crypt_keys_addr;
+	unsigned long dm_crypt_keys_sz;
 };
 
 /* kexec interface functions */
-- 
cgit v1.2.3


From 62f17d9df6924cf805de5ae970470615c1c8d9f2 Mon Sep 17 00:00:00 2001
From: Coiby Xu <coxu@redhat.com>
Date: Fri, 2 May 2025 09:12:39 +0800
Subject: crash_dump: retrieve dm crypt keys in kdump kernel

Crash kernel will retrieve the dm crypt keys based on the dmcryptkeys
command line parameter.  When user space writes the key description to
/sys/kernel/config/crash_dm_crypt_key/restore, the crash kernel will save
the encryption keys to the user keyring.  Then user space e.g.
cryptsetup's --volume-key-keyring API can use it to unlock the encrypted
device.

Link: https://lkml.kernel.org/r/20250502011246.99238-6-coxu@redhat.com
Signed-off-by: Coiby Xu <coxu@redhat.com>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: "Daniel P. Berrange" <berrange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Jan Pazdziora <jpazdziora@redhat.com>
Cc: Liu Pingfan <kernelfans@gmail.com>
Cc: Milan Broz <gmazyland@gmail.com>
Cc: Ondrej Kozina <okozina@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/crash_core.h | 1 +
 include/linux/crash_dump.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 2e6782239034..d35726d6a415 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -36,6 +36,7 @@ static inline void arch_kexec_unprotect_crashkres(void) { }
 
 #ifdef CONFIG_CRASH_DM_CRYPT
 int crash_load_dm_crypt_keys(struct kimage *image);
+ssize_t dm_crypt_keys_read(char *buf, size_t count, u64 *ppos);
 #else
 static inline int crash_load_dm_crypt_keys(struct kimage *image) {return 0; }
 #endif
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 2f2555e6407c..dd6fc3b2133b 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -15,6 +15,8 @@
 extern unsigned long long elfcorehdr_addr;
 extern unsigned long long elfcorehdr_size;
 
+extern unsigned long long dm_crypt_keys_addr;
+
 #ifdef CONFIG_CRASH_DUMP
 extern int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size);
 extern void elfcorehdr_free(unsigned long long addr);
-- 
cgit v1.2.3


From 4e7bca76e3fed58437dcd7f747d1c8d98507379e Mon Sep 17 00:00:00 2001
From: Hans Zhang <hans.zhang@cixtech.com>
Date: Sat, 17 May 2025 00:52:22 +0800
Subject: PCI/MSI: Use bool for MSI enable state tracking

Convert pci_msi_enable and pci_msi_enabled() to use bool type for clarity.
No functional changes, only code cleanup.

Signed-off-by: Hans Zhang <hans.zhang@cixtech.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250516165223.125083-2-18255117159@163.com
---
 include/linux/pci.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0e8e3fd77e96..f5e908a56498 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1669,7 +1669,7 @@ void pci_disable_msi(struct pci_dev *dev);
 int pci_msix_vec_count(struct pci_dev *dev);
 void pci_disable_msix(struct pci_dev *dev);
 void pci_restore_msi_state(struct pci_dev *dev);
-int pci_msi_enabled(void);
+bool pci_msi_enabled(void);
 int pci_enable_msi(struct pci_dev *dev);
 int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
 			  int minvec, int maxvec);
@@ -1702,7 +1702,7 @@ static inline void pci_disable_msi(struct pci_dev *dev) { }
 static inline int pci_msix_vec_count(struct pci_dev *dev) { return -ENOSYS; }
 static inline void pci_disable_msix(struct pci_dev *dev) { }
 static inline void pci_restore_msi_state(struct pci_dev *dev) { }
-static inline int pci_msi_enabled(void) { return 0; }
+static inline bool pci_msi_enabled(void) { return false; }
 static inline int pci_enable_msi(struct pci_dev *dev)
 { return -ENOSYS; }
 static inline int pci_enable_msix_range(struct pci_dev *dev,
-- 
cgit v1.2.3


From a5bd029c733b8ae790d5873e2afeb88b58e3a151 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 19 May 2025 10:50:04 -0700
Subject: net: add skb_crc32c()

Add skb_crc32c(), which calculates the CRC32C of a sk_buff.  It will
replace __skb_checksum(), which unnecessarily supports arbitrary
checksums.  Compared to __skb_checksum(), skb_crc32c():

   - Uses the correct type for CRC32C values (u32, not __wsum).

   - Does not require the caller to provide a skb_checksum_ops struct.

   - Is faster because it does not use indirect calls and does not use
     the very slow crc32c_combine().

According to commit 2817a336d4d5 ("net: skb_checksum: allow custom
update/combine for walking skb") which added __skb_checksum(), the
original motivation for the abstraction layer was to avoid code
duplication for CRC32C and other checksums in the future.  However:

   - No additional checksums showed up after CRC32C.  __skb_checksum()
     is only used with the "regular" net checksum and CRC32C.

   - Indirect calls are expensive.  Commit 2544af0344ba ("net: avoid
     indirect calls in L4 checksum calculation") worked around this
     using the INDIRECT_CALL_1 macro. But that only avoided the indirect
     call for the net checksum, and at the cost of an extra branch.

   - The checksums use different types (__wsum and u32), causing casts
     to be needed.

   - It made the checksums of fragments be combined (rather than
     chained) for both checksums, despite this being highly
     counterproductive for CRC32C due to how slow crc32c_combine() is.
     This can clearly be seen in commit 4c2f24549644 ("sctp: linearize
     early if it's not GSO") which tried to work around this performance
     bug.  With a dedicated function for each checksum, we can instead
     just use the proper strategy for each checksum.

As shown by the following tables, the new function skb_crc32c() is
faster than __skb_checksum(), with the improvement varying greatly from
5% to 2500% depending on the case.  The largest improvements come from
fragmented packets, mainly due to eliminating the inefficient
crc32c_combine().  But linear packets are improved too, especially
shorter ones, mainly due to eliminating indirect calls.  These
benchmarks were done on AMD Zen 5.  On that CPU, Linux uses IBRS instead
of retpoline; an even greater improvement might be seen with retpoline:

    Linear sk_buffs

        Length in bytes    __skb_checksum cycles    skb_crc32c cycles
        ===============    =====================    =================
                     64                       43                   18
                    256                       94                   77
                   1420                      204                  161
                  16384                     1735                 1642

    Nonlinear sk_buffs (even split between head and one fragment)

        Length in bytes    __skb_checksum cycles    skb_crc32c cycles
        ===============    =====================    =================
                     64                      579                   22
                    256                      829                   77
                   1420                     1506                  194
                  16384                     4365                 1682

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://patch.msgid.link/20250519175012.36581-3-ebiggers@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c7397b17bb08..7ccc6356acac 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4203,6 +4203,7 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 		      __wsum csum, const struct skb_checksum_ops *ops);
 __wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
 		    __wsum csum);
+u32 skb_crc32c(const struct sk_buff *skb, int offset, int len, u32 crc);
 
 static inline void * __must_check
 __skb_header_pointer(const struct sk_buff *skb, int offset, int len,
-- 
cgit v1.2.3


From 70c96c7cb9f035d5b960021f2450afa6240e66b4 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 19 May 2025 10:50:08 -0700
Subject: net: fold __skb_checksum() into skb_checksum()

Now that the only remaining caller of __skb_checksum() is
skb_checksum(), fold __skb_checksum() into skb_checksum().  This makes
struct skb_checksum_ops unnecessary, so remove that too and simply do
the "regular" net checksum.  It also makes the wrapper functions
csum_partial_ext() and csum_block_add_ext() unnecessary, so remove those
too and just use the underlying functions.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://patch.msgid.link/20250519175012.36581-7-ebiggers@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7ccc6356acac..018c07230513 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4192,15 +4192,6 @@ static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len)
 	return copy_to_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT;
 }
 
-struct skb_checksum_ops {
-	__wsum (*update)(const void *mem, int len, __wsum wsum);
-	__wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len);
-};
-
-extern const struct skb_checksum_ops *crc32c_csum_stub __read_mostly;
-
-__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
-		      __wsum csum, const struct skb_checksum_ops *ops);
 __wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
 		    __wsum csum);
 u32 skb_crc32c(const struct sk_buff *skb, int offset, int len, u32 crc);
-- 
cgit v1.2.3


From b82f72292ab4c65250bd734281464a6ab1ff4133 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 19 May 2025 10:50:09 -0700
Subject: lib/crc32: remove unused support for CRC32C combination

crc32c_combine() and crc32c_shift() are no longer used (except by the
KUnit test that tests them), and their current implementation is very
slow.  Remove them.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://patch.msgid.link/20250519175012.36581-8-ebiggers@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/crc32.h | 23 -----------------------
 1 file changed, 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index 69c2e8bb3782..7f7d0be8a0ac 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -76,29 +76,6 @@ static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
 	return crc32_le_shift(crc1, len2) ^ crc2;
 }
 
-u32 crc32c_shift(u32 crc, size_t len);
-
-/**
- * crc32c_combine - Combine two crc32c check values into one. For two sequences
- *		    of bytes, seq1 and seq2 with lengths len1 and len2, crc32c()
- *		    check values were calculated for each, crc1 and crc2.
- *
- * @crc1: crc32c of the first block
- * @crc2: crc32c of the second block
- * @len2: length of the second block
- *
- * Return: The crc32c() check value of seq1 and seq2 concatenated, requiring
- *	   only crc1, crc2, and len2. Note: If seq_full denotes the concatenated
- *	   memory area of seq1 with seq2, and crc_full the crc32c() value of
- *	   seq_full, then crc_full == crc32c_combine(crc1, crc2, len2) when
- *	   crc_full was seeded with the same initializer as crc1, and crc2 seed
- *	   was 0. See also crc_combine_test().
- */
-static inline u32 crc32c_combine(u32 crc1, u32 crc2, size_t len2)
-{
-	return crc32c_shift(crc1, len2) ^ crc2;
-}
-
 #define crc32(seed, data, length)  crc32_le(seed, (unsigned char const *)(data), length)
 
 /*
-- 
cgit v1.2.3


From ea6342d98928e243f2024fb97a9b4d42ee55dfba Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 19 May 2025 10:50:10 -0700
Subject: net: add skb_copy_and_crc32c_datagram_iter()

Since skb_copy_and_hash_datagram_iter() is used only with CRC32C, the
crypto_ahash abstraction provides no value.  Add
skb_copy_and_crc32c_datagram_iter() which just calls crc32c() directly.

This is faster and simpler.  It also doesn't have the weird dependency
issue where skb_copy_and_hash_datagram_iter() depends on
CONFIG_CRYPTO_HASH=y without that being expressed explicitly in the
kconfig (presumably because it was too heavyweight for NET to select).
The new function is conditional on the hidden boolean symbol NET_CRC32C,
which selects CRC32.  So it gets compiled only when something that
actually needs CRC32C packet checksums is enabled, it has no implicit
dependency, and it doesn't depend on the heavyweight crypto layer.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://patch.msgid.link/20250519175012.36581-9-ebiggers@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 018c07230513..510adf63c211 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4137,6 +4137,8 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen,
 int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset,
 			   struct iov_iter *to, int len,
 			   struct ahash_request *hash);
+int skb_copy_and_crc32c_datagram_iter(const struct sk_buff *skb, int offset,
+				      struct iov_iter *to, int len, u32 *crcp);
 int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
 				 struct iov_iter *from, int len);
 int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
-- 
cgit v1.2.3


From c93f75b2d755c35b596084ddd3feb3528284a53f Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 19 May 2025 10:50:12 -0700
Subject: net: remove skb_copy_and_hash_datagram_iter()

Now that skb_copy_and_hash_datagram_iter() is no longer used, remove it.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://patch.msgid.link/20250519175012.36581-11-ebiggers@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 510adf63c211..5520524c93bf 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -274,7 +274,6 @@
 			 SKB_DATA_ALIGN(sizeof(struct sk_buff)) +	\
 			 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 
-struct ahash_request;
 struct net_device;
 struct scatterlist;
 struct pipe_inode_info;
@@ -4134,9 +4133,6 @@ static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
 }
 int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen,
 				   struct msghdr *msg);
-int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset,
-			   struct iov_iter *to, int len,
-			   struct ahash_request *hash);
 int skb_copy_and_crc32c_datagram_iter(const struct sk_buff *skb, int offset,
 				      struct iov_iter *to, int len, u32 *crcp);
 int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
-- 
cgit v1.2.3


From 31afd6bc55cc0093c3e5b0a368319e423d4de8ea Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Sat, 17 May 2025 22:13:45 +0200
Subject: net: phy: pass PHY driver to .match_phy_device OP

Pass PHY driver pointer to .match_phy_device OP in addition to phydev.
Having access to the PHY driver struct might be useful to check the
PHY ID of the driver is being matched for in case the PHY ID scanned in
the phydev is not consistent.

A scenario for this is a PHY that change PHY ID after a firmware is
loaded, in such case, the PHY ID stored in PHY device struct is not
valid anymore and PHY will manually scan the ID in the match_phy_device
function.

Having the PHY driver info is also useful for those PHY driver that
implement multiple simple .match_phy_device OP to match specific MMD PHY
ID. With this extra info if the parsing logic is the same, the matching
function can be generalized by using the phy_id in the PHY driver
instead of hardcoding.

Rust wrapper callback is updated to align to the new match_phy_device
arguments.

Suggested-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Reviewed-by: Benno Lossin <lossin@kernel.org> # for Rust
Reviewed-by: FUJITA Tomonori <fujita.tomonori@gmail.com>
Link: https://patch.msgid.link/20250517201353.5137-2-ansuelsmth@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 92a88b5ce356..10e66d45a8e8 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -990,7 +990,8 @@ struct phy_driver {
 	 * driver for the given phydev.	 If NULL, matching is based on
 	 * phy_id and phy_id_mask.
 	 */
-	int (*match_phy_device)(struct phy_device *phydev);
+	int (*match_phy_device)(struct phy_device *phydev,
+				const struct phy_driver *phydrv);
 
 	/**
 	 * @set_wol: Some devices (e.g. qnap TS-119P II) require PHY
-- 
cgit v1.2.3


From d6c45707ac84c2d9f274ece1cea4dddb97996bde Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Sat, 17 May 2025 22:13:48 +0200
Subject: net: phy: introduce genphy_match_phy_device()

Introduce new API, genphy_match_phy_device(), to provide a way to check
to match a PHY driver for a PHY device based on the info stored in the
PHY device struct.

The function generalize the logic used in phy_bus_match() to check the
PHY ID whether if C45 or C22 ID should be used for matching.

This is useful for custom .match_phy_device function that wants to use
the generic logic under some condition. (example a PHY is already setup
and provide the correct PHY ID)

Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Link: https://patch.msgid.link/20250517201353.5137-5-ansuelsmth@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 10e66d45a8e8..32b9da274115 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1868,6 +1868,9 @@ char *phy_attached_info_irq(struct phy_device *phydev)
 	__malloc;
 void phy_attached_info(struct phy_device *phydev);
 
+int genphy_match_phy_device(struct phy_device *phydev,
+			    const struct phy_driver *phydrv);
+
 /* Clause 22 PHY */
 int genphy_read_abilities(struct phy_device *phydev);
 int genphy_setup_forced(struct phy_device *phydev);
-- 
cgit v1.2.3


From 18355307dc56c198365c6b6b359a4a24db013685 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <demonsingur@gmail.com>
Date: Wed, 7 May 2025 15:19:13 +0300
Subject: i2c: atr: add static flag

Some I2C ATRs do not support dynamic remapping, only static mapping
of direct children.

Mappings will only be added or removed as a result of devices being
added or removed from a child bus.

The ATR pool will have to be big enough to accommodate all devices
expected to be added to the child buses.

Add a new flag that prevents old mappings to be replaced or new mappings
to be created in the alias finding code paths. That mens adding a flags
parameter to i2c_atr_new() and an i2c_atr_flags enum.

Signed-off-by: Cosmin Tanislav <demonsingur@gmail.com>
Reviewed-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
Reviewed-by: Romain Gantois <romain.gantois@bootlin.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/i2c-atr.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c-atr.h b/include/linux/i2c-atr.h
index 1c3a5bcd939f..5aaab1598084 100644
--- a/include/linux/i2c-atr.h
+++ b/include/linux/i2c-atr.h
@@ -18,6 +18,19 @@ struct device;
 struct fwnode_handle;
 struct i2c_atr;
 
+/**
+ * enum i2c_atr_flags - Flags for an I2C ATR driver
+ *
+ * @I2C_ATR_F_STATIC: ATR does not support dynamic mapping, use static mapping.
+ *                    Mappings will only be added or removed as a result of
+ *                    devices being added or removed from a child bus.
+ *                    The ATR pool will have to be big enough to accomodate all
+ *                    devices expected to be added to the child buses.
+ */
+enum i2c_atr_flags {
+	I2C_ATR_F_STATIC = BIT(0),
+};
+
 /**
  * struct i2c_atr_ops - Callbacks from ATR to the device driver.
  * @attach_addr: Notify the driver of a new device connected on a child
@@ -65,6 +78,7 @@ struct i2c_atr_adap_desc {
  * @dev:          The device acting as an ATR
  * @ops:          Driver-specific callbacks
  * @max_adapters: Maximum number of child adapters
+ * @flags:        Flags for ATR
  *
  * The new ATR helper is connected to the parent adapter but has no child
  * adapters. Call i2c_atr_add_adapter() to add some.
@@ -74,7 +88,8 @@ struct i2c_atr_adap_desc {
  * Return: pointer to the new ATR helper object, or ERR_PTR
  */
 struct i2c_atr *i2c_atr_new(struct i2c_adapter *parent, struct device *dev,
-			    const struct i2c_atr_ops *ops, int max_adapters);
+			    const struct i2c_atr_ops *ops, int max_adapters,
+			    u32 flags);
 
 /**
  * i2c_atr_delete - Delete an I2C ATR helper.
-- 
cgit v1.2.3


From 17a3a30e8e3df77d1d1f5bc705b903604a1eedc9 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <demonsingur@gmail.com>
Date: Wed, 7 May 2025 15:19:15 +0300
Subject: i2c: atr: add passthrough flag

Some I2C ATRs can have other I2C ATRs as children. The I2C messages of
the child ATRs need to be forwarded as-is if the parent I2C ATR can
only do static mapping.

In the case of GMSL, the deserializer I2C ATR actually doesn't have I2C
address remapping hardware capabilities, but it is able to select which
GMSL link to talk to, allowing it to change the address of the
serializer.

The child ATRs need to have their alias pools defined in such a way to
prevent overlapping addresses between them, but there's no way around
this without orchestration between multiple ATR instances.

To allow for this use-case, add a flag that allows unmapped addresses
to be passed through, since they are already remapped by the child ATRs.

There's no case where an address that has not been remapped by the child
ATR will hit the parent ATR.

Signed-off-by: Cosmin Tanislav <demonsingur@gmail.com>
Reviewed-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
Reviewed-by: Romain Gantois <romain.gantois@bootlin.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/i2c-atr.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c-atr.h b/include/linux/i2c-atr.h
index 5aaab1598084..2bb54dc87c8e 100644
--- a/include/linux/i2c-atr.h
+++ b/include/linux/i2c-atr.h
@@ -26,9 +26,11 @@ struct i2c_atr;
  *                    devices being added or removed from a child bus.
  *                    The ATR pool will have to be big enough to accomodate all
  *                    devices expected to be added to the child buses.
+ * @I2C_ATR_F_PASSTHROUGH: Allow unmapped incoming addresses to pass through
  */
 enum i2c_atr_flags {
 	I2C_ATR_F_STATIC = BIT(0),
+	I2C_ATR_F_PASSTHROUGH = BIT(1),
 };
 
 /**
-- 
cgit v1.2.3


From 6adf48a3aa316ce360e02dd10222e96da9a0eff5 Mon Sep 17 00:00:00 2001
From: Artur Weber <aweber.kernel@gmail.com>
Date: Thu, 15 May 2025 16:16:30 +0200
Subject: mfd: bcm590xx: Add support for multiple device types + BCM59054
 compatible

The BCM59054 is another chip from the BCM590xx line of PMUs, commonly
used on devices with the BCM21664/BCM23550 chipsets.

Prepare the BCM590xx driver for supporting other devices by adding the
PMUID register values for supported chip types and store them in the
MFD data struct as "pmu_id". (These will be checked against the actual
PMUID register values in a later commit.)

Then, add a DT compatible for the BCM59054, and provide the PMU ID as
OF match data.

Signed-off-by: Artur Weber <aweber.kernel@gmail.com>
Reviewed-by: Stanislav Jakubek <stano.jakubek@gmail.com>
Link: https://lore.kernel.org/r/20250515-bcm59054-v9-3-14ba0ea2ea5b@gmail.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/bcm590xx.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/bcm590xx.h b/include/linux/mfd/bcm590xx.h
index 6b8791da6119..76c30e629333 100644
--- a/include/linux/mfd/bcm590xx.h
+++ b/include/linux/mfd/bcm590xx.h
@@ -13,6 +13,10 @@
 #include <linux/i2c.h>
 #include <linux/regmap.h>
 
+/* PMU ID register values; also used as device type */
+#define BCM590XX_PMUID_BCM59054		0x54
+#define BCM590XX_PMUID_BCM59056		0x56
+
 /* max register address */
 #define BCM590XX_MAX_REGISTER_PRI	0xe7
 #define BCM590XX_MAX_REGISTER_SEC	0xf0
@@ -24,6 +28,9 @@ struct bcm590xx {
 	struct regmap *regmap_pri;
 	struct regmap *regmap_sec;
 	unsigned int id;
+
+	/* PMU ID value; also used as device type */
+	u8 pmu_id;
 };
 
 #endif /*  __LINUX_MFD_BCM590XX_H */
-- 
cgit v1.2.3


From d310cdbb4ee6285f374d4dfc32173c35f8a2273e Mon Sep 17 00:00:00 2001
From: Artur Weber <aweber.kernel@gmail.com>
Date: Thu, 15 May 2025 16:16:31 +0200
Subject: mfd: bcm590xx: Add PMU ID/revision parsing function

The BCM590xx PMUs have two I2C registers for reading the PMU ID
and revision. The revision is useful for subdevice drivers, since
different revisions may have slight differences in behavior (for
example - BCM59054 has different regulator configurations for
revision A0 and A1).

Check the PMU ID register and make sure it matches the DT compatible.
Fetch the digital and analog revision from the PMUREV register
so that it can be used in subdevice drivers.

Also add some known revision values to bcm590xx.h, for convenience
when writing subdevice drivers.

Signed-off-by: Artur Weber <aweber.kernel@gmail.com>
Reviewed-by: Stanislav Jakubek <stano.jakubek@gmail.com>
Link: https://lore.kernel.org/r/20250515-bcm59054-v9-4-14ba0ea2ea5b@gmail.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/bcm590xx.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/bcm590xx.h b/include/linux/mfd/bcm590xx.h
index 76c30e629333..a54c50b2d2c8 100644
--- a/include/linux/mfd/bcm590xx.h
+++ b/include/linux/mfd/bcm590xx.h
@@ -17,6 +17,16 @@
 #define BCM590XX_PMUID_BCM59054		0x54
 #define BCM590XX_PMUID_BCM59056		0x56
 
+/* Known chip revision IDs */
+#define BCM59054_REV_DIGITAL_A1		1
+#define BCM59054_REV_ANALOG_A1		2
+
+#define BCM59056_REV_DIGITAL_A0		1
+#define BCM59056_REV_ANALOG_A0		1
+
+#define BCM59056_REV_DIGITAL_B0		2
+#define BCM59056_REV_ANALOG_B0		2
+
 /* max register address */
 #define BCM590XX_MAX_REGISTER_PRI	0xe7
 #define BCM590XX_MAX_REGISTER_SEC	0xf0
@@ -31,6 +41,10 @@ struct bcm590xx {
 
 	/* PMU ID value; also used as device type */
 	u8 pmu_id;
+
+	/* Chip revision, read from PMUREV reg */
+	u8 rev_digital;
+	u8 rev_analog;
 };
 
 #endif /*  __LINUX_MFD_BCM590XX_H */
-- 
cgit v1.2.3


From 75dc12b4450269821fca4c8634f5185d28cf2117 Mon Sep 17 00:00:00 2001
From: Artur Weber <aweber.kernel@gmail.com>
Date: Thu, 15 May 2025 16:16:33 +0200
Subject: regulator: bcm590xx: Store regulator descriptions in table

Instead of filling in the regulator description programatically,
store the data in a struct. This will make it a bit nicer to
introduce support for other BCM590xx chips besides the BCM59056.

To do this, add a new struct type, bcm590xx_reg_data, to store
all of the necessary information. Drop the old IS_LDO, IS_GPLDO...
macros in favor of the "type" field in this struct. Adapt the
old bcm590xx_reg struct to the new types.

Signed-off-by: Artur Weber <aweber.kernel@gmail.com>
Reviewed-by: Stanislav Jakubek <stano.jakubek@gmail.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20250515-bcm59054-v9-6-14ba0ea2ea5b@gmail.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/bcm590xx.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/bcm590xx.h b/include/linux/mfd/bcm590xx.h
index a54c50b2d2c8..09d4ae054242 100644
--- a/include/linux/mfd/bcm590xx.h
+++ b/include/linux/mfd/bcm590xx.h
@@ -27,6 +27,12 @@
 #define BCM59056_REV_DIGITAL_B0		2
 #define BCM59056_REV_ANALOG_B0		2
 
+/* regmap types */
+enum bcm590xx_regmap_type {
+	BCM590XX_REGMAP_PRI,
+	BCM590XX_REGMAP_SEC,
+};
+
 /* max register address */
 #define BCM590XX_MAX_REGISTER_PRI	0xe7
 #define BCM590XX_MAX_REGISTER_SEC	0xf0
-- 
cgit v1.2.3


From 1007ae0d464ceb55a3740634790521d3543aaab9 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 22 May 2025 12:47:31 +0200
Subject: spi: use container_of_cont() for to_spi_device()

Some places in the spi core pass in a const pointer to a device and the
default container_of() casts that away, which is not a good idea.
Preserve the proper const attribute by using container_of_const() for
to_spi_device() instead, which is what it was designed for.

Note, this removes the NULL check for a device pointer in the call, but
no one was ever checking for that return value, and a device pointer
should never be NULL overall anyway, so this should be a safe change.

Cc: Mark Brown <broonie@kernel.org>
Fixes: d69d80484598 ("driver core: have match() callback in struct bus_type take a const *")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/2025052230-fidgeting-stooge-66f5@gregkh
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 0ba5e49bace4..6e64f0193777 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -249,10 +249,7 @@ struct spi_device {
 static_assert((SPI_MODE_KERNEL_MASK & SPI_MODE_USER_MASK) == 0,
 	      "SPI_MODE_USER_MASK & SPI_MODE_KERNEL_MASK must not overlap");
 
-static inline struct spi_device *to_spi_device(const struct device *dev)
-{
-	return dev ? container_of(dev, struct spi_device, dev) : NULL;
-}
+#define to_spi_device(__dev)	container_of_const(__dev, struct spi_device, dev)
 
 /* Most drivers won't need to care about device refcounting */
 static inline struct spi_device *spi_dev_get(struct spi_device *spi)
-- 
cgit v1.2.3


From 1c12fbdf40e17df2efc24bf2009a0c3bfa75bfa7 Mon Sep 17 00:00:00 2001
From: Mathieu Dubois-Briand <mathieu.dubois-briand@bootlin.com>
Date: Thu, 22 May 2025 14:06:20 +0200
Subject: regmap: irq: Add support for chips without separate IRQ status

Some GPIO chips allow to rise an IRQ on GPIO level changes but do not
provide an IRQ status for each separate line: only the current gpio
level can be retrieved.

Add support for these chips, emulating IRQ status by comparing GPIO
levels with the levels during the previous interrupt.

Signed-off-by: Mathieu Dubois-Briand <mathieu.dubois-briand@bootlin.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://patch.msgid.link/20250522-mdb-max7360-support-v9-5-74fc03517e41@bootlin.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regmap.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index d17c5ea3d55d..02b83f5499b8 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -1641,6 +1641,8 @@ struct regmap_irq_chip_data;
  * @ack_invert:  Inverted ack register: cleared bits for ack.
  * @clear_ack:  Use this to set 1 and 0 or vice-versa to clear interrupts.
  * @status_invert: Inverted status register: cleared bits are active interrupts.
+ * @status_is_level: Status register is actuall signal level: Xor status
+ *		     register with previous value to get active interrupts.
  * @wake_invert: Inverted wake register: cleared bits are wake enabled.
  * @type_in_mask: Use the mask registers for controlling irq type. Use this if
  *		  the hardware provides separate bits for rising/falling edge
@@ -1704,6 +1706,7 @@ struct regmap_irq_chip {
 	unsigned int ack_invert:1;
 	unsigned int clear_ack:1;
 	unsigned int status_invert:1;
+	unsigned int status_is_level:1;
 	unsigned int wake_invert:1;
 	unsigned int type_in_mask:1;
 	unsigned int clear_on_unmask:1;
-- 
cgit v1.2.3


From 4ff4d86f6cceb6bea583bdb230e5439655778cce Mon Sep 17 00:00:00 2001
From: Kory Maincent <kory.maincent@bootlin.com>
Date: Mon, 19 May 2025 10:45:05 +0200
Subject: net: Add support for providing the PTP hardware source in tsinfo

Multi-PTP source support within a network topology has been merged,
but the hardware timestamp source is not yet exposed to users.
Currently, users only see the PTP index, which does not indicate
whether the timestamp comes from a PHY or a MAC.

Add support for reporting the hwtstamp source using a
hwtstamp-source field, alongside hwtstamp-phyindex, to describe
the origin of the hardware timestamp.

Remove HWTSTAMP_SOURCE_UNSPEC enum value as it is not used at all.

Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://patch.msgid.link/20250519-feature_ptp_source-v4-1-5d10e19a0265@bootlin.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/ethtool.h    | 5 +++++
 include/linux/net_tstamp.h | 7 +------
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 117718c24814..5e0dd333ad1f 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -19,6 +19,7 @@
 #include <linux/netlink.h>
 #include <linux/timer_types.h>
 #include <uapi/linux/ethtool.h>
+#include <uapi/linux/ethtool_netlink_generated.h>
 #include <uapi/linux/net_tstamp.h>
 
 #define ETHTOOL_MM_MAX_VERIFY_TIME_MS		128
@@ -830,6 +831,8 @@ struct ethtool_rxfh_param {
  * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags
  * @phc_index: device index of the associated PHC, or -1 if there is none
  * @phc_qualifier: qualifier of the associated PHC
+ * @phc_source: source device of the associated PHC
+ * @phc_phyindex: index of PHY device source of the associated PHC
  * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values
  * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values
  */
@@ -838,6 +841,8 @@ struct kernel_ethtool_ts_info {
 	u32 so_timestamping;
 	int phc_index;
 	enum hwtstamp_provider_qualifier phc_qualifier;
+	enum hwtstamp_source phc_source;
+	int phc_phyindex;
 	enum hwtstamp_tx_types tx_types;
 	enum hwtstamp_rx_filters rx_filters;
 };
diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h
index ff0758e88ea1..f4936d9c2b3c 100644
--- a/include/linux/net_tstamp.h
+++ b/include/linux/net_tstamp.h
@@ -4,6 +4,7 @@
 #define _LINUX_NET_TIMESTAMPING_H_
 
 #include <uapi/linux/net_tstamp.h>
+#include <uapi/linux/ethtool_netlink_generated.h>
 
 #define SOF_TIMESTAMPING_SOFTWARE_MASK	(SOF_TIMESTAMPING_RX_SOFTWARE | \
 					 SOF_TIMESTAMPING_TX_SOFTWARE | \
@@ -13,12 +14,6 @@
 					 SOF_TIMESTAMPING_TX_HARDWARE | \
 					 SOF_TIMESTAMPING_RAW_HARDWARE)
 
-enum hwtstamp_source {
-	HWTSTAMP_SOURCE_UNSPEC,
-	HWTSTAMP_SOURCE_NETDEV,
-	HWTSTAMP_SOURCE_PHYLIB,
-};
-
 /**
  * struct hwtstamp_provider_desc - hwtstamp provider description
  *
-- 
cgit v1.2.3


From 1d2aeee6dd629084cc524ec2d00100e70aa3c824 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 18 Mar 2025 10:13:41 +0100
Subject: mfd: aat2870: Use per-client debugfs directory

The I2C core now provides a debugfs entry for each client. Let this
driver use it instead of the custom directory in debugfs root. Further
improvements by this change: automatic clean up on removal, support of
multiple instances.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Link: https://lore.kernel.org/r/20250318091426.22258-2-wsa+renesas@sang-engineering.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/aat2870.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/aat2870.h b/include/linux/mfd/aat2870.h
index 2445842d482d..c7a3c53eba68 100644
--- a/include/linux/mfd/aat2870.h
+++ b/include/linux/mfd/aat2870.h
@@ -133,9 +133,6 @@ struct aat2870_data {
 	int (*read)(struct aat2870_data *aat2870, u8 addr, u8 *val);
 	int (*write)(struct aat2870_data *aat2870, u8 addr, u8 val);
 	int (*update)(struct aat2870_data *aat2870, u8 addr, u8 mask, u8 val);
-
-	/* for debugfs */
-	struct dentry *dentry_root;
 };
 
 struct aat2870_subdev_info {
-- 
cgit v1.2.3


From 3a91f28fab43f093c72312148288d125ae160c2d Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Thu, 22 May 2025 23:20:39 +0800
Subject: io_uring: add helper io_uring_cmd_ctx_handle()

Add helper io_uring_cmd_ctx_handle() for driver to track per-context
resource, such as registered kernel io buffer.

Suggested-by: Caleb Sander Mateos <csander@purestorage.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
Link: https://lore.kernel.org/r/20250522152043.399824-2-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring/cmd.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index 0634a3de1782..53408124c1e5 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -140,6 +140,15 @@ static inline struct io_uring_cmd_data *io_uring_cmd_get_async_data(struct io_ur
 	return cmd_to_io_kiocb(cmd)->async_data;
 }
 
+/*
+ * Return uring_cmd's context reference as its context handle for driver to
+ * track per-context resource, such as registered kernel IO buffer
+ */
+static inline void *io_uring_cmd_ctx_handle(struct io_uring_cmd *cmd)
+{
+	return cmd_to_io_kiocb(cmd)->ctx;
+}
+
 int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
 			    void (*release)(void *), unsigned int index,
 			    unsigned int issue_flags);
-- 
cgit v1.2.3


From 9e8c67a9e526f497d606d721cf6b92dd68d90806 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 May 2025 00:44:44 -0400
Subject: sched_ext: Introduce cgroup_lifetime_notifier

Other subsystems may make use of the cgroup hierarchy with the cgroup_bpf
support being one such example. For such a feature, it's useful to be able
to hook into cgroup creation and destruction paths to perform
feature-specific initializations and cleanups.

Add cgroup_lifetime_notifier which generates CGROUP_LIFETIME_ONLINE and
CGROUP_LIFETIME_OFFLINE events whenever cgroups are created and destroyed,
respectively.

The next patch will convert cgroup_bpf to use the new notifier and other
uses are planned.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 989c08b09691..7865b681731c 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -19,6 +19,7 @@
 #include <linux/kernfs.h>
 #include <linux/jump_label.h>
 #include <linux/types.h>
+#include <linux/notifier.h>
 #include <linux/ns_common.h>
 #include <linux/nsproxy.h>
 #include <linux/user_namespace.h>
@@ -40,7 +41,7 @@ struct kernel_clone_args;
 
 #ifdef CONFIG_CGROUPS
 
-enum {
+enum css_task_iter_flags {
 	CSS_TASK_ITER_PROCS    = (1U << 0),  /* walk only threadgroup leaders */
 	CSS_TASK_ITER_THREADED = (1U << 1),  /* walk all threaded css_sets in the domain */
 	CSS_TASK_ITER_SKIPPED  = (1U << 16), /* internal flags */
@@ -66,10 +67,16 @@ struct css_task_iter {
 	struct list_head		iters_node;	/* css_set->task_iters */
 };
 
+enum cgroup_lifetime_events {
+	CGROUP_LIFETIME_ONLINE,
+	CGROUP_LIFETIME_OFFLINE,
+};
+
 extern struct file_system_type cgroup_fs_type;
 extern struct cgroup_root cgrp_dfl_root;
 extern struct css_set init_css_set;
 extern spinlock_t css_set_lock;
+extern struct blocking_notifier_head cgroup_lifetime_notifier;
 
 #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys;
 #include <linux/cgroup_subsys.h>
-- 
cgit v1.2.3


From 82648b8b2ae0a0ff371e2a98133844658cfaae9a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 May 2025 00:46:12 -0400
Subject: sched_ext: Convert cgroup BPF support to use cgroup_lifetime_notifier

Replace explicit cgroup_bpf_inherit/offline() calls from cgroup
creation/destruction paths with notification callback registered on
cgroup_lifetime_notifier.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/bpf-cgroup.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 9de7adb68294..60d1511b4f4d 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -114,8 +114,7 @@ struct bpf_prog_list {
 	u32 flags;
 };
 
-int cgroup_bpf_inherit(struct cgroup *cgrp);
-void cgroup_bpf_offline(struct cgroup *cgrp);
+void __init cgroup_bpf_lifetime_notifier_init(void);
 
 int __cgroup_bpf_run_filter_skb(struct sock *sk,
 				struct sk_buff *skb,
@@ -431,8 +430,10 @@ const struct bpf_func_proto *
 cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
 #else
 
-static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
-static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
+static inline void cgroup_bpf_lifetime_notifier_init(void)
+{
+	return;
+}
 
 static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
 					 enum bpf_prog_type ptype,
-- 
cgit v1.2.3


From 3f12680913fda8de06c21e836dd5f246fe1684e5 Mon Sep 17 00:00:00 2001
From: Yuquan Wang <wangyuquan1236@phytium.com.cn>
Date: Thu, 8 May 2025 10:27:19 +0800
Subject: mm: numa_memblks: introduce numa_add_reserved_memblk

acpi_parse_cfmws() currently adds empty CFMWS ranges to numa_meminfo with
the expectation that numa_cleanup_meminfo moves them to
numa_reserved_meminfo.  There is no need for that indirection when it is
known in advance that these unpopulated ranges are meant for
numa_reserved_meminfo in support of future hotplug / CXL provisioning.

Introduce and use numa_add_reserved_memblk() to add the empty CFMWS ranges
directly.

Link: https://lkml.kernel.org/r/20250508022719.3941335-1-wangyuquan1236@phytium.com.cn
Signed-off-by: Yuquan Wang <wangyuquan1236@phytium.com.cn>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Cc: Bruno Faccini <bfaccini@nvidia.com>
Cc: Chen Baozi <chenbaozi@phytium.com.cn>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Haibo Xu <haibo1.xu@intel.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Robert Richter <rrichter@amd.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/numa_memblks.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/numa_memblks.h b/include/linux/numa_memblks.h
index dd85613cdd86..991076cba7c5 100644
--- a/include/linux/numa_memblks.h
+++ b/include/linux/numa_memblks.h
@@ -22,6 +22,7 @@ struct numa_meminfo {
 };
 
 int __init numa_add_memblk(int nodeid, u64 start, u64 end);
+int __init numa_add_reserved_memblk(int nid, u64 start, u64 end);
 void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi);
 
 int __init numa_cleanup_meminfo(struct numa_meminfo *mi);
-- 
cgit v1.2.3


From e1e1a3ae7f9f0cb06e80af0f24927be63149d081 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 12 May 2025 14:34:15 +0200
Subject: mm: convert track_pfn_insert() to pfnmap_setup_cachemode*()

...  by factoring it out from track_pfn_remap() into
pfnmap_setup_cachemode() and provide pfnmap_setup_cachemode_pfn() as a
replacement for track_pfn_insert().

For PMDs/PUDs, we keep checking a single pfn only.  Add some
documentation, and also document why it is valid to not check the whole
pfn range.

We'll reuse pfnmap_setup_cachemode() from core MM next.

Link: https://lkml.kernel.org/r/20250512123424.637989-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>	[x86 bits]
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Dave Airlie <airlied@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Tvrtko Ursulin <tursulin@ursulin.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 52 +++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 44 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index f1e890b60460..be1745839871 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1496,13 +1496,10 @@ static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
 	return 0;
 }
 
-/*
- * track_pfn_insert is called when a _new_ single pfn is established
- * by vmf_insert_pfn().
- */
-static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
-				    pfn_t pfn)
+static inline int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
+		pgprot_t *prot)
 {
+	return 0;
 }
 
 /*
@@ -1552,8 +1549,32 @@ static inline void untrack_pfn_clear(struct vm_area_struct *vma)
 extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
 			   unsigned long pfn, unsigned long addr,
 			   unsigned long size);
-extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
-			     pfn_t pfn);
+
+/**
+ * pfnmap_setup_cachemode - setup the cachemode in the pgprot for a pfn range
+ * @pfn: the start of the pfn range
+ * @size: the size of the pfn range in bytes
+ * @prot: the pgprot to modify
+ *
+ * Lookup the cachemode for the pfn range starting at @pfn with the size
+ * @size and store it in @prot, leaving other data in @prot unchanged.
+ *
+ * This allows for a hardware implementation to have fine-grained control of
+ * memory cache behavior at page level granularity. Without a hardware
+ * implementation, this function does nothing.
+ *
+ * Currently there is only one implementation for this - x86 Page Attribute
+ * Table (PAT). See Documentation/arch/x86/pat.rst for more details.
+ *
+ * This function can fail if the pfn range spans pfns that require differing
+ * cachemodes. If the pfn range was previously verified to have a single
+ * cachemode, it is sufficient to query only a single pfn. The assumption is
+ * that this is the case for drivers using the vmf_insert_pfn*() interface.
+ *
+ * Returns 0 on success and -EINVAL on error.
+ */
+int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
+		pgprot_t *prot);
 extern int track_pfn_copy(struct vm_area_struct *dst_vma,
 		struct vm_area_struct *src_vma, unsigned long *pfn);
 extern void untrack_pfn_copy(struct vm_area_struct *dst_vma,
@@ -1563,6 +1584,21 @@ extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
 extern void untrack_pfn_clear(struct vm_area_struct *vma);
 #endif
 
+/**
+ * pfnmap_setup_cachemode_pfn - setup the cachemode in the pgprot for a pfn
+ * @pfn: the pfn
+ * @prot: the pgprot to modify
+ *
+ * Lookup the cachemode for @pfn and store it in @prot, leaving other
+ * data in @prot unchanged.
+ *
+ * See pfnmap_setup_cachemode() for details.
+ */
+static inline void pfnmap_setup_cachemode_pfn(unsigned long pfn, pgprot_t *prot)
+{
+	pfnmap_setup_cachemode(pfn, PAGE_SIZE, prot);
+}
+
 #ifdef CONFIG_MMU
 #ifdef __HAVE_COLOR_ZERO_PAGE
 static inline int is_zero_pfn(unsigned long pfn)
-- 
cgit v1.2.3


From db44863a4d9df3604c4ff76507bb2056b6392e58 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 12 May 2025 14:34:16 +0200
Subject: mm: introduce pfnmap_track() and pfnmap_untrack() and use them for
 memremap

Let's provide variants of track_pfn_remap() and untrack_pfn() that won't
mess with VMAs, and replace the usage in mm/memremap.c.

Add some documentation.

Link: https://lkml.kernel.org/r/20250512123424.637989-4-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>	[x86 bits]
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Dave Airlie <airlied@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Tvrtko Ursulin <tursulin@ursulin.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index be1745839871..90f72cd35839 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1502,6 +1502,16 @@ static inline int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
 	return 0;
 }
 
+static inline int pfnmap_track(unsigned long pfn, unsigned long size,
+		pgprot_t *prot)
+{
+	return 0;
+}
+
+static inline void pfnmap_untrack(unsigned long pfn, unsigned long size)
+{
+}
+
 /*
  * track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page
  * tables copied during copy_page_range(). Will store the pfn to be
@@ -1575,6 +1585,35 @@ extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
  */
 int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
 		pgprot_t *prot);
+
+/**
+ * pfnmap_track - track a pfn range
+ * @pfn: the start of the pfn range
+ * @size: the size of the pfn range in bytes
+ * @prot: the pgprot to track
+ *
+ * Requested the pfn range to be 'tracked' by a hardware implementation and
+ * setup the cachemode in @prot similar to pfnmap_setup_cachemode().
+ *
+ * This allows for fine-grained control of memory cache behaviour at page
+ * level granularity. Tracking memory this way is persisted across VMA splits
+ * (VMA merging does not apply for VM_PFNMAP).
+ *
+ * Currently, there is only one implementation for this - x86 Page Attribute
+ * Table (PAT). See Documentation/arch/x86/pat.rst for more details.
+ *
+ * Returns 0 on success and -EINVAL on error.
+ */
+int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot);
+
+/**
+ * pfnmap_untrack - untrack a pfn range
+ * @pfn: the start of the pfn range
+ * @size: the size of the pfn range in bytes
+ *
+ * Untrack a pfn range previously tracked through pfnmap_track().
+ */
+void pfnmap_untrack(unsigned long pfn, unsigned long size);
 extern int track_pfn_copy(struct vm_area_struct *dst_vma,
 		struct vm_area_struct *src_vma, unsigned long *pfn);
 extern void untrack_pfn_copy(struct vm_area_struct *dst_vma,
-- 
cgit v1.2.3


From f8e97613fed25758ddf52159b87e1c66e619a23a Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 12 May 2025 14:34:17 +0200
Subject: mm: convert VM_PFNMAP tracking to pfnmap_track() + pfnmap_untrack()

Let's use our new interface.  In remap_pfn_range(), we'll now decide
whether we have to track (full VMA covered) or only lookup the cachemode
(partial VMA covered).

Remember what we have to untrack by linking it from the VMA.  When
duplicating VMAs (e.g., splitting, mremap, fork), we'll handle it similar
to anon VMA names, and use a kref to share the tracking.

Once the last VMA un-refs our tracking data, we'll do the untracking,
which simplifies things a lot and should sort our various issues we saw
recently, for example, when partially unmapping/zapping a tracked VMA.

This change implies that we'll keep tracking the original PFN range even
after splitting + partially unmapping it: not too bad, because it was not
working reliably before.  The only thing that kind-of worked before was
shrinking such a mapping using mremap(): we managed to adjust the
reservation in a hacky way, now we won't adjust the reservation but leave
it around until all involved VMAs are gone.

If that ever turns out to be an issue, we could hook into VM splitting
code and split the tracking; however, that adds complexity that might not
be required, so we'll keep it simple for now.

Link: https://lkml.kernel.org/r/20250512123424.637989-5-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>	[x86 bits]
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Dave Airlie <airlied@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Tvrtko Ursulin <tursulin@ursulin.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_inline.h |  2 ++
 include/linux/mm_types.h  | 11 +++++++++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index f9157a0c42a5..89b518ff097e 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -447,6 +447,8 @@ static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
 
 #endif  /* CONFIG_ANON_VMA_NAME */
 
+void pfnmap_track_ctx_release(struct kref *ref);
+
 static inline void init_tlb_flush_pending(struct mm_struct *mm)
 {
 	atomic_set(&mm->tlb_flush_pending, 0);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 15808cad2bc1..3e934dc6057c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -763,6 +763,14 @@ struct vma_numab_state {
 	int prev_scan_seq;
 };
 
+#ifdef __HAVE_PFNMAP_TRACKING
+struct pfnmap_track_ctx {
+	struct kref kref;
+	unsigned long pfn;
+	unsigned long size;	/* in bytes */
+};
+#endif
+
 /*
  * Describes a VMA that is about to be mmap()'ed. Drivers may choose to
  * manipulate mutable fields which will cause those fields to be updated in the
@@ -900,6 +908,9 @@ struct vm_area_struct {
 	struct anon_vma_name *anon_name;
 #endif
 	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
+#ifdef __HAVE_PFNMAP_TRACKING
+	struct pfnmap_track_ctx *pfnmap_track_ctx;
+#endif
 } __randomize_layout;
 
 #ifdef CONFIG_NUMA
-- 
cgit v1.2.3


From 7bd7d74ec01954fde9eb65b065eb55bcda4f86e2 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 12 May 2025 14:34:18 +0200
Subject: x86/mm/pat: remove old pfnmap tracking interface

We can now get rid of the old interface along with get_pat_info() and
follow_phys().

Link: https://lkml.kernel.org/r/20250512123424.637989-6-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>	[x86 bits]
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Dave Airlie <airlied@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Tvrtko Ursulin <tursulin@ursulin.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 66 -------------------------------------------------
 1 file changed, 66 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 90f72cd35839..0b6e1f781d86 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1485,17 +1485,6 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
  * vmf_insert_pfn.
  */
 
-/*
- * track_pfn_remap is called when a _new_ pfn mapping is being established
- * by remap_pfn_range() for physical range indicated by pfn and size.
- */
-static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
-				  unsigned long pfn, unsigned long addr,
-				  unsigned long size)
-{
-	return 0;
-}
-
 static inline int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
 		pgprot_t *prot)
 {
@@ -1511,55 +1500,7 @@ static inline int pfnmap_track(unsigned long pfn, unsigned long size,
 static inline void pfnmap_untrack(unsigned long pfn, unsigned long size)
 {
 }
-
-/*
- * track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page
- * tables copied during copy_page_range(). Will store the pfn to be
- * passed to untrack_pfn_copy() only if there is something to be untracked.
- * Callers should initialize the pfn to 0.
- */
-static inline int track_pfn_copy(struct vm_area_struct *dst_vma,
-		struct vm_area_struct *src_vma, unsigned long *pfn)
-{
-	return 0;
-}
-
-/*
- * untrack_pfn_copy is called when a VM_PFNMAP VMA failed to copy during
- * copy_page_range(), but after track_pfn_copy() was already called. Can
- * be called even if track_pfn_copy() did not actually track anything:
- * handled internally.
- */
-static inline void untrack_pfn_copy(struct vm_area_struct *dst_vma,
-		unsigned long pfn)
-{
-}
-
-/*
- * untrack_pfn is called while unmapping a pfnmap for a region.
- * untrack can be called for a specific region indicated by pfn and size or
- * can be for the entire vma (in which case pfn, size are zero).
- */
-static inline void untrack_pfn(struct vm_area_struct *vma,
-			       unsigned long pfn, unsigned long size,
-			       bool mm_wr_locked)
-{
-}
-
-/*
- * untrack_pfn_clear is called in the following cases on a VM_PFNMAP VMA:
- *
- * 1) During mremap() on the src VMA after the page tables were moved.
- * 2) During fork() on the dst VMA, immediately after duplicating the src VMA.
- */
-static inline void untrack_pfn_clear(struct vm_area_struct *vma)
-{
-}
 #else
-extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
-			   unsigned long pfn, unsigned long addr,
-			   unsigned long size);
-
 /**
  * pfnmap_setup_cachemode - setup the cachemode in the pgprot for a pfn range
  * @pfn: the start of the pfn range
@@ -1614,13 +1555,6 @@ int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot);
  * Untrack a pfn range previously tracked through pfnmap_track().
  */
 void pfnmap_untrack(unsigned long pfn, unsigned long size);
-extern int track_pfn_copy(struct vm_area_struct *dst_vma,
-		struct vm_area_struct *src_vma, unsigned long *pfn);
-extern void untrack_pfn_copy(struct vm_area_struct *dst_vma,
-		unsigned long pfn);
-extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
-			unsigned long size, bool mm_wr_locked);
-extern void untrack_pfn_clear(struct vm_area_struct *vma);
 #endif
 
 /**
-- 
cgit v1.2.3


From cba4dbeb7bfcf8b69d491288c3bf877ead883214 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 12 May 2025 14:34:19 +0200
Subject: mm: remove VM_PAT

It's unused, so let's remove it.

Link: https://lkml.kernel.org/r/20250512123424.637989-7-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>	[x86 bits]
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Dave Airlie <airlied@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Tvrtko Ursulin <tursulin@ursulin.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 43748c8f3454..a916ea42cfd5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -357,9 +357,7 @@ extern unsigned int kobjsize(const void *objp);
 # define VM_SHADOW_STACK	VM_NONE
 #endif
 
-#if defined(CONFIG_X86)
-# define VM_PAT		VM_ARCH_1	/* PAT reserves whole VMA at once (x86) */
-#elif defined(CONFIG_PPC64)
+#if defined(CONFIG_PPC64)
 # define VM_SAO		VM_ARCH_1	/* Strong Access Ordering (powerpc) */
 #elif defined(CONFIG_PARISC)
 # define VM_GROWSUP	VM_ARCH_1
-- 
cgit v1.2.3


From 5053383829ab2b17dc4766a832004c848f4af9df Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Mon, 12 May 2025 10:57:11 +0800
Subject: mm: khugepaged: convert set_huge_pmd() to take a folio

We've already gotten the stable locked folio in collapse_pte_mapped_thp(),
so just use folio for set_huge_pmd() to set the PMD entry, which is more
straightforward.

Moreover, we will check the folio size in do_set_pmd(), so we can remove
the unnecessary VM_BUG_ON() in set_huge_pmd().  While we are at it, we can
also remove the PageTransHuge(), as it currently has no callers.

Link: https://lkml.kernel.org/r/110c3e1ec5fe7854a0e2c95ffcbc985817180ed7.1747017104.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 37b11f15dbd9..1c1d49554c71 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -907,20 +907,6 @@ FOLIO_FLAG_FALSE(partially_mapped)
 #define PG_head_mask ((1UL << PG_head))
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-/*
- * PageHuge() only returns true for hugetlbfs pages, but not for
- * normal or transparent huge pages.
- *
- * PageTransHuge() returns true for both transparent huge and
- * hugetlbfs pages, but not normal pages. PageTransHuge() can only be
- * called only in the core VM paths where hugetlbfs pages can't exist.
- */
-static inline int PageTransHuge(const struct page *page)
-{
-	VM_BUG_ON_PAGE(PageTail(page), page);
-	return PageHead(page);
-}
-
 /*
  * PageTransCompound returns true for both transparent huge pages
  * and hugetlbfs pages, so it should only be called when it's known
@@ -931,7 +917,6 @@ static inline int PageTransCompound(const struct page *page)
 	return PageCompound(page);
 }
 #else
-TESTPAGEFLAG_FALSE(TransHuge, transhuge)
 TESTPAGEFLAG_FALSE(TransCompound, transcompound)
 #endif
 
-- 
cgit v1.2.3


From 698c0089cdf0b27d37f0b24824b682c23de5f72d Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Mon, 12 May 2025 10:57:12 +0800
Subject: mm: convert do_set_pmd() to take a folio

In do_set_pmd(), we always use the folio->page to build PMD mappings for
the entire folio.  Since all callers of do_set_pmd() already hold a stable
folio, converting do_set_pmd() to take a folio is safe and more
straightforward.

In addition, to ensure the extensibility of do_set_pmd() for supporting
larger folios beyond PMD size, we keep the 'page' parameter to specify
which page within the folio should be mapped.

No functional changes expected.

Link: https://lkml.kernel.org/r/9b488f4ecb4d3fd8634e3d448dd0ed6964482480.1747017104.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a916ea42cfd5..cd2e513189d6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1235,7 +1235,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 	return pte;
 }
 
-vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page);
+vm_fault_t do_set_pmd(struct vm_fault *vmf, struct folio *folio, struct page *page);
 void set_pte_range(struct vm_fault *vmf, struct folio *folio,
 		struct page *page, unsigned int nr, unsigned long addr);
 
-- 
cgit v1.2.3


From 6669d1aaa0c45a50a4cc5f1756ab03578eaebd18 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Wed, 14 May 2025 09:40:24 +0100
Subject: mm: remove WARN_ON_ONCE() in file_has_valid_mmap_hooks()

Having encountered a trinity report in linux-next (Linked in the 'Closes'
tag) it appears that there are legitimate situations where a file-backed
mapping can be acquired but no file->f_op->mmap or
file->f_op->mmap_prepare is set, at which point do_mmap() should simply
error out with -ENODEV.

Since previously we did not warn in this scenario and it appears we rely
upon this, restore this situation, while retaining a WARN_ON_ONCE() for
the case where both are set, which is absolutely incorrect and must be
addressed and thus always requires a warning.

If further work is required to chase down precisely what is causing this,
then we can later restore this, but it makes no sense to hold up this
series to do so, as this is existing and apparently expected behaviour.

Link: https://lkml.kernel.org/r/20250514084024.29148-1-lorenzo.stoakes@oracle.com
Fixes: c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback")
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202505141434.96ce5e5d-lkp@intel.com
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Pedro Falcato <pfalcato@suse.de>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2721a1ff13d..09c8495dacdb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2248,7 +2248,7 @@ static inline bool file_has_valid_mmap_hooks(struct file *file)
 	/* Hooks are mutually exclusive. */
 	if (WARN_ON_ONCE(has_mmap && has_mmap_prepare))
 		return false;
-	if (WARN_ON_ONCE(!has_mmap && !has_mmap_prepare))
+	if (!has_mmap && !has_mmap_prepare)
 		return false;
 
 	return true;
-- 
cgit v1.2.3


From 2aad4edf6e1018b28b7000faec56b7b6e585c8e1 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Fri, 16 May 2025 17:34:46 -0700
Subject: mm: rename try_alloc_pages() to alloc_pages_nolock()

The "try_" prefix is confusing, since it made people believe that
try_alloc_pages() is analogous to spin_trylock() and NULL return means
EAGAIN.  This is not the case.  If it returns NULL there is no reason to
call it again.  It will most likely return NULL again.  Hence rename it to
alloc_pages_nolock() to make it symmetrical to free_pages_nolock() and
document that NULL means ENOMEM.

Link: https://lkml.kernel.org/r/20250517003446.60260-1-alexei.starovoitov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Harry Yoo <harry.yoo@oracle.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/gfp.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index c9fa6309c903..be160e8d8bcb 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -45,13 +45,13 @@ static inline bool gfpflags_allow_spinning(const gfp_t gfp_flags)
 	 * !__GFP_DIRECT_RECLAIM -> direct claim is not allowed.
 	 * !__GFP_KSWAPD_RECLAIM -> it's not safe to wake up kswapd.
 	 * All GFP_* flags including GFP_NOWAIT use one or both flags.
-	 * try_alloc_pages() is the only API that doesn't specify either flag.
+	 * alloc_pages_nolock() is the only API that doesn't specify either flag.
 	 *
 	 * This is stronger than GFP_NOWAIT or GFP_ATOMIC because
 	 * those are guaranteed to never block on a sleeping lock.
 	 * Here we are enforcing that the allocation doesn't ever spin
 	 * on any locks (i.e. only trylocks). There is no high level
-	 * GFP_$FOO flag for this use in try_alloc_pages() as the
+	 * GFP_$FOO flag for this use in alloc_pages_nolock() as the
 	 * regular page allocator doesn't fully support this
 	 * allocation mode.
 	 */
@@ -354,8 +354,8 @@ static inline struct page *alloc_page_vma_noprof(gfp_t gfp,
 }
 #define alloc_page_vma(...)			alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__))
 
-struct page *try_alloc_pages_noprof(int nid, unsigned int order);
-#define try_alloc_pages(...)			alloc_hooks(try_alloc_pages_noprof(__VA_ARGS__))
+struct page *alloc_pages_nolock_noprof(int nid, unsigned int order);
+#define alloc_pages_nolock(...)			alloc_hooks(alloc_pages_nolock_noprof(__VA_ARGS__))
 
 extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order);
 #define __get_free_pages(...)			alloc_hooks(get_free_pages_noprof(__VA_ARGS__))
-- 
cgit v1.2.3


From cc79061b8fc119807111b615aa791562374b15b2 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Tue, 13 May 2025 14:56:35 +0800
Subject: mm: khugepaged: decouple SHMEM and file folios' collapse

Originally, the file pages collapse was intended for tmpfs/shmem to merge
into THP in the background.  However, now not only tmpfs/shmem can support
large folios, but some other file systems (such as XFS, erofs ...) also
support large folios.  Therefore, it is time to decouple the support of
file folios collapse from SHMEM.

Link: https://lkml.kernel.org/r/ce5c2314e0368cf34bda26f9bacf01c982d4da17.1747119309.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/khugepaged.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 1f46046080f5..b8d69cfbb58b 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -15,16 +15,8 @@ extern void khugepaged_enter_vma(struct vm_area_struct *vma,
 				 unsigned long vm_flags);
 extern void khugepaged_min_free_kbytes_update(void);
 extern bool current_is_khugepaged(void);
-#ifdef CONFIG_SHMEM
 extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
 				   bool install_pmd);
-#else
-static inline int collapse_pte_mapped_thp(struct mm_struct *mm,
-					  unsigned long addr, bool install_pmd)
-{
-	return 0;
-}
-#endif
 
 static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 {
-- 
cgit v1.2.3


From 8814e3b8692b31e0150a894dd70c14ca0b7b746a Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeel.butt@linux.dev>
Date: Wed, 14 May 2025 11:41:54 -0700
Subject: memcg: make mod_memcg_state re-entrant safe against irqs

Let's make mod_memcg_state re-entrant safe against irqs.  The only thing
needed is to convert the usage of __this_cpu_add() to this_cpu_add().  In
addition, with re-entrant safety, there is no need to disable irqs.

mod_memcg_state() is not safe against nmi, so let's add warning if someone
tries to call it in nmi context.

Link: https://lkml.kernel.org/r/20250514184158.3471331-4-shakeel.butt@linux.dev
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9ed75f82b858..92861ff3c43f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -903,19 +903,9 @@ struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
 					    struct mem_cgroup *oom_domain);
 void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
 
-void __mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx,
-		       int val);
-
 /* idx can be of type enum memcg_stat_item or node_stat_item */
-static inline void mod_memcg_state(struct mem_cgroup *memcg,
-				   enum memcg_stat_item idx, int val)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__mod_memcg_state(memcg, idx, val);
-	local_irq_restore(flags);
-}
+void mod_memcg_state(struct mem_cgroup *memcg,
+		     enum memcg_stat_item idx, int val);
 
 static inline void mod_memcg_page_state(struct page *page,
 					enum memcg_stat_item idx, int val)
@@ -1375,12 +1365,6 @@ static inline void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
 {
 }
 
-static inline void __mod_memcg_state(struct mem_cgroup *memcg,
-				     enum memcg_stat_item idx,
-				     int nr)
-{
-}
-
 static inline void mod_memcg_state(struct mem_cgroup *memcg,
 				   enum memcg_stat_item idx,
 				   int nr)
-- 
cgit v1.2.3


From e52401e7247bb36cabba389ae32fb75a12a6e94b Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeel.butt@linux.dev>
Date: Wed, 14 May 2025 11:41:55 -0700
Subject: memcg: make count_memcg_events re-entrant safe against irqs

Let's make count_memcg_events re-entrant safe against irqs.  The only
thing needed is to convert the usage of __this_cpu_add() to
this_cpu_add().  In addition, with re-entrant safety, there is no need to
disable irqs.  Also add warnings for in_nmi() as it is not safe against
nmi context.

Link: https://lkml.kernel.org/r/20250514184158.3471331-5-shakeel.butt@linux.dev
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 21 ++-------------------
 1 file changed, 2 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 92861ff3c43f..f7848f73f41c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -942,19 +942,8 @@ static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
 	local_irq_restore(flags);
 }
 
-void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
-			  unsigned long count);
-
-static inline void count_memcg_events(struct mem_cgroup *memcg,
-				      enum vm_event_item idx,
-				      unsigned long count)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__count_memcg_events(memcg, idx, count);
-	local_irq_restore(flags);
-}
+void count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
+			unsigned long count);
 
 static inline void count_memcg_folio_events(struct folio *folio,
 		enum vm_event_item idx, unsigned long nr)
@@ -1418,12 +1407,6 @@ static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
 }
 
 static inline void count_memcg_events(struct mem_cgroup *memcg,
-				      enum vm_event_item idx,
-				      unsigned long count)
-{
-}
-
-static inline void __count_memcg_events(struct mem_cgroup *memcg,
 					enum vm_event_item idx,
 					unsigned long count)
 {
-- 
cgit v1.2.3


From ba1a455393c430c97235cb593d40a3ea7ad8acca Mon Sep 17 00:00:00 2001
From: Artur Weber <aweber.kernel@gmail.com>
Date: Sun, 16 Mar 2025 19:18:52 +0100
Subject: mfd: bcm590xx: Drop unused "id" member of bcm590xx struct

The "id" member of the bcm590xx struct is unused and will be confusing
once we add an actual PMU ID storage value. Drop it; a replacement
will be introduced in a future commit.

Signed-off-by: Artur Weber <aweber.kernel@gmail.com>
Reviewed-by: "Rob Herring (Arm)" <robh@kernel.org>
Link: https://lore.kernel.org/r/20250316-bcm59054-v7-4-4281126be1b8@gmail.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/bcm590xx.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/bcm590xx.h b/include/linux/mfd/bcm590xx.h
index 09d4ae054242..5a5783abd47b 100644
--- a/include/linux/mfd/bcm590xx.h
+++ b/include/linux/mfd/bcm590xx.h
@@ -43,7 +43,6 @@ struct bcm590xx {
 	struct i2c_client *i2c_sec;
 	struct regmap *regmap_pri;
 	struct regmap *regmap_sec;
-	unsigned int id;
 
 	/* PMU ID value; also used as device type */
 	u8 pmu_id;
-- 
cgit v1.2.3


From b3379422b460ea8c0556df300d547d63e5569cda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Draszik?= <andre.draszik@linaro.org>
Date: Wed, 9 Apr 2025 21:37:25 +0100
Subject: mfd: sec-core: Drop non-existing forward declarations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

sec_irq_resume() was removed in commit 6445b84abf91 ("mfd: Add s2mps11
irq driver") and sec_irq_exit() in commit 3dc6f4aaafbe ("mfd: sec: Use
devm_mfd_add_devices and devm_regmap_add_irq_chip") while the
prototypes were left. They should be removed.

Do so.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: André Draszik <andre.draszik@linaro.org>
Link: https://lore.kernel.org/r/20250409-s2mpg10-v4-4-d66d5f39b6bf@linaro.org
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/samsung/core.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index f35314458fd2..b7008b50392a 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -72,8 +72,6 @@ struct sec_pmic_dev {
 };
 
 int sec_irq_init(struct sec_pmic_dev *sec_pmic);
-void sec_irq_exit(struct sec_pmic_dev *sec_pmic);
-int sec_irq_resume(struct sec_pmic_dev *sec_pmic);
 
 struct sec_platform_data {
 	struct sec_regulator_data	*regulators;
-- 
cgit v1.2.3


From 8b88b5e4d58151d8a37250afc978f253cd8cc4fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Draszik?= <andre.draszik@linaro.org>
Date: Wed, 9 Apr 2025 21:37:28 +0100
Subject: mfd: sec: Move private internal API to internal header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

sec_irq_init() is an internal API for the core driver, and doesn't
belong into the public header.

Due to an upcoming split of the driver into a core and i2c driver,
we'll also be adding more internal APIs, which again shouldn't be in
the public header.

Move it into a new internal include.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: André Draszik <andre.draszik@linaro.org>
Link: https://lore.kernel.org/r/20250409-s2mpg10-v4-7-d66d5f39b6bf@linaro.org
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/samsung/core.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index b7008b50392a..8a4e660854bb 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -71,8 +71,6 @@ struct sec_pmic_dev {
 	struct regmap_irq_chip_data *irq_data;
 };
 
-int sec_irq_init(struct sec_pmic_dev *sec_pmic);
-
 struct sec_platform_data {
 	struct sec_regulator_data	*regulators;
 	struct sec_opmode_data		*opmode;
-- 
cgit v1.2.3


From 5338709089b75f57a9b3b7b17a0424ecab5f2fd8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Draszik?= <andre.draszik@linaro.org>
Date: Wed, 9 Apr 2025 21:37:30 +0100
Subject: mfd: sec: Add support for S2MPG10 PMIC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for Samsung's S2MPG10 PMIC, which is a Power Management IC
for mobile applications with buck converters, various LDOs, power
meters, RTC, clock outputs, and additional GPIOs interfaces.

Contrary to existing Samsung S2M series PMICs supported, communication
is not via I2C, but via the Samsung ACPM firmware.

This commit adds the core driver.

Signed-off-by: André Draszik <andre.draszik@linaro.org>
Link: https://lore.kernel.org/r/20250409-s2mpg10-v4-9-d66d5f39b6bf@linaro.org
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/samsung/core.h    |   1 +
 include/linux/mfd/samsung/irq.h     | 103 ++++++++
 include/linux/mfd/samsung/rtc.h     |  37 +++
 include/linux/mfd/samsung/s2mpg10.h | 454 ++++++++++++++++++++++++++++++++++++
 4 files changed, 595 insertions(+)
 create mode 100644 include/linux/mfd/samsung/s2mpg10.h

(limited to 'include/linux')

diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index 8a4e660854bb..c1102324172a 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -39,6 +39,7 @@ enum sec_device_type {
 	S5M8767X,
 	S2DOS05,
 	S2MPA01,
+	S2MPG10,
 	S2MPS11X,
 	S2MPS13X,
 	S2MPS14X,
diff --git a/include/linux/mfd/samsung/irq.h b/include/linux/mfd/samsung/irq.h
index 978f7af66f74..b4805cbd949b 100644
--- a/include/linux/mfd/samsung/irq.h
+++ b/include/linux/mfd/samsung/irq.h
@@ -57,6 +57,109 @@ enum s2mpa01_irq {
 #define S2MPA01_IRQ_B24_TSD_MASK	(1 << 4)
 #define S2MPA01_IRQ_B35_TSD_MASK	(1 << 5)
 
+enum s2mpg10_irq {
+	/* PMIC */
+	S2MPG10_IRQ_PWRONF,
+	S2MPG10_IRQ_PWRONR,
+	S2MPG10_IRQ_JIGONBF,
+	S2MPG10_IRQ_JIGONBR,
+	S2MPG10_IRQ_ACOKBF,
+	S2MPG10_IRQ_ACOKBR,
+	S2MPG10_IRQ_PWRON1S,
+	S2MPG10_IRQ_MRB,
+#define S2MPG10_IRQ_PWRONF_MASK		BIT(0)
+#define S2MPG10_IRQ_PWRONR_MASK		BIT(1)
+#define S2MPG10_IRQ_JIGONBF_MASK	BIT(2)
+#define S2MPG10_IRQ_JIGONBR_MASK	BIT(3)
+#define S2MPG10_IRQ_ACOKBF_MASK		BIT(4)
+#define S2MPG10_IRQ_ACOKBR_MASK		BIT(5)
+#define S2MPG10_IRQ_PWRON1S_MASK	BIT(6)
+#define S2MPG10_IRQ_MRB_MASK		BIT(7)
+
+	S2MPG10_IRQ_RTC60S,
+	S2MPG10_IRQ_RTCA1,
+	S2MPG10_IRQ_RTCA0,
+	S2MPG10_IRQ_RTC1S,
+	S2MPG10_IRQ_WTSR_COLDRST,
+	S2MPG10_IRQ_WTSR,
+	S2MPG10_IRQ_WRST,
+	S2MPG10_IRQ_SMPL,
+#define S2MPG10_IRQ_RTC60S_MASK		BIT(0)
+#define S2MPG10_IRQ_RTCA1_MASK		BIT(1)
+#define S2MPG10_IRQ_RTCA0_MASK		BIT(2)
+#define S2MPG10_IRQ_RTC1S_MASK		BIT(3)
+#define S2MPG10_IRQ_WTSR_COLDRST_MASK	BIT(4)
+#define S2MPG10_IRQ_WTSR_MASK		BIT(5)
+#define S2MPG10_IRQ_WRST_MASK		BIT(6)
+#define S2MPG10_IRQ_SMPL_MASK		BIT(7)
+
+	S2MPG10_IRQ_120C,
+	S2MPG10_IRQ_140C,
+	S2MPG10_IRQ_TSD,
+	S2MPG10_IRQ_PIF_TIMEOUT1,
+	S2MPG10_IRQ_PIF_TIMEOUT2,
+	S2MPG10_IRQ_SPD_PARITY_ERR,
+	S2MPG10_IRQ_SPD_ABNORMAL_STOP,
+	S2MPG10_IRQ_PMETER_OVERF,
+#define S2MPG10_IRQ_INT120C_MASK		BIT(0)
+#define S2MPG10_IRQ_INT140C_MASK		BIT(1)
+#define S2MPG10_IRQ_TSD_MASK			BIT(2)
+#define S2MPG10_IRQ_PIF_TIMEOUT1_MASK		BIT(3)
+#define S2MPG10_IRQ_PIF_TIMEOUT2_MASK		BIT(4)
+#define S2MPG10_IRQ_SPD_PARITY_ERR_MASK		BIT(5)
+#define S2MPG10_IRQ_SPD_ABNORMAL_STOP_MASK	BIT(6)
+#define S2MPG10_IRQ_PMETER_OVERF_MASK		BIT(7)
+
+	S2MPG10_IRQ_OCP_B1M,
+	S2MPG10_IRQ_OCP_B2M,
+	S2MPG10_IRQ_OCP_B3M,
+	S2MPG10_IRQ_OCP_B4M,
+	S2MPG10_IRQ_OCP_B5M,
+	S2MPG10_IRQ_OCP_B6M,
+	S2MPG10_IRQ_OCP_B7M,
+	S2MPG10_IRQ_OCP_B8M,
+#define S2MPG10_IRQ_OCP_B1M_MASK	BIT(0)
+#define S2MPG10_IRQ_OCP_B2M_MASK	BIT(1)
+#define S2MPG10_IRQ_OCP_B3M_MASK	BIT(2)
+#define S2MPG10_IRQ_OCP_B4M_MASK	BIT(3)
+#define S2MPG10_IRQ_OCP_B5M_MASK	BIT(4)
+#define S2MPG10_IRQ_OCP_B6M_MASK	BIT(5)
+#define S2MPG10_IRQ_OCP_B7M_MASK	BIT(6)
+#define S2MPG10_IRQ_OCP_B8M_MASK	BIT(7)
+
+	S2MPG10_IRQ_OCP_B9M,
+	S2MPG10_IRQ_OCP_B10M,
+	S2MPG10_IRQ_WLWP_ACC,
+	S2MPG10_IRQ_SMPL_TIMEOUT,
+	S2MPG10_IRQ_WTSR_TIMEOUT,
+	S2MPG10_IRQ_SPD_SRP_PKT_RST,
+#define S2MPG10_IRQ_OCP_B9M_MASK		BIT(0)
+#define S2MPG10_IRQ_OCP_B10M_MASK		BIT(1)
+#define S2MPG10_IRQ_WLWP_ACC_MASK		BIT(2)
+#define S2MPG10_IRQ_SMPL_TIMEOUT_MASK		BIT(5)
+#define S2MPG10_IRQ_WTSR_TIMEOUT_MASK		BIT(6)
+#define S2MPG10_IRQ_SPD_SRP_PKT_RST_MASK	BIT(7)
+
+	S2MPG10_IRQ_PWR_WARN_CH0,
+	S2MPG10_IRQ_PWR_WARN_CH1,
+	S2MPG10_IRQ_PWR_WARN_CH2,
+	S2MPG10_IRQ_PWR_WARN_CH3,
+	S2MPG10_IRQ_PWR_WARN_CH4,
+	S2MPG10_IRQ_PWR_WARN_CH5,
+	S2MPG10_IRQ_PWR_WARN_CH6,
+	S2MPG10_IRQ_PWR_WARN_CH7,
+#define S2MPG10_IRQ_PWR_WARN_CH0_MASK	BIT(0)
+#define S2MPG10_IRQ_PWR_WARN_CH1_MASK	BIT(1)
+#define S2MPG10_IRQ_PWR_WARN_CH2_MASK	BIT(2)
+#define S2MPG10_IRQ_PWR_WARN_CH3_MASK	BIT(3)
+#define S2MPG10_IRQ_PWR_WARN_CH4_MASK	BIT(4)
+#define S2MPG10_IRQ_PWR_WARN_CH5_MASK	BIT(5)
+#define S2MPG10_IRQ_PWR_WARN_CH6_MASK	BIT(6)
+#define S2MPG10_IRQ_PWR_WARN_CH7_MASK	BIT(7)
+
+	S2MPG10_IRQ_NR,
+};
+
 enum s2mps11_irq {
 	S2MPS11_IRQ_PWRONF,
 	S2MPS11_IRQ_PWRONR,
diff --git a/include/linux/mfd/samsung/rtc.h b/include/linux/mfd/samsung/rtc.h
index 0204decfc9aa..51c4239a1fa6 100644
--- a/include/linux/mfd/samsung/rtc.h
+++ b/include/linux/mfd/samsung/rtc.h
@@ -72,6 +72,37 @@ enum s2mps_rtc_reg {
 	S2MPS_RTC_REG_MAX,
 };
 
+enum s2mpg10_rtc_reg {
+	S2MPG10_RTC_CTRL,
+	S2MPG10_RTC_UPDATE,
+	S2MPG10_RTC_SMPL,
+	S2MPG10_RTC_WTSR,
+	S2MPG10_RTC_CAP_SEL,
+	S2MPG10_RTC_MSEC,
+	S2MPG10_RTC_SEC,
+	S2MPG10_RTC_MIN,
+	S2MPG10_RTC_HOUR,
+	S2MPG10_RTC_WEEK,
+	S2MPG10_RTC_DAY,
+	S2MPG10_RTC_MON,
+	S2MPG10_RTC_YEAR,
+	S2MPG10_RTC_A0SEC,
+	S2MPG10_RTC_A0MIN,
+	S2MPG10_RTC_A0HOUR,
+	S2MPG10_RTC_A0WEEK,
+	S2MPG10_RTC_A0DAY,
+	S2MPG10_RTC_A0MON,
+	S2MPG10_RTC_A0YEAR,
+	S2MPG10_RTC_A1SEC,
+	S2MPG10_RTC_A1MIN,
+	S2MPG10_RTC_A1HOUR,
+	S2MPG10_RTC_A1WEEK,
+	S2MPG10_RTC_A1DAY,
+	S2MPG10_RTC_A1MON,
+	S2MPG10_RTC_A1YEAR,
+	S2MPG10_RTC_OSC_CTRL,
+};
+
 #define RTC_I2C_ADDR		(0x0C >> 1)
 
 #define HOUR_12			(1 << 7)
@@ -124,10 +155,16 @@ enum s2mps_rtc_reg {
 #define ALARM_ENABLE_SHIFT	7
 #define ALARM_ENABLE_MASK	(1 << ALARM_ENABLE_SHIFT)
 
+/* WTSR & SMPL registers */
 #define SMPL_ENABLE_SHIFT	7
 #define SMPL_ENABLE_MASK	(1 << SMPL_ENABLE_SHIFT)
 
 #define WTSR_ENABLE_SHIFT	6
 #define WTSR_ENABLE_MASK	(1 << WTSR_ENABLE_SHIFT)
 
+#define S2MPG10_WTSR_COLDTIMER	GENMASK(6, 5)
+#define S2MPG10_WTSR_COLDRST	BIT(4)
+#define S2MPG10_WTSR_WTSRT	GENMASK(3, 1)
+#define S2MPG10_WTSR_WTSR_EN	BIT(0)
+
 #endif /*  __LINUX_MFD_SEC_RTC_H */
diff --git a/include/linux/mfd/samsung/s2mpg10.h b/include/linux/mfd/samsung/s2mpg10.h
new file mode 100644
index 000000000000..9f5919b89a3c
--- /dev/null
+++ b/include/linux/mfd/samsung/s2mpg10.h
@@ -0,0 +1,454 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright 2015 Samsung Electronics
+ * Copyright 2020 Google Inc
+ * Copyright 2025 Linaro Ltd.
+ */
+
+#ifndef __LINUX_MFD_S2MPG10_H
+#define __LINUX_MFD_S2MPG10_H
+
+/* Common registers (type 0x000) */
+enum s2mpg10_common_reg {
+	S2MPG10_COMMON_CHIPID,
+	S2MPG10_COMMON_INT,
+	S2MPG10_COMMON_INT_MASK,
+	S2MPG10_COMMON_SPD_CTRL1 = 0x0a,
+	S2MPG10_COMMON_SPD_CTRL2,
+	S2MPG10_COMMON_SPD_CTRL3,
+	S2MPG10_COMMON_MON1SEL = 0x1a,
+	S2MPG10_COMMON_MON2SEL,
+	S2MPG10_COMMON_MONR,
+	S2MPG10_COMMON_DEBUG_CTRL1,
+	S2MPG10_COMMON_DEBUG_CTRL2,
+	S2MPG10_COMMON_DEBUG_CTRL3,
+	S2MPG10_COMMON_DEBUG_CTRL4,
+	S2MPG10_COMMON_DEBUG_CTRL5,
+	S2MPG10_COMMON_DEBUG_CTRL6,
+	S2MPG10_COMMON_DEBUG_CTRL7,
+	S2MPG10_COMMON_DEBUG_CTRL8,
+	S2MPG10_COMMON_TEST_MODE1,
+	S2MPG10_COMMON_TEST_MODE2,
+	S2MPG10_COMMON_SPD_DEBUG1,
+	S2MPG10_COMMON_SPD_DEBUG2,
+	S2MPG10_COMMON_SPD_DEBUG3,
+	S2MPG10_COMMON_SPD_DEBUG4,
+};
+
+/* For S2MPG10_COMMON_INT and S2MPG10_COMMON_INT_MASK */
+#define S2MPG10_COMMON_INT_SRC       GENMASK(7, 0)
+#define S2MPG10_COMMON_INT_SRC_PMIC  BIT(0)
+
+/* PMIC registers (type 0x100) */
+enum s2mpg10_pmic_reg {
+	S2MPG10_PMIC_INT1,
+	S2MPG10_PMIC_INT2,
+	S2MPG10_PMIC_INT3,
+	S2MPG10_PMIC_INT4,
+	S2MPG10_PMIC_INT5,
+	S2MPG10_PMIC_INT6,
+	S2MPG10_PMIC_INT1M,
+	S2MPG10_PMIC_INT2M,
+	S2MPG10_PMIC_INT3M,
+	S2MPG10_PMIC_INT4M,
+	S2MPG10_PMIC_INT5M,
+	S2MPG10_PMIC_INT6M,
+	S2MPG10_PMIC_STATUS1,
+	S2MPG10_PMIC_STATUS2,
+	S2MPG10_PMIC_PWRONSRC,
+	S2MPG10_PMIC_OFFSRC,
+	S2MPG10_PMIC_BU_CHG,
+	S2MPG10_PMIC_RTCBUF,
+	S2MPG10_PMIC_COMMON_CTRL1,
+	S2MPG10_PMIC_COMMON_CTRL2,
+	S2MPG10_PMIC_COMMON_CTRL3,
+	S2MPG10_PMIC_COMMON_CTRL4,
+	S2MPG10_PMIC_SMPL_WARN_CTRL,
+	S2MPG10_PMIC_MIMICKING_CTRL,
+	S2MPG10_PMIC_B1M_CTRL,
+	S2MPG10_PMIC_B1M_OUT1,
+	S2MPG10_PMIC_B1M_OUT2,
+	S2MPG10_PMIC_B2M_CTRL,
+	S2MPG10_PMIC_B2M_OUT1,
+	S2MPG10_PMIC_B2M_OUT2,
+	S2MPG10_PMIC_B3M_CTRL,
+	S2MPG10_PMIC_B3M_OUT1,
+	S2MPG10_PMIC_B3M_OUT2,
+	S2MPG10_PMIC_B4M_CTRL,
+	S2MPG10_PMIC_B4M_OUT1,
+	S2MPG10_PMIC_B4M_OUT2,
+	S2MPG10_PMIC_B5M_CTRL,
+	S2MPG10_PMIC_B5M_OUT1,
+	S2MPG10_PMIC_B5M_OUT2,
+	S2MPG10_PMIC_B6M_CTRL,
+	S2MPG10_PMIC_B6M_OUT1,
+	S2MPG10_PMIC_B6M_OUT2,
+	S2MPG10_PMIC_B7M_CTRL,
+	S2MPG10_PMIC_B7M_OUT1,
+	S2MPG10_PMIC_B7M_OUT2,
+	S2MPG10_PMIC_B8M_CTRL,
+	S2MPG10_PMIC_B8M_OUT1,
+	S2MPG10_PMIC_B8M_OUT2,
+	S2MPG10_PMIC_B9M_CTRL,
+	S2MPG10_PMIC_B9M_OUT1,
+	S2MPG10_PMIC_B9M_OUT2,
+	S2MPG10_PMIC_B10M_CTRL,
+	S2MPG10_PMIC_B10M_OUT1,
+	S2MPG10_PMIC_B10M_OUT2,
+	S2MPG10_PMIC_BUCK1M_USONIC,
+	S2MPG10_PMIC_BUCK2M_USONIC,
+	S2MPG10_PMIC_BUCK3M_USONIC,
+	S2MPG10_PMIC_BUCK4M_USONIC,
+	S2MPG10_PMIC_BUCK5M_USONIC,
+	S2MPG10_PMIC_BUCK6M_USONIC,
+	S2MPG10_PMIC_BUCK7M_USONIC,
+	S2MPG10_PMIC_BUCK8M_USONIC,
+	S2MPG10_PMIC_BUCK9M_USONIC,
+	S2MPG10_PMIC_BUCK10M_USONIC,
+	S2MPG10_PMIC_L1M_CTRL,
+	S2MPG10_PMIC_L2M_CTRL,
+	S2MPG10_PMIC_L3M_CTRL,
+	S2MPG10_PMIC_L4M_CTRL,
+	S2MPG10_PMIC_L5M_CTRL,
+	S2MPG10_PMIC_L6M_CTRL,
+	S2MPG10_PMIC_L7M_CTRL,
+	S2MPG10_PMIC_L8M_CTRL,
+	S2MPG10_PMIC_L9M_CTRL,
+	S2MPG10_PMIC_L10M_CTRL,
+	S2MPG10_PMIC_L11M_CTRL1,
+	S2MPG10_PMIC_L11M_CTRL2,
+	S2MPG10_PMIC_L12M_CTRL1,
+	S2MPG10_PMIC_L12M_CTRL2,
+	S2MPG10_PMIC_L13M_CTRL1,
+	S2MPG10_PMIC_L13M_CTRL2,
+	S2MPG10_PMIC_L14M_CTRL,
+	S2MPG10_PMIC_L15M_CTRL1,
+	S2MPG10_PMIC_L15M_CTRL2,
+	S2MPG10_PMIC_L16M_CTRL,
+	S2MPG10_PMIC_L17M_CTRL,
+	S2MPG10_PMIC_L18M_CTRL,
+	S2MPG10_PMIC_L19M_CTRL,
+	S2MPG10_PMIC_L20M_CTRL,
+	S2MPG10_PMIC_L21M_CTRL,
+	S2MPG10_PMIC_L22M_CTRL,
+	S2MPG10_PMIC_L23M_CTRL,
+	S2MPG10_PMIC_L24M_CTRL,
+	S2MPG10_PMIC_L25M_CTRL,
+	S2MPG10_PMIC_L26M_CTRL,
+	S2MPG10_PMIC_L27M_CTRL,
+	S2MPG10_PMIC_L28M_CTRL,
+	S2MPG10_PMIC_L29M_CTRL,
+	S2MPG10_PMIC_L30M_CTRL,
+	S2MPG10_PMIC_L31M_CTRL,
+	S2MPG10_PMIC_LDO_CTRL1,
+	S2MPG10_PMIC_LDO_CTRL2,
+	S2MPG10_PMIC_LDO_DSCH1,
+	S2MPG10_PMIC_LDO_DSCH2,
+	S2MPG10_PMIC_LDO_DSCH3,
+	S2MPG10_PMIC_LDO_DSCH4,
+	S2MPG10_PMIC_LDO_BUCK7M_HLIMIT,
+	S2MPG10_PMIC_LDO_BUCK7M_LLIMIT,
+	S2MPG10_PMIC_LDO_LDO21M_HLIMIT,
+	S2MPG10_PMIC_LDO_LDO21M_LLIMIT,
+	S2MPG10_PMIC_LDO_LDO11M_HLIMIT,
+	S2MPG10_PMIC_DVS_RAMP1,
+	S2MPG10_PMIC_DVS_RAMP2,
+	S2MPG10_PMIC_DVS_RAMP3,
+	S2MPG10_PMIC_DVS_RAMP4,
+	S2MPG10_PMIC_DVS_RAMP5,
+	S2MPG10_PMIC_DVS_RAMP6,
+	S2MPG10_PMIC_DVS_SYNC_CTRL1,
+	S2MPG10_PMIC_DVS_SYNC_CTRL2,
+	S2MPG10_PMIC_DVS_SYNC_CTRL3,
+	S2MPG10_PMIC_DVS_SYNC_CTRL4,
+	S2MPG10_PMIC_DVS_SYNC_CTRL5,
+	S2MPG10_PMIC_DVS_SYNC_CTRL6,
+	S2MPG10_PMIC_OFF_CTRL1,
+	S2MPG10_PMIC_OFF_CTRL2,
+	S2MPG10_PMIC_OFF_CTRL3,
+	S2MPG10_PMIC_OFF_CTRL4,
+	S2MPG10_PMIC_SEQ_CTRL1,
+	S2MPG10_PMIC_SEQ_CTRL2,
+	S2MPG10_PMIC_SEQ_CTRL3,
+	S2MPG10_PMIC_SEQ_CTRL4,
+	S2MPG10_PMIC_SEQ_CTRL5,
+	S2MPG10_PMIC_SEQ_CTRL6,
+	S2MPG10_PMIC_SEQ_CTRL7,
+	S2MPG10_PMIC_SEQ_CTRL8,
+	S2MPG10_PMIC_SEQ_CTRL9,
+	S2MPG10_PMIC_SEQ_CTRL10,
+	S2MPG10_PMIC_SEQ_CTRL11,
+	S2MPG10_PMIC_SEQ_CTRL12,
+	S2MPG10_PMIC_SEQ_CTRL13,
+	S2MPG10_PMIC_SEQ_CTRL14,
+	S2MPG10_PMIC_SEQ_CTRL15,
+	S2MPG10_PMIC_SEQ_CTRL16,
+	S2MPG10_PMIC_SEQ_CTRL17,
+	S2MPG10_PMIC_SEQ_CTRL18,
+	S2MPG10_PMIC_SEQ_CTRL19,
+	S2MPG10_PMIC_SEQ_CTRL20,
+	S2MPG10_PMIC_SEQ_CTRL21,
+	S2MPG10_PMIC_SEQ_CTRL22,
+	S2MPG10_PMIC_SEQ_CTRL23,
+	S2MPG10_PMIC_SEQ_CTRL24,
+	S2MPG10_PMIC_SEQ_CTRL25,
+	S2MPG10_PMIC_SEQ_CTRL26,
+	S2MPG10_PMIC_SEQ_CTRL27,
+	S2MPG10_PMIC_SEQ_CTRL28,
+	S2MPG10_PMIC_SEQ_CTRL29,
+	S2MPG10_PMIC_SEQ_CTRL30,
+	S2MPG10_PMIC_SEQ_CTRL31,
+	S2MPG10_PMIC_SEQ_CTRL32,
+	S2MPG10_PMIC_SEQ_CTRL33,
+	S2MPG10_PMIC_SEQ_CTRL34,
+	S2MPG10_PMIC_SEQ_CTRL35,
+	S2MPG10_PMIC_OFF_SEQ_CTRL1,
+	S2MPG10_PMIC_OFF_SEQ_CTRL2,
+	S2MPG10_PMIC_OFF_SEQ_CTRL3,
+	S2MPG10_PMIC_OFF_SEQ_CTRL4,
+	S2MPG10_PMIC_OFF_SEQ_CTRL5,
+	S2MPG10_PMIC_OFF_SEQ_CTRL6,
+	S2MPG10_PMIC_OFF_SEQ_CTRL7,
+	S2MPG10_PMIC_OFF_SEQ_CTRL8,
+	S2MPG10_PMIC_OFF_SEQ_CTRL9,
+	S2MPG10_PMIC_OFF_SEQ_CTRL10,
+	S2MPG10_PMIC_OFF_SEQ_CTRL11,
+	S2MPG10_PMIC_OFF_SEQ_CTRL12,
+	S2MPG10_PMIC_OFF_SEQ_CTRL13,
+	S2MPG10_PMIC_OFF_SEQ_CTRL14,
+	S2MPG10_PMIC_OFF_SEQ_CTRL15,
+	S2MPG10_PMIC_OFF_SEQ_CTRL16,
+	S2MPG10_PMIC_OFF_SEQ_CTRL17,
+	S2MPG10_PMIC_OFF_SEQ_CTRL18,
+	S2MPG10_PMIC_PCTRLSEL1,
+	S2MPG10_PMIC_PCTRLSEL2,
+	S2MPG10_PMIC_PCTRLSEL3,
+	S2MPG10_PMIC_PCTRLSEL4,
+	S2MPG10_PMIC_PCTRLSEL5,
+	S2MPG10_PMIC_PCTRLSEL6,
+	S2MPG10_PMIC_PCTRLSEL7,
+	S2MPG10_PMIC_PCTRLSEL8,
+	S2MPG10_PMIC_PCTRLSEL9,
+	S2MPG10_PMIC_PCTRLSEL10,
+	S2MPG10_PMIC_PCTRLSEL11,
+	S2MPG10_PMIC_PCTRLSEL12,
+	S2MPG10_PMIC_PCTRLSEL13,
+	S2MPG10_PMIC_DCTRLSEL1,
+	S2MPG10_PMIC_DCTRLSEL2,
+	S2MPG10_PMIC_DCTRLSEL3,
+	S2MPG10_PMIC_DCTRLSEL4,
+	S2MPG10_PMIC_DCTRLSEL5,
+	S2MPG10_PMIC_DCTRLSEL6,
+	S2MPG10_PMIC_DCTRLSEL7,
+	S2MPG10_PMIC_GPIO_CTRL1,
+	S2MPG10_PMIC_GPIO_CTRL2,
+	S2MPG10_PMIC_GPIO_CTRL3,
+	S2MPG10_PMIC_GPIO_CTRL4,
+	S2MPG10_PMIC_GPIO_CTRL5,
+	S2MPG10_PMIC_GPIO_CTRL6,
+	S2MPG10_PMIC_GPIO_CTRL7,
+	S2MPG10_PMIC_B2M_OCP_WARN,
+	S2MPG10_PMIC_B2M_OCP_WARN_X,
+	S2MPG10_PMIC_B2M_OCP_WARN_Y,
+	S2MPG10_PMIC_B2M_OCP_WARN_Z,
+	S2MPG10_PMIC_B3M_OCP_WARN,
+	S2MPG10_PMIC_B3M_OCP_WARN_X,
+	S2MPG10_PMIC_B3M_OCP_WARN_Y,
+	S2MPG10_PMIC_B3M_OCP_WARN_Z,
+	S2MPG10_PMIC_B10M_OCP_WARN,
+	S2MPG10_PMIC_B10M_OCP_WARN_X,
+	S2MPG10_PMIC_B10M_OCP_WARN_Y,
+	S2MPG10_PMIC_B10M_OCP_WARN_Z,
+	S2MPG10_PMIC_B2M_SOFT_OCP_WARN,
+	S2MPG10_PMIC_B2M_SOFT_OCP_WARN_X,
+	S2MPG10_PMIC_B2M_SOFT_OCP_WARN_Y,
+	S2MPG10_PMIC_B2M_SOFT_OCP_WARN_Z,
+	S2MPG10_PMIC_B3M_SOFT_OCP_WARN,
+	S2MPG10_PMIC_B3M_SOFT_OCP_WARN_X,
+	S2MPG10_PMIC_B3M_SOFT_OCP_WARN_Y,
+	S2MPG10_PMIC_B3M_SOFT_OCP_WARN_Z,
+	S2MPG10_PMIC_B10M_SOFT_OCP_WARN,
+	S2MPG10_PMIC_B10M_SOFT_OCP_WARN_X,
+	S2MPG10_PMIC_B10M_SOFT_OCP_WARN_Y,
+	S2MPG10_PMIC_B10M_SOFT_OCP_WARN_Z,
+	S2MPG10_PMIC_BUCK_OCP_EN1,
+	S2MPG10_PMIC_BUCK_OCP_EN2,
+	S2MPG10_PMIC_BUCK_OCP_PD_EN1,
+	S2MPG10_PMIC_BUCK_OCP_PD_EN2,
+	S2MPG10_PMIC_BUCK_OCP_CTRL1,
+	S2MPG10_PMIC_BUCK_OCP_CTRL2,
+	S2MPG10_PMIC_BUCK_OCP_CTRL3,
+	S2MPG10_PMIC_BUCK_OCP_CTRL4,
+	S2MPG10_PMIC_BUCK_OCP_CTRL5,
+	S2MPG10_PMIC_PIF_CTRL,
+	S2MPG10_PMIC_BUCK_HR_MODE1,
+	S2MPG10_PMIC_BUCK_HR_MODE2,
+	S2MPG10_PMIC_FAULTOUT_CTRL,
+	S2MPG10_PMIC_LDO_SENSE1,
+	S2MPG10_PMIC_LDO_SENSE2,
+	S2MPG10_PMIC_LDO_SENSE3,
+	S2MPG10_PMIC_LDO_SENSE4,
+};
+
+/* Meter registers (type 0xa00) */
+enum s2mpg10_meter_reg {
+	S2MPG10_METER_CTRL1,
+	S2MPG10_METER_CTRL2,
+	S2MPG10_METER_CTRL3,
+	S2MPG10_METER_CTRL4,
+	S2MPG10_METER_BUCKEN1,
+	S2MPG10_METER_BUCKEN2,
+	S2MPG10_METER_MUXSEL0,
+	S2MPG10_METER_MUXSEL1,
+	S2MPG10_METER_MUXSEL2,
+	S2MPG10_METER_MUXSEL3,
+	S2MPG10_METER_MUXSEL4,
+	S2MPG10_METER_MUXSEL5,
+	S2MPG10_METER_MUXSEL6,
+	S2MPG10_METER_MUXSEL7,
+	S2MPG10_METER_LPF_C0_0,
+	S2MPG10_METER_LPF_C0_1,
+	S2MPG10_METER_LPF_C0_2,
+	S2MPG10_METER_LPF_C0_3,
+	S2MPG10_METER_LPF_C0_4,
+	S2MPG10_METER_LPF_C0_5,
+	S2MPG10_METER_LPF_C0_6,
+	S2MPG10_METER_LPF_C0_7,
+	S2MPG10_METER_PWR_WARN0,
+	S2MPG10_METER_PWR_WARN1,
+	S2MPG10_METER_PWR_WARN2,
+	S2MPG10_METER_PWR_WARN3,
+	S2MPG10_METER_PWR_WARN4,
+	S2MPG10_METER_PWR_WARN5,
+	S2MPG10_METER_PWR_WARN6,
+	S2MPG10_METER_PWR_WARN7,
+	S2MPG10_METER_PWR_HYS1,
+	S2MPG10_METER_PWR_HYS2,
+	S2MPG10_METER_PWR_HYS3,
+	S2MPG10_METER_PWR_HYS4,
+	S2MPG10_METER_ACC_DATA_CH0_1 = 0x40,
+	S2MPG10_METER_ACC_DATA_CH0_2,
+	S2MPG10_METER_ACC_DATA_CH0_3,
+	S2MPG10_METER_ACC_DATA_CH0_4,
+	S2MPG10_METER_ACC_DATA_CH0_5,
+	S2MPG10_METER_ACC_DATA_CH0_6,
+	S2MPG10_METER_ACC_DATA_CH1_1,
+	S2MPG10_METER_ACC_DATA_CH1_2,
+	S2MPG10_METER_ACC_DATA_CH1_3,
+	S2MPG10_METER_ACC_DATA_CH1_4,
+	S2MPG10_METER_ACC_DATA_CH1_5,
+	S2MPG10_METER_ACC_DATA_CH1_6,
+	S2MPG10_METER_ACC_DATA_CH2_1,
+	S2MPG10_METER_ACC_DATA_CH2_2,
+	S2MPG10_METER_ACC_DATA_CH2_3,
+	S2MPG10_METER_ACC_DATA_CH2_4,
+	S2MPG10_METER_ACC_DATA_CH2_5,
+	S2MPG10_METER_ACC_DATA_CH2_6,
+	S2MPG10_METER_ACC_DATA_CH3_1,
+	S2MPG10_METER_ACC_DATA_CH3_2,
+	S2MPG10_METER_ACC_DATA_CH3_3,
+	S2MPG10_METER_ACC_DATA_CH3_4,
+	S2MPG10_METER_ACC_DATA_CH3_5,
+	S2MPG10_METER_ACC_DATA_CH3_6,
+	S2MPG10_METER_ACC_DATA_CH4_1,
+	S2MPG10_METER_ACC_DATA_CH4_2,
+	S2MPG10_METER_ACC_DATA_CH4_3,
+	S2MPG10_METER_ACC_DATA_CH4_4,
+	S2MPG10_METER_ACC_DATA_CH4_5,
+	S2MPG10_METER_ACC_DATA_CH4_6,
+	S2MPG10_METER_ACC_DATA_CH5_1,
+	S2MPG10_METER_ACC_DATA_CH5_2,
+	S2MPG10_METER_ACC_DATA_CH5_3,
+	S2MPG10_METER_ACC_DATA_CH5_4,
+	S2MPG10_METER_ACC_DATA_CH5_5,
+	S2MPG10_METER_ACC_DATA_CH5_6,
+	S2MPG10_METER_ACC_DATA_CH6_1,
+	S2MPG10_METER_ACC_DATA_CH6_2,
+	S2MPG10_METER_ACC_DATA_CH6_3,
+	S2MPG10_METER_ACC_DATA_CH6_4,
+	S2MPG10_METER_ACC_DATA_CH6_5,
+	S2MPG10_METER_ACC_DATA_CH6_6,
+	S2MPG10_METER_ACC_DATA_CH7_1,
+	S2MPG10_METER_ACC_DATA_CH7_2,
+	S2MPG10_METER_ACC_DATA_CH7_3,
+	S2MPG10_METER_ACC_DATA_CH7_4,
+	S2MPG10_METER_ACC_DATA_CH7_5,
+	S2MPG10_METER_ACC_DATA_CH7_6,
+	S2MPG10_METER_ACC_COUNT_1,
+	S2MPG10_METER_ACC_COUNT_2,
+	S2MPG10_METER_ACC_COUNT_3,
+	S2MPG10_METER_LPF_DATA_CH0_1,
+	S2MPG10_METER_LPF_DATA_CH0_2,
+	S2MPG10_METER_LPF_DATA_CH0_3,
+	S2MPG10_METER_LPF_DATA_CH1_1,
+	S2MPG10_METER_LPF_DATA_CH1_2,
+	S2MPG10_METER_LPF_DATA_CH1_3,
+	S2MPG10_METER_LPF_DATA_CH2_1,
+	S2MPG10_METER_LPF_DATA_CH2_2,
+	S2MPG10_METER_LPF_DATA_CH2_3,
+	S2MPG10_METER_LPF_DATA_CH3_1,
+	S2MPG10_METER_LPF_DATA_CH3_2,
+	S2MPG10_METER_LPF_DATA_CH3_3,
+	S2MPG10_METER_LPF_DATA_CH4_1,
+	S2MPG10_METER_LPF_DATA_CH4_2,
+	S2MPG10_METER_LPF_DATA_CH4_3,
+	S2MPG10_METER_LPF_DATA_CH5_1,
+	S2MPG10_METER_LPF_DATA_CH5_2,
+	S2MPG10_METER_LPF_DATA_CH5_3,
+	S2MPG10_METER_LPF_DATA_CH6_1,
+	S2MPG10_METER_LPF_DATA_CH6_2,
+	S2MPG10_METER_LPF_DATA_CH6_3,
+	S2MPG10_METER_LPF_DATA_CH7_1,
+	S2MPG10_METER_LPF_DATA_CH7_2,
+	S2MPG10_METER_LPF_DATA_CH7_3,
+	S2MPG10_METER_DSM_TRIM_OFFSET = 0xee,
+	S2MPG10_METER_BUCK_METER_TRIM3 = 0xf1,
+};
+
+/* S2MPG10 regulator IDs */
+enum s2mpg10_regulators {
+	S2MPG10_LDO1,
+	S2MPG10_LDO2,
+	S2MPG10_LDO3,
+	S2MPG10_LDO4,
+	S2MPG10_LDO5,
+	S2MPG10_LDO6,
+	S2MPG10_LDO7,
+	S2MPG10_LDO8,
+	S2MPG10_LDO9,
+	S2MPG10_LDO10,
+	S2MPG10_LDO11,
+	S2MPG10_LDO12,
+	S2MPG10_LDO13,
+	S2MPG10_LDO14,
+	S2MPG10_LDO15,
+	S2MPG10_LDO16,
+	S2MPG10_LDO17,
+	S2MPG10_LDO18,
+	S2MPG10_LDO19,
+	S2MPG10_LDO20,
+	S2MPG10_LDO21,
+	S2MPG10_LDO22,
+	S2MPG10_LDO23,
+	S2MPG10_LDO24,
+	S2MPG10_LDO25,
+	S2MPG10_LDO26,
+	S2MPG10_LDO27,
+	S2MPG10_LDO28,
+	S2MPG10_LDO29,
+	S2MPG10_LDO30,
+	S2MPG10_LDO31,
+	S2MPG10_BUCK1,
+	S2MPG10_BUCK2,
+	S2MPG10_BUCK3,
+	S2MPG10_BUCK4,
+	S2MPG10_BUCK5,
+	S2MPG10_BUCK6,
+	S2MPG10_BUCK7,
+	S2MPG10_BUCK8,
+	S2MPG10_BUCK9,
+	S2MPG10_BUCK10,
+	S2MPG10_REGULATOR_MAX,
+};
+
+#endif /* __LINUX_MFD_S2MPG10_H */
-- 
cgit v1.2.3


From adf91d9e1983dec3d5fabc273e8ff89918b852c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Draszik?= <andre.draszik@linaro.org>
Date: Wed, 9 Apr 2025 21:37:39 +0100
Subject: mfd: sec: Change device_type to int
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that sec-i2c doesn't match device type by pointer casting anymore,
we can switch the device type from unsigned long to int easily.

This saves a few bytes in struct sec_pmic_dev due to member alignment.

Signed-off-by: André Draszik <andre.draszik@linaro.org>
Link: https://lore.kernel.org/r/20250409-s2mpg10-v4-18-d66d5f39b6bf@linaro.org
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/samsung/core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index c1102324172a..d785e101fe79 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -67,7 +67,7 @@ struct sec_pmic_dev {
 	struct regmap *regmap_pmic;
 	struct i2c_client *i2c;
 
-	unsigned long device_type;
+	int device_type;
 	int irq;
 	struct regmap_irq_chip_data *irq_data;
 };
-- 
cgit v1.2.3


From 34c5f34df95f71fc500ff0e942210ebc97d2ef65 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Fri, 9 May 2025 18:35:20 +0100
Subject: mfd: sm501: Remove unused sm501_find_clock

sm501_find_clock() was added in 2007 as part of
commit b6d6454fdb66 ("[PATCH] mfd: SM501 core driver")
but hasn't been used.

Remove it.

Signed-off-by: "Dr. David Alan Gilbert" <linux@treblig.org>
Link: https://lore.kernel.org/r/20250509173521.49596-1-linux@treblig.org
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/sm501.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index 2f3488b2875d..bcda27a46e7a 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -12,9 +12,6 @@ extern int sm501_unit_power(struct device *dev,
 extern unsigned long sm501_set_clock(struct device *dev,
 				     int clksrc, unsigned long freq);
 
-extern unsigned long sm501_find_clock(struct device *dev,
-				      int clksrc, unsigned long req_freq);
-
 /* sm501_misc_control
  *
  * Modify the SM501's MISC_CONTROL register
-- 
cgit v1.2.3


From db26d62d79e4068934ad0dccdb92715df36352b9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 23 May 2025 08:57:52 +0100
Subject: netfs: Fix undifferentiation of DIO reads from unbuffered reads

On cifs, "DIO reads" (specified by O_DIRECT) need to be differentiated from
"unbuffered reads" (specified by cache=none in the mount parameters).  The
difference is flagged in the protocol and the server may behave
differently: Windows Server will, for example, mandate that DIO reads are
block aligned.

Fix this by adding a NETFS_UNBUFFERED_READ to differentiate this from
NETFS_DIO_READ, parallelling the write differentiation that already exists.
cifs will then do the right thing.

Fixes: 016dc8516aec ("netfs: Implement unbuffered/DIO read support")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/3444961.1747987072@warthog.procyon.org.uk
Reviewed-by: "Paulo Alcantara (Red Hat)" <pc@manguebit.com>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
cc: Steve French <sfrench@samba.org>
cc: netfs@lists.linux.dev
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: linux-cifs@vger.kernel.org
cc: ceph-devel@vger.kernel.org
cc: linux-nfs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 7a649cfedc09..065c17385e53 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -203,6 +203,7 @@ enum netfs_io_origin {
 	NETFS_READ_GAPS,		/* This read is a synchronous read to fill gaps */
 	NETFS_READ_SINGLE,		/* This read should be treated as a single object */
 	NETFS_READ_FOR_WRITE,		/* This read is to prepare a write */
+	NETFS_UNBUFFERED_READ,		/* This is an unbuffered read */
 	NETFS_DIO_READ,			/* This is a direct I/O read */
 	NETFS_WRITEBACK,		/* This write was triggered by writepages */
 	NETFS_WRITEBACK_SINGLE,		/* This monolithic write was triggered by writepages */
-- 
cgit v1.2.3


From 4e83ae6ec87dddac070ba349d3b839589b1bb957 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 23 May 2025 10:47:06 +0200
Subject: mips, net: ensure that SOCK_COREDUMP is defined

For historical reasons mips has to override the socket enum values but
the defines are all the same. So simply move the ARCH_HAS_SOCKET_TYPES
scope.

Fixes: a9194f88782a ("coredump: add coredump socket")
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/net.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 139c85d0f2ea..f60fff91e1df 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -70,6 +70,7 @@ enum sock_type {
 	SOCK_DCCP	= 6,
 	SOCK_PACKET	= 10,
 };
+#endif /* ARCH_HAS_SOCKET_TYPES */
 
 #define SOCK_MAX (SOCK_PACKET + 1)
 /* Mask which covers at least up to SOCK_MASK-1.  The
@@ -83,8 +84,6 @@ enum sock_type {
 #endif
 #define SOCK_COREDUMP	O_NOCTTY
 
-#endif /* ARCH_HAS_SOCKET_TYPES */
-
 /**
  * enum sock_shutdown_cmd - Shutdown types
  * @SHUT_RD: shutdown receptions
-- 
cgit v1.2.3


From 0e81cfd971dc4833c699dcd8924e54a5021bc4e8 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Mon, 19 May 2025 13:57:57 -0700
Subject: af_unix: Move SOCK_PASS{CRED,PIDFD,SEC} to struct sock.

As explained in the next patch, SO_PASSRIGHTS would have a problem
if we assigned a corresponding bit to socket->flags, so it must be
managed in struct sock.

Mixing socket->flags and sk->sk_flags for similar options will look
confusing, and sk->sk_flags does not have enough space on 32bit system.

Also, as mentioned in commit 16e572626961 ("af_unix: dont send
SCM_CREDENTIALS by default"), SOCK_PASSCRED and SOCK_PASSPID handling
is known to be slow, and managing the flags in struct socket cannot
avoid that for embryo sockets.

Let's move SOCK_PASS{CRED,PIDFD,SEC} to struct sock.

While at it, other SOCK_XXX flags in net.h are grouped as enum.

Note that assign_bit() was atomic, so the writer side is moved down
after lock_sock() in setsockopt(), but the bit is only read once
in sendmsg() and recvmsg(), so lock_sock() is not needed there.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 0ff950eecc6b..f8418d6e33e0 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -36,14 +36,13 @@ struct net;
  * in sock->flags, but moved into sk->sk_wq->flags to be RCU protected.
  * Eventually all flags will be in sk->sk_wq->flags.
  */
-#define SOCKWQ_ASYNC_NOSPACE	0
-#define SOCKWQ_ASYNC_WAITDATA	1
-#define SOCK_NOSPACE		2
-#define SOCK_PASSCRED		3
-#define SOCK_PASSSEC		4
-#define SOCK_SUPPORT_ZC		5
-#define SOCK_CUSTOM_SOCKOPT	6
-#define SOCK_PASSPIDFD		7
+enum socket_flags {
+	SOCKWQ_ASYNC_NOSPACE,
+	SOCKWQ_ASYNC_WAITDATA,
+	SOCK_NOSPACE,
+	SOCK_SUPPORT_ZC,
+	SOCK_CUSTOM_SOCKOPT,
+};
 
 #ifndef ARCH_HAS_SOCKET_TYPES
 /**
-- 
cgit v1.2.3


From d1d89e8eee6f0e91cfad2a0375ad679fd5c1ae83 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 21 May 2025 15:41:46 +0200
Subject: USB: gadget: fix up const issue with struct usb_function_instance

In struct usb_function, the struct usb_function_instance pointer
variable "fi" is listed as const, but it is written to in numerous
places, making the const marking of it a total lie.  Fix this up by just
removing the const pointer attribute as this is modified in numerous
places.

Link: https://lore.kernel.org/r/2025052145-undress-puma-f7cf@gregkh
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/composite.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 6e38fb9d2117..d8c4e9f73839 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -237,7 +237,7 @@ struct usb_function {
 	/* internals */
 	struct list_head		list;
 	DECLARE_BITMAP(endpoints, 32);
-	const struct usb_function_instance *fi;
+	struct usb_function_instance *fi;
 
 	unsigned int		bind_deactivated:1;
 };
-- 
cgit v1.2.3


From a1f1acb9c5db9b385c9b3eb1f27f897c06df49ae Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 12 May 2025 12:28:44 +0200
Subject: netfilter: nf_dup{4, 6}: Move duplication check to task_struct

nf_skb_duplicated is a per-CPU variable and relies on disabled BH for its
locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT
this data structure requires explicit locking.

Due to the recursion involved, the simplest change is to make it a
per-task variable.

Move the per-CPU variable nf_skb_duplicated to task_struct and name it
in_nf_duplicate. Add it to the existing bitfield so it doesn't use
additional memory.

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Valentin Schneider <vschneid@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h | 11 -----------
 include/linux/sched.h     |  1 +
 2 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 2b8aac2c70ad..892d12823ed4 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -497,17 +497,6 @@ struct nf_defrag_hook {
 extern const struct nf_defrag_hook __rcu *nf_defrag_v4_hook;
 extern const struct nf_defrag_hook __rcu *nf_defrag_v6_hook;
 
-/*
- * nf_skb_duplicated - TEE target has sent a packet
- *
- * When a xtables target sends a packet, the OUTPUT and POSTROUTING
- * hooks are traversed again, i.e. nft and xtables are invoked recursively.
- *
- * This is used by xtables TEE target to prevent the duplicated skb from
- * being duplicated again.
- */
-DECLARE_PER_CPU(bool, nf_skb_duplicated);
-
 /*
  * Contains bitmask of ctnetlink event subscribers, if any.
  * Can't be pernet due to NETLINK_LISTEN_ALL_NSID setsockopt flag.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f96ac1982893..52d9c52dc8f2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1044,6 +1044,7 @@ struct task_struct {
 	/* delay due to memory thrashing */
 	unsigned                        in_thrashing:1;
 #endif
+	unsigned			in_nf_duplicate:1;
 #ifdef CONFIG_PREEMPT_RT
 	struct netdev_xmit		net_xmit;
 #endif
-- 
cgit v1.2.3


From f37ad91270397a6d053e8623bdb3cf79859691d2 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 12 May 2025 12:28:46 +0200
Subject: netfilter: nf_dup_netdev: Move the recursion counter struct
 netdev_xmit

nf_dup_skb_recursion is a per-CPU variable and relies on disabled BH for its
locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT
this data structure requires explicit locking.

Move nf_dup_skb_recursion to struct netdev_xmit, provide wrappers.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netdevice_xmit.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice_xmit.h b/include/linux/netdevice_xmit.h
index 848735b3a7c0..813a19122ebb 100644
--- a/include/linux/netdevice_xmit.h
+++ b/include/linux/netdevice_xmit.h
@@ -11,6 +11,9 @@ struct netdev_xmit {
 #if IS_ENABLED(CONFIG_NET_ACT_MIRRED)
 	u8 sched_mirred_nest;
 #endif
+#if IS_ENABLED(CONFIG_NF_DUP_NETDEV)
+	u8 nf_dup_skb_recursion;
+#endif
 };
 
 #endif
-- 
cgit v1.2.3


From 90869f43d06dfc836def2f53850a878f829e443e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 22 May 2025 15:49:33 +0200
Subject: netfilter: conntrack: make nf_conntrack_id callable without a module
 dependency

While nf_conntrack_id() doesn't need any functionaliy from conntrack, it
does reside in nf_conntrack_core.c -- callers add a module
dependency on conntrack.

Followup patch will need to compute the conntrack id from nf_tables_trace.c
to include it in nf_trace messages emitted to userspace via netlink.

I don't want to introduce a module dependency between nf_tables and
conntrack for this.

Since trace is slowpath, the added indirection is ok.

One alternative is to move nf_conntrack_id to the netfilter/core.c,
but I don't see a compelling reason so far.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 892d12823ed4..20947f2c685b 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -470,6 +470,7 @@ struct nf_ct_hook {
 	void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
 	void (*set_closing)(struct nf_conntrack *nfct);
 	int (*confirm)(struct sk_buff *skb);
+	u32 (*get_id)(const struct nf_conntrack *nfct);
 };
 extern const struct nf_ct_hook __rcu *nf_ct_hook;
 
-- 
cgit v1.2.3


From 73319a8ee18b9cf0b2dac87f8521595e0381ba0c Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Wed, 21 May 2025 22:44:26 +0200
Subject: netfilter: nf_tables: Have a list of nf_hook_ops in nft_hook

Supporting a 1:n relationship between nft_hook and nf_hook_ops is
convenient since a chain's or flowtable's nft_hooks may remain in place
despite matching interfaces disappearing. This stabilizes ruleset dumps
in that regard and opens the possibility to claim newly added interfaces
which match the spec. Also it prepares for wildcard interface specs
since these will potentially match multiple interfaces.

All spots dealing with hook registration are updated to handle a list of
multiple nf_hook_ops, but nft_netdev_hook_alloc() only adds a single
item for now to retain the old behaviour. The only expected functional
change here is how vanishing interfaces are handled: Instead of dropping
the respective nft_hook, only the matching nf_hook_ops are dropped.

To safely remove individual ops from the list in netdev handlers, an
rcu_head is added to struct nf_hook_ops so kfree_rcu() may be used.
There is at least nft_flowtable_find_dev() which may be iterating
through the list at the same time.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 20947f2c685b..5f896fcc074d 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -95,6 +95,9 @@ enum nf_hook_ops_type {
 };
 
 struct nf_hook_ops {
+	struct list_head	list;
+	struct rcu_head		rcu;
+
 	/* User fills in from here down. */
 	nf_hookfn		*hook;
 	struct net_device	*dev;
-- 
cgit v1.2.3


From 101f2bbab541116ab861b9c3ac0ece07a7eaa756 Mon Sep 17 00:00:00 2001
From: Stephen Brennan <stephen.s.brennan@oracle.com>
Date: Wed, 7 May 2025 15:34:01 -0700
Subject: fs: convert mount flags to enum

In prior kernel versions (5.8-6.8), commit 9f6c61f96f2d9 ("proc/mounts:
add cursor") introduced MNT_CURSOR, a flag used by readers from
/proc/mounts to keep their place while reading the file. Later, commit
2eea9ce4310d8 ("mounts: keep list of mounts in an rbtree") removed this
flag and its value has since been repurposed.

For debuggers iterating over the list of mounts, cursors should be
skipped as they are irrelevant. Detecting whether an element is a cursor
can be difficult. Since the MNT_CURSOR flag is a preprocessor constant,
it's not present in debuginfo, and since its value is repurposed, we
cannot hard-code it. For this specific issue, cursors are possible to
detect in other ways, but ideally, we would be able to read the mount
flag definitions out of the debuginfo. For that reason, convert the
mount flags to an enum.

Link: https://github.com/osandov/drgn/pull/496
Signed-off-by: Stephen Brennan <stephen.s.brennan@oracle.com>
Link: https://lore.kernel.org/20250507223402.2795029-1-stephen.s.brennan@oracle.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/mount.h | 87 ++++++++++++++++++++++++++-------------------------
 1 file changed, 45 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mount.h b/include/linux/mount.h
index dcc17ce8a959..6904ad33ee7a 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -22,48 +22,51 @@ struct fs_context;
 struct file;
 struct path;
 
-#define MNT_NOSUID	0x01
-#define MNT_NODEV	0x02
-#define MNT_NOEXEC	0x04
-#define MNT_NOATIME	0x08
-#define MNT_NODIRATIME	0x10
-#define MNT_RELATIME	0x20
-#define MNT_READONLY	0x40	/* does the user want this to be r/o? */
-#define MNT_NOSYMFOLLOW	0x80
-
-#define MNT_SHRINKABLE	0x100
-#define MNT_WRITE_HOLD	0x200
-
-#define MNT_SHARED	0x1000	/* if the vfsmount is a shared mount */
-#define MNT_UNBINDABLE	0x2000	/* if the vfsmount is a unbindable mount */
-/*
- * MNT_SHARED_MASK is the set of flags that should be cleared when a
- * mount becomes shared.  Currently, this is only the flag that says a
- * mount cannot be bind mounted, since this is how we create a mount
- * that shares events with another mount.  If you add a new MNT_*
- * flag, consider how it interacts with shared mounts.
- */
-#define MNT_SHARED_MASK	(MNT_UNBINDABLE)
-#define MNT_USER_SETTABLE_MASK  (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \
-				 | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \
-				 | MNT_READONLY | MNT_NOSYMFOLLOW)
-#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
-
-#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
-			    MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
-
-#define MNT_INTERNAL	0x4000
-
-#define MNT_LOCK_ATIME		0x040000
-#define MNT_LOCK_NOEXEC		0x080000
-#define MNT_LOCK_NOSUID		0x100000
-#define MNT_LOCK_NODEV		0x200000
-#define MNT_LOCK_READONLY	0x400000
-#define MNT_LOCKED		0x800000
-#define MNT_DOOMED		0x1000000
-#define MNT_SYNC_UMOUNT		0x2000000
-#define MNT_MARKED		0x4000000
-#define MNT_UMOUNT		0x8000000
+enum mount_flags {
+	MNT_NOSUID	= 0x01,
+	MNT_NODEV	= 0x02,
+	MNT_NOEXEC	= 0x04,
+	MNT_NOATIME	= 0x08,
+	MNT_NODIRATIME	= 0x10,
+	MNT_RELATIME	= 0x20,
+	MNT_READONLY	= 0x40, /* does the user want this to be r/o? */
+	MNT_NOSYMFOLLOW	= 0x80,
+
+	MNT_SHRINKABLE	= 0x100,
+	MNT_WRITE_HOLD	= 0x200,
+
+	MNT_SHARED	= 0x1000, /* if the vfsmount is a shared mount */
+	MNT_UNBINDABLE	= 0x2000, /* if the vfsmount is a unbindable mount */
+
+	MNT_INTERNAL	= 0x4000,
+
+	MNT_LOCK_ATIME		= 0x040000,
+	MNT_LOCK_NOEXEC		= 0x080000,
+	MNT_LOCK_NOSUID		= 0x100000,
+	MNT_LOCK_NODEV		= 0x200000,
+	MNT_LOCK_READONLY	= 0x400000,
+	MNT_LOCKED		= 0x800000,
+	MNT_DOOMED		= 0x1000000,
+	MNT_SYNC_UMOUNT		= 0x2000000,
+	MNT_MARKED		= 0x4000000,
+	MNT_UMOUNT		= 0x8000000,
+
+	/*
+	 * MNT_SHARED_MASK is the set of flags that should be cleared when a
+	 * mount becomes shared.  Currently, this is only the flag that says a
+	 * mount cannot be bind mounted, since this is how we create a mount
+	 * that shares events with another mount.  If you add a new MNT_*
+	 * flag, consider how it interacts with shared mounts.
+	 */
+	MNT_SHARED_MASK	= MNT_UNBINDABLE,
+	MNT_USER_SETTABLE_MASK  = MNT_NOSUID | MNT_NODEV | MNT_NOEXEC
+				  | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME
+				  | MNT_READONLY | MNT_NOSYMFOLLOW,
+	MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME,
+
+	MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL |
+			     MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED,
+};
 
 struct vfsmount {
 	struct dentry *mnt_root;	/* root of the mounted tree */
-- 
cgit v1.2.3


From 09683a6184ad6fd635831402316651392b56cc3a Mon Sep 17 00:00:00 2001
From: Karolina Stolarek <karolina.stolarek@oracle.com>
Date: Thu, 22 May 2025 18:21:21 -0500
Subject: PCI/AER: Rename struct aer_stats to aer_info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Update name to reflect the broader definition of structs/variables that are
stored (e.g. ratelimits). This is a preparatory patch for adding rate limit
support.

[bhelgaas: "aer_report" -> "aer_info"]

Signed-off-by: Karolina Stolarek <karolina.stolarek@oracle.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://patch.msgid.link/20250522232339.1525671-16-helgaas@kernel.org
---
 include/linux/pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0e8e3fd77e96..81a81dbfc873 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -346,7 +346,7 @@ struct pci_dev {
 	u8		hdr_type;	/* PCI header type (`multi' flag masked out) */
 #ifdef CONFIG_PCIEAER
 	u16		aer_cap;	/* AER capability offset */
-	struct aer_stats *aer_stats;	/* AER stats for this device */
+	struct aer_info	*aer_info;	/* AER info for this device */
 #endif
 #ifdef CONFIG_PCIEPORTBUS
 	struct rcec_ea	*rcec_ea;	/* RCEC cached endpoint association */
-- 
cgit v1.2.3


From 13423063c7cb7d1c34104a78cae85eb0281bae90 Mon Sep 17 00:00:00 2001
From: Roman Kisel <romank@linux.microsoft.com>
Date: Mon, 28 Apr 2025 14:07:32 -0700
Subject: arm64: kvm, smccc: Introduce and use API for getting hypervisor UUID

The KVM/arm64 uses SMCCC to detect hypervisor presence. That code is
private, and it follows the SMCCC specification. Other existing and
emerging hypervisor guest implementations can and should use that
standard approach as well.

Factor out a common infrastructure that the guests can use, update KVM
to employ the new API. The central notion of the SMCCC method is the
UUID of the hypervisor, and the new API follows that.

No functional changes. Validated with a KVM/arm64 guest.

Signed-off-by: Roman Kisel <romank@linux.microsoft.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Link: https://lore.kernel.org/r/20250428210742.435282-2-romank@linux.microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Message-ID: <20250428210742.435282-2-romank@linux.microsoft.com>
---
 include/linux/arm-smccc.h | 64 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 60 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index a3863da1510e..784ebe4607a4 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -7,6 +7,11 @@
 
 #include <linux/args.h>
 #include <linux/init.h>
+
+#ifndef __ASSEMBLY__
+#include <linux/uuid.h>
+#endif
+
 #include <uapi/linux/const.h>
 
 /*
@@ -107,10 +112,10 @@
 			   ARM_SMCCC_FUNC_QUERY_CALL_UID)
 
 /* KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74 */
-#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0	0xb66fb428U
-#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1	0xe911c52eU
-#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2	0x564bcaa9U
-#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3	0x743a004dU
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM UUID_INIT(\
+	0xb66fb428, 0xc52e, 0xe911, \
+	0xa9, 0xca, 0x4b, 0x56, \
+	0x4d, 0x00, 0x3a, 0x74)
 
 /* KVM "vendor specific" services */
 #define ARM_SMCCC_KVM_FUNC_FEATURES		0
@@ -348,6 +353,57 @@ s32 arm_smccc_get_soc_id_version(void);
  */
 s32 arm_smccc_get_soc_id_revision(void);
 
+#ifndef __ASSEMBLY__
+
+/*
+ * Returns whether a specific hypervisor UUID is advertised for the
+ * Vendor Specific Hypervisor Service range.
+ */
+bool arm_smccc_hypervisor_has_uuid(const uuid_t *uuid);
+
+static inline uuid_t smccc_res_to_uuid(u32 r0, u32 r1, u32 r2, u32 r3)
+{
+	uuid_t uuid = {
+		.b = {
+			[0]  = (r0 >> 0)  & 0xff,
+			[1]  = (r0 >> 8)  & 0xff,
+			[2]  = (r0 >> 16) & 0xff,
+			[3]  = (r0 >> 24) & 0xff,
+
+			[4]  = (r1 >> 0)  & 0xff,
+			[5]  = (r1 >> 8)  & 0xff,
+			[6]  = (r1 >> 16) & 0xff,
+			[7]  = (r1 >> 24) & 0xff,
+
+			[8]  = (r2 >> 0)  & 0xff,
+			[9]  = (r2 >> 8)  & 0xff,
+			[10] = (r2 >> 16) & 0xff,
+			[11] = (r2 >> 24) & 0xff,
+
+			[12] = (r3 >> 0)  & 0xff,
+			[13] = (r3 >> 8)  & 0xff,
+			[14] = (r3 >> 16) & 0xff,
+			[15] = (r3 >> 24) & 0xff,
+		},
+	};
+
+	return uuid;
+}
+
+static inline u32 smccc_uuid_to_reg(const uuid_t *uuid, int reg)
+{
+	u32 val = 0;
+
+	val |= (u32)(uuid->b[4 * reg + 0] << 0);
+	val |= (u32)(uuid->b[4 * reg + 1] << 8);
+	val |= (u32)(uuid->b[4 * reg + 2] << 16);
+	val |= (u32)(uuid->b[4 * reg + 3] << 24);
+
+	return val;
+}
+
+#endif /* !__ASSEMBLY__ */
+
 /**
  * struct arm_smccc_res - Result from SMC/HVC call
  * @a0-a3 result values from registers 0 to 3
-- 
cgit v1.2.3


From 18a34bb5221e2b79dbcba5bb50d92beb45b68e15 Mon Sep 17 00:00:00 2001
From: Roman Kisel <romank@linux.microsoft.com>
Date: Mon, 28 Apr 2025 14:07:40 -0700
Subject: Drivers: hv: vmbus: Introduce hv_get_vmbus_root_device()

The ARM64 PCI code for hyperv needs to know the VMBus root
device, and it is private.

Provide a function that returns it. Rename it from "hv_dev"
as "hv_dev" as a symbol is very overloaded. No functional
changes.

Signed-off-by: Roman Kisel <romank@linux.microsoft.com>
Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Link: https://lore.kernel.org/r/20250428210742.435282-10-romank@linux.microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Message-ID: <20250428210742.435282-10-romank@linux.microsoft.com>
---
 include/linux/hyperv.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index d6ffe01962c2..23b3c3a2ed8c 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1283,6 +1283,8 @@ static inline void *hv_get_drvdata(struct hv_device *dev)
 	return dev_get_drvdata(&dev->device);
 }
 
+struct device *hv_get_vmbus_root_device(void);
+
 struct hv_ring_buffer_debug_info {
 	u32 current_interrupt_mask;
 	u32 current_read_index;
-- 
cgit v1.2.3


From ab7e531a8212394608838c31e9dfac69fafa6c8a Mon Sep 17 00:00:00 2001
From: Roman Kisel <romank@linux.microsoft.com>
Date: Mon, 28 Apr 2025 14:07:41 -0700
Subject: ACPI: irq: Introduce acpi_get_gsi_dispatcher()

Using acpi_irq_create_hierarchy() in the cases where the code
also handles OF leads to code duplication as the ACPI subsystem
doesn't provide means to compute the IRQ domain parent whereas
the OF does.

Introduce acpi_get_gsi_dispatcher() so that the drivers relying
on both ACPI and OF may use irq_domain_create_hierarchy() in the
common code paths.

No functional changes.

Signed-off-by: Roman Kisel <romank@linux.microsoft.com>
Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/r/20250428210742.435282-11-romank@linux.microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Message-ID: <20250428210742.435282-11-romank@linux.microsoft.com>
---
 include/linux/acpi.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 3f2e93ed9730..14e8871b5787 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -335,8 +335,11 @@ int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity
 int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
 int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi);
 
+typedef struct fwnode_handle *(*acpi_gsi_domain_disp_fn)(u32);
+
 void acpi_set_irq_model(enum acpi_irq_model_id model,
-			struct fwnode_handle *(*)(u32));
+			acpi_gsi_domain_disp_fn fn);
+acpi_gsi_domain_disp_fn acpi_get_gsi_dispatcher(void);
 void acpi_set_gsi_to_irq_fallback(u32 (*)(u32));
 
 struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags,
-- 
cgit v1.2.3


From 059717c2ba1f745f35f4bbb8991e408c12031f1e Mon Sep 17 00:00:00 2001
From: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Date: Fri, 23 May 2025 13:20:01 -0400
Subject: ACPI: MRRM: Fix default max memory region

Per the spec, the default max memory region must be 1 covering
all system memory.

When platform does not provide ACPI MRRM table or
when CONFIG_ACPI is opted out, the acpi_mrrm_max_mem_region() function
defaults to  returning 1 region complying to RDT spec.

Signed-off-by: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://patch.msgid.link/20250523172001.1761634-1-anil.s.keshavamurthy@intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c409f4cecb09..d9ba7b4fa2cd 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1098,7 +1098,7 @@ static inline acpi_handle acpi_get_processor_handle(int cpu)
 
 static inline int acpi_mrrm_max_mem_region(void)
 {
-	return -ENOENT;
+	return 1;
 }
 
 #endif	/* !CONFIG_ACPI */
-- 
cgit v1.2.3


From 588ca944c27729c7f950d1f44c6d6700a919969a Mon Sep 17 00:00:00 2001
From: Shiju Jose <shiju.jose@huawei.com>
Date: Wed, 21 May 2025 13:47:45 +0100
Subject: cxl/edac: Add CXL memory device memory sparing control feature

Memory sparing is defined as a repair function that replaces a portion of
memory with a portion of functional memory at that same DPA. The subclasses
for this operation vary in terms of the scope of the sparing being
performed. The cacheline sparing subclass refers to a sparing action that
can replace a full cacheline. Row sparing is provided as an alternative to
PPR sparing functions and its scope is that of a single DDR row.
As per CXL r3.2 Table 8-125 foot note 1. Memory sparing is preferred over
PPR when possible.
Bank sparing allows an entire bank to be replaced. Rank sparing is defined
as an operation in which an entire DDR rank is replaced.

Memory sparing maintenance operations may be supported by CXL devices
that implement CXL.mem protocol. A sparing maintenance operation requests
the CXL device to perform a repair operation on its media.
For example, a CXL device with DRAM components that support memory sparing
features may implement sparing maintenance operations.

The host may issue a query command by setting query resources flag in the
input payload (CXL spec 3.2 Table 8-120) to determine availability of
sparing resources for a given address. In response to a query request,
the device shall report the resource availability by producing the memory
sparing event record (CXL spec 3.2 Table 8-60) in which the Channel, Rank,
Nibble Mask, Bank Group, Bank, Row, Column, Sub-Channel fields are a copy
of the values specified in the request.

During the execution of a sparing maintenance operation, a CXL memory
device:
- may not retain data
- may not be able to process CXL.mem requests correctly.
These CXL memory device capabilities are specified by restriction flags
in the memory sparing feature readable attributes.

When a CXL device identifies error on a memory component, the device
may inform the host about the need for a memory sparing maintenance
operation by using DRAM event record, where the 'maintenance needed' flag
may set. The event record contains some of the DPA, Channel, Rank,
Nibble Mask, Bank Group, Bank, Row, Column, Sub-Channel fields that
should be repaired. The userspace tool requests for maintenance operation
if the 'maintenance needed' flag set in the CXL DRAM error record.

CXL spec 3.2 section 8.2.10.7.1.4 describes the device's memory sparing
maintenance operation feature.

CXL spec 3.2 section 8.2.10.7.2.3 describes the memory sparing feature
discovery and configuration.

Add support for controlling CXL memory device memory sparing feature.
Register with EDAC driver, which gets the memory repair attr descriptors
from the EDAC memory repair driver and exposes sysfs repair control
attributes for memory sparing to the userspace. For example CXL memory
sparing control for the CXL mem0 device is exposed in
/sys/bus/edac/devices/cxl_mem0/mem_repairX/

Use case
========
1. CXL device identifies a failure in a memory component, report to
   userspace in a CXL DRAM trace event with DPA and other attributes of
   memory to repair such as channel, rank, nibble mask, bank Group,
   bank, row, column, sub-channel.

2. Rasdaemon process the trace event and may issue query request in sysfs
check resources available for memory sparing if either of the following
conditions met.
 - 'maintenance needed' flag set in the event record.
 - 'threshold event' flag set for CVME threshold feature.
 - When the number of corrected error reported on a CXL.mem media to the
   userspace exceeds the threshold value for corrected error count defined
   by the userspace policy.

3. Rasdaemon process the memory sparing trace event and issue repair
   request for memory sparing.

Kernel CXL driver shall report memory sparing event record to the userspace
with the resource availability in order rasdaemon to process the event
record and issue a repair request in sysfs for the memory sparing operation
in the CXL device.

Note: Based on the feedbacks from the community 'query' sysfs attribute is
removed and reporting memory sparing error record to the userspace are not
supported. Instead userspace issues sparing operation and kernel does the
same to the CXL memory device, when 'maintenance needed' flag set in the
DRAM event record.

Add checks to ensure the memory to be repaired is offline and if online,
then originates from a CXL DRAM error record reported in the current boot
before requesting a memory sparing operation on the device.

Note: Tested memory sparing feature control with QEMU patch
      "hw/cxl: Add emulation for memory sparing control feature"
      https://lore.kernel.org/linux-cxl/20250509172229.726-1-shiju.jose@huawei.com/T/#m5f38512a95670d75739f9dad3ee91b95c7f5c8d6

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Link: https://patch.msgid.link/20250521124749.817-8-shiju.jose@huawei.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 include/linux/edac.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/edac.h b/include/linux/edac.h
index 451f9c152c99..fa32f2aca22f 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -745,9 +745,16 @@ static inline int edac_ecs_get_desc(struct device *ecs_dev,
 #endif /* CONFIG_EDAC_ECS */
 
 enum edac_mem_repair_type {
+	EDAC_REPAIR_PPR,
+	EDAC_REPAIR_CACHELINE_SPARING,
+	EDAC_REPAIR_ROW_SPARING,
+	EDAC_REPAIR_BANK_SPARING,
+	EDAC_REPAIR_RANK_SPARING,
 	EDAC_REPAIR_MAX
 };
 
+extern const char * const edac_repair_type[];
+
 enum edac_mem_repair_cmd {
 	EDAC_DO_MEM_REPAIR = 1,
 };
-- 
cgit v1.2.3


From e7d952cc39fca34386ec9f15f68cb2eaac01b5ae Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 24 May 2025 11:23:25 +0200
Subject: perf/headers: Clean up <linux/perf_event.h> a bit

Do a bit of readability spring cleaning:

 - Fix misaligned structure member in perf_addr_filter: the new
   struct perf_addr_filter::action member was too long, but when
   it was added it was not aligned properly. Align all fields to
   the customary column 41 alignment of most of the rest of the
   header.

 - Adjust the vertical alignment of the definition of other
   structures and definitions as well, so that the 'most of' in
   the previous paragraph changes to 'all of'. ;-)

 - Prettify the assignments in perf_clear_branch_entry_bitfields()

 - Move comments from CPP definitions to outside the macro

 - Move perf_guest_info_callbacks and related defines from the front
   of the header closer to where it's used within the header.

 - And more #endif markers for larger CPP blocks and standardize
   #if/#else/#endif blocks to the following nomenclature:

	#ifdef CONFIG_FOO
	...
	#else /* !CONFIG_FOO: */
	...
	#endif /* !CONFIG_FOO */

 - Standardize on consistently using the 'extern' storage class where
   appropriate, we had cases where method prototypes sometimes omitted
   the storage class:

	extern void perf_pmu_migrate_context(struct pmu *pmu,
					int src_cpu, int dst_cpu);
	int perf_event_read_local(struct perf_event *event, u64 *value,
				  u64 *enabled, u64 *running);
	extern u64 perf_event_read_value(struct perf_event *event,
					 u64 *enabled, u64 *running);

   Which is obviously a bit confusing and adds unnecessary noise.

 - s/__u64/u64 and similar cleanups: there's no point in using __u64
   in non-UAPI headers, and doing so only adds unnecessary visual noise.

 - Harmonize all multi-parameter function prototypes along the following
   style:

	extern struct perf_event *
	perf_event_create_kernel_counter(struct perf_event_attr *attr,
					 int cpu,
					 struct task_struct *task,
					 perf_overflow_handler_t callback,
					 void *context);
 - etc.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 282 +++++++++++++++++++++++++--------------------
 1 file changed, 155 insertions(+), 127 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a96c00e2ceca..52dc7cfab0e0 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -26,18 +26,9 @@
 # include <asm/local64.h>
 #endif
 
-#define PERF_GUEST_ACTIVE	0x01
-#define PERF_GUEST_USER	0x02
-
-struct perf_guest_info_callbacks {
-	unsigned int			(*state)(void);
-	unsigned long			(*get_ip)(void);
-	unsigned int			(*handle_intel_pt_intr)(void);
-};
-
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-#include <linux/rhashtable-types.h>
-#include <asm/hw_breakpoint.h>
+# include <linux/rhashtable-types.h>
+# include <asm/hw_breakpoint.h>
 #endif
 
 #include <linux/list.h>
@@ -62,19 +53,20 @@ struct perf_guest_info_callbacks {
 #include <linux/security.h>
 #include <linux/static_call.h>
 #include <linux/lockdep.h>
+
 #include <asm/local.h>
 
 struct perf_callchain_entry {
-	__u64				nr;
-	__u64				ip[]; /* /proc/sys/kernel/perf_event_max_stack */
+	u64				nr;
+	u64				ip[]; /* /proc/sys/kernel/perf_event_max_stack */
 };
 
 struct perf_callchain_entry_ctx {
-	struct perf_callchain_entry *entry;
-	u32			    max_stack;
-	u32			    nr;
-	short			    contexts;
-	bool			    contexts_maxed;
+	struct perf_callchain_entry	*entry;
+	u32				max_stack;
+	u32				nr;
+	short				contexts;
+	bool				contexts_maxed;
 };
 
 typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
@@ -121,8 +113,8 @@ static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
  * already stored in age order, the hw_idx should be 0.
  */
 struct perf_branch_stack {
-	__u64				nr;
-	__u64				hw_idx;
+	u64				nr;
+	u64				hw_idx;
 	struct perf_branch_entry	entries[];
 };
 
@@ -132,10 +124,10 @@ struct task_struct;
  * extra PMU register associated with an event
  */
 struct hw_perf_event_extra {
-	u64		config;	/* register value */
-	unsigned int	reg;	/* register address or index */
-	int		alloc;	/* extra register already allocated */
-	int		idx;	/* index in shared_regs->regs[] */
+	u64				config;	/* register value */
+	unsigned int			reg;	/* register address or index */
+	int				alloc;	/* extra register already allocated */
+	int				idx;	/* index in shared_regs->regs[] */
 };
 
 /**
@@ -144,8 +136,8 @@ struct hw_perf_event_extra {
  * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific
  * usage.
  */
-#define PERF_EVENT_FLAG_ARCH			0x0fffffff
-#define PERF_EVENT_FLAG_USER_READ_CNT		0x80000000
+#define PERF_EVENT_FLAG_ARCH		0x0fffffff
+#define PERF_EVENT_FLAG_USER_READ_CNT	0x80000000
 
 static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0);
 
@@ -227,9 +219,14 @@ struct hw_perf_event {
 /*
  * hw_perf_event::state flags; used to track the PERF_EF_* state.
  */
-#define PERF_HES_STOPPED	0x01 /* the counter is stopped */
-#define PERF_HES_UPTODATE	0x02 /* event->count up-to-date */
-#define PERF_HES_ARCH		0x04
+
+/* the counter is stopped */
+#define PERF_HES_STOPPED		0x01
+
+/* event->count up-to-date */
+#define PERF_HES_UPTODATE		0x02
+
+#define PERF_HES_ARCH			0x04
 
 	int				state;
 
@@ -278,7 +275,7 @@ struct hw_perf_event {
 	 */
 	u64				freq_time_stamp;
 	u64				freq_count_stamp;
-#endif
+#endif /* CONFIG_PERF_EVENTS */
 };
 
 struct perf_event;
@@ -287,29 +284,33 @@ struct perf_event_pmu_context;
 /*
  * Common implementation detail of pmu::{start,commit,cancel}_txn
  */
-#define PERF_PMU_TXN_ADD  0x1		/* txn to add/schedule event on PMU */
-#define PERF_PMU_TXN_READ 0x2		/* txn to read event group from PMU */
+
+/* txn to add/schedule event on PMU */
+#define PERF_PMU_TXN_ADD		0x1
+
+/* txn to read event group from PMU */
+#define PERF_PMU_TXN_READ		0x2
 
 /**
  * pmu::capabilities flags
  */
-#define PERF_PMU_CAP_NO_INTERRUPT		0x0001
-#define PERF_PMU_CAP_NO_NMI			0x0002
-#define PERF_PMU_CAP_AUX_NO_SG			0x0004
-#define PERF_PMU_CAP_EXTENDED_REGS		0x0008
-#define PERF_PMU_CAP_EXCLUSIVE			0x0010
-#define PERF_PMU_CAP_ITRACE			0x0020
-#define PERF_PMU_CAP_NO_EXCLUDE			0x0040
-#define PERF_PMU_CAP_AUX_OUTPUT			0x0080
-#define PERF_PMU_CAP_EXTENDED_HW_TYPE		0x0100
-#define PERF_PMU_CAP_AUX_PAUSE			0x0200
-#define PERF_PMU_CAP_AUX_PREFER_LARGE		0x0400
+#define PERF_PMU_CAP_NO_INTERRUPT	0x0001
+#define PERF_PMU_CAP_NO_NMI		0x0002
+#define PERF_PMU_CAP_AUX_NO_SG		0x0004
+#define PERF_PMU_CAP_EXTENDED_REGS	0x0008
+#define PERF_PMU_CAP_EXCLUSIVE		0x0010
+#define PERF_PMU_CAP_ITRACE		0x0020
+#define PERF_PMU_CAP_NO_EXCLUDE		0x0040
+#define PERF_PMU_CAP_AUX_OUTPUT		0x0080
+#define PERF_PMU_CAP_EXTENDED_HW_TYPE	0x0100
+#define PERF_PMU_CAP_AUX_PAUSE		0x0200
+#define PERF_PMU_CAP_AUX_PREFER_LARGE	0x0400
 
 /**
  * pmu::scope
  */
 enum perf_pmu_scope {
-	PERF_PMU_SCOPE_NONE	= 0,
+	PERF_PMU_SCOPE_NONE = 0,
 	PERF_PMU_SCOPE_CORE,
 	PERF_PMU_SCOPE_DIE,
 	PERF_PMU_SCOPE_CLUSTER,
@@ -393,11 +394,21 @@ struct pmu {
 	 * Flags for ->add()/->del()/ ->start()/->stop(). There are
 	 * matching hw_perf_event::state flags.
 	 */
-#define PERF_EF_START	0x01		/* start the counter when adding    */
-#define PERF_EF_RELOAD	0x02		/* reload the counter when starting */
-#define PERF_EF_UPDATE	0x04		/* update the counter when stopping */
-#define PERF_EF_PAUSE	0x08		/* AUX area event, pause tracing */
-#define PERF_EF_RESUME	0x10		/* AUX area event, resume tracing */
+
+/* start the counter when adding    */
+#define PERF_EF_START			0x01
+
+/* reload the counter when starting */
+#define PERF_EF_RELOAD			0x02
+
+/* update the counter when stopping */
+#define PERF_EF_UPDATE			0x04
+
+/* AUX area event, pause tracing */
+#define PERF_EF_PAUSE			0x08
+
+/* AUX area event, resume tracing */
+#define PERF_EF_RESUME			0x10
 
 	/*
 	 * Adds/Removes a counter to/from the PMU, can be done inside a
@@ -596,10 +607,10 @@ enum perf_addr_filter_action_t {
  * This is a hardware-agnostic filter configuration as specified by the user.
  */
 struct perf_addr_filter {
-	struct list_head	entry;
-	struct path		path;
-	unsigned long		offset;
-	unsigned long		size;
+	struct list_head		entry;
+	struct path			path;
+	unsigned long			offset;
+	unsigned long			size;
 	enum perf_addr_filter_action_t	action;
 };
 
@@ -614,14 +625,14 @@ struct perf_addr_filter {
  * bundled together; see perf_event_addr_filters().
  */
 struct perf_addr_filters_head {
-	struct list_head	list;
-	raw_spinlock_t		lock;
-	unsigned int		nr_file_filters;
+	struct list_head		list;
+	raw_spinlock_t			lock;
+	unsigned int			nr_file_filters;
 };
 
 struct perf_addr_filter_range {
-	unsigned long		start;
-	unsigned long		size;
+	unsigned long			start;
+	unsigned long			size;
 };
 
 /**
@@ -669,24 +680,24 @@ struct swevent_hlist {
 	struct rcu_head			rcu_head;
 };
 
-#define PERF_ATTACH_CONTEXT	0x0001
-#define PERF_ATTACH_GROUP	0x0002
-#define PERF_ATTACH_TASK	0x0004
-#define PERF_ATTACH_TASK_DATA	0x0008
-#define PERF_ATTACH_GLOBAL_DATA	0x0010
-#define PERF_ATTACH_SCHED_CB	0x0020
-#define PERF_ATTACH_CHILD	0x0040
-#define PERF_ATTACH_EXCLUSIVE	0x0080
-#define PERF_ATTACH_CALLCHAIN	0x0100
-#define PERF_ATTACH_ITRACE	0x0200
+#define PERF_ATTACH_CONTEXT		0x0001
+#define PERF_ATTACH_GROUP		0x0002
+#define PERF_ATTACH_TASK		0x0004
+#define PERF_ATTACH_TASK_DATA		0x0008
+#define PERF_ATTACH_GLOBAL_DATA		0x0010
+#define PERF_ATTACH_SCHED_CB		0x0020
+#define PERF_ATTACH_CHILD		0x0040
+#define PERF_ATTACH_EXCLUSIVE		0x0080
+#define PERF_ATTACH_CALLCHAIN		0x0100
+#define PERF_ATTACH_ITRACE		0x0200
 
 struct bpf_prog;
 struct perf_cgroup;
 struct perf_buffer;
 
 struct pmu_event_list {
-	raw_spinlock_t		lock;
-	struct list_head	list;
+	raw_spinlock_t			lock;
+	struct list_head		list;
 };
 
 /*
@@ -696,12 +707,12 @@ struct pmu_event_list {
  * disabled is sufficient since it will hold-off the IPIs.
  */
 #ifdef CONFIG_PROVE_LOCKING
-#define lockdep_assert_event_ctx(event)				\
+# define lockdep_assert_event_ctx(event)			\
 	WARN_ON_ONCE(__lockdep_enabled &&			\
 		     (this_cpu_read(hardirqs_enabled) &&	\
 		      lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD))
 #else
-#define lockdep_assert_event_ctx(event)
+# define lockdep_assert_event_ctx(event)
 #endif
 
 #define for_each_sibling_event(sibling, event)			\
@@ -859,9 +870,9 @@ struct perf_event {
 #ifdef CONFIG_EVENT_TRACING
 	struct trace_event_call		*tp_event;
 	struct event_filter		*filter;
-#ifdef CONFIG_FUNCTION_TRACER
+# ifdef CONFIG_FUNCTION_TRACER
 	struct ftrace_ops               ftrace_ops;
-#endif
+# endif
 #endif
 
 #ifdef CONFIG_CGROUP_PERF
@@ -880,7 +891,7 @@ struct perf_event {
 	 * of it. event->orig_type contains original 'type' requested by
 	 * user.
 	 */
-	__u32				orig_type;
+	u32				orig_type;
 #endif /* CONFIG_PERF_EVENTS */
 };
 
@@ -945,8 +956,8 @@ static inline bool perf_pmu_ctx_is_active(struct perf_event_pmu_context *epc)
 }
 
 struct perf_event_groups {
-	struct rb_root	tree;
-	u64		index;
+	struct rb_root			tree;
+	u64				index;
 };
 
 
@@ -1189,16 +1200,18 @@ extern void perf_pmu_resched(struct pmu *pmu);
 extern int perf_event_refresh(struct perf_event *event, int refresh);
 extern void perf_event_update_userpage(struct perf_event *event);
 extern int perf_event_release_kernel(struct perf_event *event);
+
 extern struct perf_event *
 perf_event_create_kernel_counter(struct perf_event_attr *attr,
-				int cpu,
-				struct task_struct *task,
-				perf_overflow_handler_t callback,
-				void *context);
+				 int cpu,
+				 struct task_struct *task,
+				 perf_overflow_handler_t callback,
+				 void *context);
+
 extern void perf_pmu_migrate_context(struct pmu *pmu,
-				int src_cpu, int dst_cpu);
-int perf_event_read_local(struct perf_event *event, u64 *value,
-			  u64 *enabled, u64 *running);
+				     int src_cpu, int dst_cpu);
+extern int perf_event_read_local(struct perf_event *event, u64 *value,
+				 u64 *enabled, u64 *running);
 extern u64 perf_event_read_value(struct perf_event *event,
 				 u64 *enabled, u64 *running);
 
@@ -1415,14 +1428,14 @@ static inline u32 perf_sample_data_size(struct perf_sample_data *data,
  */
 static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *br)
 {
-	br->mispred = 0;
-	br->predicted = 0;
-	br->in_tx = 0;
-	br->abort = 0;
-	br->cycles = 0;
-	br->type = 0;
-	br->spec = PERF_BR_SPEC_NA;
-	br->reserved = 0;
+	br->mispred	= 0;
+	br->predicted	= 0;
+	br->in_tx	= 0;
+	br->abort	= 0;
+	br->cycles	= 0;
+	br->type	= 0;
+	br->spec	= PERF_BR_SPEC_NA;
+	br->reserved	= 0;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
@@ -1611,7 +1624,17 @@ extern void perf_event_bpf_event(struct bpf_prog *prog,
 				 enum perf_bpf_event_type type,
 				 u16 flags);
 
+#define PERF_GUEST_ACTIVE		0x01
+#define PERF_GUEST_USER			0x02
+
+struct perf_guest_info_callbacks {
+	unsigned int			(*state)(void);
+	unsigned long			(*get_ip)(void);
+	unsigned int			(*handle_intel_pt_intr)(void);
+};
+
 #ifdef CONFIG_GUEST_PERF_EVENTS
+
 extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
 
 DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
@@ -1622,21 +1645,27 @@ static inline unsigned int perf_guest_state(void)
 {
 	return static_call(__perf_guest_state)();
 }
+
 static inline unsigned long perf_guest_get_ip(void)
 {
 	return static_call(__perf_guest_get_ip)();
 }
+
 static inline unsigned int perf_guest_handle_intel_pt_intr(void)
 {
 	return static_call(__perf_guest_handle_intel_pt_intr)();
 }
+
 extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
 extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
-#else
+
+#else /* !CONFIG_GUEST_PERF_EVENTS: */
+
 static inline unsigned int perf_guest_state(void)		 { return 0; }
 static inline unsigned long perf_guest_get_ip(void)		 { return 0; }
 static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; }
-#endif /* CONFIG_GUEST_PERF_EVENTS */
+
+#endif /* !CONFIG_GUEST_PERF_EVENTS */
 
 extern void perf_event_exec(void);
 extern void perf_event_comm(struct task_struct *tsk, bool exec);
@@ -1666,6 +1695,7 @@ static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx *
 {
 	if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) {
 		struct perf_callchain_entry *entry = ctx->entry;
+
 		entry->ip[entry->nr++] = ip;
 		++ctx->contexts;
 		return 0;
@@ -1679,6 +1709,7 @@ static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64
 {
 	if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) {
 		struct perf_callchain_entry *entry = ctx->entry;
+
 		entry->ip[entry->nr++] = ip;
 		++ctx->nr;
 		return 0;
@@ -1705,7 +1736,7 @@ static inline int perf_is_paranoid(void)
 	return sysctl_perf_event_paranoid > -1;
 }
 
-int perf_allow_kernel(void);
+extern int perf_allow_kernel(void);
 
 static inline int perf_allow_cpu(void)
 {
@@ -1827,7 +1858,7 @@ extern int perf_output_begin_backward(struct perf_output_handle *handle,
 
 extern void perf_output_end(struct perf_output_handle *handle);
 extern unsigned int perf_output_copy(struct perf_output_handle *handle,
-			     const void *buf, unsigned int len);
+				     const void *buf, unsigned int len);
 extern unsigned int perf_output_skip(struct perf_output_handle *handle,
 				     unsigned int len);
 extern long perf_output_copy_aux(struct perf_output_handle *aux_handle,
@@ -1844,7 +1875,9 @@ extern void perf_event_task_tick(void);
 extern int perf_event_account_interrupt(struct perf_event *event);
 extern int perf_event_period(struct perf_event *event, u64 value);
 extern u64 perf_event_pause(struct perf_event *event, bool reset);
+
 #else /* !CONFIG_PERF_EVENTS: */
+
 static inline void *
 perf_aux_output_begin(struct perf_output_handle *handle,
 		      struct perf_event *event)				{ return NULL; }
@@ -1922,19 +1955,14 @@ static inline void perf_event_disable(struct perf_event *event)		{ }
 static inline int __perf_event_disable(void *info)			{ return -1; }
 static inline void perf_event_task_tick(void)				{ }
 static inline int perf_event_release_kernel(struct perf_event *event)	{ return 0; }
-static inline int perf_event_period(struct perf_event *event, u64 value)
-{
-	return -EINVAL;
-}
-static inline u64 perf_event_pause(struct perf_event *event, bool reset)
-{
-	return 0;
-}
-static inline int perf_exclude_event(struct perf_event *event, struct pt_regs *regs)
-{
-	return 0;
-}
-#endif
+static inline int
+perf_event_period(struct perf_event *event, u64 value)			{ return -EINVAL; }
+static inline u64
+perf_event_pause(struct perf_event *event, bool reset)			{ return 0; }
+static inline int
+perf_exclude_event(struct perf_event *event, struct pt_regs *regs)	{ return 0; }
+
+#endif /* !CONFIG_PERF_EVENTS */
 
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
 extern void perf_restore_debug_store(void);
@@ -1942,31 +1970,31 @@ extern void perf_restore_debug_store(void);
 static inline void perf_restore_debug_store(void)			{ }
 #endif
 
-#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
+#define perf_output_put(handle, x)	perf_output_copy((handle), &(x), sizeof(x))
 
 struct perf_pmu_events_attr {
-	struct device_attribute attr;
-	u64 id;
-	const char *event_str;
+	struct device_attribute		attr;
+	u64				id;
+	const char			*event_str;
 };
 
 struct perf_pmu_events_ht_attr {
-	struct device_attribute			attr;
-	u64					id;
-	const char				*event_str_ht;
-	const char				*event_str_noht;
+	struct device_attribute		attr;
+	u64				id;
+	const char			*event_str_ht;
+	const char			*event_str_noht;
 };
 
 struct perf_pmu_events_hybrid_attr {
-	struct device_attribute			attr;
-	u64					id;
-	const char				*event_str;
-	u64					pmu_type;
+	struct device_attribute		attr;
+	u64				id;
+	const char			*event_str;
+	u64				pmu_type;
 };
 
 struct perf_pmu_format_hybrid_attr {
-	struct device_attribute			attr;
-	u64					pmu_type;
+	struct device_attribute		attr;
+	u64				pmu_type;
 };
 
 ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
@@ -2008,11 +2036,11 @@ static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
 
 /* Performance counter hotplug functions */
 #ifdef CONFIG_PERF_EVENTS
-int perf_event_init_cpu(unsigned int cpu);
-int perf_event_exit_cpu(unsigned int cpu);
+extern int perf_event_init_cpu(unsigned int cpu);
+extern int perf_event_exit_cpu(unsigned int cpu);
 #else
-#define perf_event_init_cpu	NULL
-#define perf_event_exit_cpu	NULL
+# define perf_event_init_cpu		NULL
+# define perf_event_exit_cpu		NULL
 #endif
 
 extern void arch_perf_update_userpage(struct perf_event *event,
-- 
cgit v1.2.3


From 12ca42c237756182aad8ab04654c952765cb9061 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Fri, 16 May 2025 17:07:39 -0700
Subject: alloc_tag: allocate percpu counters for module tags dynamically

When a module gets unloaded it checks whether any of its tags are still in
use and if so, we keep the memory containing module's allocation tags
alive until all tags are unused.  However percpu counters referenced by
the tags are freed by free_module().  This will lead to UAF if the memory
allocated by a module is accessed after module was unloaded.

To fix this we allocate percpu counters for module allocation tags
dynamically and we keep it alive for tags which are still in use after
module unloading.  This also removes the requirement of a larger
PERCPU_MODULE_RESERVE when memory allocation profiling is enabled because
percpu memory for counters does not need to be reserved anymore.

Link: https://lkml.kernel.org/r/20250517000739.5930-1-surenb@google.com
Fixes: 0db6f8d7820a ("alloc_tag: load module tags into separate contiguous memory")
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reported-by: David Wang <00107082@163.com>
Closes: https://lore.kernel.org/all/20250516131246.6244-1-00107082@163.com/
Tested-by: David Wang <00107082@163.com>
Cc: Christoph Lameter (Ampere) <cl@gentwo.org>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/alloc_tag.h | 12 ++++++++++++
 include/linux/codetag.h   |  8 ++++----
 include/linux/percpu.h    |  4 ----
 3 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index a946e0203e6d..8f7931eb7d16 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -104,6 +104,16 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
 
 #else /* ARCH_NEEDS_WEAK_PER_CPU */
 
+#ifdef MODULE
+
+#define DEFINE_ALLOC_TAG(_alloc_tag)						\
+	static struct alloc_tag _alloc_tag __used __aligned(8)			\
+	__section(ALLOC_TAG_SECTION_NAME) = {					\
+		.ct = CODE_TAG_INIT,						\
+		.counters = NULL };
+
+#else  /* MODULE */
+
 #define DEFINE_ALLOC_TAG(_alloc_tag)						\
 	static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr);	\
 	static struct alloc_tag _alloc_tag __used __aligned(8)			\
@@ -111,6 +121,8 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
 		.ct = CODE_TAG_INIT,						\
 		.counters = &_alloc_tag_cntr };
 
+#endif /* MODULE */
+
 #endif /* ARCH_NEEDS_WEAK_PER_CPU */
 
 DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
diff --git a/include/linux/codetag.h b/include/linux/codetag.h
index d14dbd26b370..0ee4c21c6dbc 100644
--- a/include/linux/codetag.h
+++ b/include/linux/codetag.h
@@ -36,10 +36,10 @@ union codetag_ref {
 struct codetag_type_desc {
 	const char *section;
 	size_t tag_size;
-	void (*module_load)(struct codetag_type *cttype,
-			    struct codetag_module *cmod);
-	void (*module_unload)(struct codetag_type *cttype,
-			      struct codetag_module *cmod);
+	void (*module_load)(struct module *mod,
+			    struct codetag *start, struct codetag *end);
+	void (*module_unload)(struct module *mod,
+			      struct codetag *start, struct codetag *end);
 #ifdef CONFIG_MODULES
 	void (*module_replaced)(struct module *mod, struct module *new_mod);
 	bool (*needs_section_mem)(struct module *mod, unsigned long size);
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 52b5ea663b9f..85bf8dd9f087 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -15,11 +15,7 @@
 
 /* enough to cover all DEFINE_PER_CPUs in modules */
 #ifdef CONFIG_MODULES
-#ifdef CONFIG_MEM_ALLOC_PROFILING
-#define PERCPU_MODULE_RESERVE		(8 << 13)
-#else
 #define PERCPU_MODULE_RESERVE		(8 << 10)
-#endif
 #else
 #define PERCPU_MODULE_RESERVE		0
 #endif
-- 
cgit v1.2.3


From ee40c9920ac286c5bfe7c811e66ff899266d2582 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo=20Navarro?= <rcn@igalia.com>
Date: Fri, 23 May 2025 14:19:10 +0200
Subject: mm: fix copy_vma() error handling for hugetlb mappings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If, during a mremap() operation for a hugetlb-backed memory mapping,
copy_vma() fails after the source vma has been duplicated and opened (ie.
vma_link() fails), the error is handled by closing the new vma.  This
updates the hugetlbfs reservation counter of the reservation map which at
this point is referenced by both the source vma and the new copy.  As a
result, once the new vma has been freed and copy_vma() returns, the
reservation counter for the source vma will be incorrect.

This patch addresses this corner case by clearing the hugetlb private page
reservation reference for the new vma and decrementing the reference
before closing the vma, so that vma_close() won't update the reservation
counter.  This is also what copy_vma_and_data() does with the source vma
if copy_vma() succeeds, so a helper function has been added to do the
fixup in both functions.

The issue was reported by a private syzbot instance and can be reproduced
using the C reproducer in [1].  It's also a possible duplicate of public
syzbot report [2].  The WARNING report is:

============================================================
page_counter underflow: -1024 nr_pages=1024
WARNING: CPU: 0 PID: 3287 at mm/page_counter.c:61 page_counter_cancel+0xf6/0x120
Modules linked in:
CPU: 0 UID: 0 PID: 3287 Comm: repro__WARNING_ Not tainted 6.15.0-rc7+ #54 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-2-gc13ff2cd-prebuilt.qemu.org 04/01/2014
RIP: 0010:page_counter_cancel+0xf6/0x120
Code: ff 5b 41 5e 41 5f 5d c3 cc cc cc cc e8 f3 4f 8f ff c6 05 64 01 27 06 01 48 c7 c7 60 15 f8 85 48 89 de 4c 89 fa e8 2a a7 51 ff <0f> 0b e9 66 ff ff ff 44 89 f9 80 e1 07 38 c1 7c 9d 4c 81
RSP: 0018:ffffc900025df6a0 EFLAGS: 00010246
RAX: 2edfc409ebb44e00 RBX: fffffffffffffc00 RCX: ffff8880155f0000
RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000
RBP: dffffc0000000000 R08: ffffffff81c4a23c R09: 1ffff1100330482a
R10: dffffc0000000000 R11: ffffed100330482b R12: 0000000000000000
R13: ffff888058a882c0 R14: ffff888058a882c0 R15: 0000000000000400
FS:  0000000000000000(0000) GS:ffff88808fc53000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000004b33e0 CR3: 00000000076d6000 CR4: 00000000000006f0
Call Trace:
 <TASK>
 page_counter_uncharge+0x33/0x80
 hugetlb_cgroup_uncharge_counter+0xcb/0x120
 hugetlb_vm_op_close+0x579/0x960
 ? __pfx_hugetlb_vm_op_close+0x10/0x10
 remove_vma+0x88/0x130
 exit_mmap+0x71e/0xe00
 ? __pfx_exit_mmap+0x10/0x10
 ? __mutex_unlock_slowpath+0x22e/0x7f0
 ? __pfx_exit_aio+0x10/0x10
 ? __up_read+0x256/0x690
 ? uprobe_clear_state+0x274/0x290
 ? mm_update_next_owner+0xa9/0x810
 __mmput+0xc9/0x370
 exit_mm+0x203/0x2f0
 ? __pfx_exit_mm+0x10/0x10
 ? taskstats_exit+0x32b/0xa60
 do_exit+0x921/0x2740
 ? do_raw_spin_lock+0x155/0x3b0
 ? __pfx_do_exit+0x10/0x10
 ? __pfx_do_raw_spin_lock+0x10/0x10
 ? _raw_spin_lock_irq+0xc5/0x100
 do_group_exit+0x20c/0x2c0
 get_signal+0x168c/0x1720
 ? __pfx_get_signal+0x10/0x10
 ? schedule+0x165/0x360
 arch_do_signal_or_restart+0x8e/0x7d0
 ? __pfx_arch_do_signal_or_restart+0x10/0x10
 ? __pfx___se_sys_futex+0x10/0x10
 syscall_exit_to_user_mode+0xb8/0x2c0
 do_syscall_64+0x75/0x120
 entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x422dcd
Code: Unable to access opcode bytes at 0x422da3.
RSP: 002b:00007ff266cdb208 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
RAX: 0000000000000001 RBX: 00007ff266cdbcdc RCX: 0000000000422dcd
RDX: 00000000000f4240 RSI: 0000000000000081 RDI: 00000000004c7bec
RBP: 00007ff266cdb220 R08: 203a6362696c6720 R09: 203a6362696c6720
R10: 0000200000c00000 R11: 0000000000000246 R12: ffffffffffffffd0
R13: 0000000000000002 R14: 00007ffe1cb5f520 R15: 00007ff266cbb000
 </TASK>
============================================================

Link: https://lkml.kernel.org/r/20250523-warning_in_page_counter_cancel-v2-1-b6df1a8cfefd@igalia.com
Link: https://people.igalia.com/rcn/kernel_logs/20250422__WARNING_in_page_counter_cancel__repro.c [1]
Link: https://lore.kernel.org/all/67000a50.050a0220.49194.048d.GAE@google.com/ [2]
Signed-off-by: Ricardo Cañuelo Navarro <rcn@igalia.com>
Suggested-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Florent Revest <revest@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 8f3ac832ee7f..4861a7e304bb 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -275,6 +275,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
 bool is_hugetlb_entry_migration(pte_t pte);
 bool is_hugetlb_entry_hwpoisoned(pte_t pte);
 void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
+void fixup_hugetlb_reservations(struct vm_area_struct *vma);
 
 #else /* !CONFIG_HUGETLB_PAGE */
 
@@ -468,6 +469,10 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
 
 static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
 
+static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+}
+
 #endif /* !CONFIG_HUGETLB_PAGE */
 
 #ifndef pgd_write
-- 
cgit v1.2.3


From 707f853d7fa3ce323a6875487890c213e34d81a0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 2 May 2025 16:12:09 +0200
Subject: module: Provide EXPORT_SYMBOL_GPL_FOR_MODULES() helper

Helper macro to more easily limit the export of a symbol to a given
list of modules.

Eg:

  EXPORT_SYMBOL_GPL_FOR_MODULES(preempt_notifier_inc, "kvm");

will limit the use of said function to kvm.ko, any other module trying
to use this symbol will refure to load (and get modpost build
failures).

Requested-by: Masahiro Yamada <masahiroy@kernel.org>
Requested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 include/linux/export.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/export.h b/include/linux/export.h
index a8c23d945634..f35d03b4113b 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -24,11 +24,17 @@
 	.long sym
 #endif
 
-#define ___EXPORT_SYMBOL(sym, license, ns)		\
+/*
+ * LLVM integrated assembler cam merge adjacent string literals (like
+ * C and GNU-as) passed to '.ascii', but not to '.asciz' and chokes on:
+ *
+ *   .asciz "MODULE_" "kvm" ;
+ */
+#define ___EXPORT_SYMBOL(sym, license, ns...)		\
 	.section ".export_symbol","a"		ASM_NL	\
 	__export_symbol_##sym:			ASM_NL	\
 		.asciz license			ASM_NL	\
-		.asciz ns			ASM_NL	\
+		.ascii ns "\0"			ASM_NL	\
 		__EXPORT_SYMBOL_REF(sym)	ASM_NL	\
 	.previous
 
@@ -85,4 +91,6 @@
 #define EXPORT_SYMBOL_NS(sym, ns)	__EXPORT_SYMBOL(sym, "", ns)
 #define EXPORT_SYMBOL_NS_GPL(sym, ns)	__EXPORT_SYMBOL(sym, "GPL", ns)
 
+#define EXPORT_SYMBOL_GPL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods)
+
 #endif /* _LINUX_EXPORT_H */
-- 
cgit v1.2.3


From 478ad02d6844217cc7568619aeb0809d93ade43d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 25 May 2025 15:43:36 -0700
Subject: Disable FOP_DONTCACHE for now due to bugs

This is kind of last-minute, but Al Viro reported that the new
FOP_DONTCACHE flag causes memory corruption due to use-after-free
issues.

This was triggered by commit 974c5e6139db ("xfs: flag as supporting
FOP_DONTCACHE"), but that is not the underlying bug - it is just the
first user of the flag.

Vlastimil Babka suspects the underlying problem stems from the
folio_end_writeback() logic introduced in commit fb7d3bc414939
("mm/filemap: drop streaming/uncached pages when writeback completes").

The most straightforward fix would be to just revert the commit that
exposed this, but Matthew Wilcox points out that other filesystems are
also starting to enable the FOP_DONTCACHE logic, so this instead
disables that bit globally for now.

The fix will hopefully end up being trivial and we can just re-enable
this logic after more testing, but until such a time we'll have to
disable the new FOP_DONTCACHE flag.

Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Link: https://lore.kernel.org/all/20250525083209.GS2023217@ZenIV/
Triggered-by: 974c5e6139db ("xfs: flag as supporting FOP_DONTCACHE")
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Darrick J. Wong <djwong@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 016b0fe1536e..a4fd649e2c3f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2186,7 +2186,7 @@ struct file_operations {
 /* Supports asynchronous lock callbacks */
 #define FOP_ASYNC_LOCK		((__force fop_flags_t)(1 << 6))
 /* File system supports uncached read/write buffered IO */
-#define FOP_DONTCACHE		((__force fop_flags_t)(1 << 7))
+#define FOP_DONTCACHE		0 /* ((__force fop_flags_t)(1 << 7)) */
 
 /* Wrap a directory iterator that needs exclusive inode access */
 int wrap_directory_iterator(struct file *, struct dir_context *,
-- 
cgit v1.2.3


From 384492c48e6a88c9a7f0376d8e8ac7f557988e92 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <stfomichev@gmail.com>
Date: Tue, 20 May 2025 13:30:42 -0700
Subject: net: devmem: support single IOV with sendmsg

sendmsg() with a single iov becomes ITER_UBUF, sendmsg() with multiple
iovs becomes ITER_IOVEC. iter_iov_len does not return correct
value for UBUF, so teach to treat UBUF differently.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Pavel Begunkov <asml.silence@gmail.com>
Cc: Mina Almasry <almasrymina@google.com>
Fixes: bd61848900bf ("net: devmem: Implement TX path")
Signed-off-by: Stanislav Fomichev <stfomichev@gmail.com>
Acked-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/uio.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 49ece9e1888f..393d0622cc28 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -99,7 +99,13 @@ static inline const struct iovec *iter_iov(const struct iov_iter *iter)
 }
 
 #define iter_iov_addr(iter)	(iter_iov(iter)->iov_base + (iter)->iov_offset)
-#define iter_iov_len(iter)	(iter_iov(iter)->iov_len - (iter)->iov_offset)
+
+static inline size_t iter_iov_len(const struct iov_iter *i)
+{
+	if (i->iter_type == ITER_UBUF)
+		return i->count;
+	return iter_iov(i)->iov_len - i->iov_offset;
+}
 
 static inline enum iter_type iov_iter_type(const struct iov_iter *i)
 {
-- 
cgit v1.2.3


From 45ca7e9f0730ae36fc610e675b990e9cc9ca0714 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Wed, 21 May 2025 14:17:05 +0200
Subject: vsock/virtio: fix `rx_bytes` accounting for stream sockets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In `struct virtio_vsock_sock`, we maintain two counters:
- `rx_bytes`: used internally to track how many bytes have been read.
  This supports mechanisms like .stream_has_data() and sock_rcvlowat().
- `fwd_cnt`: used for the credit mechanism to inform available receive
  buffer space to the remote peer.

These counters are updated via virtio_transport_inc_rx_pkt() and
virtio_transport_dec_rx_pkt().

Since the beginning with commit 06a8fc78367d ("VSOCK: Introduce
virtio_vsock_common.ko"), we call virtio_transport_dec_rx_pkt() in
virtio_transport_stream_do_dequeue() only when we consume the entire
packet, so partial reads, do not update `rx_bytes` and `fwd_cnt`.

This is fine for `fwd_cnt`, because we still have space used for the
entire packet, and we don't want to update the credit for the other
peer until we free the space of the entire packet. However, this
causes `rx_bytes` to be stale on partial reads.

Previously, this didn’t cause issues because `rx_bytes` was used only by
.stream_has_data(), and any unread portion of a packet implied data was
still available. However, since commit 93b808876682
("virtio/vsock: fix logic which reduces credit update messages"), we now
rely on `rx_bytes` to determine if a credit update should be sent when
the data in the RX queue drops below SO_RCVLOWAT value.

This patch fixes the accounting by updating `rx_bytes` with the number
of bytes actually read, even on partial reads, while leaving `fwd_cnt`
untouched until the packet is fully consumed. Also introduce a new
`buf_used` counter to check that the remote peer is honoring the given
credit; this was previously done via `rx_bytes`.

Fixes: 93b808876682 ("virtio/vsock: fix logic which reduces credit update messages")
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://patch.msgid.link/20250521121705.196379-1-sgarzare@redhat.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/virtio_vsock.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index 0387d64e2c66..36fb3edfa403 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -140,6 +140,7 @@ struct virtio_vsock_sock {
 	u32 last_fwd_cnt;
 	u32 rx_bytes;
 	u32 buf_alloc;
+	u32 buf_used;
 	struct sk_buff_head rx_queue;
 	u32 msg_count;
 };
-- 
cgit v1.2.3


From 9be022476fea62b32aa05d9d370e0175155731e6 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Fri, 11 Apr 2025 21:14:12 +0800
Subject: mailbox: Remove devm_mbox_controller_unregister

Commit e898d9cdd3a9("mailbox: Add device-managed registration functions")
introduced device-managed API for mailbox, but in the past 7 years,
there is no user for devm_mbox_controller_unregister. So remove it.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Signed-off-by: Jassi Brar <jassisinghbrar@gmail.com>
---
 include/linux/mailbox_controller.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h
index 5fb0b65f45a2..ad01c4082358 100644
--- a/include/linux/mailbox_controller.h
+++ b/include/linux/mailbox_controller.h
@@ -134,7 +134,4 @@ void mbox_chan_txdone(struct mbox_chan *chan, int r); /* atomic */
 
 int devm_mbox_controller_register(struct device *dev,
 				  struct mbox_controller *mbox);
-void devm_mbox_controller_unregister(struct device *dev,
-				     struct mbox_controller *mbox);
-
 #endif /* __MAILBOX_CONTROLLER_H */
-- 
cgit v1.2.3


From ed449ddbd867f2cc02d6890c231431f264a876eb Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Wed, 21 May 2025 13:46:09 -0700
Subject: net: core: Convert inet_addr_is_any() to sockaddr_storage

All the callers of inet_addr_is_any() have a sockaddr_storage-backed
sockaddr. Avoid casts and switch prototype to the actual object being
used.

Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> # SCSI
Signed-off-by: Kees Cook <kees@kernel.org>
Link: https://patch.msgid.link/20250521204619.2301870-1-kees@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/inet.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/inet.h b/include/linux/inet.h
index bd8276e96e60..9158772f3559 100644
--- a/include/linux/inet.h
+++ b/include/linux/inet.h
@@ -55,6 +55,6 @@ extern int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char
 
 extern int inet_pton_with_scope(struct net *net, unsigned short af,
 		const char *src, const char *port, struct sockaddr_storage *addr);
-extern bool inet_addr_is_any(struct sockaddr *addr);
+bool inet_addr_is_any(struct sockaddr_storage *addr);
 
 #endif	/* _LINUX_INET_H */
-- 
cgit v1.2.3


From 161972650d6795ea00f8b72557cf3c3e593ed250 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Wed, 21 May 2025 13:46:10 -0700
Subject: net: core: Switch netif_set_mac_address() to struct sockaddr_storage

In order to avoid passing around struct sockaddr that has a size the
compiler cannot reason about (nor track at runtime), convert
netif_set_mac_address() to take struct sockaddr_storage. This is just a
cast conversion, so there is are no binary changes. Following patches
will make actual allocation changes.

Acked-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Kees Cook <kees@kernel.org>
Link: https://patch.msgid.link/20250521204619.2301870-2-kees@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ea9d335de130..47200a394a02 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4212,7 +4212,7 @@ int netif_set_mtu(struct net_device *dev, int new_mtu);
 int dev_set_mtu(struct net_device *, int);
 int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
 			      struct netlink_ext_ack *extack);
-int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa,
+int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,
 			  struct netlink_ext_ack *extack);
 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
 			struct netlink_ext_ack *extack);
-- 
cgit v1.2.3


From 9ca6804ab7c34f65fcf2e29333a39e7807c30b60 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Wed, 21 May 2025 13:46:14 -0700
Subject: net: core: Convert dev_set_mac_address() to struct sockaddr_storage

All users of dev_set_mac_address() are now using a struct sockaddr_storage.
Convert the internal data type to struct sockaddr_storage, drop the casts,
and update pointer types.

Acked-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Kees Cook <kees@kernel.org>
Link: https://patch.msgid.link/20250521204619.2301870-6-kees@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 47200a394a02..b4242b997373 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4214,7 +4214,7 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
 			      struct netlink_ext_ack *extack);
 int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,
 			  struct netlink_ext_ack *extack);
-int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
+int dev_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,
 			struct netlink_ext_ack *extack);
 int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa,
 			     struct netlink_ext_ack *extack);
-- 
cgit v1.2.3


From ae9fcd5a0f8ab7e12619e1c66312a03b842935c3 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Wed, 21 May 2025 13:46:16 -0700
Subject: net: core: Convert dev_set_mac_address_user() to use struct
 sockaddr_storage

Convert callers of dev_set_mac_address_user() to use struct
sockaddr_storage. Add sanity checks on dev->addr_len usage.

Signed-off-by: Kees Cook <kees@kernel.org>
Acked-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://patch.msgid.link/20250521204619.2301870-8-kees@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b4242b997373..adb14db25798 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4216,7 +4216,7 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,
 			  struct netlink_ext_ack *extack);
 int dev_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,
 			struct netlink_ext_ack *extack);
-int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa,
+int dev_set_mac_address_user(struct net_device *dev, struct sockaddr_storage *ss,
 			     struct netlink_ext_ack *extack);
 int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name);
 int dev_get_port_parent_id(struct net_device *dev,
-- 
cgit v1.2.3


From c5b6ababd21ab6bb251e5a2b4db110e76fb00a26 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Mon, 12 May 2025 14:04:02 -0400
Subject: locking/mutex: implement mutex_trylock_nested

Despite the fact that several lockdep-related checks are skipped when
calling trylock* versions of the locking primitives, for example
mutex_trylock, each time the mutex is acquired, a held_lock is still
placed onto the lockdep stack by __lock_acquire() which is called
regardless of whether the trylock* or regular locking API was used.

This means that if the caller successfully acquires more than
MAX_LOCK_DEPTH locks of the same class, even when using mutex_trylock,
lockdep will still complain that the maximum depth of the held lock stack
has been reached and disable itself.

For example, the following error currently occurs in the ARM version
of KVM, once the code tries to lock all vCPUs of a VM configured with more
than MAX_LOCK_DEPTH vCPUs, a situation that can easily happen on modern
systems, where having more than 48 CPUs is common, and it's also common to
run VMs that have vCPU counts approaching that number:

[  328.171264] BUG: MAX_LOCK_DEPTH too low!
[  328.175227] turning off the locking correctness validator.
[  328.180726] Please attach the output of /proc/lock_stat to the bug report
[  328.187531] depth: 48  max: 48!
[  328.190678] 48 locks held by qemu-kvm/11664:
[  328.194957]  #0: ffff800086de5ba0 (&kvm->lock){+.+.}-{3:3}, at: kvm_ioctl_create_device+0x174/0x5b0
[  328.204048]  #1: ffff0800e78800b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0
[  328.212521]  #2: ffff07ffeee51e98 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0
[  328.220991]  #3: ffff0800dc7d80b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0
[  328.229463]  #4: ffff07ffe0c980b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0
[  328.237934]  #5: ffff0800a3883c78 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0
[  328.246405]  #6: ffff07fffbe480b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0

Luckily, in all instances that require locking all vCPUs, the
'kvm->lock' is taken a priori, and that fact makes it possible to use
the little known feature of lockdep, called a 'nest_lock', to avoid this
warning and subsequent lockdep self-disablement.

The action of 'nested lock' being provided to lockdep's lock_acquire(),
causes the lockdep to detect that the top of the held lock stack contains
a lock of the same class and then increment its reference counter instead
of pushing a new held_lock item onto that stack.

See __lock_acquire for more information.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Message-ID: <20250512180407.659015-2-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/mutex.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 2143d05116be..da4518cfd59c 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -193,7 +193,22 @@ extern void mutex_lock_io(struct mutex *lock);
  *
  * Returns 1 if the mutex has been acquired successfully, and 0 on contention.
  */
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern int _mutex_trylock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
+
+#define mutex_trylock_nest_lock(lock, nest_lock)		\
+(								\
+	typecheck(struct lockdep_map *, &(nest_lock)->dep_map),	\
+	_mutex_trylock_nest_lock(lock, &(nest_lock)->dep_map)	\
+)
+
+#define mutex_trylock(lock) _mutex_trylock_nest_lock(lock, NULL)
+#else
 extern int mutex_trylock(struct mutex *lock);
+#define mutex_trylock_nest_lock(lock, nest_lock) mutex_trylock(lock)
+#endif
+
 extern void mutex_unlock(struct mutex *lock);
 
 extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
-- 
cgit v1.2.3


From fb49f07ba1d9d8c9bd8854878ae6b5b21ff9ac45 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Mon, 12 May 2025 14:04:03 -0400
Subject: locking/mutex: implement mutex_lock_killable_nest_lock

KVM's SEV intra-host migration code needs to lock all vCPUs
of the source and the target VM, before it proceeds with the migration.

The number of vCPUs that belong to each VM is not bounded by anything
except a self-imposed KVM limit of CONFIG_KVM_MAX_NR_VCPUS vCPUs which is
significantly larger than the depth of lockdep's lock stack.

Luckily, the locks in both of the cases mentioned above, are held under
the 'kvm->lock' of each VM, which means that we can use the little
known lockdep feature called a "nest_lock" to support this use case in
a cleaner way, compared to the way it's currently done.

Implement and expose 'mutex_lock_killable_nest_lock' for this
purpose.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Message-ID: <20250512180407.659015-3-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/mutex.h | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index da4518cfd59c..a039fa8c1780 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -156,16 +156,15 @@ static inline int __devm_mutex_init(struct device *dev, struct mutex *lock)
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
 extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
-
 extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock,
 					unsigned int subclass);
-extern int __must_check mutex_lock_killable_nested(struct mutex *lock,
-					unsigned int subclass);
+extern int __must_check _mutex_lock_killable(struct mutex *lock,
+		unsigned int subclass, struct lockdep_map *nest_lock);
 extern void mutex_lock_io_nested(struct mutex *lock, unsigned int subclass);
 
 #define mutex_lock(lock) mutex_lock_nested(lock, 0)
 #define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0)
-#define mutex_lock_killable(lock) mutex_lock_killable_nested(lock, 0)
+#define mutex_lock_killable(lock) _mutex_lock_killable(lock, 0, NULL)
 #define mutex_lock_io(lock) mutex_lock_io_nested(lock, 0)
 
 #define mutex_lock_nest_lock(lock, nest_lock)				\
@@ -174,6 +173,15 @@ do {									\
 	_mutex_lock_nest_lock(lock, &(nest_lock)->dep_map);		\
 } while (0)
 
+#define mutex_lock_killable_nest_lock(lock, nest_lock)			\
+(									\
+	typecheck(struct lockdep_map *, &(nest_lock)->dep_map),		\
+	_mutex_lock_killable(lock, 0, &(nest_lock)->dep_map)		\
+)
+
+#define mutex_lock_killable_nested(lock, subclass) \
+	_mutex_lock_killable(lock, subclass, NULL)
+
 #else
 extern void mutex_lock(struct mutex *lock);
 extern int __must_check mutex_lock_interruptible(struct mutex *lock);
@@ -183,6 +191,7 @@ extern void mutex_lock_io(struct mutex *lock);
 # define mutex_lock_nested(lock, subclass) mutex_lock(lock)
 # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
 # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
+# define mutex_lock_killable_nest_lock(lock, nest_lock) mutex_lock_killable(lock)
 # define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
 # define mutex_lock_io_nested(lock, subclass) mutex_lock_io(lock)
 #endif
-- 
cgit v1.2.3


From e4a454ced74c0ac97c8bd32f086ee3ad74528780 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Mon, 12 May 2025 14:04:04 -0400
Subject: KVM: add kvm_lock_all_vcpus and kvm_trylock_all_vcpus

In a few cases, usually in the initialization code, KVM locks all vCPUs
of a VM to ensure that userspace doesn't do funny things while KVM performs
an operation that affects the whole VM.

Until now, all these operations were implemented using custom code,
and all of them share the same problem:

Lockdep can't cope with simultaneous locking of a large number of locks of
the same class.

However if these locks are taken while another lock is already held,
which is luckily the case, it is possible to take advantage of little known
_nest_lock feature of lockdep which allows in this case to have an
unlimited number of locks of same class to be taken.

To implement this, create two functions:
kvm_lock_all_vcpus() and kvm_trylock_all_vcpus()

Both functions are needed because some code that will be replaced in
the subsequent patches, uses mutex_trylock, instead of regular mutex_lock.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Message-ID: <20250512180407.659015-4-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1dedc421b3e3..a6140415c693 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1015,6 +1015,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
 
 void kvm_destroy_vcpus(struct kvm *kvm);
 
+int kvm_trylock_all_vcpus(struct kvm *kvm);
+int kvm_lock_all_vcpus(struct kvm *kvm);
+void kvm_unlock_all_vcpus(struct kvm *kvm);
+
 void vcpu_load(struct kvm_vcpu *vcpu);
 void vcpu_put(struct kvm_vcpu *vcpu);
 
-- 
cgit v1.2.3


From 89f9dba365e1b85caef556d28654c6d336629eef Mon Sep 17 00:00:00 2001
From: "T.J. Mercier" <tjmercier@google.com>
Date: Thu, 22 May 2025 23:04:25 +0000
Subject: dma-buf: Rename debugfs symbols
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename the debugfs list and mutex so it's clear they are now usable
without the need for CONFIG_DEBUG_FS. The list will always be populated
to support the creation of a BPF iterator for dmabufs.

Signed-off-by: T.J. Mercier <tjmercier@google.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Acked-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20250522230429.941193-2-tjmercier@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/dma-buf.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 36216d28d8bd..8ff4add71f88 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -370,10 +370,8 @@ struct dma_buf {
 	 */
 	struct module *owner;
 
-#if IS_ENABLED(CONFIG_DEBUG_FS)
 	/** @list_node: node for dma_buf accounting and debugging. */
 	struct list_head list_node;
-#endif
 
 	/** @priv: exporter specific private data for this buffer object. */
 	void *priv;
-- 
cgit v1.2.3


From 76ea95534995adde1aa3cb1aa97ef33f50a617a9 Mon Sep 17 00:00:00 2001
From: "T.J. Mercier" <tjmercier@google.com>
Date: Thu, 22 May 2025 23:04:26 +0000
Subject: bpf: Add dmabuf iterator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The dmabuf iterator traverses the list of all DMA buffers.

DMA buffers are refcounted through their associated struct file. A
reference is taken on each buffer as the list is iterated to ensure each
buffer persists for the duration of the bpf program execution without
holding the list mutex.

Signed-off-by: T.J. Mercier <tjmercier@google.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Acked-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20250522230429.941193-3-tjmercier@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/dma-buf.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 8ff4add71f88..7af2ea839f58 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -634,4 +634,6 @@ int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map);
 void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map);
 int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map);
 void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map);
+struct dma_buf *dma_buf_iter_begin(void);
+struct dma_buf *dma_buf_iter_next(struct dma_buf *dmbuf);
 #endif /* __DMA_BUF_H__ */
-- 
cgit v1.2.3


From 7b2b67dbd449afd00fc7279b1ab7ffa3d26fe0c9 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 27 May 2025 07:28:54 -0600
Subject: Revert "Disable FOP_DONTCACHE for now due to bugs"

This reverts commit 478ad02d6844217cc7568619aeb0809d93ade43d.

Both the read and write side dirty && writeback races should be resolved
now, revert the commit that disabled FOP_DONTCACHE for filesystems.

Link: https://lore.kernel.org/linux-fsdevel/20250525083209.GS2023217@ZenIV/
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/20250527133255.452431-4-axboe@kernel.dk
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0db87f8e676c..57c3db3ef6ad 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2207,7 +2207,7 @@ struct file_operations {
 /* Supports asynchronous lock callbacks */
 #define FOP_ASYNC_LOCK		((__force fop_flags_t)(1 << 6))
 /* File system supports uncached read/write buffered IO */
-#define FOP_DONTCACHE		0 /* ((__force fop_flags_t)(1 << 7)) */
+#define FOP_DONTCACHE		((__force fop_flags_t)(1 << 7))
 
 /* Wrap a directory iterator that needs exclusive inode access */
 int wrap_directory_iterator(struct file *, struct dir_context *,
-- 
cgit v1.2.3


From e2d2115e56c4a02377189bfc3a9a7933552a7b0f Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song@linux.dev>
Date: Fri, 23 May 2025 21:13:35 -0700
Subject: bpf: Do not include stack ptr register in precision backtracking
 bookkeeping

Yi Lai reported an issue ([1]) where the following warning appears
in kernel dmesg:
  [   60.643604] verifier backtracking bug
  [   60.643635] WARNING: CPU: 10 PID: 2315 at kernel/bpf/verifier.c:4302 __mark_chain_precision+0x3a6c/0x3e10
  [   60.648428] Modules linked in: bpf_testmod(OE)
  [   60.650471] CPU: 10 UID: 0 PID: 2315 Comm: test_progs Tainted: G           OE       6.15.0-rc4-gef11287f8289-dirty #327 PREEMPT(full)
  [   60.654385] Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE
  [   60.656682] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
  [   60.660475] RIP: 0010:__mark_chain_precision+0x3a6c/0x3e10
  [   60.662814] Code: 5a 30 84 89 ea e8 c4 d9 01 00 80 3d 3e 7d d8 04 00 0f 85 60 fa ff ff c6 05 31 7d d8 04
                       01 48 c7 c7 00 58 30 84 e8 c4 06 a5 ff <0f> 0b e9 46 fa ff ff 48 ...
  [   60.668720] RSP: 0018:ffff888116cc7298 EFLAGS: 00010246
  [   60.671075] RAX: 54d70e82dfd31900 RBX: ffff888115b65e20 RCX: 0000000000000000
  [   60.673659] RDX: 0000000000000001 RSI: 0000000000000004 RDI: 00000000ffffffff
  [   60.676241] RBP: 0000000000000400 R08: ffff8881f6f23bd3 R09: 1ffff1103ede477a
  [   60.678787] R10: dffffc0000000000 R11: ffffed103ede477b R12: ffff888115b60ae8
  [   60.681420] R13: 1ffff11022b6cbc4 R14: 00000000fffffff2 R15: 0000000000000001
  [   60.684030] FS:  00007fc2aedd80c0(0000) GS:ffff88826fa8a000(0000) knlGS:0000000000000000
  [   60.686837] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  [   60.689027] CR2: 000056325369e000 CR3: 000000011088b002 CR4: 0000000000370ef0
  [   60.691623] Call Trace:
  [   60.692821]  <TASK>
  [   60.693960]  ? __pfx_verbose+0x10/0x10
  [   60.695656]  ? __pfx_disasm_kfunc_name+0x10/0x10
  [   60.697495]  check_cond_jmp_op+0x16f7/0x39b0
  [   60.699237]  do_check+0x58fa/0xab10
  ...

Further analysis shows the warning is at line 4302 as below:

  4294                 /* static subprog call instruction, which
  4295                  * means that we are exiting current subprog,
  4296                  * so only r1-r5 could be still requested as
  4297                  * precise, r0 and r6-r10 or any stack slot in
  4298                  * the current frame should be zero by now
  4299                  */
  4300                 if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
  4301                         verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
  4302                         WARN_ONCE(1, "verifier backtracking bug");
  4303                         return -EFAULT;
  4304                 }

With the below test (also in the next patch):
  __used __naked static void __bpf_jmp_r10(void)
  {
	asm volatile (
	"r2 = 2314885393468386424 ll;"
	"goto +0;"
	"if r2 <= r10 goto +3;"
	"if r1 >= -1835016 goto +0;"
	"if r2 <= 8 goto +0;"
	"if r3 <= 0 goto +0;"
	"exit;"
	::: __clobber_all);
  }

  SEC("?raw_tp")
  __naked void bpf_jmp_r10(void)
  {
	asm volatile (
	"r3 = 0 ll;"
	"call __bpf_jmp_r10;"
	"r0 = 0;"
	"exit;"
	::: __clobber_all);
  }

The following is the verifier failure log:
  0: (18) r3 = 0x0                      ; R3_w=0
  2: (85) call pc+2
  caller:
   R10=fp0
  callee:
   frame1: R1=ctx() R3_w=0 R10=fp0
  5: frame1: R1=ctx() R3_w=0 R10=fp0
  ; asm volatile ("                                 \ @ verifier_precision.c:184
  5: (18) r2 = 0x20202000256c6c78       ; frame1: R2_w=0x20202000256c6c78
  7: (05) goto pc+0
  8: (bd) if r2 <= r10 goto pc+3        ; frame1: R2_w=0x20202000256c6c78 R10=fp0
  9: (35) if r1 >= 0xffe3fff8 goto pc+0         ; frame1: R1=ctx()
  10: (b5) if r2 <= 0x8 goto pc+0
  mark_precise: frame1: last_idx 10 first_idx 0 subseq_idx -1
  mark_precise: frame1: regs=r2 stack= before 9: (35) if r1 >= 0xffe3fff8 goto pc+0
  mark_precise: frame1: regs=r2 stack= before 8: (bd) if r2 <= r10 goto pc+3
  mark_precise: frame1: regs=r2,r10 stack= before 7: (05) goto pc+0
  mark_precise: frame1: regs=r2,r10 stack= before 5: (18) r2 = 0x20202000256c6c78
  mark_precise: frame1: regs=r10 stack= before 2: (85) call pc+2
  BUG regs 400

The main failure reason is due to r10 in precision backtracking bookkeeping.
Actually r10 is always precise and there is no need to add it for the precision
backtracking bookkeeping.

One way to fix the issue is to prevent bt_set_reg() if any src/dst reg is
r10. Andrii suggested to go with push_insn_history() approach to avoid
explicitly checking r10 in backtrack_insn().

This patch added push_insn_history() support for cond_jmp like 'rX <op> rY'
operations. In check_cond_jmp_op(), if any of rX or rY is a stack pointer,
push_insn_history() will record such information, and later backtrack_insn()
will do bt_set_reg() properly for those register(s).

  [1] https://lore.kernel.org/bpf/Z%2F8q3xzpU59CIYQE@ly-workstation/

Reported by: Yi Lai <yi1.lai@linux.intel.com>

Fixes: 407958a0e980 ("bpf: encapsulate precision backtracking bookkeeping")
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20250524041335.4046126-1-yonghong.song@linux.dev
---
 include/linux/bpf_verifier.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 78c97e12ea4e..256274acb1d8 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -356,7 +356,11 @@ enum {
 	INSN_F_SPI_MASK = 0x3f, /* 6 bits */
 	INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */
 
-	INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */
+	INSN_F_STACK_ACCESS = BIT(9),
+
+	INSN_F_DST_REG_STACK = BIT(10), /* dst_reg is PTR_TO_STACK */
+	INSN_F_SRC_REG_STACK = BIT(11), /* src_reg is PTR_TO_STACK */
+	/* total 12 bits are used now. */
 };
 
 static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES);
@@ -365,9 +369,9 @@ static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8);
 struct bpf_insn_hist_entry {
 	u32 idx;
 	/* insn idx can't be bigger than 1 million */
-	u32 prev_idx : 22;
-	/* special flags, e.g., whether insn is doing register stack spill/load */
-	u32 flags : 10;
+	u32 prev_idx : 20;
+	/* special INSN_F_xxx flags */
+	u32 flags : 12;
 	/* additional registers that need precision tracking when this
 	 * jump is backtracked, vector of six 10-bit records
 	 */
-- 
cgit v1.2.3


From e9cb929670a1e98b592b30f03f06e9e20110f318 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 22 May 2025 13:21:47 +0200
Subject: net: phy: fix up const issues in to_mdio_device() and to_phy_device()

Both to_mdio_device() and to_phy_device() "throw away" the const pointer
attribute passed to them and return a non-const pointer, which generally
is not a good thing overall.  Fix this up by using container_of_const()
which was designed for this very problem.

Cc: Alexander Lobakin <alobakin@pm.me>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Heiner Kallweit <hkallweit1@gmail.com>
Cc: Russell King <linux@armlinux.org.uk>
Fixes: 7eab14de73a8 ("mdio, phy: fix -Wshadow warnings triggered by nested container_of()")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/2025052246-conduit-glory-8fc9@gregkh
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mdio.h | 5 +----
 include/linux/phy.h  | 5 +----
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 3c3deac57894..e43ff9f980a4 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -45,10 +45,7 @@ struct mdio_device {
 	unsigned int reset_deassert_delay;
 };
 
-static inline struct mdio_device *to_mdio_device(const struct device *dev)
-{
-	return container_of(dev, struct mdio_device, dev);
-}
+#define to_mdio_device(__dev)	container_of_const(__dev, struct mdio_device, dev)
 
 /* struct mdio_driver_common: Common to all MDIO drivers */
 struct mdio_driver_common {
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 32b9da274115..e194dad1623d 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -744,10 +744,7 @@ struct phy_device {
 #define PHY_F_NO_IRQ		0x80000000
 #define PHY_F_RXC_ALWAYS_ON	0x40000000
 
-static inline struct phy_device *to_phy_device(const struct device *dev)
-{
-	return container_of(to_mdio_device(dev), struct phy_device, mdio);
-}
+#define to_phy_device(__dev)	container_of_const(to_mdio_device(__dev), struct phy_device, mdio)
 
 /**
  * struct phy_tdr_config - Configuration of a TDR raw test
-- 
cgit v1.2.3


From 81edb1ba3232afd45ae7f3f492a91019571b18c9 Mon Sep 17 00:00:00 2001
From: Fan Ni <fan.ni@samsung.com>
Date: Mon, 5 May 2025 11:22:42 -0700
Subject: mm/hugetlb: refactor unmap_hugepage_range() to take folio instead of
 page

The function unmap_hugepage_range() has two kinds of users:
1) unmap_ref_private(), which passes in the head page of a folio.  Since
   unmap_ref_private() already takes folio and there are no other uses
   of the folio struct in the function, it is natural for
   unmap_hugepage_range() to take folio also.
2) All other uses, which pass in NULL pointer.

In both cases, we can pass in folio. Refactor unmap_hugepage_range() to
take folio.

Link: https://lkml.kernel.org/r/20250505182345.506888-4-nifan.cxl@gmail.com
Signed-off-by: Fan Ni <fan.ni@samsung.com>
Reviewed-by: Muchun Song <muchun.song@linux.dev>
Reviewed-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 23ebf49c5d6a..f6d5f24e793c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -129,8 +129,8 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *,
 			    struct vm_area_struct *, struct vm_area_struct *);
 void unmap_hugepage_range(struct vm_area_struct *,
-			  unsigned long, unsigned long, struct page *,
-			  zap_flags_t);
+			  unsigned long start, unsigned long end,
+			  struct folio *, zap_flags_t);
 void __unmap_hugepage_range(struct mmu_gather *tlb,
 			  struct vm_area_struct *vma,
 			  unsigned long start, unsigned long end,
-- 
cgit v1.2.3


From 7f4b6065d9a842721a04632fc219aa453d1b2f5c Mon Sep 17 00:00:00 2001
From: Fan Ni <fan.ni@samsung.com>
Date: Mon, 5 May 2025 11:22:43 -0700
Subject: mm/hugetlb: refactor __unmap_hugepage_range() to take folio instead
 of page

The function __unmap_hugepage_range() has two kinds of users:
1) unmap_hugepage_range(), which passes in the head page of a folio.
   Since unmap_hugepage_range() already takes folio and there are no other
   uses of the folio struct in the function, it is natural for
   __unmap_hugepage_range() to take folio also.
2) All other uses, which pass in NULL pointer.

In both cases, we can pass in folio. Refactor __unmap_hugepage_range() to
take folio.

Link: https://lkml.kernel.org/r/20250505182345.506888-5-nifan.cxl@gmail.com
Signed-off-by: Fan Ni <fan.ni@samsung.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index f6d5f24e793c..eb21619206af 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -134,7 +134,7 @@ void unmap_hugepage_range(struct vm_area_struct *,
 void __unmap_hugepage_range(struct mmu_gather *tlb,
 			  struct vm_area_struct *vma,
 			  unsigned long start, unsigned long end,
-			  struct page *ref_page, zap_flags_t zap_flags);
+			  struct folio *, zap_flags_t zap_flags);
 void hugetlb_report_meminfo(struct seq_file *);
 int hugetlb_report_node_meminfo(char *buf, int len, int nid);
 void hugetlb_show_meminfo_node(int nid);
@@ -455,7 +455,7 @@ static inline long hugetlb_change_protection(
 
 static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
 			struct vm_area_struct *vma, unsigned long start,
-			unsigned long end, struct page *ref_page,
+			unsigned long end, struct folio *folio,
 			zap_flags_t zap_flags)
 {
 	BUG();
-- 
cgit v1.2.3


From 375700bab5b150e876e42d894a9a7470881f8a61 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 23 May 2025 13:13:11 -0600
Subject: llist: make llist_add_batch() a static inline

The function is small enough that it should be, and it's a (very) hot path
for io_uring.  Doing this actually reduces my vmlinux text size for my
standard build/test box.

Before:
axboe@r7625 ~/g/linux (test)> size vmlinux
   text	   data	    bss	    dec	    hex		filename
19892174 5938310 2470432 28300916 1afd674	vmlinux

After:
axboe@r7625 ~/g/linux (test)> size vmlinux
   text	   data	    bss	    dec	    hex		filename
19891878 5938310 2470436 28300624 1afd550	vmlinux

Link: https://lkml.kernel.org/r/f1d104c6-7ac8-457a-a53d-6bb741421b2f@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/llist.h | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/llist.h b/include/linux/llist.h
index 2c982ff7475a..27b17f64bcee 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -223,9 +223,26 @@ static inline struct llist_node *llist_next(struct llist_node *node)
 	return node->next;
 }
 
-extern bool llist_add_batch(struct llist_node *new_first,
-			    struct llist_node *new_last,
-			    struct llist_head *head);
+/**
+ * llist_add_batch - add several linked entries in batch
+ * @new_first:	first entry in batch to be added
+ * @new_last:	last entry in batch to be added
+ * @head:	the head for your lock-less list
+ *
+ * Return whether list is empty before adding.
+ */
+static inline bool llist_add_batch(struct llist_node *new_first,
+				   struct llist_node *new_last,
+				   struct llist_head *head)
+{
+	struct llist_node *first = READ_ONCE(head->first);
+
+	do {
+		new_last->next = first;
+	} while (!try_cmpxchg(&head->first, &first, new_first));
+
+	return !first;
+}
 
 static inline bool __llist_add_batch(struct llist_node *new_first,
 				     struct llist_node *new_last,
-- 
cgit v1.2.3


From 206cc44588f72b49ad4d7e21a7472ab2a72a83df Mon Sep 17 00:00:00 2001
From: Sami Uddin <sami.md.ko@gmail.com>
Date: Mon, 12 May 2025 07:51:53 +0930
Subject: virtio: reject shm region if length is zero

Prevent usage of shared memory regions where the length is zero,
as such configurations are not valid and may lead to unexpected behavior.

Signed-off-by: Sami Uddin <sami.md.ko@gmail.com>
Message-Id: <20250511222153.2332-1-sami.md.ko@gmail.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_config.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 169c7d367fac..b3e1d30c765b 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -329,6 +329,8 @@ static inline
 bool virtio_get_shm_region(struct virtio_device *vdev,
 			   struct virtio_shm_region *region, u8 id)
 {
+	if (!region->len)
+		return false;
 	if (!vdev->config->get_shm_region)
 		return false;
 	return vdev->config->get_shm_region(vdev, region, id);
-- 
cgit v1.2.3


From 34ecde3c56066ba79e5ec3d93c5b14ea83e3603e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 27 May 2025 17:01:31 -0600
Subject: iomap: don't lose folio dropbehind state for overwrites

DONTCACHE I/O must have the completion punted to a workqueue, just like
what is done for unwritten extents, as the completion needs task context
to perform the invalidation of the folio(s). However, if writeback is
started off filemap_fdatawrite_range() off generic_sync() and it's an
overwrite, then the DONTCACHE marking gets lost as iomap_add_to_ioend()
don't look at the folio being added and no further state is passed down
to help it know that this is a dropbehind/DONTCACHE write.

Check if the folio being added is marked as dropbehind, and set
IOMAP_IOEND_DONTCACHE if that is the case. Then XFS can factor this into
the decision making of completion context in xfs_submit_ioend().
Additionally include this ioend flag in the NOMERGE flags, to avoid
mixing it with unrelated IO.

Since this is the 3rd flag that will cause XFS to punt the completion to
a workqueue, add a helper so that each one of them can get appropriately
commented.

This fixes extra page cache being instantiated when the write performed
is an overwrite, rather than newly instantiated blocks.

Fixes: b2cd5ae693a3 ("iomap: make buffered writes work with RWF_DONTCACHE")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/5153f6e8-274d-4546-bf55-30a5018e0d03@kernel.dk
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/iomap.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 68416b135151..522644d62f30 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -377,13 +377,16 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
 #define IOMAP_IOEND_BOUNDARY		(1U << 2)
 /* is direct I/O */
 #define IOMAP_IOEND_DIRECT		(1U << 3)
+/* is DONTCACHE I/O */
+#define IOMAP_IOEND_DONTCACHE		(1U << 4)
 
 /*
  * Flags that if set on either ioend prevent the merge of two ioends.
  * (IOMAP_IOEND_BOUNDARY also prevents merges, but only one-way)
  */
 #define IOMAP_IOEND_NOMERGE_FLAGS \
-	(IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT)
+	(IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT | \
+	 IOMAP_IOEND_DONTCACHE)
 
 /*
  * Structure for writeback I/O completions.
-- 
cgit v1.2.3


From 793908d60b8745c386b9f4e29eb702f74ceb0886 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Thu, 24 Apr 2025 10:34:04 +0200
Subject: PCI: endpoint: Retain fixed-size BAR size as well as aligned size

When allocating space for an endpoint function on a BAR with a fixed size,
the size saved in 'struct pci_epf_bar.size' should be the fixed size as
expected by pci_epc_set_bar().

However, if pci_epf_alloc_space() increased the allocation size to
accommodate iATU alignment requirements, it previously saved the larger
aligned size in .size, which broke pci_epc_set_bar().

To solve this, keep the fixed BAR size in .size and save the aligned size
in a new .aligned_size for use when deallocating it.

Fixes: 2a9a801620ef ("PCI: endpoint: Add support to specify alignment for buffers allocated to BARs")
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
[mani: commit message fixup]
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
[bhelgaas: more specific subject, commit log, wrap comment to match file]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Niklas Cassel <cassel@kernel.org>
Link: https://patch.msgid.link/20250424-pci-ep-size-alignment-v5-1-2d4ec2af23f5@baylibre.com
---
 include/linux/pci-epf.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h
index 879d19cebd4f..749cee0bcf2c 100644
--- a/include/linux/pci-epf.h
+++ b/include/linux/pci-epf.h
@@ -114,6 +114,8 @@ struct pci_epf_driver {
  * @phys_addr: physical address that should be mapped to the BAR
  * @addr: virtual address corresponding to the @phys_addr
  * @size: the size of the address space present in BAR
+ * @aligned_size: the size actually allocated to accommodate the iATU alignment
+ *                requirement
  * @barno: BAR number
  * @flags: flags that are set for the BAR
  */
@@ -121,6 +123,7 @@ struct pci_epf_bar {
 	dma_addr_t	phys_addr;
 	void		*addr;
 	size_t		size;
+	size_t		aligned_size;
 	enum pci_barno	barno;
 	int		flags;
 };
-- 
cgit v1.2.3


From d2e1d783f2c619cf9d8242b67a0ab0d2915f2920 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <anna.schumaker@oracle.com>
Date: Fri, 11 Apr 2025 15:47:32 -0400
Subject: NFS: Add support for fallocate(FALLOC_FL_ZERO_RANGE)

This implements a suggestion from Trond that we can mimic
FALLOC_FL_ZERO_RANGE by sending a compound that first does a DEALLOCATE
to punch a hole in a file, and then an ALLOCATE to fill the hole with
zeroes. There might technically be a race here, but once the DEALLOCATE
finishes any reads from the region would return zeroes anyway, so I
don't expect it to cause problems.

Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfs4.h      | 1 +
 include/linux/nfs_fs_sb.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index d8cad844870a..07635f87a3fd 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -678,6 +678,7 @@ enum {
 	NFSPROC4_CLNT_SEEK,
 	NFSPROC4_CLNT_ALLOCATE,
 	NFSPROC4_CLNT_DEALLOCATE,
+	NFSPROC4_CLNT_ZERO_RANGE,
 	NFSPROC4_CLNT_LAYOUTSTATS,
 	NFSPROC4_CLNT_CLONE,
 	NFSPROC4_CLNT_COPY,
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index e02f4c4e9cc4..63141320c2a8 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -304,6 +304,7 @@ struct nfs_server {
 #define NFS_CAP_CASE_PRESERVING	(1U << 7)
 #define NFS_CAP_REBOOT_LAYOUTRETURN	(1U << 8)
 #define NFS_CAP_OFFLOAD_STATUS	(1U << 9)
+#define NFS_CAP_ZERO_RANGE	(1U << 10)
 #define NFS_CAP_OPEN_XOR	(1U << 12)
 #define NFS_CAP_DELEGTIME	(1U << 13)
 #define NFS_CAP_POSIX_LOCK	(1U << 14)
-- 
cgit v1.2.3


From 77be29b7a3f896bd96808ba6781481a1f6afa66c Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Thu, 1 May 2025 08:29:42 -0400
Subject: NFSv4: Allow FREE_STATEID to clean up delegations

The NFS client's list of delegations can grow quite large (well beyond the
delegation watermark) if the server is revoking or there are repeated
events that expire state.  Once this happens, the revoked delegations can
cause a performance problem for subsequent walks of the
servers->delegations list when the client tries to test and free state.

If we can determine that the FREE_STATEID operation has completed without
error, we can prune the delegation from the list.

Since the NFS client combines TEST_STATEID with FREE_STATEID in its minor
version operations, there isn't an easy way to communicate success of
FREE_STATEID.  Rather than re-arrange quite a number of calling paths to
break out the separate procedures, let's signal the success of FREE_STATEID
by setting the stateid's type.

Set NFS4_FREED_STATEID_TYPE for stateids that have been successfully
discarded from the server, and use that type to signal that the delegation
can be cleaned up.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfs4.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 07635f87a3fd..e947af6a3684 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -72,6 +72,7 @@ struct nfs4_stateid_struct {
 		NFS4_LAYOUT_STATEID_TYPE,
 		NFS4_PNFS_DS_STATEID_TYPE,
 		NFS4_REVOKED_STATEID_TYPE,
+		NFS4_FREED_STATEID_TYPE,
 	} type;
 };
 
-- 
cgit v1.2.3


From 77e82fb2c6c27c122e785f543ae0062f7783c886 Mon Sep 17 00:00:00 2001
From: NeilBrown <neil@brown.name>
Date: Fri, 9 May 2025 10:46:39 +1000
Subject: nfs_localio: always hold nfsd net ref with nfsd_file ref

Having separate nfsd_file_put and nfsd_file_put_local in struct
nfsd_localio_operations doesn't make much sense.  The difference is that
nfsd_file_put doesn't drop a reference to the nfs_net which is what
keeps nfsd from shutting down.

Currently, if nfsd tries to shutdown it will invalidate the files stored
in the list from the nfs_uuid and this will drop all references to the
nfsd net that the client holds.  But the client could still hold some
references to nfsd_files for active IO.  So nfsd might think is has
completely shut down local IO, but hasn't and has no way to wait for
those active IO requests to complete.

So this patch changes nfsd_file_get to nfsd_file_get_local and has it
increase the ref count on the nfsd net and it replaces all calls to
->nfsd_put_file to ->nfsd_put_file_local.

It also changes ->nfsd_open_local_fh to return with the refcount on the
net elevated precisely when a valid nfsd_file is returned.

This means that whenever the client holds a valid nfsd_file, there will
be an associated count on the nfsd net, and so the count can only reach
zero when all nfsd_files have been returned.

nfs_local_file_put() is changed to call nfs_to_nfsd_file_put_local()
instead of replacing calls to one with calls to the other because this
will help a later patch which changes nfs_to_nfsd_file_put_local() to
take an __rcu pointer while nfs_local_file_put() doesn't.

Fixes: 86e00412254a ("nfs: cache all open LOCALIO nfsd_file(s) in client")
Signed-off-by: NeilBrown <neil@brown.name>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfslocalio.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
index 9aa8a43843d7..c3f34bae60e1 100644
--- a/include/linux/nfslocalio.h
+++ b/include/linux/nfslocalio.h
@@ -66,8 +66,7 @@ struct nfsd_localio_operations {
 						const struct nfs_fh *,
 						const fmode_t);
 	struct net *(*nfsd_file_put_local)(struct nfsd_file *);
-	struct nfsd_file *(*nfsd_file_get)(struct nfsd_file *);
-	void (*nfsd_file_put)(struct nfsd_file *);
+	struct nfsd_file *(*nfsd_file_get_local)(struct nfsd_file *);
 	struct file *(*nfsd_file_file)(struct nfsd_file *);
 } ____cacheline_aligned;
 
-- 
cgit v1.2.3


From c25a89770d1f216dcedfc2d25d56b604f62ce0bd Mon Sep 17 00:00:00 2001
From: NeilBrown <neil@brown.name>
Date: Fri, 9 May 2025 10:46:43 +1000
Subject: nfs_localio: change nfsd_file_put_local() to take a pointer to __rcu
 pointer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of calling xchg() and unrcu_pointer() before
nfsd_file_put_local(), we now pass pointer to the __rcu pointer and call
xchg() and unrcu_pointer() inside that function.

Where unrcu_pointer() is currently called the internals of "struct
nfsd_file" are not known and that causes older compilers such as gcc-8
to complain.

In some cases we have a __kernel (aka normal) pointer not an __rcu
pointer so we need to cast it to __rcu first.  This is strictly a
weakening so no information is lost.  Somewhat surprisingly, this cast
is accepted by gcc-8.

This has the pleasing result that the cmpxchg() which sets ro_file and
rw_file, and also the xchg() which clears them, are both now in the nfsd
code.

Reported-by: Pali Rohár <pali@kernel.org>
Reported-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Fixes: 86e00412254a ("nfs: cache all open LOCALIO nfsd_file(s) in client")
Signed-off-by: NeilBrown <neil@brown.name>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfslocalio.h | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
index c3f34bae60e1..5c7c92659e73 100644
--- a/include/linux/nfslocalio.h
+++ b/include/linux/nfslocalio.h
@@ -50,10 +50,6 @@ void nfs_localio_invalidate_clients(struct list_head *nn_local_clients,
 				    spinlock_t *nn_local_clients_lock);
 
 /* localio needs to map filehandle -> struct nfsd_file */
-extern struct nfsd_file *
-nfsd_open_local_fh(struct net *, struct auth_domain *, struct rpc_clnt *,
-		   const struct cred *, const struct nfs_fh *,
-		   const fmode_t) __must_hold(rcu);
 void nfs_close_local_fh(struct nfs_file_localio *);
 
 struct nfsd_localio_operations {
@@ -64,8 +60,9 @@ struct nfsd_localio_operations {
 						struct rpc_clnt *,
 						const struct cred *,
 						const struct nfs_fh *,
+						struct nfsd_file __rcu **pnf,
 						const fmode_t);
-	struct net *(*nfsd_file_put_local)(struct nfsd_file *);
+	struct net *(*nfsd_file_put_local)(struct nfsd_file __rcu **);
 	struct nfsd_file *(*nfsd_file_get_local)(struct nfsd_file *);
 	struct file *(*nfsd_file_file)(struct nfsd_file *);
 } ____cacheline_aligned;
@@ -76,6 +73,7 @@ extern const struct nfsd_localio_operations *nfs_to;
 struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *,
 		   struct rpc_clnt *, const struct cred *,
 		   const struct nfs_fh *, struct nfs_file_localio *,
+		   struct nfsd_file __rcu **pnf,
 		   const fmode_t);
 
 static inline void nfs_to_nfsd_net_put(struct net *net)
@@ -90,16 +88,19 @@ static inline void nfs_to_nfsd_net_put(struct net *net)
 	rcu_read_unlock();
 }
 
-static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio)
+static inline void nfs_to_nfsd_file_put_local(struct nfsd_file __rcu **localio)
 {
 	/*
-	 * Must not hold RCU otherwise nfsd_file_put() can easily trigger:
-	 * "Voluntary context switch within RCU read-side critical section!"
-	 * by scheduling deep in underlying filesystem (e.g. XFS).
+	 * Either *localio must be guaranteed to be non-NULL, or caller
+	 * must prevent nfsd shutdown from completing as nfs_close_local_fh()
+	 * does by blocking the nfs_uuid from being finally put.
 	 */
-	struct net *net = nfs_to->nfsd_file_put_local(localio);
+	struct net *net;
 
-	nfs_to_nfsd_net_put(net);
+	net = nfs_to->nfsd_file_put_local(localio);
+
+	if (net)
+		nfs_to_nfsd_net_put(net);
 }
 
 #else   /* CONFIG_NFS_LOCALIO */
-- 
cgit v1.2.3


From f62da6e7270c2db5aef8a8b14f465896961a9372 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <cassel@kernel.org>
Date: Wed, 14 May 2025 09:43:18 +0200
Subject: PCI: endpoint: Align pci_epc_set_msi(), pci_epc_ops::set_msi()
 nr_irqs encoding

The kdoc for pci_epc_set_msi() says:
"Invoke to set the required number of MSI interrupts."

The kdoc for the callback pci_epc_ops::set_msi() says:
"ops to set the requested number of MSI interrupts in the MSI capability
register"

pci_epc_ops::set_msi() does however expect the parameter 'interrupts' to be
in the encoding as defined by the Multiple Message Capable (MMC) field of
the MSI capability structure. Nowhere in the kdoc does it say that the
number of interrupts should be in MMC encoding.

It is very confusing that the API pci_epc_set_msi() and the callback
function pci_epc_ops::set_msi() both take a parameter named 'interrupts',
but they expect completely different encodings.

Clean up the API and the callback function to have the same semantics,
i.e. the parameter represents the number of interrupts, regardless of the
internal encoding of that value.

Also rename the parameter 'interrupts' to 'nr_irqs', in both the wrapper
function and the callback function, such that the name is unambiguous.

[bhelgaas: more specific subject]

Signed-off-by: Niklas Cassel <cassel@kernel.org>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: stable+noautosel@kernel.org # this is simply a cleanup
Link: https://patch.msgid.link/20250514074313.283156-13-cassel@kernel.org
---
 include/linux/pci-epc.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 82837008b56f..15d10c07c9f1 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -100,7 +100,7 @@ struct pci_epc_ops {
 	void	(*unmap_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			      phys_addr_t addr);
 	int	(*set_msi)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
-			   u8 interrupts);
+			   u8 nr_irqs);
 	int	(*get_msi)(struct pci_epc *epc, u8 func_no, u8 vfunc_no);
 	int	(*set_msix)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			    u16 interrupts, enum pci_barno, u32 offset);
@@ -286,8 +286,7 @@ int pci_epc_map_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 		     u64 pci_addr, size_t size);
 void pci_epc_unmap_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			phys_addr_t phys_addr);
-int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
-		    u8 interrupts);
+int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no, u8 nr_irqs);
 int pci_epc_get_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no);
 int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 		     u16 interrupts, enum pci_barno, u32 offset);
-- 
cgit v1.2.3


From de0321bcc5fdd83631f0c2a6fdebfe0ad4e23449 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <cassel@kernel.org>
Date: Wed, 14 May 2025 09:43:19 +0200
Subject: PCI: endpoint: Align pci_epc_set_msix(), pci_epc_ops::set_msix()
 nr_irqs encoding

The kdoc for pci_epc_set_msix() says:
"Invoke to set the required number of MSI-X interrupts."

The kdoc for the callback pci_epc_ops->set_msix() says:
"ops to set the requested number of MSI-X interrupts in the MSI-X
capability register"

pci_epc_ops::set_msix() does however expect the parameter 'interrupts' to
be in the encoding as defined by the Table Size field. Nowhere in the
kdoc does it say that the number of interrupts should be in Table Size
encoding.

It is very confusing that the API pci_epc_set_msix() and the callback
function pci_epc_ops::set_msix() both take a parameter named 'interrupts',
but they expect completely different encodings.

Clean up the API and the callback function to have the same semantics,
i.e. the parameter represents the number of interrupts, regardless of the
internal encoding of that value.

Also rename the parameter 'interrupts' to 'nr_irqs', in both the wrapper
function and the callback function, such that the name is unambiguous.

[bhelgaas: more specific subject]

Signed-off-by: Niklas Cassel <cassel@kernel.org>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: stable+noautosel@kernel.org # this is simply a cleanup
Link: https://patch.msgid.link/20250514074313.283156-14-cassel@kernel.org
---
 include/linux/pci-epc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 15d10c07c9f1..4286bfdbfdfa 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -103,7 +103,7 @@ struct pci_epc_ops {
 			   u8 nr_irqs);
 	int	(*get_msi)(struct pci_epc *epc, u8 func_no, u8 vfunc_no);
 	int	(*set_msix)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
-			    u16 interrupts, enum pci_barno, u32 offset);
+			    u16 nr_irqs, enum pci_barno, u32 offset);
 	int	(*get_msix)(struct pci_epc *epc, u8 func_no, u8 vfunc_no);
 	int	(*raise_irq)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			     unsigned int type, u16 interrupt_num);
@@ -288,8 +288,8 @@ void pci_epc_unmap_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			phys_addr_t phys_addr);
 int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no, u8 nr_irqs);
 int pci_epc_get_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no);
-int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
-		     u16 interrupts, enum pci_barno, u32 offset);
+int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no, u16 nr_irqs,
+		     enum pci_barno, u32 offset);
 int pci_epc_get_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no);
 int pci_epc_map_msi_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			phys_addr_t phys_addr, u8 interrupt_num,
-- 
cgit v1.2.3


From c31f91c6af96a5eb0632f4aee8d4e39cad7d7559 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 16 Apr 2025 10:53:58 +0200
Subject: fuse: don't allow signals to interrupt getdents copying

When getting the directory contents, the entries are first fetched to a
kernel buffer, then they are copied to userspace with dir_emit().  This
second phase is non-blocking as long as the userspace buffer is not paged
out, making it interruptible makes zero sense.

Overload d_type as flags, since it only uses 4 bits from 32.

Reviewed-by: Bernd Schubert <bschubert@ddn.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/fs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 016b0fe1536e..0f2a1a572e3a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2073,6 +2073,9 @@ struct dir_context {
 	loff_t pos;
 };
 
+/* If OR-ed with d_type, pending signals are not checked */
+#define FILLDIR_FLAG_NOINTR	0x1000
+
 /*
  * These flags let !MMU mmap() govern direct device mapping vs immediate
  * copying more easily for MAP_PRIVATE, especially for ROM filesystems.
-- 
cgit v1.2.3


From 467e245d47e6662a4cbf4184d9e6d0b1b120c0bf Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 16 Apr 2025 12:44:55 +0200
Subject: readdir: supply dir_context.count as readdir buffer size hint

This is a preparation for large readdir buffers in fuse.

Simply setting the fuse buffer size to the userspace buffer size should
work, the record sizes are similar (fuse's is slightly larger than libc's,
so no overflow should ever happen).

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Jaco Kroon <jaco@uls.co.za>
---
 include/linux/fs.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0f2a1a572e3a..dfc5a3327124 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2071,6 +2071,13 @@ typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
 struct dir_context {
 	filldir_t actor;
 	loff_t pos;
+	/*
+	 * Filesystems MUST NOT MODIFY count, but may use as a hint:
+	 * 0	    unknown
+	 * > 0      space in buffer (assume at least one entry)
+	 * INT_MAX  unlimited
+	 */
+	int count;
 };
 
 /* If OR-ed with d_type, pending signals are not checked */
-- 
cgit v1.2.3


From 5402c4d4d2000a9baa30c1157c97152ec6383733 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sun, 25 May 2025 12:47:31 +0200
Subject: exportfs: require ->fh_to_parent() to encode connectable file handles

When user requests a connectable file handle explicitly with the
AT_HANDLE_CONNECTABLE flag, fail the request if filesystem (e.g. nfs)
does not know how to decode a connected non-dir dentry.

Fixes: c374196b2b9f ("fs: name_to_handle_at() support for "explicit connectable" file handles")
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Link: https://lore.kernel.org/20250525104731.1461704-1-amir73il@gmail.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/exportfs.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index fc93f0abf513..25c4a5afbd44 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -314,6 +314,9 @@ static inline bool exportfs_can_decode_fh(const struct export_operations *nop)
 static inline bool exportfs_can_encode_fh(const struct export_operations *nop,
 					  int fh_flags)
 {
+	if (!nop)
+		return false;
+
 	/*
 	 * If a non-decodeable file handle was requested, we only need to make
 	 * sure that filesystem did not opt-out of encoding fid.
@@ -321,6 +324,13 @@ static inline bool exportfs_can_encode_fh(const struct export_operations *nop,
 	if (fh_flags & EXPORT_FH_FID)
 		return exportfs_can_encode_fid(nop);
 
+	/*
+	 * If a connectable file handle was requested, we need to make sure that
+	 * filesystem can also decode connected file handles.
+	 */
+	if ((fh_flags & EXPORT_FH_CONNECTABLE) && !nop->fh_to_parent)
+		return false;
+
 	/*
 	 * If a decodeable file handle was requested, we need to make sure that
 	 * filesystem can also decode file handles.
-- 
cgit v1.2.3


From d1c696dba120624256ab335ab8247f535b872309 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Thu, 8 May 2025 12:40:32 +0530
Subject: PCI: host-common: Convert to library for host controller drivers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This common library will be used as a placeholder for helper functions
shared by the host controller drivers. This avoids placing the host
controller drivers specific helpers in drivers/pci/*.c, to avoid enlarging
the kernel image on platforms that do not use host controller drivers at
all (like x86/ACPI platforms).

Suggested-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://patch.msgid.link/20250508-pcie-reset-slot-v4-3-7050093e2b50@linaro.org
---
 include/linux/pci-ecam.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h
index 3a10f8cfc3ad..d930651473b4 100644
--- a/include/linux/pci-ecam.h
+++ b/include/linux/pci-ecam.h
@@ -93,10 +93,4 @@ extern const struct pci_ecam_ops al_pcie_ops;	/* Amazon Annapurna Labs PCIe */
 extern const struct pci_ecam_ops tegra194_pcie_ops; /* Tegra194 PCIe */
 extern const struct pci_ecam_ops loongson_pci_ecam_ops; /* Loongson PCIe */
 #endif
-
-#if IS_ENABLED(CONFIG_PCI_HOST_COMMON)
-/* for DT-based PCI controllers that support ECAM */
-int pci_host_common_probe(struct platform_device *pdev);
-void pci_host_common_remove(struct platform_device *pdev);
-#endif
 #endif
-- 
cgit v1.2.3


From acc53a0b4c156877773da6e9eea4113dc7e770ae Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 14 May 2025 19:15:07 +0100
Subject: mm: rename page->index to page->__folio_index

All users of page->index have been converted to not refer to it any more.
Update a few pieces of documentation that were missed and prevent new
users from appearing (or at least make them easy to grep for).

Link: https://lkml.kernel.org/r/20250514181508.3019795-1-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h       | 6 +++---
 include/linux/mm_types.h | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index cd2e513189d6..5009c53ff1fe 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1276,9 +1276,9 @@ vm_fault_t finish_fault(struct vm_fault *vmf);
  * the page's disk buffers. PG_private must be set to tell the VM to call
  * into the filesystem to release these pages.
  *
- * A page may belong to an inode's memory mapping. In this case, page->mapping
- * is the pointer to the inode, and page->index is the file offset of the page,
- * in units of PAGE_SIZE.
+ * A folio may belong to an inode's memory mapping. In this case,
+ * folio->mapping points to the inode, and folio->index is the file
+ * offset of the folio, in units of PAGE_SIZE.
  *
  * If pagecache pages are not associated with an inode, they are said to be
  * anonymous pages. These may become associated with the swapcache, and in that
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 3e934dc6057c..17e0dcb87aae 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -107,7 +107,7 @@ struct page {
 			/* See page-flags.h for PAGE_MAPPING_FLAGS */
 			struct address_space *mapping;
 			union {
-				pgoff_t index;		/* Our offset within mapping. */
+				pgoff_t __folio_index;		/* Our offset within mapping. */
 				unsigned long share;	/* share count for fsdax */
 			};
 			/**
@@ -488,7 +488,7 @@ FOLIO_MATCH(flags, flags);
 FOLIO_MATCH(lru, lru);
 FOLIO_MATCH(mapping, mapping);
 FOLIO_MATCH(compound_head, lru);
-FOLIO_MATCH(index, index);
+FOLIO_MATCH(__folio_index, index);
 FOLIO_MATCH(private, private);
 FOLIO_MATCH(_mapcount, _mapcount);
 FOLIO_MATCH(_refcount, _refcount);
@@ -589,7 +589,7 @@ TABLE_MATCH(flags, __page_flags);
 TABLE_MATCH(compound_head, pt_list);
 TABLE_MATCH(compound_head, _pt_pad_1);
 TABLE_MATCH(mapping, __page_mapping);
-TABLE_MATCH(index, pt_index);
+TABLE_MATCH(__folio_index, pt_index);
 TABLE_MATCH(rcu_head, pt_rcu_head);
 TABLE_MATCH(page_type, __page_type);
 TABLE_MATCH(_refcount, __page_refcount);
-- 
cgit v1.2.3


From d9736929445e7f4c60f0093af61ff0b52e2d4412 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 14 May 2025 18:06:04 +0100
Subject: iov: remove copy_page_from_iter_atomic()

All callers now use copy_folio_from_iter_atomic(), so convert
copy_page_from_iter_atomic().  While I'm in there, use kmap_local_folio()
and pagefault_disable() instead of kmap_atomic().  That allows preemption
and/or task migration to happen during the copy_from_user().  Also use the
new folio_test_partial_kmap() predicate instead of open-coding it.

Link: https://lkml.kernel.org/r/20250514170607.3000994-4-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Hugh Dickins <hughd@google.com>
Cc: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/uio.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 49ece9e1888f..e46477482663 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -176,8 +176,6 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs)
 	return ret;
 }
 
-size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
-				  size_t bytes, struct iov_iter *i);
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 void iov_iter_revert(struct iov_iter *i, size_t bytes);
 size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes);
@@ -187,6 +185,8 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
+size_t copy_folio_from_iter_atomic(struct folio *folio, size_t offset,
+		size_t bytes, struct iov_iter *i);
 
 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
@@ -204,12 +204,6 @@ static inline size_t copy_folio_from_iter(struct folio *folio, size_t offset,
 	return copy_page_from_iter(&folio->page, offset, bytes, i);
 }
 
-static inline size_t copy_folio_from_iter_atomic(struct folio *folio,
-		size_t offset, size_t bytes, struct iov_iter *i)
-{
-	return copy_page_from_iter_atomic(&folio->page, offset, bytes, i);
-}
-
 size_t copy_page_to_iter_nofault(struct page *page, unsigned offset,
 				 size_t bytes, struct iov_iter *i);
 
-- 
cgit v1.2.3


From 940b01fc8dc1aead398819215650727cb9e7335e Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeel.butt@linux.dev>
Date: Sun, 18 May 2025 23:31:39 -0700
Subject: memcg: nmi safe memcg stats for specific archs

There are archs which have NMI but does not support this_cpu_* ops safely
in the nmi context but they support safe atomic ops in nmi context.  For
such archs, let's add infra to use atomic ops for the memcg stats which
can be updated in nmi.

At the moment, the memcg stats which get updated in the objcg charging
path are MEMCG_KMEM, NR_SLAB_RECLAIMABLE_B & NR_SLAB_UNRECLAIMABLE_B.
Rather than adding support for all memcg stats to be nmi safe, let's just
add infra to make these three stats nmi safe which this patch is doing.

Link: https://lkml.kernel.org/r/20250519063142.111219-3-shakeel.butt@linux.dev
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f7848f73f41c..87b6688f124a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -113,6 +113,12 @@ struct mem_cgroup_per_node {
 	CACHELINE_PADDING(_pad2_);
 	unsigned long		lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
 	struct mem_cgroup_reclaim_iter	iter;
+
+#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
+	/* slab stats for nmi context */
+	atomic_t		slab_reclaimable;
+	atomic_t		slab_unreclaimable;
+#endif
 };
 
 struct mem_cgroup_threshold {
@@ -236,6 +242,10 @@ struct mem_cgroup {
 	atomic_long_t		memory_events[MEMCG_NR_MEMORY_EVENTS];
 	atomic_long_t		memory_events_local[MEMCG_NR_MEMORY_EVENTS];
 
+#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
+	/* MEMCG_KMEM for nmi context */
+	atomic_t		kmem_stat;
+#endif
 	/*
 	 * Hint of reclaim pressure for socket memroy management. Note
 	 * that this indicator should NOT be used in legacy cgroup mode
-- 
cgit v1.2.3


From 49c69504f4d340d870f2c3f3d2f404c118ff7b23 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 23 May 2025 00:30:17 +0200
Subject: mmu_notifiers: remove leftover stub macros

Commit ec8832d007cb ("mmu_notifiers: don't invalidate secondary TLBs as
part of mmu_notifier_invalidate_range_end()") removed the main definitions
of {ptep,pmdp_huge,pudp_huge}_clear_flush_notify; just their
!CONFIG_MMU_NOTIFIER stubs are left behind, remove them.

Link: https://lkml.kernel.org/r/20250523-mmu-notifier-cleanup-unused-v1-1-cc1f47ebec33@google.com
Signed-off-by: Jann Horn <jannh@google.com>
Reviewed-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: Qi Zheng <zhengqi.arch@bytedance.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmu_notifier.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index bc2402a45741..d1094c2d5fb6 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -654,9 +654,6 @@ static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm)
 #define pmdp_clear_flush_young_notify pmdp_clear_flush_young
 #define ptep_clear_young_notify ptep_test_and_clear_young
 #define pmdp_clear_young_notify pmdp_test_and_clear_young
-#define	ptep_clear_flush_notify ptep_clear_flush
-#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
-#define pudp_huge_clear_flush_notify pudp_huge_clear_flush
 
 static inline void mmu_notifier_synchronize(void)
 {
-- 
cgit v1.2.3


From e13e7922d03439e374c263049af5f740ceae6346 Mon Sep 17 00:00:00 2001
From: Juan Yescas <jyescas@google.com>
Date: Wed, 21 May 2025 14:57:45 -0700
Subject: mm: add CONFIG_PAGE_BLOCK_ORDER to select page block order

Problem: On large page size configurations (16KiB, 64KiB), the CMA
alignment requirement (CMA_MIN_ALIGNMENT_BYTES) increases considerably,
and this causes the CMA reservations to be larger than necessary.  This
means that system will have less available MIGRATE_UNMOVABLE and
MIGRATE_RECLAIMABLE page blocks since MIGRATE_CMA can't fallback to them.

The CMA_MIN_ALIGNMENT_BYTES increases because it depends on MAX_PAGE_ORDER
which depends on ARCH_FORCE_MAX_ORDER.  The value of ARCH_FORCE_MAX_ORDER
increases on 16k and 64k kernels.

For example, in ARM, the CMA alignment requirement when:

- CONFIG_ARCH_FORCE_MAX_ORDER default value is used
- CONFIG_TRANSPARENT_HUGEPAGE is set:

PAGE_SIZE | MAX_PAGE_ORDER | pageblock_order | CMA_MIN_ALIGNMENT_BYTES
-----------------------------------------------------------------------
   4KiB   |      10        |       9         |  4KiB * (2 ^  9) =   2MiB
  16Kib   |      11        |      11         | 16KiB * (2 ^ 11) =  32MiB
  64KiB   |      13        |      13         | 64KiB * (2 ^ 13) = 512MiB

There are some extreme cases for the CMA alignment requirement when:

- CONFIG_ARCH_FORCE_MAX_ORDER maximum value is set
- CONFIG_TRANSPARENT_HUGEPAGE is NOT set:
- CONFIG_HUGETLB_PAGE is NOT set

PAGE_SIZE | MAX_PAGE_ORDER | pageblock_order |  CMA_MIN_ALIGNMENT_BYTES
------------------------------------------------------------------------
   4KiB   |      15        |      15         |  4KiB * (2 ^ 15) = 128MiB
  16Kib   |      13        |      13         | 16KiB * (2 ^ 13) = 128MiB
  64KiB   |      13        |      13         | 64KiB * (2 ^ 13) = 512MiB

This affects the CMA reservations for the drivers. If a driver in a
4KiB kernel needs 4MiB of CMA memory, in a 16KiB kernel, the minimal
reservation has to be 32MiB due to the alignment requirements:

reserved-memory {
    ...
    cma_test_reserve: cma_test_reserve {
        compatible = "shared-dma-pool";
        size = <0x0 0x400000>; /* 4 MiB */
        ...
    };
};

reserved-memory {
    ...
    cma_test_reserve: cma_test_reserve {
        compatible = "shared-dma-pool";
        size = <0x0 0x2000000>; /* 32 MiB */
        ...
    };
};

Solution: Add a new config CONFIG_PAGE_BLOCK_ORDER that allows to set the
page block order in all the architectures.  The maximum page block order
will be given by ARCH_FORCE_MAX_ORDER.

By default, CONFIG_PAGE_BLOCK_ORDER will have the same value that
ARCH_FORCE_MAX_ORDER.  This will make sure that current kernel
configurations won't be affected by this change.  It is a opt-in change.

This patch will allow to have the same CMA alignment requirements for
large page sizes (16KiB, 64KiB) as that in 4kb kernels by setting a lower
pageblock_order.

Tests:

- Verified that HugeTLB pages work when pageblock_order is 1, 7, 10 on
  4k and 16k kernels.

- Verified that Transparent Huge Pages work when pageblock_order is 1,
  7, 10 on 4k and 16k kernels.

- Verified that dma-buf heaps allocations work when pageblock_order is
  1, 7, 10 on 4k and 16k kernels.

Benchmarks:

The benchmarks compare 16kb kernels with pageblock_order 10 and 7.  The
reason for the pageblock_order 7 is because this value makes the min CMA
alignment requirement the same as that in 4kb kernels (2MB).

- Perform 100K dma-buf heaps (/dev/dma_heap/system) allocations of
  SZ_8M, SZ_4M, SZ_2M, SZ_1M, SZ_64, SZ_8, SZ_4.  Use simpleperf
  (https://developer.android.com/ndk/guides/simpleperf) to measure the #
  of instructions and page-faults on 16k kernels.  The benchmark was
  executed 10 times.  The averages are below:

           # instructions         |     #page-faults
    order 10     |  order 7       | order 10 | order 7
--------------------------------------------------------
 13,891,765,770	 | 11,425,777,314 |    220   |   217
 14,456,293,487	 | 12,660,819,302 |    224   |   219
 13,924,261,018	 | 13,243,970,736 |    217   |   221
 13,910,886,504	 | 13,845,519,630 |    217   |   221
 14,388,071,190	 | 13,498,583,098 |    223   |   224
 13,656,442,167	 | 12,915,831,681 |    216   |   218
 13,300,268,343	 | 12,930,484,776 |    222   |   218
 13,625,470,223	 | 14,234,092,777 |    219   |   218
 13,508,964,965	 | 13,432,689,094 |    225   |   219
 13,368,950,667	 | 13,683,587,37  |    219   |   225
-------------------------------------------------------------------
 13,803,137,433  | 13,131,974,268 |    220   |   220    Averages

There were 4.85% #instructions when order was 7, in comparison with order
10.

     13,803,137,433 - 13,131,974,268 = -671,163,166 (-4.86%)

The number of page faults in order 7 and 10 were the same.

These results didn't show any significant regression when the
pageblock_order is set to 7 on 16kb kernels.

- Run speedometer 3.1 (https://browserbench.org/Speedometer3.1/) 5 times
  on the 16k kernels with pageblock_order 7 and 10.

order 10 | order 7  | order 7 - order 10 | (order 7 - order 10) %
-------------------------------------------------------------------
  15.8	 |  16.4    |         0.6        |     3.80%
  16.4	 |  16.2    |        -0.2        |    -1.22%
  16.6	 |  16.3    |        -0.3        |    -1.81%
  16.8	 |  16.3    |        -0.5        |    -2.98%
  16.6	 |  16.8    |         0.2        |     1.20%
-------------------------------------------------------------------
  16.44     16.4            -0.04	          -0.24%   Averages

The results didn't show any significant regression when the
pageblock_order is set to 7 on 16kb kernels.

Link: https://lkml.kernel.org/r/20250521215807.1860663-1-jyescas@google.com
Signed-off-by: Juan Yescas <jyescas@google.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h          | 16 ++++++++++++++++
 include/linux/pageblock-flags.h |  8 ++++----
 2 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index b19a98c20de8..87a667533d6d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -37,6 +37,22 @@
 
 #define NR_PAGE_ORDERS (MAX_PAGE_ORDER + 1)
 
+/* Defines the order for the number of pages that have a migrate type. */
+#ifndef CONFIG_PAGE_BLOCK_ORDER
+#define PAGE_BLOCK_ORDER MAX_PAGE_ORDER
+#else
+#define PAGE_BLOCK_ORDER CONFIG_PAGE_BLOCK_ORDER
+#endif /* CONFIG_PAGE_BLOCK_ORDER */
+
+/*
+ * The MAX_PAGE_ORDER, which defines the max order of pages to be allocated
+ * by the buddy allocator, has to be larger or equal to the PAGE_BLOCK_ORDER,
+ * which defines the order for the number of pages that can have a migrate type
+ */
+#if (PAGE_BLOCK_ORDER > MAX_PAGE_ORDER)
+#error MAX_PAGE_ORDER must be >= PAGE_BLOCK_ORDER
+#endif
+
 /*
  * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed
  * costly to service.  That is between allocation orders which should
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index fc6b9c87cb0a..e73a4292ef02 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -41,18 +41,18 @@ extern unsigned int pageblock_order;
  * Huge pages are a constant size, but don't exceed the maximum allocation
  * granularity.
  */
-#define pageblock_order		MIN_T(unsigned int, HUGETLB_PAGE_ORDER, MAX_PAGE_ORDER)
+#define pageblock_order		MIN_T(unsigned int, HUGETLB_PAGE_ORDER, PAGE_BLOCK_ORDER)
 
 #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
 
 #elif defined(CONFIG_TRANSPARENT_HUGEPAGE)
 
-#define pageblock_order		MIN_T(unsigned int, HPAGE_PMD_ORDER, MAX_PAGE_ORDER)
+#define pageblock_order		MIN_T(unsigned int, HPAGE_PMD_ORDER, PAGE_BLOCK_ORDER)
 
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
-#define pageblock_order		MAX_PAGE_ORDER
+/* If huge pages are not used, group by PAGE_BLOCK_ORDER */
+#define pageblock_order		PAGE_BLOCK_ORDER
 
 #endif /* CONFIG_HUGETLB_PAGE */
 
-- 
cgit v1.2.3


From ad6b26b6a0a79166b53209df2ca1cf8636296382 Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Fri, 23 May 2025 20:51:15 +0800
Subject: sched/numa: add statistics of numa balance task
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On systems with NUMA balancing enabled, it has been found that tracking
task activities resulting from NUMA balancing is beneficial.  NUMA
balancing employs two mechanisms for task migration: one is to migrate
a task to an idle CPU within its preferred node, and the other is to
swap tasks located on different nodes when they are on each other's
preferred nodes.

The kernel already provides NUMA page migration statistics in
/sys/fs/cgroup/mytest/memory.stat and /proc/{PID}/sched.  However, it
lacks statistics regarding task migration and swapping.  Therefore,
relevant counts for task migration and swapping should be added.

The following two new fields:

numa_task_migrated
numa_task_swapped

will be shown in /sys/fs/cgroup/{GROUP}/memory.stat, /proc/{PID}/sched
and /proc/vmstat.

Introducing both per-task and per-memory cgroup (memcg) NUMA balancing
statistics facilitates a rapid evaluation of the performance and
resource utilization of the target workload.  For instance, users can
first identify the container with high NUMA balancing activity and then
further pinpoint a specific task within that group, and subsequently
adjust the memory policy for that task.  In short, although it is
possible to iterate through /proc/$pid/sched to locate the problematic
task, the introduction of aggregated NUMA balancing activity for tasks
within each memcg can assist users in identifying the task more
efficiently through a divide-and-conquer approach.

As Libo Chen pointed out, the memcg event relies on the text names in
vmstat_text, and /proc/vmstat generates corresponding items based on
vmstat_text.  Thus, the relevant task migration and swapping events
introduced in vmstat_text also need to be populated by
count_vm_numa_event(), otherwise these values are zero in /proc/vmstat.

In theory, task migration and swap events are part of the scheduler's
activities.  The reason for exposing them through the
memory.stat/vmstat interface is that we already have NUMA balancing
statistics in memory.stat/vmstat, and these events are closely related
to each other.  Following Shakeel's suggestion, we describe the
end-to-end flow/story of all these events occurring on a timeline for
future reference:

The goal of NUMA balancing is to co-locate a task and its memory pages
on the same NUMA node.  There are two strategies: migrate the pages to
the task's node, or migrate the task to the node where its pages
reside.

Suppose a task p1 is running on Node 0, but its pages are located on
Node 1.  NUMA page fault statistics for p1 reveal its "page footprint"
across nodes.  If NUMA balancing detects that most of p1's pages are on
Node 1:

1.Page Migration Attempt:
The Numa balance first tries to migrate p1's pages to Node 0.
The numa_page_migrate counter increments.

2.Task Migration Strategies:
After the page migration finishes, Numa balance checks every
1 second to see if p1 can be migrated to Node 1.

Case 2.1: Idle CPU Available

  If Node 1 has an idle CPU, p1 is directly scheduled there.  This
  event is logged as numa_task_migrated.

Case 2.2: No Idle CPU (Task Swap)

  If all CPUs on Node1 are busy, direct migration could cause CPU
  contention or load imbalance.  Instead: The Numa balance selects a
  candidate task p2 on Node 1 that prefers Node 0 (e.g., due to its own
  page footprint).  p1 and p2 are swapped.  This cross-node swap is
  recorded as numa_task_swapped.

Link: https://lkml.kernel.org/r/d00edb12ba0f0de3c5222f61487e65f2ac58f5b1.1748493462.git.yu.c.chen@intel.com
Link: https://lkml.kernel.org/r/7ef90a88602ed536be46eba7152ed0d33bad5790.1748002400.git.yu.c.chen@intel.com
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Tested-by: Madadi Vineeth Reddy <vineethr@linux.ibm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Cc: Aubrey Li <aubrey.li@intel.com>
Cc: Ayush Jain <Ayush.jain3@amd.com>
Cc: "Chen, Tim C" <tim.c.chen@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Libo Chen <libo.chen@oracle.com>
Cc: Mel Gorman <mgorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sched.h         | 4 ++++
 include/linux/vm_event_item.h | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f96ac1982893..1c50e30b5c01 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -549,6 +549,10 @@ struct sched_statistics {
 	u64				nr_failed_migrations_running;
 	u64				nr_failed_migrations_hot;
 	u64				nr_forced_migrations;
+#ifdef CONFIG_NUMA_BALANCING
+	u64				numa_task_migrated;
+	u64				numa_task_swapped;
+#endif
 
 	u64				nr_wakeups;
 	u64				nr_wakeups_sync;
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 9e15a088ba38..91a3ce9a2687 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -66,6 +66,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		NUMA_HINT_FAULTS,
 		NUMA_HINT_FAULTS_LOCAL,
 		NUMA_PAGE_MIGRATE,
+		NUMA_TASK_MIGRATE,
+		NUMA_TASK_SWAP,
 #endif
 #ifdef CONFIG_MIGRATION
 		PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
-- 
cgit v1.2.3


From 5c78e793f78732b60276401f75cc1a101f9ad121 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Fri, 30 May 2025 12:06:47 -0700
Subject: overflow: Introduce __DEFINE_FLEX for having no initializer

While not yet in the tree, there is a proposed patch[1] that was
depending on the prior behavior of _DEFINE_FLEX, which did not have an
explicit initializer. Provide this via __DEFINE_FLEX now, which can also
have attributes applied (e.g. __uninitialized).

Examples of the resulting initializer behaviors can be seen here:
https://godbolt.org/z/P7Go8Tr33

Link: https://lore.kernel.org/netdev/20250520205920.2134829-9-anthony.l.nguyen@intel.com [1]
Fixes: 47e36ed78406 ("overflow: Fix direct struct member initialization in _DEFINE_FLEX()")
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/overflow.h | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 7b7be27ca113..154ed0dbb43f 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -389,24 +389,37 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
 	struct_size((type *)NULL, member, count)
 
 /**
- * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family.
- * Enables caller macro to pass (different) initializer.
+ * __DEFINE_FLEX() - helper macro for DEFINE_FLEX() family.
+ * Enables caller macro to pass arbitrary trailing expressions
  *
  * @type: structure type name, including "struct" keyword.
  * @name: Name for a variable to define.
  * @member: Name of the array member.
  * @count: Number of elements in the array; must be compile-time const.
- * @initializer: Initializer expression (e.g., pass `= { }` at minimum).
+ * @trailer: Trailing expressions for attributes and/or initializers.
  */
-#define _DEFINE_FLEX(type, name, member, count, initializer...)			\
+#define __DEFINE_FLEX(type, name, member, count, trailer...)			\
 	_Static_assert(__builtin_constant_p(count),				\
 		       "onstack flex array members require compile-time const count"); \
 	union {									\
 		u8 bytes[struct_size_t(type, member, count)];			\
 		type obj;							\
-	} name##_u = { .obj initializer };					\
+	} name##_u trailer;							\
 	type *name = (type *)&name##_u
 
+/**
+ * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family.
+ * Enables caller macro to pass (different) initializer.
+ *
+ * @type: structure type name, including "struct" keyword.
+ * @name: Name for a variable to define.
+ * @member: Name of the array member.
+ * @count: Number of elements in the array; must be compile-time const.
+ * @initializer: Initializer expression (e.g., pass `= { }` at minimum).
+ */
+#define _DEFINE_FLEX(type, name, member, count, initializer...)			\
+	__DEFINE_FLEX(type, name, member, count, = { .obj initializer })
+
 /**
  * DEFINE_RAW_FLEX() - Define an on-stack instance of structure with a trailing
  * flexible array member, when it does not have a __counted_by annotation.
@@ -424,7 +437,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
  * elements in array @member.
  */
 #define DEFINE_RAW_FLEX(type, name, member, count)	\
-	_DEFINE_FLEX(type, name, member, count, = {})
+	__DEFINE_FLEX(type, name, member, count, = { })
 
 /**
  * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing
-- 
cgit v1.2.3


From 1c8a0ed2043c30cee97facd1eb8cff88b6c7ea4a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 7 Apr 2025 13:12:14 +0300
Subject: PCI: Remove unused pci_printk()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

include/linux/pci.h provides low-level pci_printk() interface that is
not used since the commits fab874e12593 ("PCI/AER: Descope pci_printk()
to aer_printk()") and 588021b28642 ("PCI: shpchp: Remove 'shpchp_debug'
module parameter"). PCI logging should not use pci_printk() but pci_*()
wrappers that follow the usual logging wrapper patterns.

Remove pci_printk().

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/20250407101215.1376-1-ilpo.jarvinen@linux.intel.com
---
 include/linux/pci.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0e8e3fd77e96..e293ad5d840d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2694,9 +2694,6 @@ void pci_uevent_ers(struct pci_dev *pdev, enum  pci_ers_result err_type);
 
 #include <linux/dma-mapping.h>
 
-#define pci_printk(level, pdev, fmt, arg...) \
-	dev_printk(level, &(pdev)->dev, fmt, ##arg)
-
 #define pci_emerg(pdev, fmt, arg...)	dev_emerg(&(pdev)->dev, fmt, ##arg)
 #define pci_alert(pdev, fmt, arg...)	dev_alert(&(pdev)->dev, fmt, ##arg)
 #define pci_crit(pdev, fmt, arg...)	dev_crit(&(pdev)->dev, fmt, ##arg)
-- 
cgit v1.2.3


From b8628379957d0c8c057782ec1a8512de6d4ba29c Mon Sep 17 00:00:00 2001
From: Sumanth Gavini <sumanth.gavini@yahoo.com>
Date: Mon, 19 May 2025 19:08:05 -0700
Subject: mfd: maxim: Correct Samsung "Electronics" spelling in headers

Fix the misspelling of 'Electronics' in MFD driver headers.

Link: https://lore.kernel.org/lkml/3aa30119-60e5-4dcb-b13a-1753966ca775@sirena.org.uk/#t
Link: https://lore.kernel.org/r/20250520020808.159586-1-sumanth.gavini@yahoo.com
Signed-off-by: Sumanth Gavini <sumanth.gavini@yahoo.com>
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/max8997-private.h | 2 +-
 include/linux/mfd/max8998-private.h | 2 +-
 include/linux/mfd/max8998.h         | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/max8997-private.h b/include/linux/mfd/max8997-private.h
index f70eea0f2264..261c0aae7d00 100644
--- a/include/linux/mfd/max8997-private.h
+++ b/include/linux/mfd/max8997-private.h
@@ -2,7 +2,7 @@
 /*
  * max8997-private.h - Voltage regulator driver for the Maxim 8997
  *
- *  Copyright (C) 2010 Samsung Electrnoics
+ *  Copyright (C) 2010 Samsung Electronics
  *  MyungJoo Ham <myungjoo.ham@samsung.com>
  */
 
diff --git a/include/linux/mfd/max8998-private.h b/include/linux/mfd/max8998-private.h
index 6deb5f577602..d77dc18db6eb 100644
--- a/include/linux/mfd/max8998-private.h
+++ b/include/linux/mfd/max8998-private.h
@@ -2,7 +2,7 @@
 /*
  * max8998-private.h - Voltage regulator driver for the Maxim 8998
  *
- *  Copyright (C) 2009-2010 Samsung Electrnoics
+ *  Copyright (C) 2009-2010 Samsung Electronics
  *  Kyungmin Park <kyungmin.park@samsung.com>
  *  Marek Szyprowski <m.szyprowski@samsung.com>
  */
diff --git a/include/linux/mfd/max8998.h b/include/linux/mfd/max8998.h
index a054e55c8646..5473f1983e31 100644
--- a/include/linux/mfd/max8998.h
+++ b/include/linux/mfd/max8998.h
@@ -2,7 +2,7 @@
 /*
  * max8998.h - Voltage regulator driver for the Maxim 8998
  *
- *  Copyright (C) 2009-2010 Samsung Electrnoics
+ *  Copyright (C) 2009-2010 Samsung Electronics
  *  Kyungmin Park <kyungmin.park@samsung.com>
  *  Marek Szyprowski <m.szyprowski@samsung.com>
  */
-- 
cgit v1.2.3


From ffb006aa433e8109ec79320c344fb69947997ba1 Mon Sep 17 00:00:00 2001
From: Sumanth Gavini <sumanth.gavini@yahoo.com>
Date: Mon, 19 May 2025 16:20:23 -0700
Subject: mfd: maxim: Correct Samsung "Electronics" spelling in copyright
 headers

Fix the misspelling of 'Electronics' in MFD driver copyright headers.

Link: https://lore.kernel.org/lkml/3aa30119-60e5-4dcb-b13a-1753966ca775@sirena.org.uk/#t
Link: https://lore.kernel.org/r/20250519232025.152769-1-sumanth.gavini@yahoo.com
Signed-off-by: Sumanth Gavini <sumanth.gavini@yahoo.com>
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/max14577-private.h | 2 +-
 include/linux/mfd/max14577.h         | 2 +-
 include/linux/mfd/max77686-private.h | 2 +-
 include/linux/mfd/max77686.h         | 2 +-
 include/linux/mfd/max77693-private.h | 2 +-
 include/linux/mfd/max77693.h         | 2 +-
 include/linux/mfd/max8997.h          | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/max14577-private.h b/include/linux/mfd/max14577-private.h
index a21374f8ad26..dd51a37fa37f 100644
--- a/include/linux/mfd/max14577-private.h
+++ b/include/linux/mfd/max14577-private.h
@@ -2,7 +2,7 @@
 /*
  * max14577-private.h - Common API for the Maxim 14577/77836 internal sub chip
  *
- * Copyright (C) 2014 Samsung Electrnoics
+ * Copyright (C) 2014 Samsung Electronics
  * Chanwoo Choi <cw00.choi@samsung.com>
  * Krzysztof Kozlowski <krzk@kernel.org>
  */
diff --git a/include/linux/mfd/max14577.h b/include/linux/mfd/max14577.h
index 8b3ef891ba42..0fda5c2e745a 100644
--- a/include/linux/mfd/max14577.h
+++ b/include/linux/mfd/max14577.h
@@ -2,7 +2,7 @@
 /*
  * max14577.h - Driver for the Maxim 14577/77836
  *
- * Copyright (C) 2014 Samsung Electrnoics
+ * Copyright (C) 2014 Samsung Electronics
  * Chanwoo Choi <cw00.choi@samsung.com>
  * Krzysztof Kozlowski <krzk@kernel.org>
  *
diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h
index ea635d12a741..e6b8b4014dc0 100644
--- a/include/linux/mfd/max77686-private.h
+++ b/include/linux/mfd/max77686-private.h
@@ -2,7 +2,7 @@
 /*
  * max77686-private.h - Voltage regulator driver for the Maxim 77686/802
  *
- *  Copyright (C) 2012 Samsung Electrnoics
+ *  Copyright (C) 2012 Samsung Electronics
  *  Chiwoong Byun <woong.byun@samsung.com>
  */
 
diff --git a/include/linux/mfd/max77686.h b/include/linux/mfd/max77686.h
index d0fb510875e6..7c4624acd1db 100644
--- a/include/linux/mfd/max77686.h
+++ b/include/linux/mfd/max77686.h
@@ -2,7 +2,7 @@
 /*
  * max77686.h - Driver for the Maxim 77686/802
  *
- *  Copyright (C) 2012 Samsung Electrnoics
+ *  Copyright (C) 2012 Samsung Electronics
  *  Chiwoong Byun <woong.byun@samsung.com>
  *
  * This driver is based on max8997.h
diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
index c324d548619e..8e7c35b5ea1c 100644
--- a/include/linux/mfd/max77693-private.h
+++ b/include/linux/mfd/max77693-private.h
@@ -2,7 +2,7 @@
 /*
  * max77693-private.h - Voltage regulator driver for the Maxim 77693
  *
- *  Copyright (C) 2012 Samsung Electrnoics
+ *  Copyright (C) 2012 Samsung Electronics
  *  SangYoung Son <hello.son@samsung.com>
  *
  * This program is not provided / owned by Maxim Integrated Products.
diff --git a/include/linux/mfd/max77693.h b/include/linux/mfd/max77693.h
index c67c16ba8649..8e77ebeb7cf1 100644
--- a/include/linux/mfd/max77693.h
+++ b/include/linux/mfd/max77693.h
@@ -2,7 +2,7 @@
 /*
  * max77693.h - Driver for the Maxim 77693
  *
- *  Copyright (C) 2012 Samsung Electrnoics
+ *  Copyright (C) 2012 Samsung Electronics
  *  SangYoung Son <hello.son@samsung.com>
  *
  * This program is not provided / owned by Maxim Integrated Products.
diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h
index 5c2cc1103437..fb36e1386069 100644
--- a/include/linux/mfd/max8997.h
+++ b/include/linux/mfd/max8997.h
@@ -2,7 +2,7 @@
 /*
  * max8997.h - Driver for the Maxim 8997/8966
  *
- *  Copyright (C) 2009-2010 Samsung Electrnoics
+ *  Copyright (C) 2009-2010 Samsung Electronics
  *  MyungJoo Ham <myungjoo.ham@samsung.com>
  *
  * This driver is based on max8998.h
-- 
cgit v1.2.3


From 10f4a7cd724e34b7a6ff96e57ac49dc0cadececc Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Tue, 20 May 2025 13:20:37 -0700
Subject: nvme: fix command limits status code

The command specific status code, 0x183, was introduced in the NVMe 2.0
specification defined to "Command Size Limits Exceeded" and only ever
applied to DSM and Copy commands.  Fix the name and, remove the
incorrect translation to error codes and special treatment in the
target code for it.

Fixes: 3b7c33b28a44d4 ("nvme.h: add Write Zeroes definitions")
Cc: Chaitanya Kulkarni <chaitanyak@nvidia.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/nvme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 51308f65b72f..b65a1b9f2116 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -2171,7 +2171,7 @@ enum {
 	NVME_SC_BAD_ATTRIBUTES		= 0x180,
 	NVME_SC_INVALID_PI		= 0x181,
 	NVME_SC_READ_ONLY		= 0x182,
-	NVME_SC_ONCS_NOT_SUPPORTED	= 0x183,
+	NVME_SC_CMD_SIZE_LIM_EXCEEDED	= 0x183,
 
 	/*
 	 * I/O Command Set Specific - Fabrics commands:
-- 
cgit v1.2.3


From 434d7f9b0e24e1f0166d05f10881a8ab386845b7 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 30 May 2025 16:30:11 +0800
Subject: timens: Add struct seq_file forward declaration

Add forward declaration of struct seq_file before using it in a
function prototype.

Fixes: 04a8682a71be ("fs/proc: Introduce /proc/pid/timens_offsets")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andrei Vagin <avagin@gmail.com>
Link: https://lore.kernel.org/all/aDlskzKIAULMlwPj@gondor.apana.org.au
---
 include/linux/time_namespace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index 0b8b32bf0655..bb2c52f4fc94 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -12,6 +12,7 @@
 struct user_namespace;
 extern struct user_namespace init_user_ns;
 
+struct seq_file;
 struct vm_area_struct;
 
 struct timens_offsets {
-- 
cgit v1.2.3


From 1e1f706fc2ce90eaaf3480b3d5f27885960d751c Mon Sep 17 00:00:00 2001
From: Lachlan Hodges <lachlan.hodges@morsemicro.com>
Date: Tue, 3 Jun 2025 15:35:38 +1000
Subject: wifi: cfg80211/mac80211: correctly parse S1G beacon optional elements

S1G beacons are not traditional beacons but a type of extension frame.
Extension frames contain the frame control and duration fields, followed
by zero or more optional fields before the frame body. These optional
fields are distinct from the variable length elements.

The presence of optional fields is indicated in the frame control field.
To correctly locate the elements offset, the frame control must be parsed
to identify which optional fields are present. Currently, mac80211 parses
S1G beacons based on fixed assumptions about the frame layout, without
inspecting the frame control field. This can result in incorrect offsets
to the "variable" portion of the frame.

Properly parse S1G beacon frames by using the field lengths defined in
IEEE 802.11-2024, section 9.3.4.3, ensuring that the elements offset is
calculated accurately.

Fixes: 9eaffe5078ca ("cfg80211: convert S1G beacon to scan results")
Fixes: cd418ba63f0c ("mac80211: convert S1G beacon to scan results")
Signed-off-by: Lachlan Hodges <lachlan.hodges@morsemicro.com>
Link: https://patch.msgid.link/20250603053538.468562-1-lachlan.hodges@morsemicro.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 79 +++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 69 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 420c7f9aa6ee..ce377f7fb912 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -111,6 +111,8 @@
 
 /* bits unique to S1G beacon */
 #define IEEE80211_S1G_BCN_NEXT_TBTT	0x100
+#define IEEE80211_S1G_BCN_CSSID		0x200
+#define IEEE80211_S1G_BCN_ANO		0x400
 
 /* see 802.11ah-2016 9.9 NDP CMAC frames */
 #define IEEE80211_S1G_1MHZ_NDP_BITS	25
@@ -153,9 +155,6 @@
 
 #define IEEE80211_ANO_NETTYPE_WILD              15
 
-/* bits unique to S1G beacon */
-#define IEEE80211_S1G_BCN_NEXT_TBTT    0x100
-
 /* control extension - for IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTL_EXT */
 #define IEEE80211_CTL_EXT_POLL		0x2000
 #define IEEE80211_CTL_EXT_SPR		0x3000
@@ -627,6 +626,42 @@ static inline bool ieee80211_is_s1g_beacon(__le16 fc)
 	       cpu_to_le16(IEEE80211_FTYPE_EXT | IEEE80211_STYPE_S1G_BEACON);
 }
 
+/**
+ * ieee80211_s1g_has_next_tbtt - check if IEEE80211_S1G_BCN_NEXT_TBTT
+ * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame contains the variable-length
+ *	next TBTT field
+ */
+static inline bool ieee80211_s1g_has_next_tbtt(__le16 fc)
+{
+	return ieee80211_is_s1g_beacon(fc) &&
+		(fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT));
+}
+
+/**
+ * ieee80211_s1g_has_ano - check if IEEE80211_S1G_BCN_ANO
+ * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame contains the variable-length
+ *	ANO field
+ */
+static inline bool ieee80211_s1g_has_ano(__le16 fc)
+{
+	return ieee80211_is_s1g_beacon(fc) &&
+		(fc & cpu_to_le16(IEEE80211_S1G_BCN_ANO));
+}
+
+/**
+ * ieee80211_s1g_has_cssid - check if IEEE80211_S1G_BCN_CSSID
+ * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame contains the variable-length
+ *	compressed SSID field
+ */
+static inline bool ieee80211_s1g_has_cssid(__le16 fc)
+{
+	return ieee80211_is_s1g_beacon(fc) &&
+		(fc & cpu_to_le16(IEEE80211_S1G_BCN_CSSID));
+}
+
 /**
  * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon
  * @fc: frame control bytes in little-endian byteorder
@@ -1245,16 +1280,40 @@ struct ieee80211_ext {
 			u8 change_seq;
 			u8 variable[0];
 		} __packed s1g_beacon;
-		struct {
-			u8 sa[ETH_ALEN];
-			__le32 timestamp;
-			u8 change_seq;
-			u8 next_tbtt[3];
-			u8 variable[0];
-		} __packed s1g_short_beacon;
 	} u;
 } __packed __aligned(2);
 
+/**
+ * ieee80211_s1g_optional_len - determine length of optional S1G beacon fields
+ * @fc: frame control bytes in little-endian byteorder
+ * Return: total length in bytes of the optional fixed-length fields
+ *
+ * S1G beacons may contain up to three optional fixed-length fields that
+ * precede the variable-length elements. Whether these fields are present
+ * is indicated by flags in the frame control field.
+ *
+ * From IEEE 802.11-2024 section 9.3.4.3:
+ *  - Next TBTT field may be 0 or 3 bytes
+ *  - Short SSID field may be 0 or 4 bytes
+ *  - Access Network Options (ANO) field may be 0 or 1 byte
+ */
+static inline size_t
+ieee80211_s1g_optional_len(__le16 fc)
+{
+	size_t len = 0;
+
+	if (ieee80211_s1g_has_next_tbtt(fc))
+		len += 3;
+
+	if (ieee80211_s1g_has_cssid(fc))
+		len += 4;
+
+	if (ieee80211_s1g_has_ano(fc))
+		len += 1;
+
+	return len;
+}
+
 #define IEEE80211_TWT_CONTROL_NDP			BIT(0)
 #define IEEE80211_TWT_CONTROL_RESP_MODE			BIT(1)
 #define IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST	BIT(3)
-- 
cgit v1.2.3


From 500e626c4a5bf2fa7cc6f1cd6dd86c77b5b127f2 Mon Sep 17 00:00:00 2001
From: Andy Chiu <andybnac@gmail.com>
Date: Tue, 8 Apr 2025 02:08:28 +0800
Subject: kernel: ftrace: export ftrace_sync_ipi

The following ftrace patch for riscv uses a data store to update ftrace
function. Therefore, a romote fence is required to order it against
function_trace_op updates. The mechanism is similar to the fence between
function_trace_op and update_ftrace_func in the generic ftrace, so we
leverage the same ftrace_sync_ipi function.

[ alex: Fix build warning when !CONFIG_DYNAMIC_FTRACE ]

Signed-off-by: Andy Chiu <andybnac@gmail.com>
Link: https://lore.kernel.org/r/20250407180838.42877-4-andybnac@gmail.com
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>
---
 include/linux/ftrace.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fbabc3d848b3..30374478cb07 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -635,6 +635,8 @@ enum {
 #define ftrace_get_symaddr(fentry_ip) (0)
 #endif
 
+void ftrace_sync_ipi(void *data);
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 void ftrace_arch_code_modify_prepare(void);
-- 
cgit v1.2.3


From 8c21c4111128365f81a88573eeb2844fa696b299 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 2 Jun 2025 19:55:36 +0900
Subject: module: make __mod_device_table__* symbols static

The __mod_device_table__* symbols are only parsed by modpost to generate
MODULE_ALIAS() entries from MODULE_DEVICE_TABLE().

Therefore, these symbols do not need to be globally visible, or globally
unique.

If they are in the global scope, we would worry about the symbol
uniqueness, but modpost is fine with parsing multiple symbols with the
same name.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
---
 include/linux/module.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 8050f77c3b64..92e1420fccdf 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -249,8 +249,8 @@ struct module_kobject *lookup_or_create_module_kobject(const char *name);
 #ifdef MODULE
 /* Creates an alias so file2alias.c can find device table. */
 #define MODULE_DEVICE_TABLE(type, name)					\
-extern typeof(name) __mod_device_table__##type##__##name		\
-  __attribute__ ((unused, alias(__stringify(name))))
+static typeof(name) __mod_device_table__##type##__##name		\
+  __attribute__ ((used, alias(__stringify(name))))
 #else  /* !MODULE */
 #define MODULE_DEVICE_TABLE(type, name)
 #endif
-- 
cgit v1.2.3


From 6093faaf9593fca92f96f165c95ff4b53353b1f4 Mon Sep 17 00:00:00 2001
From: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
Date: Wed, 5 Mar 2025 16:37:06 +0800
Subject: raid6: Add RISC-V SIMD syndrome and recovery calculations

The assembly is originally based on the ARM NEON and int.uc, but uses
RISC-V vector instructions to implement the RAID6 syndrome and
recovery calculations.

The functions are tested on QEMU running with the option "-icount shift=0":

  raid6: rvvx1    gen()  1008 MB/s
  raid6: rvvx2    gen()  1395 MB/s
  raid6: rvvx4    gen()  1584 MB/s
  raid6: rvvx8    gen()  1694 MB/s
  raid6: int64x8  gen()   113 MB/s
  raid6: int64x4  gen()   116 MB/s
  raid6: int64x2  gen()   272 MB/s
  raid6: int64x1  gen()   229 MB/s
  raid6: using algorithm rvvx8 gen() 1694 MB/s
  raid6: .... xor() 1000 MB/s, rmw enabled
  raid6: using rvv recovery algorithm

[Charlie: - Fixup vector options]

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
Reviewed-by: Charlie Jenkins <charlie@rivosinc.com>
Tested-by: Charlie Jenkins <charlie@rivosinc.com>
Link: https://lore.kernel.org/r/20250305083707.74218-1-zhangchunyan@iscas.ac.cn
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>
---
 include/linux/raid/pq.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 98030accf641..72ff44cca864 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -108,6 +108,10 @@ extern const struct raid6_calls raid6_vpermxor4;
 extern const struct raid6_calls raid6_vpermxor8;
 extern const struct raid6_calls raid6_lsx;
 extern const struct raid6_calls raid6_lasx;
+extern const struct raid6_calls raid6_rvvx1;
+extern const struct raid6_calls raid6_rvvx2;
+extern const struct raid6_calls raid6_rvvx4;
+extern const struct raid6_calls raid6_rvvx8;
 
 struct raid6_recov_calls {
 	void (*data2)(int, size_t, int, int, void **);
@@ -125,6 +129,7 @@ extern const struct raid6_recov_calls raid6_recov_s390xc;
 extern const struct raid6_recov_calls raid6_recov_neon;
 extern const struct raid6_recov_calls raid6_recov_lsx;
 extern const struct raid6_recov_calls raid6_recov_lasx;
+extern const struct raid6_recov_calls raid6_recov_rvv;
 
 extern const struct raid6_calls raid6_neonx1;
 extern const struct raid6_calls raid6_neonx2;
-- 
cgit v1.2.3


From 044d2aee6c575231ed4a24fb3d119bad0937488b Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Wed, 21 May 2025 09:06:02 -0700
Subject: alloc_tag: handle module codetag load errors as module load failures

Failures inside codetag_load_module() are currently ignored.  As a result
an error there would not cause a module load failure and freeing of the
associated resources.  Correct this behavior by propagating the error code
to the caller and handling possible errors.  With this change, error to
allocate percpu counters, which happens at this stage, will not be ignored
and will cause a module load failure and freeing of resources.  With this
change we also do not need to disable memory allocation profiling when
this error happens, instead we fail to load the module.

Link: https://lkml.kernel.org/r/20250521160602.1940771-1-surenb@google.com
Fixes: 10075262888b ("alloc_tag: allocate percpu counters for module tags dynamically")
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reported-by: Casey Chen <cachen@purestorage.com>
Closes: https://lore.kernel.org/all/20250520231620.15259-1-cachen@purestorage.com/
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: David Wang <00107082@163.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Luis Chamberalin <mcgrof@kernel.org>
Cc: Petr Pavlu <petr.pavlu@suse.com>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/codetag.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/codetag.h b/include/linux/codetag.h
index 0ee4c21c6dbc..5f2b9a1f722c 100644
--- a/include/linux/codetag.h
+++ b/include/linux/codetag.h
@@ -36,8 +36,8 @@ union codetag_ref {
 struct codetag_type_desc {
 	const char *section;
 	size_t tag_size;
-	void (*module_load)(struct module *mod,
-			    struct codetag *start, struct codetag *end);
+	int (*module_load)(struct module *mod,
+			   struct codetag *start, struct codetag *end);
 	void (*module_unload)(struct module *mod,
 			      struct codetag *start, struct codetag *end);
 #ifdef CONFIG_MODULES
@@ -89,7 +89,7 @@ void *codetag_alloc_module_section(struct module *mod, const char *name,
 				   unsigned long align);
 void codetag_free_module_sections(struct module *mod);
 void codetag_module_replaced(struct module *mod, struct module *new_mod);
-void codetag_load_module(struct module *mod);
+int codetag_load_module(struct module *mod);
 void codetag_unload_module(struct module *mod);
 
 #else /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */
@@ -103,7 +103,7 @@ codetag_alloc_module_section(struct module *mod, const char *name,
 			     unsigned long align) { return NULL; }
 static inline void codetag_free_module_sections(struct module *mod) {}
 static inline void codetag_module_replaced(struct module *mod, struct module *new_mod) {}
-static inline void codetag_load_module(struct module *mod) {}
+static inline int codetag_load_module(struct module *mod) { return 0; }
 static inline void codetag_unload_module(struct module *mod) {}
 
 #endif /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */
-- 
cgit v1.2.3


From 081056dc00a27bccb55ccc3c6f230a3d5fd3f7e0 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Tue, 27 May 2025 23:23:53 +0200
Subject: mm/hugetlb: unshare page tables during VMA split, not before

Currently, __split_vma() triggers hugetlb page table unsharing through
vm_ops->may_split().  This happens before the VMA lock and rmap locks are
taken - which is too early, it allows racing VMA-locked page faults in our
process and racing rmap walks from other processes to cause page tables to
be shared again before we actually perform the split.

Fix it by explicitly calling into the hugetlb unshare logic from
__split_vma() in the same place where THP splitting also happens.  At that
point, both the VMA and the rmap(s) are write-locked.

An annoying detail is that we can now call into the helper
hugetlb_unshare_pmds() from two different locking contexts:

1. from hugetlb_split(), holding:
    - mmap lock (exclusively)
    - VMA lock
    - file rmap lock (exclusively)
2. hugetlb_unshare_all_pmds(), which I think is designed to be able to
   call us with only the mmap lock held (in shared mode), but currently
   only runs while holding mmap lock (exclusively) and VMA lock

Backporting note:
This commit fixes a racy protection that was introduced in commit
b30c14cd6102 ("hugetlb: unshare some PMDs when splitting VMAs"); that
commit claimed to fix an issue introduced in 5.13, but it should actually
also go all the way back.

[jannh@google.com: v2]
  Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-1-1329349bad1a@google.com
Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-0-1329349bad1a@google.com
Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-1-f4136f5ec58a@google.com
Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page")
Signed-off-by: Jann Horn <jannh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>	[b30c14cd6102: hugetlb: unshare some PMDs when splitting VMAs]
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 0598f36931de..42f374e828a2 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -279,6 +279,7 @@ bool is_hugetlb_entry_migration(pte_t pte);
 bool is_hugetlb_entry_hwpoisoned(pte_t pte);
 void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
 void fixup_hugetlb_reservations(struct vm_area_struct *vma);
+void hugetlb_split(struct vm_area_struct *vma, unsigned long addr);
 
 #else /* !CONFIG_HUGETLB_PAGE */
 
@@ -476,6 +477,8 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
 {
 }
 
+static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {}
+
 #endif /* !CONFIG_HUGETLB_PAGE */
 
 #ifndef pgd_write
-- 
cgit v1.2.3


From bab77c0d191e241d2d59a845c7ed68bfa6e1b257 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 4 May 2025 13:28:37 -0400
Subject: finish_automount(): don't leak MNT_LOCKED from parent to child

Intention for MNT_LOCKED had always been to protect the internal
mountpoints within a subtree that got copied across the userns boundary,
not the mountpoint that tree got attached to - after all, it _was_
exposed before the copying.

For roots of secondary copies that is enforced in attach_recursive_mnt() -
MNT_LOCKED is explicitly stripped for those.  For the root of primary
copy we are almost always guaranteed that MNT_LOCKED won't be there,
so attach_recursive_mnt() doesn't bother.  Unfortunately, one call
chain got overlooked - triggering e.g. NFS referral will have the
submount inherit the public flags from parent; that's fine for such
things as read-only, nosuid, etc., but not for MNT_LOCKED.

This is particularly pointless since the mount attached by finish_automount()
is usually expirable, which makes any protection granted by MNT_LOCKED
null and void; just wait for a while and that mount will go away on its own.

Include MNT_LOCKED into the set of flags to be ignored by do_add_mount() - it
really is an internal flag.

Reviewed-by: Christian Brauner <brauner@kernel.org>
Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users")
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/mount.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mount.h b/include/linux/mount.h
index 6904ad33ee7a..1a3136e53eaa 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -65,7 +65,8 @@ enum mount_flags {
 	MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME,
 
 	MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL |
-			     MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED,
+			     MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED |
+			     MNT_LOCKED,
 };
 
 struct vfsmount {
-- 
cgit v1.2.3


From 41cb08555c4164996d67c78b3bf1c658075b75f1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 9 May 2025 07:51:14 +0200
Subject: treewide, timers: Rename from_timer() to timer_container_of()

Move this API to the canonical timer_*() namespace.

[ tglx: Redone against pre rc1 ]

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/aB2X0jCKQO56WdMt@gmail.com
---
 include/linux/timer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index f636f55c427d..0414d9e6b4fc 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -129,7 +129,7 @@ extern void timer_destroy_on_stack(struct timer_list *timer);
 static inline void timer_destroy_on_stack(struct timer_list *timer) { }
 #endif
 
-#define from_timer(var, callback_timer, timer_fieldname) \
+#define timer_container_of(var, callback_timer, timer_fieldname)	\
 	container_of(callback_timer, typeof(*var), timer_fieldname)
 
 /**
-- 
cgit v1.2.3


From 1a4eabf662543c62ae1e71a26d1c8e6643c66388 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 1 Jul 2025 15:32:01 +0100
Subject: mfd: adp5585: Refactor how regmap defaults are handled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The only thing changing between variants is the regmap default
registers. Hence, instead of having a regmap configuration for every
variant (duplicating lots of fields), add a chip info type of structure
with a regmap ID to identify which defaults to use and populate
regmap_config at runtime given a template plus the id. Also note that
between variants, the defaults can be the same which means the chip info
structure can be used in more than one compatible.

This will also make it simpler adding new chips with more variants.

Also note that the chip info structures are deliberately not const as
they will also contain lots of members that are the same between the
different devices variants and so we will fill those at runtime.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-6-b1fcfe9e9826@analog.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/adp5585.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h
index 016033cd68e4..c56af8d8d76c 100644
--- a/include/linux/mfd/adp5585.h
+++ b/include/linux/mfd/adp5585.h
@@ -119,8 +119,19 @@
 
 struct regmap;
 
+enum adp5585_variant {
+	ADP5585_00 = 1,
+	ADP5585_01,
+	ADP5585_02,
+	ADP5585_03,
+	ADP5585_04,
+	ADP5585_MAX
+};
+
 struct adp5585_dev {
+	struct device *dev;
 	struct regmap *regmap;
+	enum adp5585_variant variant;
 };
 
 #endif
-- 
cgit v1.2.3


From 0190a72f28ee0995c546fd4fcf80ed25a0fc4b28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 1 Jul 2025 15:32:02 +0100
Subject: mfd: adp5585: Add support for adp5589
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ADP5589 is a 19 I/O port expander with built-in keypad matrix decoder,
programmable logic, reset generator, and PWM generator.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-7-b1fcfe9e9826@analog.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/adp5585.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h
index c56af8d8d76c..70e58122a36a 100644
--- a/include/linux/mfd/adp5585.h
+++ b/include/linux/mfd/adp5585.h
@@ -117,6 +117,12 @@
 #define ADP5585_BANK(n)			((n) >= 6 ? 1 : 0)
 #define ADP5585_BIT(n)			((n) >= 6 ? BIT((n) - 6) : BIT(n))
 
+/* ADP5589 */
+#define		ADP5589_MAN_ID_VALUE		0x10
+#define ADP5589_GPI_STATUS_C		0x18
+#define ADP5589_INT_EN			0x4e
+#define ADP5589_MAX_REG			ADP5589_INT_EN
+
 struct regmap;
 
 enum adp5585_variant {
@@ -125,6 +131,9 @@ enum adp5585_variant {
 	ADP5585_02,
 	ADP5585_03,
 	ADP5585_04,
+	ADP5589_00,
+	ADP5589_01,
+	ADP5589_02,
 	ADP5585_MAX
 };
 
@@ -132,6 +141,7 @@ struct adp5585_dev {
 	struct device *dev;
 	struct regmap *regmap;
 	enum adp5585_variant variant;
+	unsigned int id;
 };
 
 #endif
-- 
cgit v1.2.3


From 7077fb501b95360c7fe35553f2bdb1ccf34edd16 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 1 Jul 2025 15:32:03 +0100
Subject: mfd: adp5585: Add a per chip reg struture
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are some differences in the register map between the devices.
Hence, add a register structure per device. This will be needed in
following patches.

On top of that adp5585_fill_regmap_config() is renamed and reworked so
that the current struct adp5585_info act as template (they indeed
contain all the different data between variants) which can then be
complemented depending on the device (as identified by the id register).
This is done like this since a lot of the data is pretty much the same
between variants of the same device.

Reviewed-by: Lee Jones <lee@kernel.org>
Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-8-b1fcfe9e9826@analog.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/adp5585.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h
index 70e58122a36a..6ecb90a6276c 100644
--- a/include/linux/mfd/adp5585.h
+++ b/include/linux/mfd/adp5585.h
@@ -120,6 +120,7 @@
 /* ADP5589 */
 #define		ADP5589_MAN_ID_VALUE		0x10
 #define ADP5589_GPI_STATUS_C		0x18
+#define ADP5589_PIN_CONFIG_D		0x4C
 #define ADP5589_INT_EN			0x4e
 #define ADP5589_MAX_REG			ADP5589_INT_EN
 
@@ -137,9 +138,14 @@ enum adp5585_variant {
 	ADP5585_MAX
 };
 
+struct adp5585_regs {
+	unsigned int ext_cfg;
+};
+
 struct adp5585_dev {
 	struct device *dev;
 	struct regmap *regmap;
+	const struct adp5585_regs *regs;
 	enum adp5585_variant variant;
 	unsigned int id;
 };
-- 
cgit v1.2.3


From 9f425bf713b511b1078e0fea5a88c497e13dbb64 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 1 Jul 2025 15:32:04 +0100
Subject: gpio: adp5585: add support for the adp5589 expander
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Support the adp5589 I/O expander which supports up to 19 pins. We need
to add a chip_info based struct since accessing register "banks"
and "bits" differs between devices.

Also some register addresses are different.

While at it move ADP558X_GPIO_MAX defines to the main header file and
rename them. That information will be needed by the top level device in
a following change.

Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-9-b1fcfe9e9826@analog.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/adp5585.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h
index 6ecb90a6276c..d26f722cf31a 100644
--- a/include/linux/mfd/adp5585.h
+++ b/include/linux/mfd/adp5585.h
@@ -107,23 +107,23 @@
 
 #define ADP5585_MAX_REG			ADP5585_INT_EN
 
-/*
- * Bank 0 covers pins "GPIO 1/R0" to "GPIO 6/R5", numbered 0 to 5 by the
- * driver, and bank 1 covers pins "GPIO 7/C0" to "GPIO 11/C4", numbered 6 to
- * 10. Some variants of the ADP5585 don't support "GPIO 6/R5". As the driver
- * uses identical GPIO numbering for all variants to avoid confusion, GPIO 5 is
- * marked as reserved in the device tree for variants that don't support it.
- */
-#define ADP5585_BANK(n)			((n) >= 6 ? 1 : 0)
-#define ADP5585_BIT(n)			((n) >= 6 ? BIT((n) - 6) : BIT(n))
+#define ADP5585_PIN_MAX			11
 
 /* ADP5589 */
 #define		ADP5589_MAN_ID_VALUE		0x10
+#define ADP5589_GPI_STATUS_A		0x16
 #define ADP5589_GPI_STATUS_C		0x18
+#define ADP5589_RPULL_CONFIG_A		0x19
+#define ADP5589_DEBOUNCE_DIS_A		0x27
+#define ADP5589_GPO_DATA_OUT_A		0x2a
+#define ADP5589_GPO_OUT_MODE_A		0x2d
+#define		ADP5589_GPIO_DIRECTION_A	0x30
 #define ADP5589_PIN_CONFIG_D		0x4C
 #define ADP5589_INT_EN			0x4e
 #define ADP5589_MAX_REG			ADP5589_INT_EN
 
+#define ADP5589_PIN_MAX			19
+
 struct regmap;
 
 enum adp5585_variant {
-- 
cgit v1.2.3


From 75024f97e82e63d02b0743500efb1e264a1c2dd4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 1 Jul 2025 15:32:05 +0100
Subject: pwm: adp5585: add support for adp5589
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for the adp5589 I/O expander. From a PWM point of view it is
pretty similar to adp5585. Main difference is the address
of registers meaningful for configuring the PWM.

Acked-by: Uwe Kleine-König <ukleinek@kernel.org>
Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-10-b1fcfe9e9826@analog.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/adp5585.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h
index d26f722cf31a..77f7c74f084d 100644
--- a/include/linux/mfd/adp5585.h
+++ b/include/linux/mfd/adp5585.h
@@ -118,6 +118,9 @@
 #define ADP5589_GPO_DATA_OUT_A		0x2a
 #define ADP5589_GPO_OUT_MODE_A		0x2d
 #define		ADP5589_GPIO_DIRECTION_A	0x30
+#define ADP5589_PWM_OFFT_LOW		0x3e
+#define ADP5589_PWM_ONT_LOW		0x40
+#define ADP5589_PWM_CFG			0x42
 #define ADP5589_PIN_CONFIG_D		0x4C
 #define ADP5589_INT_EN			0x4e
 #define ADP5589_MAX_REG			ADP5589_INT_EN
-- 
cgit v1.2.3


From 47a1f759b776ec9287f675f5d4fbf60b94cc566d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 1 Jul 2025 15:32:07 +0100
Subject: mfd: adp5585: Add support for event handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These devices are capable of generate FIFO based events based on KEY or
GPI presses. Add support for handling these events. This is in
preparation of adding full support for keymap and gpis based events.

Reviewed-by: Lee Jones <lee@kernel.org>
Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-12-b1fcfe9e9826@analog.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/adp5585.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h
index 77f7c74f084d..43a33a3d3f5a 100644
--- a/include/linux/mfd/adp5585.h
+++ b/include/linux/mfd/adp5585.h
@@ -10,13 +10,20 @@
 #define __MFD_ADP5585_H_
 
 #include <linux/bits.h>
+#include <linux/notifier.h>
 
 #define ADP5585_ID			0x00
 #define		ADP5585_MAN_ID_VALUE		0x20
 #define		ADP5585_MAN_ID_MASK		GENMASK(7, 4)
+#define		ADP5585_REV_ID_MASK		GENMASK(3, 0)
 #define ADP5585_INT_STATUS		0x01
+#define		ADP5585_OVRFLOW_INT		BIT(2)
+#define		ADP5585_EVENT_INT		BIT(0)
 #define ADP5585_STATUS			0x02
+#define		ADP5585_EC_MASK			GENMASK(4, 0)
 #define ADP5585_FIFO_1			0x03
+#define		ADP5585_KEV_EV_PRESS_MASK	BIT(7)
+#define		ADP5585_KEY_EVENT_MASK		GENMASK(6, 0)
 #define ADP5585_FIFO_2			0x04
 #define ADP5585_FIFO_3			0x05
 #define ADP5585_FIFO_4			0x06
@@ -32,6 +39,7 @@
 #define ADP5585_FIFO_14			0x10
 #define ADP5585_FIFO_15			0x11
 #define ADP5585_FIFO_16			0x12
+#define		ADP5585_EV_MAX			(ADP5585_FIFO_16 - ADP5585_FIFO_1 + 1)
 #define ADP5585_GPI_INT_STAT_A		0x13
 #define ADP5585_GPI_INT_STAT_B		0x14
 #define ADP5585_GPI_STATUS_A		0x15
@@ -104,6 +112,8 @@
 #define		ADP5585_INT_CFG			BIT(1)
 #define		ADP5585_RST_CFG			BIT(0)
 #define ADP5585_INT_EN			0x3c
+#define		ADP5585_OVRFLOW_IEN		BIT(2)
+#define		ADP5585_EVENT_IEN		BIT(0)
 
 #define ADP5585_MAX_REG			ADP5585_INT_EN
 
@@ -121,7 +131,9 @@
 #define ADP5589_PWM_OFFT_LOW		0x3e
 #define ADP5589_PWM_ONT_LOW		0x40
 #define ADP5589_PWM_CFG			0x42
+#define ADP5589_POLL_PTIME_CFG		0x48
 #define ADP5589_PIN_CONFIG_D		0x4C
+#define ADP5589_GENERAL_CFG		0x4d
 #define ADP5589_INT_EN			0x4e
 #define ADP5589_MAX_REG			ADP5589_INT_EN
 
@@ -142,15 +154,21 @@ enum adp5585_variant {
 };
 
 struct adp5585_regs {
+	unsigned int gen_cfg;
 	unsigned int ext_cfg;
+	unsigned int int_en;
+	unsigned int poll_ptime_cfg;
 };
 
 struct adp5585_dev {
 	struct device *dev;
 	struct regmap *regmap;
 	const struct adp5585_regs *regs;
+	struct blocking_notifier_head event_notifier;
 	enum adp5585_variant variant;
 	unsigned int id;
+	int irq;
+	unsigned int ev_poll_time;
 };
 
 #endif
-- 
cgit v1.2.3


From 333812da70d5f71bf5e176f6d55a5f716301b5fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 1 Jul 2025 15:32:08 +0100
Subject: mfd: adp5585: Support reset and unlock events
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ADP558x family of devices can be programmed to respond to some
especial events, In case of the unlock events, one can lock the keypad
and use KEYS or GPIs events to unlock it. For the reset events, one can
again use a combinations of GPIs/KEYs in order to generate an event that
will trigger the device to generate an output reset pulse.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-13-b1fcfe9e9826@analog.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/adp5585.h | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h
index 43a33a3d3f5a..db483ef9693a 100644
--- a/include/linux/mfd/adp5585.h
+++ b/include/linux/mfd/adp5585.h
@@ -68,6 +68,7 @@
 #define ADP5585_GPIO_DIRECTION_A	0x27
 #define ADP5585_GPIO_DIRECTION_B	0x28
 #define ADP5585_RESET1_EVENT_A		0x29
+#define		ADP5585_RESET_EV_PRESS		BIT(7)
 #define ADP5585_RESET1_EVENT_B		0x2a
 #define ADP5585_RESET1_EVENT_C		0x2b
 #define ADP5585_RESET2_EVENT_A		0x2c
@@ -118,6 +119,13 @@
 #define ADP5585_MAX_REG			ADP5585_INT_EN
 
 #define ADP5585_PIN_MAX			11
+#define ADP5585_MAX_UNLOCK_TIME_SEC	7
+#define ADP5585_KEY_EVENT_START		1
+#define ADP5585_KEY_EVENT_END		25
+#define ADP5585_GPI_EVENT_START		37
+#define ADP5585_GPI_EVENT_END		47
+#define ADP5585_ROW5_KEY_EVENT_START	1
+#define ADP5585_ROW5_KEY_EVENT_END	30
 
 /* ADP5589 */
 #define		ADP5589_MAN_ID_VALUE		0x10
@@ -128,6 +136,20 @@
 #define ADP5589_GPO_DATA_OUT_A		0x2a
 #define ADP5589_GPO_OUT_MODE_A		0x2d
 #define		ADP5589_GPIO_DIRECTION_A	0x30
+#define ADP5589_UNLOCK1			0x33
+#define		ADP5589_UNLOCK_EV_PRESS		BIT(7)
+#define ADP5589_UNLOCK_TIMERS		0x36
+#define		ADP5589_UNLOCK_TIMER		GENMASK(2, 0)
+#define ADP5589_LOCK_CFG		0x37
+#define		ADP5589_LOCK_EN			BIT(0)
+#define ADP5589_RESET1_EVENT_A		0x38
+#define ADP5589_RESET2_EVENT_A		0x3B
+#define ADP5589_RESET_CFG		0x3D
+#define		ADP5585_RESET2_POL		BIT(7)
+#define		ADP5585_RESET1_POL		BIT(6)
+#define		ADP5585_RST_PASSTHRU_EN		BIT(5)
+#define		ADP5585_RESET_TRIG_TIME		GENMASK(4, 2)
+#define		ADP5585_PULSE_WIDTH		GENMASK(1, 0)
 #define ADP5589_PWM_OFFT_LOW		0x3e
 #define ADP5589_PWM_ONT_LOW		0x40
 #define ADP5589_PWM_CFG			0x42
@@ -138,6 +160,11 @@
 #define ADP5589_MAX_REG			ADP5589_INT_EN
 
 #define ADP5589_PIN_MAX			19
+#define ADP5589_KEY_EVENT_START		1
+#define ADP5589_KEY_EVENT_END		88
+#define ADP5589_GPI_EVENT_START		97
+#define ADP5589_GPI_EVENT_END		115
+#define ADP5589_UNLOCK_WILDCARD		127
 
 struct regmap;
 
@@ -158,6 +185,9 @@ struct adp5585_regs {
 	unsigned int ext_cfg;
 	unsigned int int_en;
 	unsigned int poll_ptime_cfg;
+	unsigned int reset_cfg;
+	unsigned int reset1_event_a;
+	unsigned int reset2_event_a;
 };
 
 struct adp5585_dev {
@@ -167,8 +197,18 @@ struct adp5585_dev {
 	struct blocking_notifier_head event_notifier;
 	enum adp5585_variant variant;
 	unsigned int id;
+	bool has_unlock;
+	bool has_pin6;
 	int irq;
 	unsigned int ev_poll_time;
+	unsigned int unlock_time;
+	unsigned int unlock_keys[2];
+	unsigned int nkeys_unlock;
+	unsigned int reset1_keys[3];
+	unsigned int nkeys_reset1;
+	unsigned int reset2_keys[2];
+	unsigned int nkeys_reset2;
+	u8 reset_cfg;
 };
 
 #endif
-- 
cgit v1.2.3


From bd113a13e1fa51789f55987369b80e1d8bc19389 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 1 Jul 2025 15:32:09 +0100
Subject: mfd: adp5585: Add support for input devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ADP558x family supports a built in keypad matrix decoder which can
be added as an Input device. In order to both support the Input and the
GPIO device, we need to create a bitmap of the supported pins and track
their usage since they can either be used as GPIOs (GPIs) or as part of
the keymap.

We also need to mark special pins busy in case some features are being
used (ex: pwm or reset events).

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-14-b1fcfe9e9826@analog.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/adp5585.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h
index db483ef9693a..41c5d2e1cc7c 100644
--- a/include/linux/mfd/adp5585.h
+++ b/include/linux/mfd/adp5585.h
@@ -126,6 +126,10 @@
 #define ADP5585_GPI_EVENT_END		47
 #define ADP5585_ROW5_KEY_EVENT_START	1
 #define ADP5585_ROW5_KEY_EVENT_END	30
+#define ADP5585_PWM_OUT			3
+#define ADP5585_RESET1_OUT		4
+#define ADP5585_RESET2_OUT		9
+#define ADP5585_ROW5			5
 
 /* ADP5589 */
 #define		ADP5589_MAN_ID_VALUE		0x10
@@ -154,6 +158,7 @@
 #define ADP5589_PWM_ONT_LOW		0x40
 #define ADP5589_PWM_CFG			0x42
 #define ADP5589_POLL_PTIME_CFG		0x48
+#define ADP5589_PIN_CONFIG_A		0x49
 #define ADP5589_PIN_CONFIG_D		0x4C
 #define ADP5589_GENERAL_CFG		0x4d
 #define ADP5589_INT_EN			0x4e
@@ -165,6 +170,7 @@
 #define ADP5589_GPI_EVENT_START		97
 #define ADP5589_GPI_EVENT_END		115
 #define ADP5589_UNLOCK_WILDCARD		127
+#define ADP5589_RESET2_OUT		12
 
 struct regmap;
 
@@ -188,6 +194,7 @@ struct adp5585_regs {
 	unsigned int reset_cfg;
 	unsigned int reset1_event_a;
 	unsigned int reset2_event_a;
+	unsigned int pin_cfg_a;
 };
 
 struct adp5585_dev {
@@ -195,6 +202,9 @@ struct adp5585_dev {
 	struct regmap *regmap;
 	const struct adp5585_regs *regs;
 	struct blocking_notifier_head event_notifier;
+	unsigned long *pin_usage;
+	unsigned int n_pins;
+	unsigned int reset2_out;
 	enum adp5585_variant variant;
 	unsigned int id;
 	bool has_unlock;
-- 
cgit v1.2.3


From 988b28a83b658137e58123f4dafc3a1588e1cb2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 1 Jul 2025 15:32:10 +0100
Subject: gpio: adp5585: support gpi events
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for adding GPIs to the event FIFO. This is done by adding
irq_chip support. Like this, one can use the input gpio_keys driver as a
"frontend" device and input handler.

As part of this change, we now implement .request() and .free() as we can't
blindly consume all available pins as GPIOs (example: some pins can be
used for forming a keymap matrix).

Also note that the number of pins can now be obtained from the parent,
top level device. Hence the 'max_gpio' variable can be removed.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-15-b1fcfe9e9826@analog.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/adp5585.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h
index 41c5d2e1cc7c..5237da6b4a9f 100644
--- a/include/linux/mfd/adp5585.h
+++ b/include/linux/mfd/adp5585.h
@@ -136,6 +136,8 @@
 #define ADP5589_GPI_STATUS_A		0x16
 #define ADP5589_GPI_STATUS_C		0x18
 #define ADP5589_RPULL_CONFIG_A		0x19
+#define ADP5589_GPI_INT_LEVEL_A		0x1e
+#define ADP5589_GPI_EVENT_EN_A		0x21
 #define ADP5589_DEBOUNCE_DIS_A		0x27
 #define ADP5589_GPO_DATA_OUT_A		0x2a
 #define ADP5589_GPO_OUT_MODE_A		0x2d
-- 
cgit v1.2.3


From ea4d331050b4cd43e6a900937db88b01ef75e1f2 Mon Sep 17 00:00:00 2001
From: Javier Carrasco <javier.carrasco@wolfvision.net>
Date: Wed, 16 Oct 2024 06:02:41 +0200
Subject: Input: touch-overlay - add touchscreen overlay handling

Some touch devices provide mechanical overlays with different objects
like buttons or clipped touchscreen surfaces.

In order to support these objects, add a series of helper functions
to the input subsystem to transform them into overlay objects via
device tree nodes.

These overlay objects consume the raw touch events and report the
expected input events depending on the object properties.

Note that the current implementation allows for multiple definitions
of touchscreen areas (regions that report touch events), but only the
first one will be used for the touchscreen device that the consumers
typically provide.
Should the need for multiple touchscreen areas arise, additional
touchscreen devices would be required at the consumer side.
There is no limitation in the number of touch areas defined as buttons.

Reviewed-by: Jeff LaBundy <jeff@labundy.com>
Signed-off-by: Javier Carrasco <javier.carrasco@wolfvision.net>
Link: https://lore.kernel.org/r/20241016-feature-ts_virtobj_patch-v11-2-b292a1bbb0a1@wolfvision.net
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/input/touch-overlay.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 include/linux/input/touch-overlay.h

(limited to 'include/linux')

diff --git a/include/linux/input/touch-overlay.h b/include/linux/input/touch-overlay.h
new file mode 100644
index 000000000000..0253e554d3cd
--- /dev/null
+++ b/include/linux/input/touch-overlay.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2023 Javier Carrasco <javier.carrasco@wolfvision.net>
+ */
+
+#ifndef _TOUCH_OVERLAY
+#define _TOUCH_OVERLAY
+
+#include <linux/types.h>
+
+struct input_dev;
+
+int touch_overlay_map(struct list_head *list, struct input_dev *input);
+
+void touch_overlay_get_touchscreen_abs(struct list_head *list, u16 *x, u16 *y);
+
+bool touch_overlay_mapped_touchscreen(struct list_head *list);
+
+bool touch_overlay_process_contact(struct list_head *list,
+				   struct input_dev *input,
+				   struct input_mt_pos *pos, int slot);
+
+void touch_overlay_sync_frame(struct list_head *list, struct input_dev *input);
+
+#endif
-- 
cgit v1.2.3