From da142f3d373a6ddaca0119615a8db2175ddc4121 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 5 Dec 2025 15:26:55 -0800
Subject: KVM: Remove subtle "struct kvm_stats_desc" pseudo-overlay

Remove KVM's internal pseudo-overlay of kvm_stats_desc, which subtly
aliases the flexible name[] in the uAPI definition with a fixed-size array
of the same name.  The unusual embedded structure results in compiler
warnings due to -Wflex-array-member-not-at-end, and also necessitates an
extra level of dereferencing in KVM.  To avoid the "overlay", define the
uAPI structure to have a fixed-size name when building for the kernel.

Opportunistically clean up the indentation for the stats macros, and
replace spaces with tabs.

No functional change intended.

Reported-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Closes: https://lore.kernel.org/all/aPfNKRpLfhmhYqfP@kspp
Acked-by: Marc Zyngier <maz@kernel.org>
Acked-by: Christian Borntraeger <borntraeger@linux.ibm.com>
[..]
Acked-by: Anup Patel <anup@brainfault.org>
Reviewed-by: Bibo Mao <maobibo@loongson.cn>
Acked-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://patch.msgid.link/20251205232655.445294-1-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/linux/kvm_host.h | 83 ++++++++++++++++++++----------------------------
 include/uapi/linux/kvm.h |  8 +++++
 2 files changed, 43 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d93f75b05ae2..7428d9949382 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1927,56 +1927,43 @@ enum kvm_stat_kind {
 
 struct kvm_stat_data {
 	struct kvm *kvm;
-	const struct _kvm_stats_desc *desc;
+	const struct kvm_stats_desc *desc;
 	enum kvm_stat_kind kind;
 };
 
-struct _kvm_stats_desc {
-	struct kvm_stats_desc desc;
-	char name[KVM_STATS_NAME_SIZE];
-};
-
-#define STATS_DESC_COMMON(type, unit, base, exp, sz, bsz)		       \
-	.flags = type | unit | base |					       \
-		 BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) |	       \
-		 BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) |	       \
-		 BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK),	       \
-	.exponent = exp,						       \
-	.size = sz,							       \
+#define STATS_DESC_COMMON(type, unit, base, exp, sz, bsz)		\
+	.flags = type | unit | base |					\
+		 BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) |       \
+		 BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) |	\
+		 BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK),	\
+	.exponent = exp,						\
+	.size = sz,							\
 	.bucket_size = bsz
 
-#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz)	       \
-	{								       \
-		{							       \
-			STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),     \
-			.offset = offsetof(struct kvm_vm_stat, generic.stat)   \
-		},							       \
-		.name = #stat,						       \
-	}
-#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz)	       \
-	{								       \
-		{							       \
-			STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),     \
-			.offset = offsetof(struct kvm_vcpu_stat, generic.stat) \
-		},							       \
-		.name = #stat,						       \
-	}
-#define VM_STATS_DESC(stat, type, unit, base, exp, sz, bsz)		       \
-	{								       \
-		{							       \
-			STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),     \
-			.offset = offsetof(struct kvm_vm_stat, stat)	       \
-		},							       \
-		.name = #stat,						       \
-	}
-#define VCPU_STATS_DESC(stat, type, unit, base, exp, sz, bsz)		       \
-	{								       \
-		{							       \
-			STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),     \
-			.offset = offsetof(struct kvm_vcpu_stat, stat)	       \
-		},							       \
-		.name = #stat,						       \
-	}
+#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz)	\
+{									\
+	STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),		\
+	.offset = offsetof(struct kvm_vm_stat, generic.stat),		\
+	.name = #stat,							\
+}
+#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz)	\
+{									\
+	STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),		\
+	.offset = offsetof(struct kvm_vcpu_stat, generic.stat),		\
+	.name = #stat,							\
+}
+#define VM_STATS_DESC(stat, type, unit, base, exp, sz, bsz)		\
+{									\
+	STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),		\
+	.offset = offsetof(struct kvm_vm_stat, stat),			\
+	.name = #stat,							\
+}
+#define VCPU_STATS_DESC(stat, type, unit, base, exp, sz, bsz)		\
+{									\
+	STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),		\
+	.offset = offsetof(struct kvm_vcpu_stat, stat),			\
+	.name = #stat,							\
+}
 /* SCOPE: VM, VM_GENERIC, VCPU, VCPU_GENERIC */
 #define STATS_DESC(SCOPE, stat, type, unit, base, exp, sz, bsz)		       \
 	SCOPE##_STATS_DESC(stat, type, unit, base, exp, sz, bsz)
@@ -2053,7 +2040,7 @@ struct _kvm_stats_desc {
 	STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking)
 
 ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
-		       const struct _kvm_stats_desc *desc,
+		       const struct kvm_stats_desc *desc,
 		       void *stats, size_t size_stats,
 		       char __user *user_buffer, size_t size, loff_t *offset);
 
@@ -2098,9 +2085,9 @@ static inline void kvm_stats_log_hist_update(u64 *data, size_t size, u64 value)
 
 
 extern const struct kvm_stats_header kvm_vm_stats_header;
-extern const struct _kvm_stats_desc kvm_vm_stats_desc[];
+extern const struct kvm_stats_desc kvm_vm_stats_desc[];
 extern const struct kvm_stats_header kvm_vcpu_stats_header;
-extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[];
+extern const struct kvm_stats_desc kvm_vcpu_stats_desc[];
 
 #ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
 static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index dddb781b0507..76bd54848b11 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -14,6 +14,10 @@
 #include <linux/ioctl.h>
 #include <asm/kvm.h>
 
+#ifdef __KERNEL__
+#include <linux/kvm_types.h>
+#endif
+
 #define KVM_API_VERSION 12
 
 /*
@@ -1579,7 +1583,11 @@ struct kvm_stats_desc {
 	__u16 size;
 	__u32 offset;
 	__u32 bucket_size;
+#ifdef __KERNEL__
+	char name[KVM_STATS_NAME_SIZE];
+#else
 	char name[];
+#endif
 };
 
 #define KVM_GET_STATS_FD  _IO(KVMIO,  0xce)
-- 
cgit v1.2.3


From b777b5e09eabeefc6ba80f4296366a4742701103 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 10 Feb 2026 17:02:25 +0000
Subject: time/jiffies: Inline jiffies_to_msecs() and jiffies_to_usecs()

For common cases (HZ=100, 250 or 1000), these helpers are at most one
multiply, so there is no point calling a tiny function.

Keep them out of line for HZ=300 and others.

This saves cycles in TCP fast path, among other things.

$ scripts/bloat-o-meter -t vmlinux.old vmlinux.new
add/remove: 0/8 grow/shrink: 25/89 up/down: 530/-3474 (-2944)
...
nla_put_msecs                                193       -    -193
message_stats_print                         2131     920   -1211
Total: Before=25365208, After=25362264, chg -0.01%

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260210170226.57209-1-edumazet@google.com
---
 include/linux/jiffies.h | 40 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index fdef2c155c27..d1c3d4941854 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -434,8 +434,44 @@ extern unsigned long preset_lpj;
 /*
  * Convert various time units to each other:
  */
-extern unsigned int jiffies_to_msecs(const unsigned long j);
-extern unsigned int jiffies_to_usecs(const unsigned long j);
+
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+/**
+ * jiffies_to_msecs - Convert jiffies to milliseconds
+ * @j: jiffies value
+ *
+ * This inline version takes care of HZ in {100,250,1000}.
+ *
+ * Return: milliseconds value
+ */
+static inline unsigned int jiffies_to_msecs(const unsigned long j)
+{
+	return (MSEC_PER_SEC / HZ) * j;
+}
+#else
+unsigned int jiffies_to_msecs(const unsigned long j);
+#endif
+
+#if !(USEC_PER_SEC % HZ)
+/**
+ * jiffies_to_usecs - Convert jiffies to microseconds
+ * @j: jiffies value
+ *
+ * Return: microseconds value
+ */
+static inline unsigned int jiffies_to_usecs(const unsigned long j)
+{
+	/*
+	 * Hz usually doesn't go much further MSEC_PER_SEC.
+	 * jiffies_to_usecs() and usecs_to_jiffies() depend on that.
+	 */
+	BUILD_BUG_ON(HZ > USEC_PER_SEC);
+
+	return (USEC_PER_SEC / HZ) * j;
+}
+#else
+unsigned int jiffies_to_usecs(const unsigned long j);
+#endif
 
 /**
  * jiffies_to_nsecs - Convert jiffies to nanoseconds
-- 
cgit v1.2.3


From ce9e40a9a5e5cff0b1b0d2fa582b3d71a8ce68e8 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 6 Feb 2026 15:48:16 +0000
Subject: irqchip/gic-v3-its: Limit number of per-device MSIs to the range the
 ITS supports

The ITS driver blindly assumes that EventIDs are in abundant supply, to the
point where it never checks how many the hardware actually supports.

It turns out that some pretty esoteric integrations make it so that only a
few bits are available, all the way down to a single bit.

Enforce the advertised limitation at the point of allocating the device
structure, and hope that the endpoint driver can deal with such limitation.

Fixes: 84a6a2e7fc18d ("irqchip: GICv3: ITS: device allocation and configuration")
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Zenghui Yu <zenghui.yu@linux.dev>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260206154816.3582887-1-maz@kernel.org
---
 include/linux/irqchip/arm-gic-v3.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 70c0948f978e..0225121f3013 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -394,6 +394,7 @@
 #define GITS_TYPER_VLPIS		(1UL << 1)
 #define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT	4
 #define GITS_TYPER_ITT_ENTRY_SIZE	GENMASK_ULL(7, 4)
+#define GITS_TYPER_IDBITS		GENMASK_ULL(12, 8)
 #define GITS_TYPER_IDBITS_SHIFT		8
 #define GITS_TYPER_DEVBITS_SHIFT	13
 #define GITS_TYPER_DEVBITS		GENMASK_ULL(17, 13)
-- 
cgit v1.2.3


From 249013e673fce3506c61063c7cbedd75b4c668d8 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 18 Feb 2026 22:09:21 -0800
Subject: fsnotify: drop unused helper

Remove this helper now that all users have been converted to
fserror_report_metadata as of 7.0-rc1.

Cc: jack@suse.cz
Cc: amir73il@gmail.com
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Link: https://patch.msgid.link/177148129543.716249.980530449513340111.stgit@frogsfrogsfrogs
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fsnotify.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 28a9cb13fbfa..079c18bcdbde 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -495,19 +495,6 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 		fsnotify_dentry(dentry, mask);
 }
 
-static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode,
-				    int error)
-{
-	struct fs_error_report report = {
-		.error = error,
-		.inode = inode,
-		.sb = sb,
-	};
-
-	return fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR,
-			NULL, NULL, NULL, 0);
-}
-
 static inline void fsnotify_mnt_attach(struct mnt_namespace *ns, struct vfsmount *mnt)
 {
 	fsnotify_mnt(FS_MNT_ATTACH, ns, mnt);
-- 
cgit v1.2.3


From 6b3e458806e34f1142592f786d3eb0ebac209cc2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=BCnther=20Noack?= <gnoack@google.com>
Date: Thu, 19 Feb 2026 16:43:35 +0100
Subject: HID: Document memory allocation properties of report_fixup()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The memory pointer returned by the report_fixup() hook does not get
freed by the caller.  Instead, report_fixup() must return (in return
value and *rsize) a memory buffer with at least the same lifetime as
the input buffer (defined by rdesc and original *rsize).

This is usually achieved using one of the following techniques:

* Returning a pointer and size to a sub-portion of the input buffer
* Returning a pointer to a static buffer
* Allocating a buffer with a devm_*() function,
  which will automatically get freed when the device is removed.

Signed-off-by: Günther Noack <gnoack@google.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index dce862cafbbd..2990b9f94cb5 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -836,6 +836,12 @@ struct hid_usage_id {
  * raw_event and event should return negative on error, any other value will
  * pass the event on to .event() typically return 0 for success.
  *
+ * report_fixup must return a report descriptor pointer whose lifetime is at
+ * least that of the input rdesc.  This is usually done by mutating the input
+ * rdesc and returning it or a sub-portion of it.  In case a new buffer is
+ * allocated and returned, the implementation of report_fixup is responsible for
+ * freeing it later.
+ *
  * input_mapping shall return a negative value to completely ignore this usage
  * (e.g. doubled or invalid usage), zero to continue with parsing of this
  * usage by generic code (no special handling needed) or positive to skip
-- 
cgit v1.2.3


From 858d2a4f67ff69e645a43487ef7ea7f28f06deae Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 17 Feb 2026 16:12:05 +0000
Subject: tcp: fix potential race in tcp_v6_syn_recv_sock()

Code in tcp_v6_syn_recv_sock() after the call to tcp_v4_syn_recv_sock()
is done too late.

After tcp_v4_syn_recv_sock(), the child socket is already visible
from TCP ehash table and other cpus might use it.

Since newinet->pinet6 is still pointing to the listener ipv6_pinfo
bad things can happen as syzbot found.

Move the problematic code in tcp_v6_mapped_child_init()
and call this new helper from tcp_v4_syn_recv_sock() before
the ehash insertion.

This allows the removal of one tcp_sync_mss(), since
tcp_v4_syn_recv_sock() will call it with the correct
context.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot+937b5bbb6a815b3e5d0b@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/69949275.050a0220.2eeac1.0145.GAE@google.com/
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260217161205.2079883-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/inet_connection_sock.h | 4 +++-
 include/net/tcp.h                  | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index ecb362025c4e..5cb3056d6ddc 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -42,7 +42,9 @@ struct inet_connection_sock_af_ops {
 				      struct request_sock *req,
 				      struct dst_entry *dst,
 				      struct request_sock *req_unhash,
-				      bool *own_req);
+				      bool *own_req,
+				      void (*opt_child_init)(struct sock *newsk,
+							     const struct sock *sk));
 	u16	    net_header_len;
 	int	    (*setsockopt)(struct sock *sk, int level, int optname,
 				  sockptr_t optval, unsigned int optlen);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 40e72b9cb85f..eb8bf63fdafc 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -544,7 +544,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 				  struct request_sock *req,
 				  struct dst_entry *dst,
 				  struct request_sock *req_unhash,
-				  bool *own_req);
+				  bool *own_req,
+				  void (*opt_child_init)(struct sock *newsk,
+							 const struct sock *sk));
 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
 int tcp_v4_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len);
 int tcp_connect(struct sock *sk);
-- 
cgit v1.2.3


From 6e3c0a4e1ad1e0455b7880fad02b3ee179f56c09 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 22 Apr 2025 12:16:28 +0200
Subject: sched/fair: Fix lag clamp

Vincent reported that he was seeing undue lag clamping in a mixed
slice workload. Implement the max_slice tracking as per the todo
comment.

Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy")
Reported-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Tested-by: Shubhang Kaushik <shubhang@os.amperecomputing.com>
Link: https://patch.msgid.link/20250422101628.GA33555@noisy.programming.kicks-ass.net
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 074ad4ef3d81..a7b4a980eb2f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -579,6 +579,7 @@ struct sched_entity {
 	u64				deadline;
 	u64				min_vruntime;
 	u64				min_slice;
+	u64				max_slice;
 
 	struct list_head		group_node;
 	unsigned char			on_rq;
-- 
cgit v1.2.3


From 4c652a47722f69c6f2685f05b17490ea97f643a8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 6 Feb 2026 08:41:13 +0100
Subject: rseq: Mark rseq_arm_slice_extension_timer() __always_inline

objtool warns about this function being called inside of a uaccess
section:

kernel/entry/common.o: warning: objtool: irqentry_exit+0x1dc: call to rseq_arm_slice_extension_timer() with UACCESS enabled

Interestingly, this happens with CONFIG_RSEQ_SLICE_EXTENSION disabled,
so this is an empty function, as the normal implementation is
already marked __always_inline.

I could reproduce this multiple times with gcc-11 but not with gcc-15,
so the compiler probably got better at identifying the trivial function.

Mark all the empty helpers for !RSEQ_SLICE_EXTENSION as __always_inline
for consistency, avoiding this warning.

Fixes: 0ac3b5c3dc45 ("rseq: Implement time slice extension enforcement timer")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260206074122.709580-1-arnd@kernel.org
---
 include/linux/rseq_entry.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h
index cbc4a791618b..c6831c93cd6e 100644
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -216,10 +216,10 @@ efault:
 }
 
 #else /* CONFIG_RSEQ_SLICE_EXTENSION */
-static inline bool rseq_slice_extension_enabled(void) { return false; }
-static inline bool rseq_arm_slice_extension_timer(void) { return false; }
-static inline void rseq_slice_clear_grant(struct task_struct *t) { }
-static inline bool rseq_grant_slice_extension(bool work_pending) { return false; }
+static __always_inline bool rseq_slice_extension_enabled(void) { return false; }
+static __always_inline bool rseq_arm_slice_extension_timer(void) { return false; }
+static __always_inline void rseq_slice_clear_grant(struct task_struct *t) { }
+static __always_inline bool rseq_grant_slice_extension(bool work_pending) { return false; }
 #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */
 
 bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr);
-- 
cgit v1.2.3


From 3b68df978133ac3d46d570af065a73debbb68248 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Fri, 20 Feb 2026 15:06:41 -0500
Subject: rseq: slice ext: Ensure rseq feature size differs from original rseq
 size

Before rseq became extensible, its original size was 32 bytes even
though the active rseq area was only 20 bytes. This had the following
impact in terms of userspace ecosystem evolution:

* The GNU libc between 2.35 and 2.39 expose a __rseq_size symbol set
  to 32, even though the size of the active rseq area is really 20.
* The GNU libc 2.40 changes this __rseq_size to 20, thus making it
  express the active rseq area.
* Starting from glibc 2.41, __rseq_size corresponds to the
  AT_RSEQ_FEATURE_SIZE from getauxval(3).

This means that users of __rseq_size can always expect it to
correspond to the active rseq area, except for the value 32, for
which the active rseq area is 20 bytes.

Exposing a 32 bytes feature size would make life needlessly painful
for userspace. Therefore, add a reserved field at the end of the
rseq area to bump the feature size to 33 bytes. This reserved field
is expected to be replaced with whatever field will come next,
expecting that this field will be larger than 1 byte.

The effect of this change is to increase the size from 32 to 64 bytes
before we actually have fields using that memory.

Clarify the allocation size and alignment requirements in the struct
rseq uapi comment.

Change the value returned by getauxval(AT_RSEQ_ALIGN) to return the
value of the active rseq area size rounded up to next power of 2, which
guarantees that the rseq structure will always be aligned on the nearest
power of two large enough to contain it, even as it grows. Change the
alignment check in the rseq registration accordingly.

This will minimize the amount of ABI corner-cases we need to document
and require userspace to play games with. The rule stays simple when
__rseq_size != 32:

  #define rseq_field_available(field)	(__rseq_size >= offsetofend(struct rseq_abi, field))

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260220200642.1317826-3-mathieu.desnoyers@efficios.com
---
 include/linux/rseq.h      | 12 ++++++++++++
 include/uapi/linux/rseq.h | 26 ++++++++++++++++++++++----
 2 files changed, 34 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/rseq.h b/include/linux/rseq.h
index 7a01a0760405..b9d62fc2140d 100644
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -146,6 +146,18 @@ static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
 		t->rseq = current->rseq;
 }
 
+/*
+ * Value returned by getauxval(AT_RSEQ_ALIGN) and expected by rseq
+ * registration. This is the active rseq area size rounded up to next
+ * power of 2, which guarantees that the rseq structure will always be
+ * aligned on the nearest power of two large enough to contain it, even
+ * as it grows.
+ */
+static inline unsigned int rseq_alloc_align(void)
+{
+	return 1U << get_count_order(offsetof(struct rseq, end));
+}
+
 #else /* CONFIG_RSEQ */
 static inline void rseq_handle_slowpath(struct pt_regs *regs) { }
 static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
index 863c4a00a66b..f69344fe6c08 100644
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -87,10 +87,17 @@ struct rseq_slice_ctrl {
 };
 
 /*
- * struct rseq is aligned on 4 * 8 bytes to ensure it is always
- * contained within a single cache-line.
+ * The original size and alignment of the allocation for struct rseq is
+ * 32 bytes.
  *
- * A single struct rseq per thread is allowed.
+ * The allocation size needs to be greater or equal to
+ * max(getauxval(AT_RSEQ_FEATURE_SIZE), 32), and the allocation needs to
+ * be aligned on max(getauxval(AT_RSEQ_ALIGN), 32).
+ *
+ * As an alternative, userspace is allowed to use both the original size
+ * and alignment of 32 bytes for backward compatibility.
+ *
+ * A single active struct rseq registration per thread is allowed.
  */
 struct rseq {
 	/*
@@ -180,10 +187,21 @@ struct rseq {
 	 */
 	struct rseq_slice_ctrl slice_ctrl;
 
+	/*
+	 * Before rseq became extensible, its original size was 32 bytes even
+	 * though the active rseq area was only 20 bytes.
+	 * Exposing a 32 bytes feature size would make life needlessly painful
+	 * for userspace. Therefore, add a reserved byte after byte 32
+	 * to bump the rseq feature size from 32 to 33.
+	 * The next field to be added to the rseq area will be larger
+	 * than one byte, and will replace this reserved byte.
+	 */
+	__u8 __reserved;
+
 	/*
 	 * Flexible array member at end of structure, after last feature field.
 	 */
 	char end[];
-} __attribute__((aligned(4 * sizeof(__u64))));
+} __attribute__((aligned(32)));
 
 #endif /* _UAPI_LINUX_RSEQ_H */
-- 
cgit v1.2.3


From 901084c51a0a8fb42a3f37d2e9c62083c495f824 Mon Sep 17 00:00:00 2001
From: Penghe Geng <pgeng@nvidia.com>
Date: Thu, 19 Feb 2026 15:29:54 -0500
Subject: mmc: core: Avoid bitfield RMW for claim/retune flags

Move claimed and retune control flags out of the bitfield word to
avoid unrelated RMW side effects in asynchronous contexts.

The host->claimed bit shared a word with retune flags. Writes to claimed
in __mmc_claim_host() or retune_now in mmc_mq_queue_rq() can overwrite
other bits when concurrent updates happen in other contexts, triggering
spurious WARN_ON(!host->claimed). Convert claimed, can_retune,
retune_now and retune_paused to bool to remove shared-word coupling.

Fixes: 6c0cedd1ef952 ("mmc: core: Introduce host claiming by context")
Fixes: 1e8e55b67030c ("mmc: block: Add CQE support")
Cc: stable@vger.kernel.org
Suggested-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Penghe Geng <pgeng@nvidia.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/host.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index e0e2c265e5d1..ba84f02c2a10 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -486,14 +486,12 @@ struct mmc_host {
 
 	struct mmc_ios		ios;		/* current io bus settings */
 
+	bool			claimed;	/* host exclusively claimed */
+
 	/* group bitfields together to minimize padding */
 	unsigned int		use_spi_crc:1;
-	unsigned int		claimed:1;	/* host exclusively claimed */
 	unsigned int		doing_init_tune:1; /* initial tuning in progress */
-	unsigned int		can_retune:1;	/* re-tuning can be used */
 	unsigned int		doing_retune:1;	/* re-tuning in progress */
-	unsigned int		retune_now:1;	/* do re-tuning at next req */
-	unsigned int		retune_paused:1; /* re-tuning is temporarily disabled */
 	unsigned int		retune_crc_disable:1; /* don't trigger retune upon crc */
 	unsigned int		can_dma_map_merge:1; /* merging can be used */
 	unsigned int		vqmmc_enabled:1; /* vqmmc regulator is enabled */
@@ -508,6 +506,9 @@ struct mmc_host {
 	int			rescan_disable;	/* disable card detection */
 	int			rescan_entered;	/* used with nonremovable devices */
 
+	bool			can_retune;	/* re-tuning can be used */
+	bool			retune_now;	/* do re-tuning at next req */
+	bool			retune_paused;	/* re-tuning is temporarily disabled */
 	int			need_retune;	/* re-tuning is needed */
 	int			hold_retune;	/* hold off re-tuning */
 	unsigned int		retune_period;	/* re-tuning period in secs */
-- 
cgit v1.2.3


From 3afd8df024339c7da1a5a0302f3987866dd16e40 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 22 Dec 2025 21:36:25 +0100
Subject: PM: runtime: Change pm_runtime_put() return type to void

The primary role of pm_runtime_put() is to decrement the runtime PM
usage counter of the given device.  It always does that regardless of
the value returned by it later.

In addition, if the runtime PM usage counter after decrementation turns
out to be zero, a work item is queued up to check whether or not the
device can be suspended.  This is not guaranteed to succeed though and
even if it is successful, the device may still not be suspended going
forward.

There are multiple valid reasons why pm_runtime_put() may not decide to
queue up the work item mentioned above, including, but not limited to,
the case when user space has written "on" to the device's runtime PM
"control" file in sysfs.  In all of those cases, pm_runtime_put()
returns a negative error code (even though the device's runtime PM
usage counter has been successfully decremented by it) which is very
confusing.  In fact, its return value should only be used for debug
purposes and care should be taken when doing it even in that case.

Accordingly, to avoid the confusion mentioned above, change the return
type of pm_runtime_put() to void.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Link: https://patch.msgid.link/14387202.RDIVbhacDa@rafael.j.wysocki
---
 include/linux/pm_runtime.h | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 41037c513f06..64921b10ac74 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -545,22 +545,10 @@ static inline int pm_runtime_resume_and_get(struct device *dev)
  *
  * Decrement the runtime PM usage counter of @dev and if it turns out to be
  * equal to 0, queue up a work item for @dev like in pm_request_idle().
- *
- * Return:
- * * 1: Success. Usage counter dropped to zero, but device was already suspended.
- * * 0: Success.
- * * -EINVAL: Runtime PM error.
- * * -EACCES: Runtime PM disabled.
- * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status
- *            change ongoing.
- * * -EBUSY: Runtime PM child_count non-zero.
- * * -EPERM: Device PM QoS resume latency 0.
- * * -EINPROGRESS: Suspend already in progress.
- * * -ENOSYS: CONFIG_PM not enabled.
  */
-static inline int pm_runtime_put(struct device *dev)
+static inline void pm_runtime_put(struct device *dev)
 {
-	return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC);
+	__pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC);
 }
 
 /**
-- 
cgit v1.2.3


From 2a7b7652b1bb3fadc3bd47d622bfb127a93ab6b0 Mon Sep 17 00:00:00 2001
From: Leif Skunberg <diamondback@cohunt.app>
Date: Tue, 10 Feb 2026 14:21:29 +0100
Subject: platform/x86: int3472: Handle GPIO type 0x10 (DOVDD)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Lenovo ThinkPad X1 Fold 16 Gen 1 has an OV5675 sensor (ACPI HID
OVTI5675) behind an INT3472 discrete PMIC controller. The INT3472
_DSM returns GPIO type 0x10 for one of the pins, which controls the
DOVDD (digital I/O power) regulator enable.

Type 0x10 is not currently handled by the driver, causing the GPIO to
be ignored with a warning. Add INT3472_GPIO_TYPE_DOVDD (0x10) and
handle it as a regulator with con_id "dovdd" to match the supply name
used by sensor drivers (e.g. ov5675).

Also increase GPIO_SUPPLY_NAME_LENGTH from 5 to 6 to accommodate
the "dovdd" name (5 chars + null terminator).

Signed-off-by: Leif Skunberg <diamondback@cohunt.app>
Reviewed-by: Hans de Goede <johannes.goede@oss.qualcomm.com>
Link: https://patch.msgid.link/20260210132129.17943-1-diamondback@cohunt.app
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/platform_data/x86/int3472.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h
index b1b837583d54..dbe745dc88d5 100644
--- a/include/linux/platform_data/x86/int3472.h
+++ b/include/linux/platform_data/x86/int3472.h
@@ -26,6 +26,7 @@
 #define INT3472_GPIO_TYPE_POWER_ENABLE				0x0b
 #define INT3472_GPIO_TYPE_CLK_ENABLE				0x0c
 #define INT3472_GPIO_TYPE_PRIVACY_LED				0x0d
+#define INT3472_GPIO_TYPE_DOVDD					0x10
 #define INT3472_GPIO_TYPE_HANDSHAKE				0x12
 #define INT3472_GPIO_TYPE_HOTPLUG_DETECT			0x13
 
@@ -33,8 +34,8 @@
 #define INT3472_MAX_SENSOR_GPIOS				3
 #define INT3472_MAX_REGULATORS					3
 
-/* E.g. "avdd\0" */
-#define GPIO_SUPPLY_NAME_LENGTH				5
+/* E.g. "dovdd\0" */
+#define GPIO_SUPPLY_NAME_LENGTH				6
 /* 12 chars for acpi_dev_name() + "-", e.g. "ABCD1234:00-" */
 #define GPIO_REGULATOR_NAME_LENGTH				(12 + GPIO_SUPPLY_NAME_LENGTH)
 /* lower- and upper-case mapping */
-- 
cgit v1.2.3


From 551d44200152cb26f75d2ef990aeb6185b7e37fd Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 23 Feb 2026 09:33:08 -0800
Subject: default_gfp(): avoid using the "newfangled" __VA_OPT__ trick

The default_gfp() helper that I added is not wrong, but it turns out
that it causes unnecessary headaches for 'sparse' which doesn't support
the use of __VA_OPT__ (introduced in C++20 and C23, and supported by gcc
and clang for a long time).

We do already use __VA_OPT__ in some other cases in the kernel (drm/xe
and btrfs), but it has been fairly limited.  Now it triggers for pretty
much everything, and sparse ends up not working at all.

We can use the traditional gcc ',##__VA_ARGS__' syntax instead: it may
not be the "C standard" way and is slightly less natural in this
context, but it is the traditional model for this and avoids the sparse
problem.

Reported-and-tested-by: Ricardo Ribalda <ribalda@chromium.org>
Reported-and-tested-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Reported-by: Ben Dooks <ben.dooks@codethink.co.uk>
Fixes: e19e1b480ac7 ("add default_gfp() helper macro and use it in the new *alloc_obj() helpers")
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 2b30a0529d48..90536b2bc42e 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -14,8 +14,8 @@ struct vm_area_struct;
 struct mempolicy;
 
 /* Helper macro to avoid gfp flags if they are the default one */
-#define __default_gfp(a,...) a
-#define default_gfp(...) __default_gfp(__VA_ARGS__ __VA_OPT__(,) GFP_KERNEL)
+#define __default_gfp(a,b,...) b
+#define default_gfp(...) __default_gfp(,##__VA_ARGS__,GFP_KERNEL)
 
 /* Convert GFP flags to their corresponding migrate type */
 #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
-- 
cgit v1.2.3


From 7accb1c4321acb617faf934af59d928b0b047e2b Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Tue, 3 Feb 2026 15:16:16 -0500
Subject: Bluetooth: L2CAP: Fix invalid response to L2CAP_ECRED_RECONF_REQ

This fixes responding with an invalid result caused by checking the
wrong size of CID which should have been (cmd_len - sizeof(*req)) and
on top of it the wrong result was use L2CAP_CR_LE_INVALID_PARAMS which
is invalid/reserved for reconf when running test like L2CAP/ECFC/BI-03-C:

> ACL Data RX: Handle 64 flags 0x02 dlen 14
      LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 2 len 6
        MTU: 64
        MPS: 64
        Source CID: 64
< ACL Data TX: Handle 64 flags 0x00 dlen 10
      LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2
!        Result: Reserved (0x000c)
         Result: Reconfiguration failed - one or more Destination CIDs invalid (0x0003)

Fiix L2CAP/ECFC/BI-04-C which expects L2CAP_RECONF_INVALID_MPS (0x0002)
when more than one channel gets its MPS reduced:

> ACL Data RX: Handle 64 flags 0x02 dlen 16
      LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 2 len 8
        MTU: 264
        MPS: 99
        Source CID: 64
!       Source CID: 65
< ACL Data TX: Handle 64 flags 0x00 dlen 10
      LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2
!        Result: Reconfiguration successful (0x0000)
         Result: Reconfiguration failed - reduction in size of MPS not allowed for more than one channel at a time (0x0002)

Fix L2CAP/ECFC/BI-05-C when SCID is invalid (85 unconnected):

> ACL Data RX: Handle 64 flags 0x02 dlen 14
      LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 2 len 6
        MTU: 65
        MPS: 64
!        Source CID: 85
< ACL Data TX: Handle 64 flags 0x00 dlen 10
      LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2
!        Result: Reconfiguration successful (0x0000)
         Result: Reconfiguration failed - one or more Destination CIDs invalid (0x0003)

Fix L2CAP/ECFC/BI-06-C when MPS < L2CAP_ECRED_MIN_MPS (64):

> ACL Data RX: Handle 64 flags 0x02 dlen 14
      LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 2 len 6
        MTU: 672
!       MPS: 63
        Source CID: 64
< ACL Data TX: Handle 64 flags 0x00 dlen 10
      LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2
!       Result: Reconfiguration failed - reduction in size of MPS not allowed for more than one channel at a time (0x0002)
        Result: Reconfiguration failed - other unacceptable parameters (0x0004)

Fix L2CAP/ECFC/BI-07-C when MPS reduced for more than one channel:

> ACL Data RX: Handle 64 flags 0x02 dlen 16
      LE L2CAP: Enhanced Credit Reconfigure Request (0x19) ident 3 len 8
        MTU: 84
!       MPS: 71
        Source CID: 64
!        Source CID: 65
< ACL Data TX: Handle 64 flags 0x00 dlen 10
      LE L2CAP: Enhanced Credit Reconfigure Respond (0x1a) ident 2 len 2
!       Result: Reconfiguration successful (0x0000)
        Result: Reconfiguration failed - reduction in size of MPS not allowed for more than one channel at a time (0x0002)

Link: https://github.com/bluez/bluez/issues/1865
Fixes: 15f02b910562 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/l2cap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index ec3af01e4db9..6f9cf7a05986 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -493,6 +493,8 @@ struct l2cap_ecred_reconf_req {
 #define L2CAP_RECONF_SUCCESS		0x0000
 #define L2CAP_RECONF_INVALID_MTU	0x0001
 #define L2CAP_RECONF_INVALID_MPS	0x0002
+#define L2CAP_RECONF_INVALID_CID	0x0003
+#define L2CAP_RECONF_INVALID_PARAMS	0x0004
 
 struct l2cap_ecred_reconf_rsp {
 	__le16 result;
-- 
cgit v1.2.3


From c28d2bff70444a85b3b86aaf241ece9408c7858c Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Thu, 5 Feb 2026 15:11:34 -0500
Subject: Bluetooth: L2CAP: Fix result of L2CAP_ECRED_CONN_RSP when MTU is too
 short

Test L2CAP/ECFC/BV-26-C expect the response to L2CAP_ECRED_CONN_REQ with
and MTU value < L2CAP_ECRED_MIN_MTU (64) to be L2CAP_CR_LE_INVALID_PARAMS
rather than L2CAP_CR_LE_UNACCEPT_PARAMS.

Also fix not including the correct number of CIDs in the response since
the spec requires all CIDs being rejected to be included in the
response.

Link: https://github.com/bluez/bluez/issues/1868
Fixes: 15f02b910562 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/l2cap.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 6f9cf7a05986..010f1a8fd15f 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -284,9 +284,9 @@ struct l2cap_conn_rsp {
 #define L2CAP_CR_LE_BAD_KEY_SIZE	0x0007
 #define L2CAP_CR_LE_ENCRYPTION		0x0008
 #define L2CAP_CR_LE_INVALID_SCID	0x0009
-#define L2CAP_CR_LE_SCID_IN_USE		0X000A
-#define L2CAP_CR_LE_UNACCEPT_PARAMS	0X000B
-#define L2CAP_CR_LE_INVALID_PARAMS	0X000C
+#define L2CAP_CR_LE_SCID_IN_USE		0x000A
+#define L2CAP_CR_LE_UNACCEPT_PARAMS	0x000B
+#define L2CAP_CR_LE_INVALID_PARAMS	0x000C
 
 /* connect/create channel status */
 #define L2CAP_CS_NO_INFO	0x0000
-- 
cgit v1.2.3


From 4ced4cf5c9d172d91f181df3accdf949d3761aab Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@google.com>
Date: Tue, 17 Feb 2026 18:01:05 +0000
Subject: binfmt_elf_fdpic: fix AUXV size calculation for ELF_HWCAP3 and
 ELF_HWCAP4
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 4e6e8c2b757f ("binfmt_elf: Wire up AT_HWCAP3 at AT_HWCAP4") added
support for AT_HWCAP3 and AT_HWCAP4, but it missed updating the AUX
vector size calculation in create_elf_fdpic_tables() and
AT_VECTOR_SIZE_BASE in include/linux/auxvec.h.

Similar to the fix for AT_HWCAP2 in commit c6a09e342f8e ("binfmt_elf_fdpic:
fix AUXV size calculation when ELF_HWCAP2 is defined"), this omission
leads to a mismatch between the reserved space and the actual number of
AUX entries, eventually triggering a kernel BUG_ON(csp != sp).

Fix this by incrementing nitems when ELF_HWCAP3 or ELF_HWCAP4 are
defined and updating AT_VECTOR_SIZE_BASE.

Cc: Mark Brown <broonie@kernel.org>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Reviewed-by: Michal Koutný <mkoutny@suse.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Cyrill Gorcunov <gorcunov@gmail.com>
Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
Fixes: 4e6e8c2b757f ("binfmt_elf: Wire up AT_HWCAP3 at AT_HWCAP4")
Signed-off-by: Andrei Vagin <avagin@google.com>
Link: https://patch.msgid.link/20260217180108.1420024-2-avagin@google.com
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/auxvec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h
index 407f7005e6d6..8bcb9b726262 100644
--- a/include/linux/auxvec.h
+++ b/include/linux/auxvec.h
@@ -4,6 +4,6 @@
 
 #include <uapi/linux/auxvec.h>
 
-#define AT_VECTOR_SIZE_BASE 22 /* NEW_AUX_ENT entries in auxiliary table */
+#define AT_VECTOR_SIZE_BASE 24 /* NEW_AUX_ENT entries in auxiliary table */
   /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */
 #endif /* _LINUX_AUXVEC_H */
-- 
cgit v1.2.3


From 983512f3a87fd8dc4c94dfa6b596b6e57df5aad7 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 20 Feb 2026 19:38:58 +0100
Subject: net: Drop the lock in skb_may_tx_timestamp()

skb_may_tx_timestamp() may acquire sock::sk_callback_lock. The lock must
not be taken in IRQ context, only softirq is okay. A few drivers receive
the timestamp via a dedicated interrupt and complete the TX timestamp
from that handler. This will lead to a deadlock if the lock is already
write-locked on the same CPU.

Taking the lock can be avoided. The socket (pointed by the skb) will
remain valid until the skb is released. The ->sk_socket and ->file
member will be set to NULL once the user closes the socket which may
happen before the timestamp arrives.
If we happen to observe the pointer while the socket is closing but
before the pointer is set to NULL then we may use it because both
pointer (and the file's cred member) are RCU freed.

Drop the lock. Use READ_ONCE() to obtain the individual pointer. Add a
matching WRITE_ONCE() where the pointer are cleared.

Link: https://lore.kernel.org/all/20260205145104.iWinkXHv@linutronix.de
Fixes: b245be1f4db1a ("net-timestamp: no-payload only sysctl")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260220183858.N4ERjFW6@linutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 66b56288c1d3..6c9a83016e95 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2098,7 +2098,7 @@ static inline int sk_rx_queue_get(const struct sock *sk)
 
 static inline void sk_set_socket(struct sock *sk, struct socket *sock)
 {
-	sk->sk_socket = sock;
+	WRITE_ONCE(sk->sk_socket, sock);
 	if (sock) {
 		WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid);
 		WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino);
-- 
cgit v1.2.3


From eb9549346f7578eda3755683ac2cfb4d94c0675f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 16 Feb 2026 13:17:44 +0100
Subject: mm: change vma_alloc_folio_noprof() macro to inline function

In a few rare configurations with extra warnings eanbled, the new
drm_pagemap_migrate_populate_ram_pfn() calls vma_alloc_folio_noprof() but
that does not use all the arguments, leading to a harmless warning:

drivers/gpu/drm/drm_pagemap.c: In function 'drm_pagemap_migrate_populate_ram_pfn':
drivers/gpu/drm/drm_pagemap.c:701:63: error: parameter 'addr' set but not used [-Werror=unused-but-set-parameter=]
  701 |                                                 unsigned long addr)
      |                                                 ~~~~~~~~~~~~~~^~~~

Replace the macro with an inline function so the compiler can see how the
argument would be used, but is still able to optimize out the assignments.

Link: https://lkml.kernel.org/r/20260216121751.2378374-1-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Brendan Jackman <jackmanb@google.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/gfp.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 2b30a0529d48..f82d74a77cad 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -339,8 +339,11 @@ static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int orde
 {
 	return folio_alloc_noprof(gfp, order);
 }
-#define vma_alloc_folio_noprof(gfp, order, vma, addr)		\
-	folio_alloc_noprof(gfp, order)
+static inline struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order,
+		struct vm_area_struct *vma, unsigned long addr)
+{
+	return folio_alloc_noprof(gfp, order);
+}
 #endif
 
 #define alloc_pages(...)			alloc_hooks(alloc_pages_noprof(__VA_ARGS__))
-- 
cgit v1.2.3


From f85b1c6af5bc3872f994df0a5688c1162de07a62 Mon Sep 17 00:00:00 2001
From: "Pratyush Yadav (Google)" <pratyush@kernel.org>
Date: Mon, 16 Feb 2026 14:22:19 +0100
Subject: liveupdate: luo_file: remember retrieve() status

LUO keeps track of successful retrieve attempts on a LUO file.  It does so
to avoid multiple retrievals of the same file.  Multiple retrievals cause
problems because once the file is retrieved, the serialized data
structures are likely freed and the file is likely in a very different
state from what the code expects.

The retrieve boolean in struct luo_file keeps track of this, and is passed
to the finish callback so it knows what work was already done and what it
has left to do.

All this works well when retrieve succeeds.  When it fails,
luo_retrieve_file() returns the error immediately, without ever storing
anywhere that a retrieve was attempted or what its error code was.  This
results in an errored LIVEUPDATE_SESSION_RETRIEVE_FD ioctl to userspace,
but nothing prevents it from trying this again.

The retry is problematic for much of the same reasons listed above.  The
file is likely in a very different state than what the retrieve logic
normally expects, and it might even have freed some serialization data
structures.  Attempting to access them or free them again is going to
break things.

For example, if memfd managed to restore 8 of its 10 folios, but fails on
the 9th, a subsequent retrieve attempt will try to call
kho_restore_folio() on the first folio again, and that will fail with a
warning since it is an invalid operation.

Apart from the retry, finish() also breaks.  Since on failure the
retrieved bool in luo_file is never touched, the finish() call on session
close will tell the file handler that retrieve was never attempted, and it
will try to access or free the data structures that might not exist, much
in the same way as the retry attempt.

There is no sane way of attempting the retrieve again.  Remember the error
retrieve returned and directly return it on a retry.  Also pass this
status code to finish() so it can make the right decision on the work it
needs to do.

This is done by changing the bool to an integer.  A value of 0 means
retrieve was never attempted, a positive value means it succeeded, and a
negative value means it failed and the error code is the value.

Link: https://lkml.kernel.org/r/20260216132221.987987-1-pratyush@kernel.org
Fixes: 7c722a7f44e0 ("liveupdate: luo_file: implement file systems callbacks")
Signed-off-by: Pratyush Yadav (Google) <pratyush@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/liveupdate.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/liveupdate.h b/include/linux/liveupdate.h
index fe82a6c3005f..dd11fdc76a5f 100644
--- a/include/linux/liveupdate.h
+++ b/include/linux/liveupdate.h
@@ -23,8 +23,11 @@ struct file;
 /**
  * struct liveupdate_file_op_args - Arguments for file operation callbacks.
  * @handler:          The file handler being called.
- * @retrieved:        The retrieve status for the 'can_finish / finish'
- *                    operation.
+ * @retrieve_status:  The retrieve status for the 'can_finish / finish'
+ *                    operation. A value of 0 means the retrieve has not been
+ *                    attempted, a positive value means the retrieve was
+ *                    successful, and a negative value means the retrieve failed,
+ *                    and the value is the error code of the call.
  * @file:             The file object. For retrieve: [OUT] The callback sets
  *                    this to the new file. For other ops: [IN] The caller sets
  *                    this to the file being operated on.
@@ -40,7 +43,7 @@ struct file;
  */
 struct liveupdate_file_op_args {
 	struct liveupdate_file_handler *handler;
-	bool retrieved;
+	int retrieve_status;
 	struct file *file;
 	u64 serialized_data;
 	void *private_data;
-- 
cgit v1.2.3


From 079c24d5690262e83ee476e2a548e416f3237511 Mon Sep 17 00:00:00 2001
From: Kalesh Singh <kaleshsingh@google.com>
Date: Thu, 19 Feb 2026 15:36:56 -0800
Subject: mm/tracing: rss_stat: ensure curr is false from kthread context

The rss_stat trace event allows userspace tools, like Perfetto [1], to
inspect per-process RSS metric changes over time.

The curr field was introduced to rss_stat in commit e4dcad204d3a
("rss_stat: add support to detect RSS updates of external mm").  Its
intent is to indicate whether the RSS update is for the mm_struct of the
current execution context; and is set to false when operating on a remote
mm_struct (e.g., via kswapd or a direct reclaimer).

However, an issue arises when a kernel thread temporarily adopts a user
process's mm_struct.  Kernel threads do not have their own mm_struct and
normally have current->mm set to NULL.  To operate on user memory, they
can "borrow" a memory context using kthread_use_mm(), which sets
current->mm to the user process's mm.

This can be observed, for example, in the USB Function Filesystem (FFS)
driver.  The ffs_user_copy_worker() handles AIO completions and uses
kthread_use_mm() to copy data to a user-space buffer.  If a page fault
occurs during this copy, the fault handler executes in the kthread's
context.

At this point, current is the kthread, but current->mm points to the user
process's mm.  Since the rss_stat event (from the page fault) is for that
same mm, the condition current->mm == mm becomes true, causing curr to be
incorrectly set to true when the trace event is emitted.

This is misleading because it suggests the mm belongs to the kthread,
confusing userspace tools that track per-process RSS changes and
corrupting their mm_id-to-process association.

Fix this by ensuring curr is always false when the trace event is emitted
from a kthread context by checking for the PF_KTHREAD flag.

Link: https://lkml.kernel.org/r/20260219233708.1971199-1-kaleshsingh@google.com
Link: https://perfetto.dev/ [1]
Fixes: e4dcad204d3a ("rss_stat: add support to detect RSS updates of external mm")
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Acked-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Pedro Falcato <pfalcato@suse.de>
Cc: "David Hildenbrand (Arm)" <david@kernel.org>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: <stable@vger.kernel.org>	[5.10+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/trace/events/kmem.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 7f93e754da5c..cd7920c81f85 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -440,7 +440,13 @@ TRACE_EVENT(rss_stat,
 
 	TP_fast_assign(
 		__entry->mm_id = mm_ptr_to_hash(mm);
-		__entry->curr = !!(current->mm == mm);
+		/*
+		 * curr is true if the mm matches the current task's mm_struct.
+		 * Since kthreads (PF_KTHREAD) have no mm_struct of their own
+		 * but can borrow one via kthread_use_mm(), we must filter them
+		 * out to avoid incorrectly attributing the RSS update to them.
+		 */
+		__entry->curr = current->mm == mm && !(current->flags & PF_KTHREAD);
 		__entry->member = member;
 		__entry->size = (percpu_counter_sum_positive(&mm->rss_stat[member])
 							    << PAGE_SHIFT);
-- 
cgit v1.2.3


From 201ceb94aa1def0024a7c18ce643e5f65026be06 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@kernel.org>
Date: Mon, 23 Feb 2026 19:37:51 -0800
Subject: kunit: irq: Ensure timer doesn't fire too frequently

Fix a bug where kunit_run_irq_test() could hang if the system is too
slow.  This was noticed with the crypto library tests in certain VMs.

Specifically, if kunit_irq_test_timer_func() and the associated hrtimer
code took over 5us to run, then the CPU would spend all its time
executing that code in hardirq context.  As a result, the task executing
kunit_run_irq_test() never had a chance to run, exit the loop, and
cancel the timer.

To fix it, make kunit_irq_test_timer_func() increase the timer interval
when the other contexts aren't having a chance to run.

Fixes: 950a81224e8b ("lib/crypto: tests: Add hash-test-template.h and gen-hash-testvecs.py")
Cc: stable@vger.kernel.org
Reviewed-by: David Gow <david@davidgow.net>
Link: https://lore.kernel.org/r/20260224033751.97615-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
 include/kunit/run-in-irq-context.h | 44 ++++++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/kunit/run-in-irq-context.h b/include/kunit/run-in-irq-context.h
index c89b1b1b12dd..bfe60d6cf28d 100644
--- a/include/kunit/run-in-irq-context.h
+++ b/include/kunit/run-in-irq-context.h
@@ -12,16 +12,16 @@
 #include <linux/hrtimer.h>
 #include <linux/workqueue.h>
 
-#define KUNIT_IRQ_TEST_HRTIMER_INTERVAL us_to_ktime(5)
-
 struct kunit_irq_test_state {
 	bool (*func)(void *test_specific_state);
 	void *test_specific_state;
 	bool task_func_reported_failure;
 	bool hardirq_func_reported_failure;
 	bool softirq_func_reported_failure;
+	atomic_t task_func_calls;
 	atomic_t hardirq_func_calls;
 	atomic_t softirq_func_calls;
+	ktime_t interval;
 	struct hrtimer timer;
 	struct work_struct bh_work;
 };
@@ -30,14 +30,25 @@ static enum hrtimer_restart kunit_irq_test_timer_func(struct hrtimer *timer)
 {
 	struct kunit_irq_test_state *state =
 		container_of(timer, typeof(*state), timer);
+	int task_calls, hardirq_calls, softirq_calls;
 
 	WARN_ON_ONCE(!in_hardirq());
-	atomic_inc(&state->hardirq_func_calls);
+	task_calls = atomic_read(&state->task_func_calls);
+	hardirq_calls = atomic_inc_return(&state->hardirq_func_calls);
+	softirq_calls = atomic_read(&state->softirq_func_calls);
+
+	/*
+	 * If the timer is firing too often for the softirq or task to ever have
+	 * a chance to run, increase the timer interval.  This is needed on very
+	 * slow systems.
+	 */
+	if (hardirq_calls >= 20 && (softirq_calls == 0 || task_calls == 0))
+		state->interval = ktime_add_ns(state->interval, 250);
 
 	if (!state->func(state->test_specific_state))
 		state->hardirq_func_reported_failure = true;
 
-	hrtimer_forward_now(&state->timer, KUNIT_IRQ_TEST_HRTIMER_INTERVAL);
+	hrtimer_forward_now(&state->timer, state->interval);
 	queue_work(system_bh_wq, &state->bh_work);
 	return HRTIMER_RESTART;
 }
@@ -86,10 +97,14 @@ static inline void kunit_run_irq_test(struct kunit *test, bool (*func)(void *),
 	struct kunit_irq_test_state state = {
 		.func = func,
 		.test_specific_state = test_specific_state,
+		/*
+		 * Start with a 5us timer interval.  If the system can't keep
+		 * up, kunit_irq_test_timer_func() will increase it.
+		 */
+		.interval = us_to_ktime(5),
 	};
 	unsigned long end_jiffies;
-	int hardirq_calls, softirq_calls;
-	bool allctx = false;
+	int task_calls, hardirq_calls, softirq_calls;
 
 	/*
 	 * Set up a hrtimer (the way we access hardirq context) and a work
@@ -104,21 +119,18 @@ static inline void kunit_run_irq_test(struct kunit *test, bool (*func)(void *),
 	 * and hardirq), or 1 second, whichever comes first.
 	 */
 	end_jiffies = jiffies + HZ;
-	hrtimer_start(&state.timer, KUNIT_IRQ_TEST_HRTIMER_INTERVAL,
-		      HRTIMER_MODE_REL_HARD);
-	for (int task_calls = 0, calls = 0;
-	     ((calls < max_iterations) || !allctx) &&
-	     !time_after(jiffies, end_jiffies);
-	     task_calls++) {
+	hrtimer_start(&state.timer, state.interval, HRTIMER_MODE_REL_HARD);
+	do {
 		if (!func(test_specific_state))
 			state.task_func_reported_failure = true;
 
+		task_calls = atomic_inc_return(&state.task_func_calls);
 		hardirq_calls = atomic_read(&state.hardirq_func_calls);
 		softirq_calls = atomic_read(&state.softirq_func_calls);
-		calls = task_calls + hardirq_calls + softirq_calls;
-		allctx = (task_calls > 0) && (hardirq_calls > 0) &&
-			 (softirq_calls > 0);
-	}
+	} while ((task_calls + hardirq_calls + softirq_calls < max_iterations ||
+		  (task_calls == 0 || hardirq_calls == 0 ||
+		   softirq_calls == 0)) &&
+		 !time_after(jiffies, end_jiffies));
 
 	/* Cancel the timer and work. */
 	hrtimer_cancel(&state.timer);
-- 
cgit v1.2.3


From 4b44cbb264d0ed3f2f2bc2659db6ce45882f4670 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Tue, 24 Feb 2026 15:24:52 -0800
Subject: overflow: Make sure size helpers are always inlined

With kmalloc_obj() performing implicit size calculations, the embedded
size_mul() calls, while marked inline, were not always being inlined.
I noticed a couple places where allocations were making a call out for
things that would otherwise be compile-time calculated. Force the
compilers to always inline these calculations.

Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://patch.msgid.link/20260224232451.work.614-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/overflow.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index eddd987a8513..a8cb6319b4fb 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -42,7 +42,7 @@
  * both the type-agnostic benefits of the macros while also being able to
  * enforce that the return value is, in fact, checked.
  */
-static inline bool __must_check __must_check_overflow(bool overflow)
+static __always_inline bool __must_check __must_check_overflow(bool overflow)
 {
 	return unlikely(overflow);
 }
@@ -327,7 +327,7 @@ static inline bool __must_check __must_check_overflow(bool overflow)
  * with any overflow causing the return value to be SIZE_MAX. The
  * lvalue must be size_t to avoid implicit type conversion.
  */
-static inline size_t __must_check size_mul(size_t factor1, size_t factor2)
+static __always_inline size_t __must_check size_mul(size_t factor1, size_t factor2)
 {
 	size_t bytes;
 
@@ -346,7 +346,7 @@ static inline size_t __must_check size_mul(size_t factor1, size_t factor2)
  * with any overflow causing the return value to be SIZE_MAX. The
  * lvalue must be size_t to avoid implicit type conversion.
  */
-static inline size_t __must_check size_add(size_t addend1, size_t addend2)
+static __always_inline size_t __must_check size_add(size_t addend1, size_t addend2)
 {
 	size_t bytes;
 
@@ -367,7 +367,7 @@ static inline size_t __must_check size_add(size_t addend1, size_t addend2)
  * argument may be SIZE_MAX (or the result with be forced to SIZE_MAX).
  * The lvalue must be size_t to avoid implicit type conversion.
  */
-static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
+static __always_inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
 {
 	size_t bytes;
 
-- 
cgit v1.2.3


From 93a4a9b732fbb479f95d327aa867d094aed3f712 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze@samba.org>
Date: Tue, 24 Feb 2026 17:59:52 +0100
Subject: RDMA/core: Check id_priv->restricted_node_type in cma_listen_on_dev()

When listening on wildcard addresses we have a global list for the application
layer rdma_cm_id and for any existing device or any device added in future we
try to listen on any wildcard listener.

When the listener has a restricted_node_type we should prevent listening on
devices with a different node type.

While there fix the documentation comment of rdma_restrict_node_type()
to include rdma_resolve_addr() instead of having rdma_bind_addr() twice.

Fixes: a760e80e90f5 ("RDMA/core: introduce rdma_restrict_node_type()")
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Steve French <smfrench@gmail.com>
Cc: Namjae Jeon <linkinjeon@kernel.org>
Cc: Tom Talpey <tom@talpey.com>
Cc: Long Li <longli@microsoft.com>
Cc: linux-rdma@vger.kernel.org
Cc: linux-cifs@vger.kernel.org
Cc: samba-technical@lists.samba.org
Signed-off-by: Stefan Metzmacher <metze@samba.org>
Link: https://patch.msgid.link/20260224165951.3582093-2-metze@samba.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/rdma/rdma_cm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 6de6fd8bd15e..d639ff889e64 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -181,7 +181,7 @@ void rdma_destroy_id(struct rdma_cm_id *id);
  *
  * It needs to be called before the RDMA identifier is bound
  * to an device, which mean it should be called before
- * rdma_bind_addr(), rdma_bind_addr() and rdma_listen().
+ * rdma_bind_addr(), rdma_resolve_addr() and rdma_listen().
  */
 int rdma_restrict_node_type(struct rdma_cm_id *id, u8 node_type);
 
-- 
cgit v1.2.3


From 622d68772ddf07573cf88e833afe8ba6c70ac748 Mon Sep 17 00:00:00 2001
From: "Anirudh Rayabharam (Microsoft)" <anirudh@anirudhrb.com>
Date: Wed, 25 Feb 2026 12:44:03 +0000
Subject: mshv: add arm64 support for doorbell & intercept SINTs

On x86, the HYPERVISOR_CALLBACK_VECTOR is used to receive synthetic
interrupts (SINTs) from the hypervisor for doorbells and intercepts.
There is no such vector reserved for arm64.

On arm64, the hypervisor exposes a synthetic register that can be read
to find the INTID that should be used for SINTs. This INTID is in the
PPI range.

To better unify the code paths, introduce mshv_sint_vector_init() that
either reads the synthetic register and obtains the INTID (arm64) or
just uses HYPERVISOR_CALLBACK_VECTOR as the interrupt vector (x86).

Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Reviewed-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Signed-off-by: Anirudh Rayabharam (Microsoft) <anirudh@anirudhrb.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 include/hyperv/hvgdk_mini.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h
index 056ef7b6b360..8bb3dd71c5b4 100644
--- a/include/hyperv/hvgdk_mini.h
+++ b/include/hyperv/hvgdk_mini.h
@@ -1121,6 +1121,8 @@ enum hv_register_name {
 	HV_X64_REGISTER_MSR_MTRR_FIX4KF8000	= 0x0008007A,
 
 	HV_X64_REGISTER_REG_PAGE	= 0x0009001C,
+#elif defined(CONFIG_ARM64)
+	HV_ARM64_REGISTER_SINT_RESERVED_INTERRUPT_ID	= 0x00070001,
 #endif
 };
 
-- 
cgit v1.2.3


From 28aaa9c39945b7925a1cc1d513c8f21ed38f5e4f Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 26 Feb 2026 10:43:55 +0100
Subject: kthread: consolidate kthread exit paths to prevent use-after-free

Guillaume reported crashes via corrupted RCU callback function pointers
during KUnit testing. The crash was traced back to the pidfs rhashtable
conversion which replaced the 24-byte rb_node with an 8-byte rhash_head
in struct pid, shrinking it from 160 to 144 bytes.

struct kthread (without CONFIG_BLK_CGROUP) is also 144 bytes. With
CONFIG_SLAB_MERGE_DEFAULT and SLAB_HWCACHE_ALIGN both round up to
192 bytes and share the same slab cache. struct pid.rcu.func and
struct kthread.affinity_node both sit at offset 0x78.

When a kthread exits via make_task_dead() it bypasses kthread_exit() and
misses the affinity_node cleanup. free_kthread_struct() frees the memory
while the node is still linked into the global kthread_affinity_list. A
subsequent list_del() by another kthread writes through dangling list
pointers into the freed and reused memory, corrupting the pid's
rcu.func pointer.

Instead of patching free_kthread_struct() to handle the missed cleanup,
consolidate all kthread exit paths. Turn kthread_exit() into a macro
that calls do_exit() and add kthread_do_exit() which is called from
do_exit() for any task with PF_KTHREAD set. This guarantees that
kthread-specific cleanup always happens regardless of the exit path -
make_task_dead(), direct do_exit(), or kthread_exit().

Replace __to_kthread() with a new tsk_is_kthread() accessor in the
public header. Export do_exit() since module code using the
kthread_exit() macro now needs it directly.

Reported-by: Guillaume Tucker <gtucker@gtucker.io>
Tested-by: Guillaume Tucker <gtucker@gtucker.io>
Tested-by: Mark Brown <broonie@kernel.org>
Tested-by: David Gow <davidgow@google.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/all/20260224-mittlerweile-besessen-2738831ae7f6@brauner
Co-developed-by: Linus Torvalds <torvalds@linux-foundation.org>
Fixes: 4d13f4304fa4 ("kthread: Implement preferred affinity")
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/kthread.h | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index c92c1149ee6e..a01a474719a7 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -7,6 +7,24 @@
 
 struct mm_struct;
 
+/* opaque kthread data */
+struct kthread;
+
+/*
+ * When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will
+ * always remain a kthread.  For kthreads p->worker_private always
+ * points to a struct kthread.  For tasks that are not kthreads
+ * p->worker_private is used to point to other things.
+ *
+ * Return NULL for any task that is not a kthread.
+ */
+static inline struct kthread *tsk_is_kthread(struct task_struct *p)
+{
+	if (p->flags & PF_KTHREAD)
+		return p->worker_private;
+	return NULL;
+}
+
 __printf(4, 5)
 struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
 					   void *data,
@@ -98,9 +116,10 @@ void *kthread_probe_data(struct task_struct *k);
 int kthread_park(struct task_struct *k);
 void kthread_unpark(struct task_struct *k);
 void kthread_parkme(void);
-void kthread_exit(long result) __noreturn;
+#define kthread_exit(result) do_exit(result)
 void kthread_complete_and_exit(struct completion *, long) __noreturn;
 int kthreads_update_housekeeping(void);
+void kthread_do_exit(struct kthread *, long);
 
 int kthreadd(void *unused);
 extern struct task_struct *kthreadd_task;
-- 
cgit v1.2.3


From 102eab95f025b4d3f3a6c0a858400aca2af2fe52 Mon Sep 17 00:00:00 2001
From: Bobby Eshleman <bobbyeshleman@meta.com>
Date: Mon, 23 Feb 2026 14:38:33 -0800
Subject: vsock: lock down child_ns_mode as write-once

Two administrator processes may race when setting child_ns_mode as one
process sets child_ns_mode to "local" and then creates a namespace, but
another process changes child_ns_mode to "global" between the write and
the namespace creation. The first process ends up with a namespace in
"global" mode instead of "local". While this can be detected after the
fact by reading ns_mode and retrying, it is fragile and error-prone.

Make child_ns_mode write-once so that a namespace manager can set it
once and be sure it won't change. Writing a different value after the
first write returns -EBUSY. This applies to all namespaces, including
init_net, where an init process can write "local" to lock all future
namespaces into local mode.

Fixes: eafb64f40ca4 ("vsock: add netns to vsock core")
Suggested-by: Daan De Meyer <daan.j.demeyer@gmail.com>
Suggested-by: Stefano Garzarella <sgarzare@redhat.com>
Co-developed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Bobby Eshleman <bobbyeshleman@meta.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://patch.msgid.link/20260223-vsock-ns-write-once-v3-2-c0cde6959923@meta.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/af_vsock.h    | 13 +++++++++++--
 include/net/netns/vsock.h |  3 +++
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index d3ff48a2fbe0..533d8e75f7bb 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -276,10 +276,19 @@ static inline bool vsock_net_mode_global(struct vsock_sock *vsk)
 	return vsock_net_mode(sock_net(sk_vsock(vsk))) == VSOCK_NET_MODE_GLOBAL;
 }
 
-static inline void vsock_net_set_child_mode(struct net *net,
+static inline bool vsock_net_set_child_mode(struct net *net,
 					    enum vsock_net_mode mode)
 {
-	WRITE_ONCE(net->vsock.child_ns_mode, mode);
+	int new_locked = mode + 1;
+	int old_locked = 0; /* unlocked */
+
+	if (try_cmpxchg(&net->vsock.child_ns_mode_locked,
+			&old_locked, new_locked)) {
+		WRITE_ONCE(net->vsock.child_ns_mode, mode);
+		return true;
+	}
+
+	return old_locked == new_locked;
 }
 
 static inline enum vsock_net_mode vsock_net_child_mode(struct net *net)
diff --git a/include/net/netns/vsock.h b/include/net/netns/vsock.h
index b34d69a22fa8..dc8cbe45f406 100644
--- a/include/net/netns/vsock.h
+++ b/include/net/netns/vsock.h
@@ -17,5 +17,8 @@ struct netns_vsock {
 
 	enum vsock_net_mode mode;
 	enum vsock_net_mode child_ns_mode;
+
+	/* 0 = unlocked, 1 = locked to global, 2 = locked to local */
+	int child_ns_mode_locked;
 };
 #endif /* __NET_NET_NAMESPACE_VSOCK_H */
-- 
cgit v1.2.3


From a0b4c7a49137ed21279f354eb59f49ddae8dffc2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 26 Feb 2026 13:32:33 +0000
Subject: netfs: Fix unbuffered/DIO writes to dispatch subrequests in strict
 sequence

Fix netfslib such that when it's making an unbuffered or DIO write, to make
sure that it sends each subrequest strictly sequentially, waiting till the
previous one is 'committed' before sending the next so that we don't have
pieces landing out of order and potentially leaving a hole if an error
occurs (ENOSPC for example).

This is done by copying in just those bits of issuing, collecting and
retrying subrequests that are necessary to do one subrequest at a time.
Retrying, in particular, is simpler because if the current subrequest needs
retrying, the source iterator can just be copied again and the subrequest
prepped and issued again without needing to be concerned about whether it
needs merging with the previous or next in the sequence.

Note that the issuing loop waits for a subrequest to complete right after
issuing it, but this wait could be moved elsewhere allowing preparatory
steps to be performed whilst the subrequest is in progress.  In particular,
once content encryption is available in netfslib, that could be done whilst
waiting, as could cleanup of buffers that have been completed.

Fixes: 153a9961b551 ("netfs: Implement unbuffered/DIO write support")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://patch.msgid.link/58526.1772112753@warthog.procyon.org.uk
Tested-by: Steve French <sfrench@samba.org>
Reviewed-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/trace/events/netfs.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 64a382fbc31a..2d366be46a1c 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -57,6 +57,7 @@
 	EM(netfs_rreq_trace_done,		"DONE   ")	\
 	EM(netfs_rreq_trace_end_copy_to_cache,	"END-C2C")	\
 	EM(netfs_rreq_trace_free,		"FREE   ")	\
+	EM(netfs_rreq_trace_intr,		"INTR   ")	\
 	EM(netfs_rreq_trace_ki_complete,	"KI-CMPL")	\
 	EM(netfs_rreq_trace_recollect,		"RECLLCT")	\
 	EM(netfs_rreq_trace_redirty,		"REDIRTY")	\
@@ -169,7 +170,8 @@
 	EM(netfs_sreq_trace_put_oom,		"PUT OOM    ")	\
 	EM(netfs_sreq_trace_put_wip,		"PUT WIP    ")	\
 	EM(netfs_sreq_trace_put_work,		"PUT WORK   ")	\
-	E_(netfs_sreq_trace_put_terminated,	"PUT TERM   ")
+	EM(netfs_sreq_trace_put_terminated,	"PUT TERM   ")	\
+	E_(netfs_sreq_trace_see_failed,		"SEE FAILED ")
 
 #define netfs_folio_traces					\
 	EM(netfs_folio_is_uptodate,		"mod-uptodate")	\
-- 
cgit v1.2.3


From f3ec502b6755a3bfb12c1c47025ef989ff9efc72 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Wed, 25 Feb 2026 08:34:07 -0800
Subject: mm/slab: mark alloc tags empty for sheaves allocated with
 __GFP_NO_OBJ_EXT

alloc_empty_sheaf() allocates sheaves from SLAB_KMALLOC caches using
__GFP_NO_OBJ_EXT to avoid recursion, however it does not mark their
allocation tags empty before freeing, which results in a warning when
CONFIG_MEM_ALLOC_PROFILING_DEBUG is set. Fix this by marking allocation
tags for such sheaves as empty.

The problem was technically introduced in commit 4c0a17e28340 but only
becomes possible to hit with commit 913ffd3a1bf5.

Fixes: 4c0a17e28340 ("slab: prevent recursive kmalloc() in alloc_empty_sheaf()")
Fixes: 913ffd3a1bf5 ("slab: handle kmalloc sheaves bootstrap")
Reported-by: David Wang <00107082@163.com>
Closes: https://lore.kernel.org/all/20260223155128.3849-1-00107082@163.com/
Analyzed-by: Harry Yoo <harry.yoo@oracle.com>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Tested-by: Harry Yoo <harry.yoo@oracle.com>
Tested-by: David Wang <00107082@163.com>
Link: https://patch.msgid.link/20260225163407.2218712-1-surenb@google.com
Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
---
 include/linux/gfp_types.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h
index 814bb2892f99..6c75df30a281 100644
--- a/include/linux/gfp_types.h
+++ b/include/linux/gfp_types.h
@@ -139,6 +139,8 @@ enum {
  * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
  *
  * %__GFP_NO_OBJ_EXT causes slab allocation to have no object extension.
+ * mark_obj_codetag_empty() should be called upon freeing for objects allocated
+ * with this flag to indicate that their NULL tags are expected and normal.
  */
 #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
 #define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)
-- 
cgit v1.2.3


From 2b351ea42820a7ecc2e8305724536512984f4419 Mon Sep 17 00:00:00 2001
From: Sanjay Chitroda <sanjayembeddedse@gmail.com>
Date: Thu, 26 Feb 2026 11:17:12 +0530
Subject: mm/slub: drop duplicate kernel-doc for ksize()

The implementation of ksize() was updated with kernel-doc by commit
fab0694646d7 ("mm/slab: move [__]ksize and slab_ksize() to mm/slub.c")
However, the public header still contains a kernel-doc comment
attached to the ksize() prototype.

Having documentation both in the header and next to the implementation
causes Sphinx to treat the function as being documented twice,
resulting in the warning:

  WARNING: Duplicate C declaration, also defined at core-api/mm-api:521
  Declaration is '.. c:function:: size_t ksize(const void *objp)'

Kernel-doc guidelines recommend keeping the documentation with the
function implementation. Therefore remove the redundant kernel-doc
block from include/linux/slab.h so that the implementation in slub.c
remains the canonical source for documentation.

No functional change.

Fixes: fab0694646d7 ("mm/slab: move [__]ksize and slab_ksize() to mm/slub.c")
Signed-off-by: Sanjay Chitroda <sanjayembeddedse@gmail.com>
Link: https://patch.msgid.link/20260226054712.3610744-1-sanjayembedded@gmail.com
Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
---
 include/linux/slab.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index a5a5e4108ae5..15a60b501b95 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -517,18 +517,6 @@ void kfree_sensitive(const void *objp);
 DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T))
 DEFINE_FREE(kfree_sensitive, void *, if (_T) kfree_sensitive(_T))
 
-/**
- * ksize - Report actual allocation size of associated object
- *
- * @objp: Pointer returned from a prior kmalloc()-family allocation.
- *
- * This should not be used for writing beyond the originally requested
- * allocation size. Either use krealloc() or round up the allocation size
- * with kmalloc_size_roundup() prior to allocation. If this is used to
- * access beyond the originally requested allocation size, UBSAN_BOUNDS
- * and/or FORTIFY_SOURCE may trip, since they only know about the
- * originally allocated size via the __alloc_size attribute.
- */
 size_t ksize(const void *objp);
 
 #ifdef CONFIG_PRINTK
-- 
cgit v1.2.3


From 8678591b47469fe16357234efef9b260317b8be4 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Wed, 25 Feb 2026 15:02:51 -0700
Subject: kbuild: Split .modinfo out from ELF_DETAILS

Commit 3e86e4d74c04 ("kbuild: keep .modinfo section in
vmlinux.unstripped") added .modinfo to ELF_DETAILS while removing it
from COMMON_DISCARDS, as it was needed in vmlinux.unstripped and
ELF_DETAILS was present in all architecture specific vmlinux linker
scripts. While this shuffle is fine for vmlinux, ELF_DETAILS and
COMMON_DISCARDS may be used by other linker scripts, such as the s390
and x86 compressed boot images, which may not expect to have a .modinfo
section. In certain circumstances, this could result in a bootloader
failing to load the compressed kernel [1].

Commit ddc6cbef3ef1 ("s390/boot/vmlinux.lds.S: Ensure bzImage ends with
SecureBoot trailer") recently addressed this for the s390 bzImage but
the same bug remains for arm, parisc, and x86. The presence of .modinfo
in the x86 bzImage was the root cause of the issue worked around with
commit d50f21091358 ("kbuild: align modinfo section for Secureboot
Authenticode EDK2 compat"). misc.c in arch/x86/boot/compressed includes
lib/decompress_unzstd.c, which in turn includes lib/xxhash.c and its
MODULE_LICENSE / MODULE_DESCRIPTION macros due to the STATIC definition.

Split .modinfo out from ELF_DETAILS into its own macro and handle it in
all vmlinux linker scripts. Discard .modinfo in the places where it was
previously being discarded from being in COMMON_DISCARDS, as it has
never been necessary in those uses.

Cc: stable@vger.kernel.org
Fixes: 3e86e4d74c04 ("kbuild: keep .modinfo section in vmlinux.unstripped")
Reported-by: Ed W <lists@wildgooses.com>
Closes: https://lore.kernel.org/587f25e0-a80e-46a5-9f01-87cb40cfa377@wildgooses.com/ [1]
Tested-by: Ed W <lists@wildgooses.com> # x86_64
Link: https://patch.msgid.link/20260225-separate-modinfo-from-elf-details-v1-1-387ced6baf4b@kernel.org
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
---
 include/asm-generic/vmlinux.lds.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index eeb070f330bd..1e1580febe4b 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -848,12 +848,14 @@
 
 /* Required sections not related to debugging. */
 #define ELF_DETAILS							\
-		.modinfo : { *(.modinfo) . = ALIGN(8); }		\
 		.comment 0 : { *(.comment) }				\
 		.symtab 0 : { *(.symtab) }				\
 		.strtab 0 : { *(.strtab) }				\
 		.shstrtab 0 : { *(.shstrtab) }
 
+#define MODINFO								\
+		.modinfo : { *(.modinfo) . = ALIGN(8); }
+
 #ifdef CONFIG_GENERIC_BUG
 #define BUG_TABLE							\
 	. = ALIGN(8);							\
-- 
cgit v1.2.3


From 3350c2b3f2b8a3b985a020a4ef4f2f050a4b6a1d Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 25 Feb 2026 21:12:29 -0800
Subject: platform_data/mlxreg: mlxreg.h: fix all kernel-doc warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the correct kernel-doc format & notation to eliminate
kernel-doc warnings:

Warning: include/linux/platform_data/mlxreg.h:24 Enum value
 'MLX_WDT_TYPE1' not described in enum 'mlxreg_wdt_type'
Warning: include/linux/platform_data/mlxreg.h:24 Enum value
 'MLX_WDT_TYPE2' not described in enum 'mlxreg_wdt_type'
Warning: include/linux/platform_data/mlxreg.h:24 Enum value
 'MLX_WDT_TYPE3' not described in enum 'mlxreg_wdt_type'
Warning: include/linux/platform_data/mlxreg.h:37 bad line:
 PHYs ready / unready state;
Warning: include/linux/platform_data/mlxreg.h:153 struct member 'np'
 not described in 'mlxreg_core_data'
Warning: include/linux/platform_data/mlxreg.h:153 struct member 'hpdev'
 not described in 'mlxreg_core_data'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260226051232.549537-1-rdunlap@infradead.org
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/platform_data/mlxreg.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h
index f6cca7a035c7..50b6be57da66 100644
--- a/include/linux/platform_data/mlxreg.h
+++ b/include/linux/platform_data/mlxreg.h
@@ -13,10 +13,10 @@
 /**
  * enum mlxreg_wdt_type - type of HW watchdog
  *
- * TYPE1 HW watchdog implementation exist in old systems.
- * All new systems have TYPE2 HW watchdog.
- * TYPE3 HW watchdog can exist on all systems with new CPLD.
- * TYPE3 is selected by WD capability bit.
+ * @MLX_WDT_TYPE1: HW watchdog implementation in old systems.
+ * @MLX_WDT_TYPE2: All new systems have TYPE2 HW watchdog.
+ * @MLX_WDT_TYPE3: HW watchdog that can exist on all systems with new CPLD.
+ *   TYPE3 is selected by WD capability bit.
  */
 enum mlxreg_wdt_type {
 	MLX_WDT_TYPE1,
@@ -35,7 +35,7 @@ enum mlxreg_wdt_type {
  * @MLXREG_HOTPLUG_LC_SYNCED: entry for line card synchronization events, coming
  *			      after hardware-firmware synchronization handshake;
  * @MLXREG_HOTPLUG_LC_READY: entry for line card ready events, indicating line card
-			     PHYs ready / unready state;
+ *			     PHYs ready / unready state;
  * @MLXREG_HOTPLUG_LC_ACTIVE: entry for line card active events, indicating firmware
  *			      availability / unavailability for the ports on line card;
  * @MLXREG_HOTPLUG_LC_THERMAL: entry for line card thermal shutdown events, positive
@@ -123,8 +123,8 @@ struct mlxreg_hotplug_device {
  * @reg_pwr: attribute power register;
  * @reg_ena: attribute enable register;
  * @mode: access mode;
- * @np - pointer to node platform associated with attribute;
- * @hpdev - hotplug device data;
+ * @np: pointer to node platform associated with attribute;
+ * @hpdev: hotplug device data;
  * @notifier: pointer to event notifier block;
  * @health_cntr: dynamic device health indication counter;
  * @attached: true if device has been attached after good health indication;
-- 
cgit v1.2.3


From 39195990e4c093c9eecf88f29811c6de29265214 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 27 Feb 2026 06:10:08 -0600
Subject: PCI: Correct PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fb82437fdd8c ("PCI: Change capability register offsets to hex") incorrectly
converted the PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 value from decimal 52 to hex
0x32:

  -#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52      /* v2 endpoints with link end here */
  +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32    /* end of v2 EPs w/ link */

This broke PCI capabilities in a VMM because subsequent ones weren't
DWORD-aligned.

Change PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 to the correct value of 0x34.

fb82437fdd8c was from Baruch Siach <baruch@tkos.co.il>, but this was not
Baruch's fault; it's a mistake I made when applying the patch.

Fixes: fb82437fdd8c ("PCI: Change capability register offsets to hex")
Reported-by: David Woodhouse <dwmw2@infradead.org>
Closes: https://lore.kernel.org/all/3ae392a0158e9d9ab09a1d42150429dd8ca42791.camel@infradead.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
---
 include/uapi/linux/pci_regs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index ec1c54b5a310..14f634ab9350 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -712,7 +712,7 @@
 #define  PCI_EXP_LNKCTL2_HASD		0x0020 /* HW Autonomous Speed Disable */
 #define PCI_EXP_LNKSTA2		0x32	/* Link Status 2 */
 #define  PCI_EXP_LNKSTA2_FLIT		0x0400 /* Flit Mode Status */
-#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2	0x32	/* end of v2 EPs w/ link */
+#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2	0x34	/* end of v2 EPs w/ link */
 #define PCI_EXP_SLTCAP2		0x34	/* Slot Capabilities 2 */
 #define  PCI_EXP_SLTCAP2_IBPD	0x00000001 /* In-band PD Disable Supported */
 #define PCI_EXP_SLTCTL2		0x38	/* Slot Control 2 */
-- 
cgit v1.2.3


From e6b899f08066e744f89df16ceb782e06868bd148 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 26 Feb 2026 14:50:09 +0100
Subject: nsfs: tighten permission checks for ns iteration ioctls

Even privileged services should not necessarily be able to see other
privileged service's namespaces so they can't leak information to each
other. Use may_see_all_namespaces() helper that centralizes this policy
until the nstree adapts.

Link: https://patch.msgid.link/20260226-work-visibility-fixes-v1-1-d2c2853313bd@kernel.org
Fixes: a1d220d9dafa ("nsfs: iterate through mount namespaces")
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Cc: stable@kernel.org # v6.12+
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/ns_common.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 825f5865bfc5..c8e227a3f9e2 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -55,6 +55,8 @@ static __always_inline bool is_ns_init_id(const struct ns_common *ns)
 
 #define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns)))
 
+bool may_see_all_namespaces(void);
+
 static __always_inline __must_check int __ns_ref_active_read(const struct ns_common *ns)
 {
 	return atomic_read(&ns->__ns_ref_active);
-- 
cgit v1.2.3


From 1df97a7453eec80c1912c2d0360290a3970a7671 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 27 Feb 2026 14:48:01 -0800
Subject: bpf: Register dtor for freeing special fields

There is a race window where BPF hash map elements can leak special
fields if the program with access to the map value recreates these
special fields between the check_and_free_fields done on the map value
and its eventual return to the memory allocator.

Several ways were explored prior to this patch, most notably [0] tried
to use a poison value to reject attempts to recreate special fields for
map values that have been logically deleted but still accessible to BPF
programs (either while sitting in the free list or when reused). While
this approach works well for task work, timers, wq, etc., it is harder
to apply the idea to kptrs, which have a similar race and failure mode.

Instead, we change bpf_mem_alloc to allow registering destructor for
allocated elements, such that when they are returned to the allocator,
any special fields created while they were accessible to programs in the
mean time will be freed. If these values get reused, we do not free the
fields again before handing the element back. The special fields thus
may remain initialized while the map value sits in a free list.

When bpf_mem_alloc is retired in the future, a similar concept can be
introduced to kmalloc_nolock-backed kmem_cache, paired with the existing
idea of a constructor.

Note that the destructor registration happens in map_check_btf, after
the BTF record is populated and (at that point) avaiable for inspection
and duplication. Duplication is necessary since the freeing of embedded
bpf_mem_alloc can be decoupled from actual map lifetime due to logic
introduced to reduce the cost of rcu_barrier()s in mem alloc free path in
9f2c6e96c65e ("bpf: Optimize rcu_barrier usage between hash map and bpf_mem_alloc.").

As such, once all callbacks are done, we must also free the duplicated
record. To remove dependency on the bpf_map itself, also stash the key
size of the map to obtain value from htab_elem long after the map is
gone.

  [0]: https://lore.kernel.org/bpf/20260216131341.1285427-1-mykyta.yatsenko5@gmail.com

Fixes: 14a324f6a67e ("bpf: Wire up freeing of referenced kptr")
Fixes: 1bfbc267ec91 ("bpf: Enable bpf_timer and bpf_wq in any context")
Reported-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: syzbot@syzkaller.appspotmail.com
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20260227224806.646888-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_mem_alloc.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h
index e45162ef59bb..4ce0d27f8ea2 100644
--- a/include/linux/bpf_mem_alloc.h
+++ b/include/linux/bpf_mem_alloc.h
@@ -14,6 +14,8 @@ struct bpf_mem_alloc {
 	struct obj_cgroup *objcg;
 	bool percpu;
 	struct work_struct work;
+	void (*dtor_ctx_free)(void *ctx);
+	void *dtor_ctx;
 };
 
 /* 'size != 0' is for bpf_mem_alloc which manages fixed-size objects.
@@ -32,6 +34,10 @@ int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg
 /* The percpu allocation with a specific unit size. */
 int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size);
 void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma);
+void bpf_mem_alloc_set_dtor(struct bpf_mem_alloc *ma,
+			    void (*dtor)(void *obj, void *ctx),
+			    void (*dtor_ctx_free)(void *ctx),
+			    void *ctx);
 
 /* Check the allocation size for kmalloc equivalent allocator */
 int bpf_mem_alloc_check_size(bool percpu, size_t size);
-- 
cgit v1.2.3


From ae51772b1e94ba1d76db19085957dbccac189c1c Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 27 Feb 2026 14:48:02 -0800
Subject: bpf: Lose const-ness of map in map_check_btf()

BPF hash map may now use the map_check_btf() callback to decide whether
to set a dtor on its bpf_mem_alloc or not. Unlike C++ where members can
opt out of const-ness using mutable, we must lose the const qualifier on
the callback such that we can avoid the ugly cast. Make the change and
adjust all existing users, and lose the comment in hashtab.c.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20260227224806.646888-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h               | 4 ++--
 include/linux/bpf_local_storage.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b78b53198a2e..05b34a6355b0 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -124,7 +124,7 @@ struct bpf_map_ops {
 	u32 (*map_fd_sys_lookup_elem)(void *ptr);
 	void (*map_seq_show_elem)(struct bpf_map *map, void *key,
 				  struct seq_file *m);
-	int (*map_check_btf)(const struct bpf_map *map,
+	int (*map_check_btf)(struct bpf_map *map,
 			     const struct btf *btf,
 			     const struct btf_type *key_type,
 			     const struct btf_type *value_type);
@@ -656,7 +656,7 @@ static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
 		map->ops->map_seq_show_elem;
 }
 
-int map_check_no_btf(const struct bpf_map *map,
+int map_check_no_btf(struct bpf_map *map,
 		     const struct btf *btf,
 		     const struct btf_type *key_type,
 		     const struct btf_type *value_type);
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 85efa9772530..8157e8da61d4 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -176,7 +176,7 @@ u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage);
 void bpf_local_storage_map_free(struct bpf_map *map,
 				struct bpf_local_storage_cache *cache);
 
-int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+int bpf_local_storage_map_check_btf(struct bpf_map *map,
 				    const struct btf *btf,
 				    const struct btf_type *key_type,
 				    const struct btf_type *value_type);
-- 
cgit v1.2.3


From 62413a9c3cb183afb9bb6e94dd68caf4e4145f4c Mon Sep 17 00:00:00 2001
From: Paul Moses <p@1g4.org>
Date: Mon, 23 Feb 2026 15:05:44 +0000
Subject: net/sched: act_gate: snapshot parameters with RCU on replace

The gate action can be replaced while the hrtimer callback or dump path is
walking the schedule list.

Convert the parameters to an RCU-protected snapshot and swap updates under
tcf_lock, freeing the previous snapshot via call_rcu(). When REPLACE omits
the entry list, preserve the existing schedule so the effective state is
unchanged.

Fixes: a51c328df310 ("net: qos: introduce a gate control flow action")
Cc: stable@vger.kernel.org
Signed-off-by: Paul Moses <p@1g4.org>
Tested-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Link: https://patch.msgid.link/20260223150512.2251594-2-p@1g4.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/tc_act/tc_gate.h | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h
index b147a3bb1a46..e0fded18e18c 100644
--- a/include/net/tc_act/tc_gate.h
+++ b/include/net/tc_act/tc_gate.h
@@ -32,6 +32,7 @@ struct tcf_gate_params {
 	s32			tcfg_clockid;
 	size_t			num_entries;
 	struct list_head	entries;
+	struct rcu_head		rcu;
 };
 
 #define GATE_ACT_GATE_OPEN	BIT(0)
@@ -39,7 +40,7 @@ struct tcf_gate_params {
 
 struct tcf_gate {
 	struct tc_action	common;
-	struct tcf_gate_params	param;
+	struct tcf_gate_params __rcu *param;
 	u8			current_gate_status;
 	ktime_t			current_close_time;
 	u32			current_entry_octets;
@@ -51,47 +52,65 @@ struct tcf_gate {
 
 #define to_gate(a) ((struct tcf_gate *)a)
 
+static inline struct tcf_gate_params *tcf_gate_params_locked(const struct tc_action *a)
+{
+	struct tcf_gate *gact = to_gate(a);
+
+	return rcu_dereference_protected(gact->param,
+					 lockdep_is_held(&gact->tcf_lock));
+}
+
 static inline s32 tcf_gate_prio(const struct tc_action *a)
 {
+	struct tcf_gate_params *p;
 	s32 tcfg_prio;
 
-	tcfg_prio = to_gate(a)->param.tcfg_priority;
+	p = tcf_gate_params_locked(a);
+	tcfg_prio = p->tcfg_priority;
 
 	return tcfg_prio;
 }
 
 static inline u64 tcf_gate_basetime(const struct tc_action *a)
 {
+	struct tcf_gate_params *p;
 	u64 tcfg_basetime;
 
-	tcfg_basetime = to_gate(a)->param.tcfg_basetime;
+	p = tcf_gate_params_locked(a);
+	tcfg_basetime = p->tcfg_basetime;
 
 	return tcfg_basetime;
 }
 
 static inline u64 tcf_gate_cycletime(const struct tc_action *a)
 {
+	struct tcf_gate_params *p;
 	u64 tcfg_cycletime;
 
-	tcfg_cycletime = to_gate(a)->param.tcfg_cycletime;
+	p = tcf_gate_params_locked(a);
+	tcfg_cycletime = p->tcfg_cycletime;
 
 	return tcfg_cycletime;
 }
 
 static inline u64 tcf_gate_cycletimeext(const struct tc_action *a)
 {
+	struct tcf_gate_params *p;
 	u64 tcfg_cycletimeext;
 
-	tcfg_cycletimeext = to_gate(a)->param.tcfg_cycletime_ext;
+	p = tcf_gate_params_locked(a);
+	tcfg_cycletimeext = p->tcfg_cycletime_ext;
 
 	return tcfg_cycletimeext;
 }
 
 static inline u32 tcf_gate_num_entries(const struct tc_action *a)
 {
+	struct tcf_gate_params *p;
 	u32 num_entries;
 
-	num_entries = to_gate(a)->param.num_entries;
+	p = tcf_gate_params_locked(a);
+	num_entries = p->num_entries;
 
 	return num_entries;
 }
@@ -105,7 +124,7 @@ static inline struct action_gate_entry
 	u32 num_entries;
 	int i = 0;
 
-	p = &to_gate(a)->param;
+	p = tcf_gate_params_locked(a);
 	num_entries = p->num_entries;
 
 	list_for_each_entry(entry, &p->entries, list)
-- 
cgit v1.2.3


From 76e954155b45294c502e3d3a9e15757c858ca55e Mon Sep 17 00:00:00 2001
From: Harishankar Vishwanathan <harishankar.vishwanathan@gmail.com>
Date: Fri, 27 Feb 2026 22:32:21 +0100
Subject: bpf: Introduce tnum_step to step through tnum's members

This commit introduces tnum_step(), a function that, when given t, and a
number z returns the smallest member of t larger than z. The number z
must be greater or equal to the smallest member of t and less than the
largest member of t.

The first step is to compute j, a number that keeps all of t's known
bits, and matches all unknown bits to z's bits. Since j is a member of
the t, it is already a candidate for result. However, we want our result
to be (minimally) greater than z.

There are only two possible cases:

(1) Case j <= z. In this case, we want to increase the value of j and
make it > z.
(2) Case j > z. In this case, we want to decrease the value of j while
keeping it > z.

(Case 1) j <= z

t = xx11x0x0
z = 10111101 (189)
j = 10111000 (184)
         ^
         k

(Case 1.1) Let's first consider the case where j < z. We will address j
== z later.

Since z > j, there had to be a bit position that was 1 in z and a 0 in
j, beyond which all positions of higher significance are equal in j and
z. Further, this position could not have been unknown in a, because the
unknown positions of a match z. This position had to be a 1 in z and
known 0 in t.

Let k be position of the most significant 1-to-0 flip. In our example, k
= 3 (starting the count at 1 at the least significant bit).  Setting (to
1) the unknown bits of t in positions of significance smaller than
k will not produce a result > z. Hence, we must set/unset the unknown
bits at positions of significance higher than k. Specifically, we look
for the next larger combination of 1s and 0s to place in those
positions, relative to the combination that exists in z. We can achieve
this by concatenating bits at unknown positions of t into an integer,
adding 1, and writing the bits of that result back into the
corresponding bit positions previously extracted from z.

>From our example, considering only positions of significance greater
than k:

t =  xx..x
z =  10..1
    +    1
     -----
     11..0

This is the exact combination 1s and 0s we need at the unknown bits of t
in positions of significance greater than k. Further, our result must
only increase the value minimally above z. Hence, unknown bits in
positions of significance smaller than k should remain 0. We finally
have,

result = 11110000 (240)

(Case 1.2) Now consider the case when j = z, for example

t = 1x1x0xxx
z = 10110100 (180)
j = 10110100 (180)

Matching the unknown bits of the t to the bits of z yielded exactly z.
To produce a number greater than z, we must set/unset the unknown bits
in t, and *all* the unknown bits of t candidates for being set/unset. We
can do this similar to Case 1.1, by adding 1 to the bits extracted from
the masked bit positions of z. Essentially, this case is equivalent to
Case 1.1, with k = 0.

t =  1x1x0xxx
z =  .0.1.100
    +       1
    ---------
     .0.1.101

This is the exact combination of bits needed in the unknown positions of
t. After recalling the known positions of t, we get

result = 10110101 (181)

(Case 2) j > z

t = x00010x1
z = 10000010 (130)
j = 10001011 (139)
	^
	k

Since j > z, there had to be a bit position which was 0 in z, and a 1 in
j, beyond which all positions of higher significance are equal in j and
z. This position had to be a 0 in z and known 1 in t. Let k be the
position of the most significant 0-to-1 flip. In our example, k = 4.

Because of the 0-to-1 flip at position k, a member of t can become
greater than z if the bits in positions greater than k are themselves >=
to z. To make that member *minimally* greater than z, the bits in
positions greater than k must be exactly = z. Hence, we simply match all
of t's unknown bits in positions more significant than k to z's bits. In
positions less significant than k, we set all t's unknown bits to 0
to retain minimality.

In our example, in positions of greater significance than k (=4),
t=x000. These positions are matched with z (1000) to produce 1000. In
positions of lower significance than k, t=10x1. All unknown bits are set
to 0 to produce 1001. The final result is:

result = 10001001 (137)

This concludes the computation for a result > z that is a member of t.

The procedure for tnum_step() in this commit implements the idea
described above. As a proof of correctness, we verified the algorithm
against a logical specification of tnum_step. The specification asserts
the following about the inputs t, z and output res that:

1. res is a member of t, and
2. res is strictly greater than z, and
3. there does not exist another value res2 such that
	3a. res2 is also a member of t, and
	3b. res2 is greater than z
	3c. res2 is smaller than res

We checked the implementation against this logical specification using
an SMT solver. The verification formula in SMTLIB format is available
at [1]. The verification returned an "unsat": indicating that no input
assignment exists for which the implementation and the specification
produce different outputs.

In addition, we also automatically generated the logical encoding of the
C implementation using Agni [2] and verified it against the same
specification. This verification also returned an "unsat", confirming
that the implementation is equivalent to the specification. The formula
for this check is also available at [3].

Link: https://pastebin.com/raw/2eRWbiit [1]
Link: https://github.com/bpfverif/agni [2]
Link: https://pastebin.com/raw/EztVbBJ2 [3]
Co-developed-by: Srinivas Narayana <srinivas.narayana@rutgers.edu>
Signed-off-by: Srinivas Narayana <srinivas.narayana@rutgers.edu>
Co-developed-by: Santosh Nagarakatte <santosh.nagarakatte@rutgers.edu>
Signed-off-by: Santosh Nagarakatte <santosh.nagarakatte@rutgers.edu>
Signed-off-by: Harishankar Vishwanathan <harishankar.vishwanathan@gmail.com>
Link: https://lore.kernel.org/r/93fdf71910411c0f19e282ba6d03b4c65f9c5d73.1772225741.git.paul.chaignon@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/tnum.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index fa4654ffb621..ca2cfec8de08 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -131,4 +131,7 @@ static inline bool tnum_subreg_is_const(struct tnum a)
 	return !(tnum_subreg(a)).mask;
 }
 
+/* Returns the smallest member of t larger than z */
+u64 tnum_step(struct tnum t, u64 z);
+
 #endif /* _LINUX_TNUM_H */
-- 
cgit v1.2.3


From 29252397bcc1e0a1f85e5c3bee59c325f5c26341 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 25 Feb 2026 20:35:45 +0000
Subject: inet: annotate data-races around isk->inet_num

UDP/TCP lookups are using RCU, thus isk->inet_num accesses
should use READ_ONCE() and WRITE_ONCE() where needed.

Fixes: 3ab5aee7fe84 ("net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260225203545.1512417-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/inet6_hashtables.h | 2 +-
 include/net/inet_hashtables.h  | 2 +-
 include/net/ip.h               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 282e29237d93..c16de5b7963f 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -175,7 +175,7 @@ static inline bool inet6_match(const struct net *net, const struct sock *sk,
 {
 	if (!net_eq(sock_net(sk), net) ||
 	    sk->sk_family != AF_INET6 ||
-	    sk->sk_portpair != ports ||
+	    READ_ONCE(sk->sk_portpair) != ports ||
 	    !ipv6_addr_equal(&sk->sk_v6_daddr, saddr) ||
 	    !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
 		return false;
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index ac05a52d9e13..5a979dcab538 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -345,7 +345,7 @@ static inline bool inet_match(const struct net *net, const struct sock *sk,
 			      int dif, int sdif)
 {
 	if (!net_eq(sock_net(sk), net) ||
-	    sk->sk_portpair != ports ||
+	    READ_ONCE(sk->sk_portpair) != ports ||
 	    sk->sk_addrpair != cookie)
 	        return false;
 
diff --git a/include/net/ip.h b/include/net/ip.h
index 69d5cef46004..7f9abd457e01 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -101,7 +101,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
 
 	ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
 	ipcm->addr = inet->inet_saddr;
-	ipcm->protocol = inet->inet_num;
+	ipcm->protocol = READ_ONCE(inet->inet_num);
 }
 
 #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
-- 
cgit v1.2.3


From 11cb63b0d1a0685e0831ae3c77223e002ef18189 Mon Sep 17 00:00:00 2001
From: Victor Nogueira <victor@mojatatu.com>
Date: Wed, 25 Feb 2026 10:43:48 -0300
Subject: net/sched: Only allow act_ct to bind to clsact/ingress qdiscs and
 shared blocks

As Paolo said earlier [1]:

"Since the blamed commit below, classify can return TC_ACT_CONSUMED while
the current skb being held by the defragmentation engine. As reported by
GangMin Kim, if such packet is that may cause a UaF when the defrag engine
later on tries to tuch again such packet."

act_ct was never meant to be used in the egress path, however some users
are attaching it to egress today [2]. Attempting to reach a middle
ground, we noticed that, while most qdiscs are not handling
TC_ACT_CONSUMED, clsact/ingress qdiscs are. With that in mind, we
address the issue by only allowing act_ct to bind to clsact/ingress
qdiscs and shared blocks. That way it's still possible to attach act_ct to
egress (albeit only with clsact).

[1] https://lore.kernel.org/netdev/674b8cbfc385c6f37fb29a1de08d8fe5c2b0fbee.1771321118.git.pabeni@redhat.com/
[2] https://lore.kernel.org/netdev/cc6bfb4a-4a2b-42d8-b9ce-7ef6644fb22b@ovn.org/

Reported-by: GangMin Kim <km.kim1503@gmail.com>
Fixes: 3f14b377d01d ("net/sched: act_ct: fix skb leak and crash on ooo frags")
CC: stable@vger.kernel.org
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20260225134349.1287037-1-victor@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/act_api.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index e1e8f0f7dacb..d11b79107930 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -70,6 +70,7 @@ struct tc_action {
 #define TCA_ACT_FLAGS_REPLACE	(1U << (TCA_ACT_FLAGS_USER_BITS + 2))
 #define TCA_ACT_FLAGS_NO_RTNL	(1U << (TCA_ACT_FLAGS_USER_BITS + 3))
 #define TCA_ACT_FLAGS_AT_INGRESS	(1U << (TCA_ACT_FLAGS_USER_BITS + 4))
+#define TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT	(1U << (TCA_ACT_FLAGS_USER_BITS + 5))
 
 /* Update lastuse only if needed, to avoid dirtying a cache line.
  * We use a temp variable to avoid fetching jiffies twice.
-- 
cgit v1.2.3


From 15fba71533bcdfaa8eeba69a5a5a2927afdf664a Mon Sep 17 00:00:00 2001
From: Valentin Spreckels <valentin@spreckels.dev>
Date: Thu, 26 Feb 2026 20:54:09 +0100
Subject: net: usb: r8152: add TRENDnet TUC-ET2G

The TRENDnet TUC-ET2G is a RTL8156 based usb ethernet adapter. Add its
vendor and product IDs.

Signed-off-by: Valentin Spreckels <valentin@spreckels.dev>
Link: https://patch.msgid.link/20260226195409.7891-2-valentin@spreckels.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/usb/r8152.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h
index 2ca60828f28b..1502b2a355f9 100644
--- a/include/linux/usb/r8152.h
+++ b/include/linux/usb/r8152.h
@@ -32,6 +32,7 @@
 #define VENDOR_ID_DLINK			0x2001
 #define VENDOR_ID_DELL			0x413c
 #define VENDOR_ID_ASUS			0x0b05
+#define VENDOR_ID_TRENDNET		0x20f4
 
 #if IS_REACHABLE(CONFIG_USB_RTL8152)
 extern u8 rtl8152_get_version(struct usb_interface *intf);
-- 
cgit v1.2.3


From 05ac3846ffe53fc63e454eb195ce8a6bab7a6a88 Mon Sep 17 00:00:00 2001
From: Shenghao Ding <shenghao-ding@ti.com>
Date: Fri, 27 Feb 2026 22:46:40 +0800
Subject: ALSA: hda/tas2781: A workaround solution to lower-vol issue among
 lower calibrated-impedance micro-speaker on TAS2781

On TAS2781, if the Speaker calibrated impedance is lower than default
value hard-coded inside the TAS2781, it will cuase vol lower than
normal. In order to fix this issue, the parameter of SineGainI need
updating.

Signed-off-by: Shenghao Ding <shenghao-ding@ti.com>
Tested-by: Matthew Schwartz <matthew.schwartz@linux.dev>
Link: https://patch.msgid.link/20260227144641.1243-1-shenghao-ding@ti.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/tas2781.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h
index 7c03bdc951bb..e847cf51878c 100644
--- a/include/sound/tas2781.h
+++ b/include/sound/tas2781.h
@@ -151,6 +151,7 @@ struct tasdevice {
 	struct bulk_reg_val *cali_data_backup;
 	struct bulk_reg_val alp_cali_bckp;
 	struct tasdevice_fw *cali_data_fmw;
+	void *cali_specific;
 	unsigned int dev_addr;
 	unsigned int err_code;
 	unsigned char cur_book;
-- 
cgit v1.2.3


From 0ed2e8bf61d6d5df1d78f4e24b682dff4c394e17 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 28 Feb 2026 04:56:20 -0700
Subject: io_uring: correct comment for IORING_SETUP_TASKRUN_FLAG

Sync with a recent liburing fix, which corrects the comment explaining
when the IORING_SETUP_TASKRUN_FLAG setup flag is valid to use. May be
use with COOP_TASKRUN or DEFER_TASKRUN, not useful without either of
this task_work mechanisms being used.

Link: https://github.com/axboe/liburing/pull/1543
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 6750c383a2ab..1ff16141c8a5 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -188,7 +188,8 @@ enum io_uring_sqe_flags_bit {
 /*
  * If COOP_TASKRUN is set, get notified if task work is available for
  * running and a kernel transition would be needed to run it. This sets
- * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN.
+ * IORING_SQ_TASKRUN in the sq ring flags. Not valid without COOP_TASKRUN
+ * or DEFER_TASKRUN.
  */
 #define IORING_SETUP_TASKRUN_FLAG	(1U << 9)
 #define IORING_SETUP_SQE128		(1U << 10) /* SQEs are 128 byte */
-- 
cgit v1.2.3


From 407fd8b8d8cce03856aa67329715de48b254b529 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 11 Feb 2026 19:03:03 +0100
Subject: KVM: remove CONFIG_KVM_GENERIC_MMU_NOTIFIER

All architectures now use MMU notifier for KVM page table management.
Remove the Kconfig symbol and the code that is used when it is
disabled.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index dde605cb894e..34759a262b28 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -253,7 +253,6 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 #endif
 
-#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
 union kvm_mmu_notifier_arg {
 	unsigned long attributes;
 };
@@ -275,7 +274,6 @@ struct kvm_gfn_range {
 bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
 bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
 bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
-#endif
 
 enum {
 	OUTSIDE_GUEST_MODE,
@@ -849,13 +847,12 @@ struct kvm {
 	struct hlist_head irq_ack_notifier_list;
 #endif
 
-#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
 	struct mmu_notifier mmu_notifier;
 	unsigned long mmu_invalidate_seq;
 	long mmu_invalidate_in_progress;
 	gfn_t mmu_invalidate_range_start;
 	gfn_t mmu_invalidate_range_end;
-#endif
+
 	struct list_head devices;
 	u64 manual_dirty_log_protect;
 	struct dentry *debugfs_dentry;
@@ -2118,7 +2115,6 @@ extern const struct _kvm_stats_desc kvm_vm_stats_desc[];
 extern const struct kvm_stats_header kvm_vcpu_stats_header;
 extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[];
 
-#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
 static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq)
 {
 	if (unlikely(kvm->mmu_invalidate_in_progress))
@@ -2196,7 +2192,6 @@ static inline bool mmu_invalidate_retry_gfn_unsafe(struct kvm *kvm,
 
 	return READ_ONCE(kvm->mmu_invalidate_seq) != mmu_seq;
 }
-#endif
 
 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 60abb0ac11dccd6b98fd9182bc5f85b621688861 Mon Sep 17 00:00:00 2001
From: "Nikhil P. Rao" <nikhil.rao@amd.com>
Date: Wed, 25 Feb 2026 00:00:26 +0000
Subject: xsk: Fix fragment node deletion to prevent buffer leak

After commit b692bf9a7543 ("xsk: Get rid of xdp_buff_xsk::xskb_list_node"),
the list_node field is reused for both the xskb pool list and the buffer
free list, this causes a buffer leak as described below.

xp_free() checks if a buffer is already on the free list using
list_empty(&xskb->list_node). When list_del() is used to remove a node
from the xskb pool list, it doesn't reinitialize the node pointers.
This means list_empty() will return false even after the node has been
removed, causing xp_free() to incorrectly skip adding the buffer to the
free list.

Fix this by using list_del_init() instead of list_del() in all fragment
handling paths, this ensures the list node is reinitialized after removal,
allowing the list_empty() to work correctly.

Fixes: b692bf9a7543 ("xsk: Get rid of xdp_buff_xsk::xskb_list_node")
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Nikhil P. Rao <nikhil.rao@amd.com>
Link: https://patch.msgid.link/20260225000456.107806-2-nikhil.rao@amd.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/xdp_sock_drv.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 242e34f771cc..aefc368449d5 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -122,7 +122,7 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
 		goto out;
 
 	list_for_each_entry_safe(pos, tmp, xskb_list, list_node) {
-		list_del(&pos->list_node);
+		list_del_init(&pos->list_node);
 		xp_free(pos);
 	}
 
@@ -157,7 +157,7 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
 	frag = list_first_entry_or_null(&xskb->pool->xskb_list,
 					struct xdp_buff_xsk, list_node);
 	if (frag) {
-		list_del(&frag->list_node);
+		list_del_init(&frag->list_node);
 		ret = &frag->xdp;
 	}
 
@@ -168,7 +168,7 @@ static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
 {
 	struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
 
-	list_del(&xskb->list_node);
+	list_del_init(&xskb->list_node);
 }
 
 static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
-- 
cgit v1.2.3


From 9351cf3fd92dc1349bb75f2f7f7324607dcf596f Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Thu, 26 Feb 2026 11:01:37 +0000
Subject: ASoC: cs35l56: Only patch ASP registers if the DAI is part of a
 DAIlink

Move the ASP register patches to a separate struct and apply this from the
ASP DAI probe() function so that the registers are only patched if the DAI
is part of a DAI link.

Some systems use the ASP as a special-purpose interconnect and on these
systems the ASP registers are configured by a third party (the firmware,
the BIOS, or another device using the amp's secondary host control
interface).

If the machine driver does not hook up the ASP DAI then the ASP registers
must be omitted from the patch to prevent overwriting the third party
configuration.

If the machine driver includes the ASP DAI in a DAI link, this implies that
the machine driver and higher components (such as alsa-ucm) are taking
ownership of the ASP. In this case the ASP registers are patched to known
defaults and the machine driver should configure the ASP.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Link: https://patch.msgid.link/20260226110137.1664562-1-rf@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/cs35l56.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h
index ae1e1489b671..28f9f5940ab6 100644
--- a/include/sound/cs35l56.h
+++ b/include/sound/cs35l56.h
@@ -406,6 +406,7 @@ extern const char * const cs35l56_cal_set_status_text[3];
 extern const char * const cs35l56_tx_input_texts[CS35L56_NUM_INPUT_SRC];
 extern const unsigned int cs35l56_tx_input_values[CS35L56_NUM_INPUT_SRC];
 
+int cs35l56_set_asp_patch(struct cs35l56_base *cs35l56_base);
 int cs35l56_set_patch(struct cs35l56_base *cs35l56_base);
 int cs35l56_mbox_send(struct cs35l56_base *cs35l56_base, unsigned int command);
 int cs35l56_firmware_shutdown(struct cs35l56_base *cs35l56_base);
-- 
cgit v1.2.3


From 8a9ebe8c3ca4c5bdad8f010656f4c2155da589fd Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 28 Feb 2026 17:48:22 -0800
Subject: gpio: timberdale: repair kernel-doc comments

Use a ':' after struct member names to avoid kernel-doc warnings:

Warning: include/linux/timb_gpio.h:22 struct member 'gpio_base' not
 described in 'timbgpio_platform_data'
Warning: include/linux/timb_gpio.h:22 struct member 'nr_pins' not
 described in 'timbgpio_platform_data'
Warning: include/linux/timb_gpio.h:22 struct member 'irq_base' not
 described in 'timbgpio_platform_data'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260301014822.3133268-1-rdunlap@infradead.org
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
---
 include/linux/timb_gpio.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/timb_gpio.h b/include/linux/timb_gpio.h
index 3faf5a6bb13e..74f5e73bf6db 100644
--- a/include/linux/timb_gpio.h
+++ b/include/linux/timb_gpio.h
@@ -9,10 +9,10 @@
 
 /**
  * struct timbgpio_platform_data - Platform data of the Timberdale GPIO driver
- * @gpio_base		The number of the first GPIO pin, set to -1 for
+ * @gpio_base:		The number of the first GPIO pin, set to -1 for
  *			dynamic number allocation.
- * @nr_pins		Number of pins that is supported by the hardware (1-32)
- * @irq_base		If IRQ is supported by the hardware, this is the base
+ * @nr_pins:		Number of pins that is supported by the hardware (1-32)
+ * @irq_base:		If IRQ is supported by the hardware, this is the base
  *			number of IRQ:s. One IRQ per pin will be used. Set to
  *			-1 if IRQ:s is not supported.
  */
-- 
cgit v1.2.3


From 189645ba9cd9c1eed45151aacaae4347c1eb86a7 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 28 Feb 2026 17:48:11 -0800
Subject: gpio: nomadik: repair some kernel-doc comments

Avoid these kernel-doc warnings by:
- adding short descriptions for enums
- using correct (matching) struct names in kernel-doc short descriptions
- using the correct struct member name for @nfunctions

Warning: include/linux/gpio/gpio-nomadik.h:116 missing initial short
 description on line: * enum prcm_gpiocr_reg_index
Warning: include/linux/gpio/gpio-nomadik.h:125 missing initial short
 description on line: * enum prcm_gpiocr_altcx_index
Warning: include/linux/gpio/gpio-nomadik.h:146 expecting prototype for
 struct prcm_gpio_altcx. Prototype was for struct prcm_gpiocr_altcx instead
Warning: include/linux/gpio/gpio-nomadik.h:156 expecting prototype for
 struct prcm_gpio_altcx_pin_desc. Prototype was for
 struct prcm_gpiocr_altcx_pin_desc instead
Warning: include/linux/gpio/gpio-nomadik.h:212 struct member 'nfunctions'
 not described in 'nmk_pinctrl_soc_data'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260301014811.3133250-1-rdunlap@infradead.org
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
---
 include/linux/gpio/gpio-nomadik.h | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/gpio/gpio-nomadik.h b/include/linux/gpio/gpio-nomadik.h
index 592a774a53cd..8061b9826361 100644
--- a/include/linux/gpio/gpio-nomadik.h
+++ b/include/linux/gpio/gpio-nomadik.h
@@ -114,8 +114,7 @@ struct nmk_gpio_chip {
 }
 
 /**
- * enum prcm_gpiocr_reg_index
- * Used to reference an PRCM GPIOCR register address.
+ * enum prcm_gpiocr_reg_index - Used to reference a PRCM GPIOCR register address.
  */
 enum prcm_gpiocr_reg_index {
 	PRCM_IDX_GPIOCR1,
@@ -123,8 +122,7 @@ enum prcm_gpiocr_reg_index {
 	PRCM_IDX_GPIOCR3
 };
 /**
- * enum prcm_gpiocr_altcx_index
- * Used to reference an Other alternate-C function.
+ * enum prcm_gpiocr_altcx_index - Used to reference an Other alternate-C function.
  */
 enum prcm_gpiocr_altcx_index {
 	PRCM_IDX_GPIOCR_ALTC1,
@@ -135,7 +133,7 @@ enum prcm_gpiocr_altcx_index {
 };
 
 /**
- * struct prcm_gpio_altcx - Other alternate-C function
+ * struct prcm_gpiocr_altcx - Other alternate-C function
  * @used: other alternate-C function availability
  * @reg_index: PRCM GPIOCR register index used to control the function
  * @control_bit: PRCM GPIOCR bit used to control the function
@@ -147,7 +145,7 @@ struct prcm_gpiocr_altcx {
 } __packed;
 
 /**
- * struct prcm_gpio_altcx_pin_desc - Other alternate-C pin
+ * struct prcm_gpiocr_altcx_pin_desc - Other alternate-C pin
  * @pin: The pin number
  * @altcx: array of other alternate-C[1-4] functions
  */
@@ -193,7 +191,7 @@ struct nmk_pingroup {
  *		numbering.
  * @npins:	The number of entries in @pins.
  * @functions:	The functions supported on this SoC.
- * @nfunction:	The number of entries in @functions.
+ * @nfunctions:	The number of entries in @functions.
  * @groups:	An array describing all pin groups the pin SoC supports.
  * @ngroups:	The number of entries in @groups.
  * @altcx_pins:	The pins that support Other alternate-C function on this SoC
-- 
cgit v1.2.3


From b570f37a2ce480be26c665345c5514686a8a0274 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Tue, 10 Feb 2026 12:56:53 +0100
Subject: mm: Fix a hmm_range_fault() livelock / starvation problem
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If hmm_range_fault() fails a folio_trylock() in do_swap_page,
trying to acquire the lock of a device-private folio for migration,
to ram, the function will spin until it succeeds grabbing the lock.

However, if the process holding the lock is depending on a work
item to be completed, which is scheduled on the same CPU as the
spinning hmm_range_fault(), that work item might be starved and
we end up in a livelock / starvation situation which is never
resolved.

This can happen, for example if the process holding the
device-private folio lock is stuck in
   migrate_device_unmap()->lru_add_drain_all()
sinc lru_add_drain_all() requires a short work-item
to be run on all online cpus to complete.

A prerequisite for this to happen is:
a) Both zone device and system memory folios are considered in
   migrate_device_unmap(), so that there is a reason to call
   lru_add_drain_all() for a system memory folio while a
   folio lock is held on a zone device folio.
b) The zone device folio has an initial mapcount > 1 which causes
   at least one migration PTE entry insertion to be deferred to
   try_to_migrate(), which can happen after the call to
   lru_add_drain_all().
c) No or voluntary only preemption.

This all seems pretty unlikely to happen, but indeed is hit by
the "xe_exec_system_allocator" igt test.

Resolve this by waiting for the folio to be unlocked if the
folio_trylock() fails in do_swap_page().

Rename migration_entry_wait_on_locked() to
softleaf_entry_wait_unlock() and update its documentation to
indicate the new use-case.

Future code improvements might consider moving
the lru_add_drain_all() call in migrate_device_unmap() to be
called *after* all pages have migration entries inserted.
That would eliminate also b) above.

v2:
- Instead of a cond_resched() in hmm_range_fault(),
  eliminate the problem by waiting for the folio to be unlocked
  in do_swap_page() (Alistair Popple, Andrew Morton)
v3:
- Add a stub migration_entry_wait_on_locked() for the
  !CONFIG_MIGRATION case. (Kernel Test Robot)
v4:
- Rename migrate_entry_wait_on_locked() to
  softleaf_entry_wait_on_locked() and update docs (Alistair Popple)
v5:
- Add a WARN_ON_ONCE() for the !CONFIG_MIGRATION
  version of softleaf_entry_wait_on_locked().
- Modify wording around function names in the commit message
  (Andrew Morton)

Suggested-by: Alistair Popple <apopple@nvidia.com>
Fixes: 1afaeb8293c9 ("mm/migrate: Trylock device page in do_swap_page")
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: linux-mm@kvack.org
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: <stable@vger.kernel.org> # v6.15+
Reviewed-by: John Hubbard <jhubbard@nvidia.com> #v3
Reviewed-by: Alistair Popple <apopple@nvidia.com>
Link: https://patch.msgid.link/20260210115653.92413-1-thomas.hellstrom@linux.intel.com
(cherry picked from commit a69d1ab971a624c6f112cea61536569d579c3215)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/linux/migrate.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 26ca00c325d9..d5af2b7f577b 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -65,7 +65,7 @@ bool isolate_folio_to_list(struct folio *folio, struct list_head *list);
 
 int migrate_huge_page_move_mapping(struct address_space *mapping,
 		struct folio *dst, struct folio *src);
-void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
+void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
 		__releases(ptl);
 void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
 int folio_migrate_mapping(struct address_space *mapping,
@@ -97,6 +97,14 @@ static inline int set_movable_ops(const struct movable_operations *ops, enum pag
 	return -ENOSYS;
 }
 
+static inline void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
+	__releases(ptl)
+{
+	WARN_ON_ONCE(1);
+
+	spin_unlock(ptl);
+}
+
 #endif /* CONFIG_MIGRATION */
 
 #ifdef CONFIG_NUMA_BALANCING
-- 
cgit v1.2.3


From af4e9ef3d78420feb8fe58cd9a1ab80c501b3c08 Mon Sep 17 00:00:00 2001
From: David Laight <david.laight.linux@gmail.com>
Date: Mon, 2 Mar 2026 13:27:51 +0000
Subject: uaccess: Fix scoped_user_read_access() for 'pointer to const'

If a 'const struct foo __user *ptr' is used for the address passed to
scoped_user_read_access() then you get a warning/error

  uaccess.h:691:1: error: initialization discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers]

for the

  void __user *_tmpptr = __scoped_user_access_begin(mode, uptr, size, elbl)

assignment.

Fix by using 'auto' for both _tmpptr and the redeclaration of uptr.
Replace the CLASS() with explicit __cleanup() functions on uptr.

Fixes: e497310b4ffb ("uaccess: Provide scoped user access regions")
Signed-off-by: David Laight <david.laight.linux@gmail.com>
Reviewed-and-tested-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/uaccess.h | 54 ++++++++++++++++++-------------------------------
 1 file changed, 20 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 1f3804245c06..809e4f7dfdbd 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -647,36 +647,22 @@ static inline void user_access_restore(unsigned long flags) { }
 /* Define RW variant so the below _mode macro expansion works */
 #define masked_user_rw_access_begin(u)	masked_user_access_begin(u)
 #define user_rw_access_begin(u, s)	user_access_begin(u, s)
-#define user_rw_access_end()		user_access_end()
 
 /* Scoped user access */
-#define USER_ACCESS_GUARD(_mode)				\
-static __always_inline void __user *				\
-class_user_##_mode##_begin(void __user *ptr)			\
-{								\
-	return ptr;						\
-}								\
-								\
-static __always_inline void					\
-class_user_##_mode##_end(void __user *ptr)			\
-{								\
-	user_##_mode##_access_end();				\
-}								\
-								\
-DEFINE_CLASS(user_ ##_mode## _access, void __user *,		\
-	     class_user_##_mode##_end(_T),			\
-	     class_user_##_mode##_begin(ptr), void __user *ptr)	\
-								\
-static __always_inline class_user_##_mode##_access_t		\
-class_user_##_mode##_access_ptr(void __user *scope)		\
-{								\
-	return scope;						\
-}
 
-USER_ACCESS_GUARD(read)
-USER_ACCESS_GUARD(write)
-USER_ACCESS_GUARD(rw)
-#undef USER_ACCESS_GUARD
+/* Cleanup wrapper functions */
+static __always_inline void __scoped_user_read_access_end(const void *p)
+{
+	user_read_access_end();
+};
+static __always_inline void __scoped_user_write_access_end(const void *p)
+{
+	user_write_access_end();
+};
+static __always_inline void __scoped_user_rw_access_end(const void *p)
+{
+	user_access_end();
+};
 
 /**
  * __scoped_user_access_begin - Start a scoped user access
@@ -750,13 +736,13 @@ USER_ACCESS_GUARD(rw)
  *
  * Don't use directly. Use scoped_masked_user_$MODE_access() instead.
  */
-#define __scoped_user_access(mode, uptr, size, elbl)					\
-for (bool done = false; !done; done = true)						\
-	for (void __user *_tmpptr = __scoped_user_access_begin(mode, uptr, size, elbl); \
-	     !done; done = true)							\
-		for (CLASS(user_##mode##_access, scope)(_tmpptr); !done; done = true)	\
-			/* Force modified pointer usage within the scope */		\
-			for (const typeof(uptr) uptr = _tmpptr; !done; done = true)
+#define __scoped_user_access(mode, uptr, size, elbl)				\
+for (bool done = false; !done; done = true)					\
+	for (auto _tmpptr = __scoped_user_access_begin(mode, uptr, size, elbl);	\
+	     !done; done = true)						\
+		/* Force modified pointer usage within the scope */		\
+		for (const auto uptr  __cleanup(__scoped_user_##mode##_access_end) = \
+		     _tmpptr; !done; done = true)
 
 /**
  * scoped_user_read_access_size - Start a scoped user read access with given size
-- 
cgit v1.2.3


From 56bd57e7b161f75535df91b229b0b2c64c6e5581 Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner@baylibre.com>
Date: Sat, 28 Feb 2026 14:02:22 -0600
Subject: iio: add IIO_DECLARE_QUATERNION() macro

Add a new IIO_DECLARE_QUATERNION() macro that is used to declare the
field in an IIO buffer struct that contains a quaternion vector.

Quaternions are currently the only IIO data type that uses the .repeat
feature of struct iio_scan_type. This has an implicit rule that the
element in the buffer must be aligned to the entire size of the repeated
element. This macro will make that requirement explicit. Since this is
the only user, we just call the macro IIO_DECLARE_QUATERNION() instead
of something more generic.

Signed-off-by: David Lechner <dlechner@baylibre.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index a9ecff191bd9..2c91b7659ce9 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -931,6 +931,18 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev)
 #define IIO_DECLARE_DMA_BUFFER_WITH_TS(type, name, count) \
 	__IIO_DECLARE_BUFFER_WITH_TS(type, name, count) __aligned(IIO_DMA_MINALIGN)
 
+/**
+ * IIO_DECLARE_QUATERNION() - Declare a quaternion element
+ * @type: element type of the individual vectors
+ * @name: identifier name
+ *
+ * Quaternions are a vector composed of 4 elements (W, X, Y, Z). Use this macro
+ * to declare a quaternion element in a struct to ensure proper alignment in
+ * an IIO buffer.
+ */
+#define IIO_DECLARE_QUATERNION(type, name) \
+	type name[4] __aligned(sizeof(type) * 4)
+
 struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv);
 
 /* The information at the returned address is guaranteed to be cacheline aligned */
-- 
cgit v1.2.3


From 479d589b40b836442bbdadc3fdb37f001bb67f26 Mon Sep 17 00:00:00 2001
From: Jiayuan Chen <jiayuan.chen@shopee.com>
Date: Thu, 26 Feb 2026 16:03:01 +0800
Subject: bpf/bonding: reject vlan+srcmac xmit_hash_policy change when XDP is
 loaded

bond_option_mode_set() already rejects mode changes that would make a
loaded XDP program incompatible via bond_xdp_check().  However,
bond_option_xmit_hash_policy_set() has no such guard.

For 802.3ad and balance-xor modes, bond_xdp_check() returns false when
xmit_hash_policy is vlan+srcmac, because the 802.1q payload is usually
absent due to hardware offload.  This means a user can:

1. Attach a native XDP program to a bond in 802.3ad/balance-xor mode
   with a compatible xmit_hash_policy (e.g. layer2+3).
2. Change xmit_hash_policy to vlan+srcmac while XDP remains loaded.

This leaves bond->xdp_prog set but bond_xdp_check() now returning false
for the same device.  When the bond is later destroyed, dev_xdp_uninstall()
calls bond_xdp_set(dev, NULL, NULL) to remove the program, which hits
the bond_xdp_check() guard and returns -EOPNOTSUPP, triggering:

WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL))

Fix this by rejecting xmit_hash_policy changes to vlan+srcmac when an
XDP program is loaded on a bond in 802.3ad or balance-xor mode.

commit 39a0876d595b ("net, bonding: Disallow vlan+srcmac with XDP")
introduced bond_xdp_check() which returns false for 802.3ad/balance-xor
modes when xmit_hash_policy is vlan+srcmac.  The check was wired into
bond_xdp_set() to reject XDP attachment with an incompatible policy, but
the symmetric path -- preventing xmit_hash_policy from being changed to an
incompatible value after XDP is already loaded -- was left unguarded in
bond_option_xmit_hash_policy_set().

Note:
commit 094ee6017ea0 ("bonding: check xdp prog when set bond mode")
later added a similar guard to bond_option_mode_set(), but
bond_option_xmit_hash_policy_set() remained unprotected.

Reported-by: syzbot+5a287bcdc08104bc3132@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/6995aff6.050a0220.2eeac1.014e.GAE@google.com/T/
Fixes: 39a0876d595b ("net, bonding: Disallow vlan+srcmac with XDP")
Signed-off-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Link: https://patch.msgid.link/20260226080306.98766-2-jiayuan.chen@linux.dev
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/bonding.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bonding.h b/include/net/bonding.h
index 4ad5521e7731..395c6e281c5f 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -699,6 +699,7 @@ void bond_debug_register(struct bonding *bond);
 void bond_debug_unregister(struct bonding *bond);
 void bond_debug_reregister(struct bonding *bond);
 const char *bond_mode_name(int mode);
+bool __bond_xdp_check(int mode, int xmit_policy);
 bool bond_xdp_check(struct bonding *bond, int mode);
 void bond_setup(struct net_device *bond_dev);
 unsigned int bond_get_num_tx_queues(void);
-- 
cgit v1.2.3


From 710f5c76580306cdb9ec51fac8fcf6a8faff7821 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 27 Feb 2026 17:26:03 +0000
Subject: indirect_call_wrapper: do not reevaluate function pointer

We have an increasing number of READ_ONCE(xxx->function)
combined with INDIRECT_CALL_[1234]() helpers.

Unfortunately this forces INDIRECT_CALL_[1234]() to read
xxx->function many times, which is not what we wanted.

Fix these macros so that xxx->function value is not reloaded.

$ scripts/bloat-o-meter -t vmlinux.0 vmlinux
add/remove: 0/0 grow/shrink: 1/65 up/down: 122/-1084 (-962)
Function                                     old     new   delta
ip_push_pending_frames                        59     181    +122
ip6_finish_output                            687     681      -6
__udp_enqueue_schedule_skb                  1078    1072      -6
ioam6_output                                2319    2312      -7
xfrm4_rcv_encap_finish2                       64      56      -8
xfrm4_output                                 297     289      -8
vrf_ip_local_out                             278     270      -8
vrf_ip6_local_out                            278     270      -8
seg6_input_finish                             64      56      -8
rpl_output                                   700     692      -8
ipmr_forward_finish                          124     116      -8
ip_forward_finish                            143     135      -8
ip6mr_forward2_finish                        100      92      -8
ip6_forward_finish                            73      65      -8
input_action_end_bpf                        1091    1083      -8
dst_input                                     52      44      -8
__xfrm6_output                               801     793      -8
__xfrm4_output                                83      75      -8
bpf_input                                    500     491      -9
__tcp_check_space                            530     521      -9
input_action_end_dt6                         291     280     -11
vti6_tnl_xmit                               1634    1622     -12
bpf_xmit                                    1203    1191     -12
rpl_input                                    497     483     -14
rawv6_send_hdrinc                           1355    1341     -14
ndisc_send_skb                              1030    1016     -14
ipv6_srh_rcv                                1377    1363     -14
ip_send_unicast_reply                       1253    1239     -14
ip_rcv_finish                                226     212     -14
ip6_rcv_finish                               300     286     -14
input_action_end_x_core                      205     191     -14
input_action_end_x                           355     341     -14
input_action_end_t                           205     191     -14
input_action_end_dx6_finish                  127     113     -14
input_action_end_dx4_finish                  373     359     -14
input_action_end_dt4                         426     412     -14
input_action_end_core                        186     172     -14
input_action_end_b6_encap                    292     278     -14
input_action_end_b6                          198     184     -14
igmp6_send                                  1332    1318     -14
ip_sublist_rcv                               864     848     -16
ip6_sublist_rcv                             1091    1075     -16
ipv6_rpl_srh_rcv                            1937    1920     -17
xfrm_policy_queue_process                   1246    1228     -18
seg6_output_core                             903     885     -18
mld_sendpack                                 856     836     -20
NF_HOOK                                      756     736     -20
vti_tunnel_xmit                             1447    1426     -21
input_action_end_dx6                         664     642     -22
input_action_end                            1502    1480     -22
sock_sendmsg_nosec                           134     111     -23
ip6mr_forward2                               388     364     -24
sock_recvmsg_nosec                           134     109     -25
seg6_input_core                              836     810     -26
ip_send_skb                                  172     146     -26
ip_local_out                                 140     114     -26
ip6_local_out                                140     114     -26
__sock_sendmsg                               162     136     -26
__ip_queue_xmit                             1196    1170     -26
__ip_finish_output                           405     379     -26
ipmr_queue_fwd_xmit                          373     346     -27
sock_recvmsg                                 173     145     -28
ip6_xmit                                    1635    1607     -28
xfrm_output_resume                          1418    1389     -29
ip_build_and_send_pkt                        625     591     -34
dst_output                                   504     432     -72
Total: Before=25217686, After=25216724, chg -0.00%

Fixes: 283c16a2dfd3 ("indirect call wrappers: helpers to speed-up indirect calls of builtin")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260227172603.1700433-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/indirect_call_wrapper.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h
index 35227d47cfc9..dc272b514a01 100644
--- a/include/linux/indirect_call_wrapper.h
+++ b/include/linux/indirect_call_wrapper.h
@@ -16,22 +16,26 @@
  */
 #define INDIRECT_CALL_1(f, f1, ...)					\
 	({								\
-		likely(f == f1) ? f1(__VA_ARGS__) : f(__VA_ARGS__);	\
+		typeof(f) __f1 = (f);					\
+		likely(__f1 == f1) ? f1(__VA_ARGS__) : __f1(__VA_ARGS__);	\
 	})
 #define INDIRECT_CALL_2(f, f2, f1, ...)					\
 	({								\
-		likely(f == f2) ? f2(__VA_ARGS__) :			\
-				  INDIRECT_CALL_1(f, f1, __VA_ARGS__);	\
+		typeof(f) __f2 = (f);					\
+		likely(__f2 == f2) ? f2(__VA_ARGS__) :			\
+				  INDIRECT_CALL_1(__f2, f1, __VA_ARGS__);	\
 	})
 #define INDIRECT_CALL_3(f, f3, f2, f1, ...)					\
 	({									\
-		likely(f == f3) ? f3(__VA_ARGS__) :				\
-				  INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__);	\
+		typeof(f) __f3 = (f);						\
+		likely(__f3 == f3) ? f3(__VA_ARGS__) :				\
+				  INDIRECT_CALL_2(__f3, f2, f1, __VA_ARGS__);	\
 	})
 #define INDIRECT_CALL_4(f, f4, f3, f2, f1, ...)					\
 	({									\
-		likely(f == f4) ? f4(__VA_ARGS__) :				\
-				  INDIRECT_CALL_3(f, f3, f2, f1, __VA_ARGS__);	\
+		typeof(f) __f4 = (f);						\
+		likely(__f4 == f4) ? f4(__VA_ARGS__) :				\
+				  INDIRECT_CALL_3(__f4, f3, f2, f1, __VA_ARGS__);	\
 	})
 
 #define INDIRECT_CALLABLE_DECLARE(f)	f
-- 
cgit v1.2.3


From 9de68394a61528d40f575c3e6719cc75c56f62c3 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@kernel.org>
Date: Mon, 2 Mar 2026 01:25:44 +0100
Subject: Revert "driver core: enforce device_lock for driver_match_device()"

This reverts commit dc23806a7c47 ("driver core: enforce device_lock for
driver_match_device()") and commit 289b14592cef ("driver core: fix
inverted "locked" suffix of driver_match_device()").

While technically correct, there is a major downside to this approach:

When a device is already present in the system and a driver is
registered on the same bus, we iterate over all devices registered on
this bus to see if one of them matches. If we come across an already
bound one where the corresponding driver crashed while holding the
device lock (e.g. in probe()) we can't make any progress anymore.

However, drivers are typically the least tested code in the kernel and
hence it is a case that is likely to happen regularly. Besides hurting
developer ergonomics, it potentially decreases chances of shutting
things down cleanly and obtaining logs in production environments as
well [1].

This came up in the context of a firewire bug, which only in combination
with the reverted commit, caused the machine to hang [2]. Additionally,
it was observed in [3].

Thus, revert commit dc23806a7c47 ("driver core: enforce device_lock for
driver_match_device()") and add a brief note clarifying that an
implementer of struct bus_type must not expect match() to be called with
the device lock held.

Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [1]
Link: https://lore.kernel.org/all/67f655bb-4d81-4609-b008-68d200255dd2@davidgow.net/ [2]
Link: https://lore.kernel.org/lkml/CALbr=LZ4v7N=tO1vgOsyj9AS+XuNbn6kG-QcF+PacdMjSo0iyw@mail.gmail.com/ [3]
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Closes: https://lore.kernel.org/driver-core/CAHk-=wgJ_L1C=HjcYJotg_zrZEmiLFJaoic+PWthjuQrutrfJw@mail.gmail.com/
Reviewed-by: Gui-Dong Han <hanguidong02@gmail.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/20260302002545.19389-1-dakr@kernel.org
[ Add additional Link: reference. - Danilo ]
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
---
 include/linux/device/bus.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h
index 99c3c83ea520..63de5f053c33 100644
--- a/include/linux/device/bus.h
+++ b/include/linux/device/bus.h
@@ -35,6 +35,8 @@ struct fwnode_handle;
  *		otherwise. It may also return error code if determining that
  *		the driver supports the device is not possible. In case of
  *		-EPROBE_DEFER it will queue the device for deferred probing.
+ *		Note: This callback may be invoked with or without the device
+ *		lock held.
  * @uevent:	Called when a device is added, removed, or a few other things
  *		that generate uevents to add the environment variables.
  * @probe:	Called when a new device or driver add to this bus, and callback
-- 
cgit v1.2.3


From 4ee7fa6cf78ff26d783d39e2949d14c4c1cd5e7f Mon Sep 17 00:00:00 2001
From: Yung Chih Su <yuuchihsu@gmail.com>
Date: Mon, 2 Mar 2026 14:02:47 +0800
Subject: net: ipv4: fix ARM64 alignment fault in multipath hash seed

`struct sysctl_fib_multipath_hash_seed` contains two u32 fields
(user_seed and mp_seed), making it an 8-byte structure with a 4-byte
alignment requirement.

In `fib_multipath_hash_from_keys()`, the code evaluates the entire
struct atomically via `READ_ONCE()`:

    mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed;

While this silently works on GCC by falling back to unaligned regular
loads which the ARM64 kernel tolerates, it causes a fatal kernel panic
when compiled with Clang and LTO enabled.

Commit e35123d83ee3 ("arm64: lto: Strengthen READ_ONCE() to acquire
when CONFIG_LTO=y") strengthens `READ_ONCE()` to use Load-Acquire
instructions (`ldar` / `ldapr`) to prevent compiler reordering bugs
under Clang LTO. Since the macro evaluates the full 8-byte struct,
Clang emits a 64-bit `ldar` instruction. ARM64 architecture strictly
requires `ldar` to be naturally aligned, thus executing it on a 4-byte
aligned address triggers a strict Alignment Fault (FSC = 0x21).

Fix the read side by moving the `READ_ONCE()` directly to the `u32`
member, which emits a safe 32-bit `ldar Wn`.

Furthermore, Eric Dumazet pointed out that `WRITE_ONCE()` on the entire
struct in `proc_fib_multipath_hash_set_seed()` is also flawed. Analysis
shows that Clang splits this 8-byte write into two separate 32-bit
`str` instructions. While this avoids an alignment fault, it destroys
atomicity and exposes a tear-write vulnerability. Fix this by
explicitly splitting the write into two 32-bit `WRITE_ONCE()`
operations.

Finally, add the missing `READ_ONCE()` when reading `user_seed` in
`proc_fib_multipath_hash_seed()` to ensure proper pairing and
concurrency safety.

Fixes: 4ee2a8cace3f ("net: ipv4: Add a sysctl to set multipath hash seed")
Signed-off-by: Yung Chih Su <yuuchihsu@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260302060247.7066-1-yuuchihsu@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip_fib.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index b4495c38e0a0..318593743b6e 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -559,7 +559,7 @@ static inline u32 fib_multipath_hash_from_keys(const struct net *net,
 	siphash_aligned_key_t hash_key;
 	u32 mp_seed;
 
-	mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed;
+	mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.mp_seed);
 	fib_multipath_hash_construct_key(&hash_key, mp_seed);
 
 	return flow_hash_from_keys_seed(keys, &hash_key);
-- 
cgit v1.2.3


From e39bb9e02b68942f8e9359d2a3efe7d37ae6be0e Mon Sep 17 00:00:00 2001
From: Qing Wang <wangqing7171@gmail.com>
Date: Fri, 27 Feb 2026 10:58:42 +0800
Subject: tracing: Fix WARN_ON in tracing_buffers_mmap_close

When a process forks, the child process copies the parent's VMAs but the
user_mapped reference count is not incremented. As a result, when both the
parent and child processes exit, tracing_buffers_mmap_close() is called
twice. On the second call, user_mapped is already 0, causing the function to
return -ENODEV and triggering a WARN_ON.

Normally, this isn't an issue as the memory is mapped with VM_DONTCOPY set.
But this is only a hint, and the application can call
madvise(MADVISE_DOFORK) which resets the VM_DONTCOPY flag. When the
application does that, it can trigger this issue on fork.

Fix it by incrementing the user_mapped reference count without re-mapping
the pages in the VMA's open callback.

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Vincent Donnefort <vdonnefort@google.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Link: https://patch.msgid.link/20260227025842.1085206-1-wangqing7171@gmail.com
Fixes: cf9f0f7c4c5bb ("tracing: Allow user-space mapping of the ring-buffer")
Reported-by: syzbot+3b5dd2030fe08afdf65d@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=3b5dd2030fe08afdf65d
Tested-by: syzbot+3b5dd2030fe08afdf65d@syzkaller.appspotmail.com
Signed-off-by: Qing Wang <wangqing7171@gmail.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 876358cfe1b1..d862fa610270 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -248,6 +248,7 @@ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node);
 
 int ring_buffer_map(struct trace_buffer *buffer, int cpu,
 		    struct vm_area_struct *vma);
+void ring_buffer_map_dup(struct trace_buffer *buffer, int cpu);
 int ring_buffer_unmap(struct trace_buffer *buffer, int cpu);
 int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu);
 #endif /* _LINUX_RING_BUFFER_H */
-- 
cgit v1.2.3


From ace7dcc8181373a0338efa1686c5e36eb121dff2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Wed, 25 Feb 2026 09:42:20 +0200
Subject: drm/dp: Add definition for Panel Replay full-line granularity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DP specification is saying value 0xff 0xff in PANEL REPLAY SELECTIVE UPDATE
X GRANULARITY CAPABILITY registers (0xb2 and 0xb3) means full-line
granularity. Add definition for this.

Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Uma Shankar <uma.shankar@intel.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patch.msgid.link/20260225074221.1744330-1-jouni.hogander@intel.com
(cherry picked from commit b93311673263bb98a200ab1cb6304f969bdada5c)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 include/drm/display/drm_dp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h
index e4eebabab975..8b15d3eeb716 100644
--- a/include/drm/display/drm_dp.h
+++ b/include/drm/display/drm_dp.h
@@ -571,6 +571,8 @@
 # define DP_PANEL_REPLAY_LINK_OFF_SUPPORTED_IN_PR_AFTER_ADAPTIVE_SYNC_SDP	(1 << 7)
 
 #define DP_PANEL_REPLAY_CAP_X_GRANULARITY		0xb2
+# define DP_PANEL_REPLAY_FULL_LINE_GRANULARITY		0xffff
+
 #define DP_PANEL_REPLAY_CAP_Y_GRANULARITY		0xb4
 
 /* Link Configuration */
-- 
cgit v1.2.3


From 82169dace41cbaa951341b0f80f4570be3b2dec0 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 18 Feb 2026 10:52:04 +0100
Subject: xenbus: add xenbus_device parameter to xenbus_read_driver_state()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to prepare checking the xenbus device status in
xenbus_read_driver_state(), add the pointer to struct xenbus_device
as a parameter.

Tested-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: "Martin K. Petersen" <martin.petersen@oracle.com> # SCSI
Acked-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>	# drivers/pci/xen-pcifront.c
Signed-off-by: Juergen Gross <jgross@suse.com>
Message-ID: <20260218095205.453657-2-jgross@suse.com>
---
 include/xen/xenbus.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index c94caf852aea..15319da65b7f 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -228,7 +228,8 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr);
 int xenbus_alloc_evtchn(struct xenbus_device *dev, evtchn_port_t *port);
 int xenbus_free_evtchn(struct xenbus_device *dev, evtchn_port_t port);
 
-enum xenbus_state xenbus_read_driver_state(const char *path);
+enum xenbus_state xenbus_read_driver_state(const struct xenbus_device *dev,
+					   const char *path);
 
 __printf(3, 4)
 void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...);
-- 
cgit v1.2.3


From e2dcf9065536ab4a1b00828ff0d19f7d282dfecc Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 18 Feb 2026 10:52:05 +0100
Subject: xen/xenbus: better handle backend crash
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the backend domain crashes, coordinated device cleanup is not
possible (as it involves waiting for the backend state change). In that
case, toolstack forcefully removes frontend xenstore entries.
xenbus_dev_changed() handles this case, and triggers device cleanup.
It's possible that toolstack manages to connect new device in that
place, before xenbus_dev_changed() notices the old one is missing. If
that happens, new one won't be probed and will forever remain in
XenbusStateInitialising.

Fix this by checking the frontend's state in Xenstore. In case it has
been reset to XenbusStateInitialising by Xen tools, consider this
being the result of an unplug+plug operation.

It's important that cleanup on such unplug doesn't modify Xenstore
entries (especially the "state" key) as it belong to the new device
to be probed - changing it would derail establishing connection to the
new backend (most likely, closing the device before it was even
connected). Handle this case by setting new xenbus_device->vanished
flag to true, and check it before changing state entry.

And even if xenbus_dev_changed() correctly detects the device was
forcefully removed, the cleanup handling is still racy. Since this whole
handling doesn't happened in a single Xenstore transaction, it's possible
that toolstack might put a new device there already. Avoid re-creating
the state key (which in the case of loosing the race would actually
close newly attached device).

The problem does not apply to frontend domain crash, as this case
involves coordinated cleanup.

Problem originally reported at
https://lore.kernel.org/xen-devel/aOZvivyZ9YhVWDLN@mail-itl/T/#t,
including reproduction steps.

Based-on-patch-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Tested-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Message-ID: <20260218095205.453657-3-jgross@suse.com>
---
 include/xen/xenbus.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index 15319da65b7f..8ca15743af7f 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -80,6 +80,7 @@ struct xenbus_device {
 	const char *devicetype;
 	const char *nodename;
 	const char *otherend;
+	bool vanished;
 	int otherend_id;
 	struct xenbus_watch otherend_watch;
 	struct device dev;
-- 
cgit v1.2.3


From 2d28ed588f8d7d0d41b0a4fad7f0d05e4bbf1797 Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Tue, 24 Feb 2026 16:24:34 -0800
Subject: Revert "ptdesc: remove references to folios from __pagetable_ctor()
 and pagetable_dtor()"

This change swapped out mod_node_page_state for lruvec_stat_add_folio.
But, these two APIs are not interchangeable: the lruvec version also
increments memcg stats, in addition to "global" pgdat stats.

So after this change, the "pagetables" memcg stat in memory.stat always
yields "0", which is a userspace visible regression.

I tried to look for a refactor where we add a variant of
lruvec_stat_mod_folio which takes a pgdat and a memcg instead of a folio,
to try to adhere to the spirit of the original patch.  But at the end of
the day this just means we have to call folio_memcg(ptdesc_folio(ptdesc))
anyway, which doesn't really accomplish much.

This regression is visible in master as well as 6.18 stable, so CC stable
too.

Link: https://lkml.kernel.org/r/20260225002434.2953895-1-axelrasmussen@google.com
Fixes: f0c92726e89f ("ptdesc: remove references to folios from __pagetable_ctor() and pagetable_dtor()")
Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5be3d8a8f806..abb4963c1f06 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3514,26 +3514,21 @@ static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; }
 static inline void ptlock_free(struct ptdesc *ptdesc) {}
 #endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */
 
-static inline unsigned long ptdesc_nr_pages(const struct ptdesc *ptdesc)
-{
-	return compound_nr(ptdesc_page(ptdesc));
-}
-
 static inline void __pagetable_ctor(struct ptdesc *ptdesc)
 {
-	pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags));
+	struct folio *folio = ptdesc_folio(ptdesc);
 
-	__SetPageTable(ptdesc_page(ptdesc));
-	mod_node_page_state(pgdat, NR_PAGETABLE, ptdesc_nr_pages(ptdesc));
+	__folio_set_pgtable(folio);
+	lruvec_stat_add_folio(folio, NR_PAGETABLE);
 }
 
 static inline void pagetable_dtor(struct ptdesc *ptdesc)
 {
-	pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags));
+	struct folio *folio = ptdesc_folio(ptdesc);
 
 	ptlock_free(ptdesc);
-	__ClearPageTable(ptdesc_page(ptdesc));
-	mod_node_page_state(pgdat, NR_PAGETABLE, -ptdesc_nr_pages(ptdesc));
+	__folio_clear_pgtable(folio);
+	lruvec_stat_sub_folio(folio, NR_PAGETABLE);
 }
 
 static inline void pagetable_dtor_free(struct ptdesc *ptdesc)
-- 
cgit v1.2.3


From 7392f8e4ea632622b2cd2086675ba022db238b3a Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 1 Mar 2026 16:52:29 -0800
Subject: uaccess: correct kernel-doc parameter format

Use the correct kernel-doc function parameter format to avoid kernel-doc
warnings:

Warning: include/linux/uaccess.h:814 function parameter 'uptr' not
 described in 'scoped_user_rw_access_size'
Warning: include/linux/uaccess.h:826 function parameter 'uptr' not
 described in 'scoped_user_rw_access'

Link: https://lkml.kernel.org/r/20260302005229.3471955-1-rdunlap@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/uaccess.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 1f3804245c06..001cfef21b61 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -806,7 +806,7 @@ for (bool done = false; !done; done = true)						\
 
 /**
  * scoped_user_rw_access_size - Start a scoped user read/write access with given size
- * @uptr	Pointer to the user space address to read from and write to
+ * @uptr:	Pointer to the user space address to read from and write to
  * @size:	Size of the access starting from @uptr
  * @elbl:	Error label to goto when the access region is rejected
  *
@@ -817,7 +817,7 @@ for (bool done = false; !done; done = true)						\
 
 /**
  * scoped_user_rw_access - Start a scoped user read/write access
- * @uptr	Pointer to the user space address to read from and write to
+ * @uptr:	Pointer to the user space address to read from and write to
  * @elbl:	Error label to goto when the access region is rejected
  *
  * The size of the access starting from @uptr is determined via sizeof(*@uptr)).
-- 
cgit v1.2.3


From 599b4e290c8766b19378d85d4310c6ec8f90ade4 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 1 Mar 2026 16:52:22 -0800
Subject: mm/mmu_notifier: clean up mmu_notifier.h kernel-doc

Eliminate kernel-doc warnings in mmu_notifier.h:
- add a missing struct short description
- use the correct format for function parameters
- add missing function return comment sections

Warning: include/linux/mmu_notifier.h:236 missing initial short
 description on line: * struct mmu_interval_notifier_ops
Warning: include/linux/mmu_notifier.h:325 function parameter 'interval_sub'
 not described in 'mmu_interval_set_seq'
Warning: include/linux/mmu_notifier.h:325 function parameter 'cur_seq'
 not described in 'mmu_interval_set_seq'
Warning: include/linux/mmu_notifier.h:346 function parameter 'interval_sub'
 not described in 'mmu_interval_read_retry'
Warning: include/linux/mmu_notifier.h:346 function parameter 'seq' not
 described in 'mmu_interval_read_retry'
Warning: include/linux/mmu_notifier.h:346 No description found for return
 value of 'mmu_interval_read_retry'
Warning: include/linux/mmu_notifier.h:370 function parameter 'interval_sub'
 not described in 'mmu_interval_check_retry'
Warning: include/linux/mmu_notifier.h:370 function parameter 'seq' not
 described in 'mmu_interval_check_retry'
Warning: include/linux/mmu_notifier.h:370 No description found for return
 value of 'mmu_interval_check_retry'

Link: https://lkml.kernel.org/r/20260302005222.3470783-1-rdunlap@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmu_notifier.h | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 07a2bbaf86e9..8450e18a87c2 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -234,7 +234,7 @@ struct mmu_notifier {
 };
 
 /**
- * struct mmu_interval_notifier_ops
+ * struct mmu_interval_notifier_ops - callback for range notification
  * @invalidate: Upon return the caller must stop using any SPTEs within this
  *              range. This function can sleep. Return false only if sleeping
  *              was required but mmu_notifier_range_blockable(range) is false.
@@ -309,8 +309,8 @@ void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub);
 
 /**
  * mmu_interval_set_seq - Save the invalidation sequence
- * @interval_sub - The subscription passed to invalidate
- * @cur_seq - The cur_seq passed to the invalidate() callback
+ * @interval_sub: The subscription passed to invalidate
+ * @cur_seq: The cur_seq passed to the invalidate() callback
  *
  * This must be called unconditionally from the invalidate callback of a
  * struct mmu_interval_notifier_ops under the same lock that is used to call
@@ -329,8 +329,8 @@ mmu_interval_set_seq(struct mmu_interval_notifier *interval_sub,
 
 /**
  * mmu_interval_read_retry - End a read side critical section against a VA range
- * interval_sub: The subscription
- * seq: The return of the paired mmu_interval_read_begin()
+ * @interval_sub: The subscription
+ * @seq: The return of the paired mmu_interval_read_begin()
  *
  * This MUST be called under a user provided lock that is also held
  * unconditionally by op->invalidate() when it calls mmu_interval_set_seq().
@@ -338,7 +338,7 @@ mmu_interval_set_seq(struct mmu_interval_notifier *interval_sub,
  * Each call should be paired with a single mmu_interval_read_begin() and
  * should be used to conclude the read side.
  *
- * Returns true if an invalidation collided with this critical section, and
+ * Returns: true if an invalidation collided with this critical section, and
  * the caller should retry.
  */
 static inline bool
@@ -350,20 +350,21 @@ mmu_interval_read_retry(struct mmu_interval_notifier *interval_sub,
 
 /**
  * mmu_interval_check_retry - Test if a collision has occurred
- * interval_sub: The subscription
- * seq: The return of the matching mmu_interval_read_begin()
+ * @interval_sub: The subscription
+ * @seq: The return of the matching mmu_interval_read_begin()
  *
  * This can be used in the critical section between mmu_interval_read_begin()
- * and mmu_interval_read_retry().  A return of true indicates an invalidation
- * has collided with this critical region and a future
- * mmu_interval_read_retry() will return true.
- *
- * False is not reliable and only suggests a collision may not have
- * occurred. It can be called many times and does not have to hold the user
- * provided lock.
+ * and mmu_interval_read_retry().
  *
  * This call can be used as part of loops and other expensive operations to
  * expedite a retry.
+ * It can be called many times and does not have to hold the user
+ * provided lock.
+ *
+ * Returns: true indicates an invalidation has collided with this critical
+ * region and a future mmu_interval_read_retry() will return true.
+ * False is not reliable and only suggests a collision may not have
+ * occurred.
  */
 static inline bool
 mmu_interval_check_retry(struct mmu_interval_notifier *interval_sub,
-- 
cgit v1.2.3


From 7f083faf59d14c04e01ec05a7507f036c965acf8 Mon Sep 17 00:00:00 2001
From: Koichiro Den <den@valinux.co.jp>
Date: Sat, 28 Feb 2026 23:53:07 +0900
Subject: net: sched: avoid qdisc_reset_all_tx_gt() vs dequeue race for
 lockless qdiscs

When shrinking the number of real tx queues,
netif_set_real_num_tx_queues() calls qdisc_reset_all_tx_gt() to flush
qdiscs for queues which will no longer be used.

qdisc_reset_all_tx_gt() currently serializes qdisc_reset() with
qdisc_lock(). However, for lockless qdiscs, the dequeue path is
serialized by qdisc_run_begin/end() using qdisc->seqlock instead, so
qdisc_reset() can run concurrently with __qdisc_run() and free skbs
while they are still being dequeued, leading to UAF.

This can easily be reproduced on e.g. virtio-net by imposing heavy
traffic while frequently changing the number of queue pairs:

  iperf3 -ub0 -c $peer -t 0 &
  while :; do
    ethtool -L eth0 combined 1
    ethtool -L eth0 combined 2
  done

With KASAN enabled, this leads to reports like:

  BUG: KASAN: slab-use-after-free in __qdisc_run+0x133f/0x1760
  ...
  Call Trace:
   <TASK>
   ...
   __qdisc_run+0x133f/0x1760
   __dev_queue_xmit+0x248f/0x3550
   ip_finish_output2+0xa42/0x2110
   ip_output+0x1a7/0x410
   ip_send_skb+0x2e6/0x480
   udp_send_skb+0xb0a/0x1590
   udp_sendmsg+0x13c9/0x1fc0
   ...
   </TASK>

  Allocated by task 1270 on cpu 5 at 44.558414s:
   ...
   alloc_skb_with_frags+0x84/0x7c0
   sock_alloc_send_pskb+0x69a/0x830
   __ip_append_data+0x1b86/0x48c0
   ip_make_skb+0x1e8/0x2b0
   udp_sendmsg+0x13a6/0x1fc0
   ...

  Freed by task 1306 on cpu 3 at 44.558445s:
   ...
   kmem_cache_free+0x117/0x5e0
   pfifo_fast_reset+0x14d/0x580
   qdisc_reset+0x9e/0x5f0
   netif_set_real_num_tx_queues+0x303/0x840
   virtnet_set_channels+0x1bf/0x260 [virtio_net]
   ethnl_set_channels+0x684/0xae0
   ethnl_default_set_doit+0x31a/0x890
   ...

Serialize qdisc_reset_all_tx_gt() against the lockless dequeue path by
taking qdisc->seqlock for TCQ_F_NOLOCK qdiscs, matching the
serialization model already used by dev_reset_queue().

Additionally clear QDISC_STATE_NON_EMPTY after reset so the qdisc state
reflects an empty queue, avoiding needless re-scheduling.

Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking")
Signed-off-by: Koichiro Den <den@valinux.co.jp>
Link: https://patch.msgid.link/20260228145307.3955532-1-den@valinux.co.jp
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sch_generic.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c3a7268b567e..d5d55cb21686 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -778,13 +778,23 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb)
 static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
 {
 	struct Qdisc *qdisc;
+	bool nolock;
 
 	for (; i < dev->num_tx_queues; i++) {
 		qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
 		if (qdisc) {
+			nolock = qdisc->flags & TCQ_F_NOLOCK;
+
+			if (nolock)
+				spin_lock_bh(&qdisc->seqlock);
 			spin_lock_bh(qdisc_lock(qdisc));
 			qdisc_reset(qdisc);
 			spin_unlock_bh(qdisc_lock(qdisc));
+			if (nolock) {
+				clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+				clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
+				spin_unlock_bh(&qdisc->seqlock);
+			}
 		}
 	}
 }
-- 
cgit v1.2.3


From 165573e41f2f66ef98940cf65f838b2cb575d9d1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 2 Mar 2026 20:55:27 +0000
Subject: tcp: secure_seq: add back ports to TS offset

This reverts 28ee1b746f49 ("secure_seq: downgrade to per-host timestamp offsets")

tcp_tw_recycle went away in 2017.

Zhouyan Deng reported off-path TCP source port leakage via
SYN cookie side-channel that can be fixed in multiple ways.

One of them is to bring back TCP ports in TS offset randomization.

As a bonus, we perform a single siphash() computation
to provide both an ISN and a TS offset.

Fixes: 28ee1b746f49 ("secure_seq: downgrade to per-host timestamp offsets")
Reported-by: Zhouyan Deng <dengzhouyan_nwpu@163.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Acked-by: Florian Westphal <fw@strlen.de>
Link: https://patch.msgid.link/20260302205527.1982836-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/secure_seq.h | 45 ++++++++++++++++++++++++++++++++++++++-------
 include/net/tcp.h        |  6 ++++--
 2 files changed, 42 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index cddebafb9f77..6f996229167b 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -5,16 +5,47 @@
 #include <linux/types.h>
 
 struct net;
+extern struct net init_net;
+
+union tcp_seq_and_ts_off {
+	struct {
+		u32 seq;
+		u32 ts_off;
+	};
+	u64 hash64;
+};
 
 u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
 u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 			       __be16 dport);
-u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
-		   __be16 sport, __be16 dport);
-u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr);
-u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
-		     __be16 sport, __be16 dport);
-u32 secure_tcpv6_ts_off(const struct net *net,
-			const __be32 *saddr, const __be32 *daddr);
+union tcp_seq_and_ts_off
+secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr,
+			  __be16 sport, __be16 dport);
+
+static inline u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
+				 __be16 sport, __be16 dport)
+{
+	union tcp_seq_and_ts_off ts;
+
+	ts = secure_tcp_seq_and_ts_off(&init_net, saddr, daddr,
+				       sport, dport);
+
+	return ts.seq;
+}
+
+union tcp_seq_and_ts_off
+secure_tcpv6_seq_and_ts_off(const struct net *net, const __be32 *saddr,
+			    const __be32 *daddr,
+			    __be16 sport, __be16 dport);
+
+static inline u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
+				   __be16 sport, __be16 dport)
+{
+	union tcp_seq_and_ts_off ts;
+
+	ts = secure_tcpv6_seq_and_ts_off(&init_net, saddr, daddr,
+					 sport, dport);
 
+	return ts.seq;
+}
 #endif /* _NET_SECURE_SEQ */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index eb8bf63fdafc..978eea2d5df0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -43,6 +43,7 @@
 #include <net/dst.h>
 #include <net/mptcp.h>
 #include <net/xfrm.h>
+#include <net/secure_seq.h>
 
 #include <linux/seq_file.h>
 #include <linux/memcontrol.h>
@@ -2464,8 +2465,9 @@ struct tcp_request_sock_ops {
 				       struct flowi *fl,
 				       struct request_sock *req,
 				       u32 tw_isn);
-	u32 (*init_seq)(const struct sk_buff *skb);
-	u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb);
+	union tcp_seq_and_ts_off (*init_seq_and_ts_off)(
+					const struct net *net,
+					const struct sk_buff *skb);
 	int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
 			   struct flowi *fl, struct request_sock *req,
 			   struct tcp_fastopen_cookie *foc,
-- 
cgit v1.2.3


From b824c3e16c1904bf80df489e293d1e3cbf98896d Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 2 Mar 2026 17:26:31 +0100
Subject: net: Provide a PREEMPT_RT specific check for netdev_queue::_xmit_lock

After acquiring netdev_queue::_xmit_lock the number of the CPU owning
the lock is recorded in netdev_queue::xmit_lock_owner. This works as
long as the BH context is not preemptible.

On PREEMPT_RT the softirq context is preemptible and without the
softirq-lock it is possible to have multiple user in __dev_queue_xmit()
submitting a skb on the same CPU. This is fine in general but this means
also that the current CPU is recorded as netdev_queue::xmit_lock_owner.
This in turn leads to the recursion alert and the skb is dropped.

Instead checking the for CPU number, that owns the lock, PREEMPT_RT can
check if the lockowner matches the current task.

Add netif_tx_owned() which returns true if the current context owns the
lock by comparing the provided CPU number with the recorded number. This
resembles the current check by negating the condition (the current check
returns true if the lock is not owned).
On PREEMPT_RT use rt_mutex_owner() to return the lock owner and compare
the current task against it.
Use the new helper in __dev_queue_xmit() and netif_local_xmit_active()
which provides a similar check.
Update comments regarding pairing READ_ONCE().

Reported-by: Bert Karwatzki <spasswolf@web.de>
Closes: https://lore.kernel.org/all/20260216134333.412332-1-spasswolf@web.de
Fixes: 3253cb49cbad4 ("softirq: Allow to drop the softirq-BKL lock on PREEMPT_RT")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reported-by: Bert Karwatzki <spasswolf@web.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://patch.msgid.link/20260302162631.uGUyIqDT@linutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d4e6e00bb90a..67e25f6d15a4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4711,7 +4711,7 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits)
 static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
 {
 	spin_lock(&txq->_xmit_lock);
-	/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+	/* Pairs with READ_ONCE() in netif_tx_owned() */
 	WRITE_ONCE(txq->xmit_lock_owner, cpu);
 }
 
@@ -4729,7 +4729,7 @@ static inline void __netif_tx_release(struct netdev_queue *txq)
 static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
 {
 	spin_lock_bh(&txq->_xmit_lock);
-	/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+	/* Pairs with READ_ONCE() in netif_tx_owned() */
 	WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id());
 }
 
@@ -4738,7 +4738,7 @@ static inline bool __netif_tx_trylock(struct netdev_queue *txq)
 	bool ok = spin_trylock(&txq->_xmit_lock);
 
 	if (likely(ok)) {
-		/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+		/* Pairs with READ_ONCE() in netif_tx_owned() */
 		WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id());
 	}
 	return ok;
@@ -4746,14 +4746,14 @@ static inline bool __netif_tx_trylock(struct netdev_queue *txq)
 
 static inline void __netif_tx_unlock(struct netdev_queue *txq)
 {
-	/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+	/* Pairs with READ_ONCE() in netif_tx_owned() */
 	WRITE_ONCE(txq->xmit_lock_owner, -1);
 	spin_unlock(&txq->_xmit_lock);
 }
 
 static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
 {
-	/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+	/* Pairs with READ_ONCE() in netif_tx_owned() */
 	WRITE_ONCE(txq->xmit_lock_owner, -1);
 	spin_unlock_bh(&txq->_xmit_lock);
 }
@@ -4846,6 +4846,23 @@ static inline void netif_tx_disable(struct net_device *dev)
 	local_bh_enable();
 }
 
+#ifndef CONFIG_PREEMPT_RT
+static inline bool netif_tx_owned(struct netdev_queue *txq, unsigned int cpu)
+{
+	/* Other cpus might concurrently change txq->xmit_lock_owner
+	 * to -1 or to their cpu id, but not to our id.
+	 */
+	return READ_ONCE(txq->xmit_lock_owner) == cpu;
+}
+
+#else
+static inline bool netif_tx_owned(struct netdev_queue *txq, unsigned int cpu)
+{
+	return rt_mutex_owner(&txq->_xmit_lock.lock) == current;
+}
+
+#endif
+
 static inline void netif_addr_lock(struct net_device *dev)
 {
 	unsigned char nest_level = 0;
-- 
cgit v1.2.3


From fb7fb4016300ac622c964069e286dc83166a5d52 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 2 Mar 2026 23:28:15 +0100
Subject: netfilter: nf_tables: clone set on flush only

Syzbot with fault injection triggered a failing memory allocation with
GFP_KERNEL which results in a WARN splat:

iter.err
WARNING: net/netfilter/nf_tables_api.c:845 at nft_map_deactivate+0x34e/0x3c0 net/netfilter/nf_tables_api.c:845, CPU#0: syz.0.17/5992
Modules linked in:
CPU: 0 UID: 0 PID: 5992 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full)
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2026
RIP: 0010:nft_map_deactivate+0x34e/0x3c0 net/netfilter/nf_tables_api.c:845
Code: 8b 05 86 5a 4e 09 48 3b 84 24 a0 00 00 00 75 62 48 8d 65 d8 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc cc e8 63 6d fa f7 90 <0f> 0b 90 43
+80 7c 35 00 00 0f 85 23 fe ff ff e9 26 fe ff ff 89 d9
RSP: 0018:ffffc900045af780 EFLAGS: 00010293
RAX: ffffffff89ca45bd RBX: 00000000fffffff4 RCX: ffff888028111e40
RDX: 0000000000000000 RSI: 00000000fffffff4 RDI: 0000000000000000
RBP: ffffc900045af870 R08: 0000000000400dc0 R09: 00000000ffffffff
R10: dffffc0000000000 R11: fffffbfff1d141db R12: ffffc900045af7e0
R13: 1ffff920008b5f24 R14: dffffc0000000000 R15: ffffc900045af920
FS:  000055557a6a5500(0000) GS:ffff888125496000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fb5ea271fc0 CR3: 000000003269e000 CR4: 00000000003526f0
Call Trace:
 <TASK>
 __nft_release_table+0xceb/0x11f0 net/netfilter/nf_tables_api.c:12115
 nft_rcv_nl_event+0xc25/0xdb0 net/netfilter/nf_tables_api.c:12187
 notifier_call_chain+0x19d/0x3a0 kernel/notifier.c:85
 blocking_notifier_call_chain+0x6a/0x90 kernel/notifier.c:380
 netlink_release+0x123b/0x1ad0 net/netlink/af_netlink.c:761
 __sock_release net/socket.c:662 [inline]
 sock_close+0xc3/0x240 net/socket.c:1455

Restrict set clone to the flush set command in the preparation phase.
Add NFT_ITER_UPDATE_CLONE and use it for this purpose, update the rbtree
and pipapo backends to only clone the set when this iteration type is
used.

As for the existing NFT_ITER_UPDATE type, update the pipapo backend to
use the existing set clone if available, otherwise use the existing set
representation. After this update, there is no need to clone a set that
is being deleted, this includes bound anonymous set.

An alternative approach to NFT_ITER_UPDATE_CLONE is to add a .clone
interface and call it from the flush set path.

Reported-by: syzbot+4924a0edc148e8b4b342@syzkaller.appspotmail.com
Fixes: 3f1d886cc7c3 ("netfilter: nft_set_pipapo: move cloning of match info to insert/removal path")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/netfilter/nf_tables.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 426534a711b0..ea6f29ad7888 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -320,11 +320,13 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv)
  * @NFT_ITER_UNSPEC: unspecified, to catch errors
  * @NFT_ITER_READ: read-only iteration over set elements
  * @NFT_ITER_UPDATE: iteration under mutex to update set element state
+ * @NFT_ITER_UPDATE_CLONE: clone set before iteration under mutex to update element
  */
 enum nft_iter_type {
 	NFT_ITER_UNSPEC,
 	NFT_ITER_READ,
 	NFT_ITER_UPDATE,
+	NFT_ITER_UPDATE_CLONE,
 };
 
 struct nft_set;
-- 
cgit v1.2.3


From 9df95785d3d8302f7c066050117b04cd3c2048c2 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 3 Mar 2026 16:31:32 +0100
Subject: netfilter: nft_set_pipapo: split gc into unlink and reclaim phase

Yiming Qian reports Use-after-free in the pipapo set type:
  Under a large number of expired elements, commit-time GC can run for a very
  long time in a non-preemptible context, triggering soft lockup warnings and
  RCU stall reports (local denial of service).

We must split GC in an unlink and a reclaim phase.

We cannot queue elements for freeing until pointers have been swapped.
Expired elements are still exposed to both the packet path and userspace
dumpers via the live copy of the data structure.

call_rcu() does not protect us: dump operations or element lookups starting
after call_rcu has fired can still observe the free'd element, unless the
commit phase has made enough progress to swap the clone and live pointers
before any new reader has picked up the old version.

This a similar approach as done recently for the rbtree backend in commit
35f83a75529a ("netfilter: nft_set_rbtree: don't gc elements on insert").

Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges")
Reported-by: Yiming Qian <yimingqian591@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/netfilter/nf_tables.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index ea6f29ad7888..e2d2bfc1f989 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1863,6 +1863,11 @@ struct nft_trans_gc {
 	struct rcu_head		rcu;
 };
 
+static inline int nft_trans_gc_space(const struct nft_trans_gc *trans)
+{
+	return NFT_TRANS_GC_BATCHCOUNT - trans->count;
+}
+
 static inline void nft_ctx_update(struct nft_ctx *ctx,
 				  const struct nft_trans *trans)
 {
-- 
cgit v1.2.3


From e2cedd400c3ec0302ffca2490e8751772906ac23 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Wed, 4 Mar 2026 09:06:02 -0500
Subject: net/sched: act_ife: Fix metalist update behavior

Whenever an ife action replace changes the metalist, instead of
replacing the old data on the metalist, the current ife code is appending
the new metadata. Aside from being innapropriate behavior, this may lead
to an unbounded addition of metadata to the metalist which might cause an
out of bounds error when running the encode op:

[  138.423369][    C1] ==================================================================
[  138.424317][    C1] BUG: KASAN: slab-out-of-bounds in ife_tlv_meta_encode (net/ife/ife.c:168)
[  138.424906][    C1] Write of size 4 at addr ffff8880077f4ffe by task ife_out_out_bou/255
[  138.425778][    C1] CPU: 1 UID: 0 PID: 255 Comm: ife_out_out_bou Not tainted 7.0.0-rc1-00169-gfbdfa8da05b6 #624 PREEMPT(full)
[  138.425795][    C1] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[  138.425800][    C1] Call Trace:
[  138.425804][    C1]  <IRQ>
[  138.425808][    C1]  dump_stack_lvl (lib/dump_stack.c:122)
[  138.425828][    C1]  print_report (mm/kasan/report.c:379 mm/kasan/report.c:482)
[  138.425839][    C1]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[  138.425844][    C1]  ? __virt_addr_valid (./arch/x86/include/asm/preempt.h:95 (discriminator 1) ./include/linux/rcupdate.h:975 (discriminator 1) ./include/linux/mmzone.h:2207 (discriminator 1) arch/x86/mm/physaddr.c:54 (discriminator 1))
[  138.425853][    C1]  ? ife_tlv_meta_encode (net/ife/ife.c:168)
[  138.425859][    C1]  kasan_report (mm/kasan/report.c:221 mm/kasan/report.c:597)
[  138.425868][    C1]  ? ife_tlv_meta_encode (net/ife/ife.c:168)
[  138.425878][    C1]  kasan_check_range (mm/kasan/generic.c:186 (discriminator 1) mm/kasan/generic.c:200 (discriminator 1))
[  138.425884][    C1]  __asan_memset (mm/kasan/shadow.c:84 (discriminator 2))
[  138.425889][    C1]  ife_tlv_meta_encode (net/ife/ife.c:168)
[  138.425893][    C1]  ? ife_tlv_meta_encode (net/ife/ife.c:171)
[  138.425898][    C1]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[  138.425903][    C1]  ife_encode_meta_u16 (net/sched/act_ife.c:57)
[  138.425910][    C1]  ? __pfx_do_raw_spin_lock (kernel/locking/spinlock_debug.c:114)
[  138.425916][    C1]  ? __asan_memcpy (mm/kasan/shadow.c:105 (discriminator 3))
[  138.425921][    C1]  ? __pfx_ife_encode_meta_u16 (net/sched/act_ife.c:45)
[  138.425927][    C1]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[  138.425931][    C1]  tcf_ife_act (net/sched/act_ife.c:847 net/sched/act_ife.c:879)

To solve this issue, fix the replace behavior by adding the metalist to
the ife rcu data structure.

Fixes: aa9fd9a325d51 ("sched: act: ife: update parameters via rcu handling")
Reported-by: Ruitong Liu <cnitlrt@gmail.com>
Tested-by: Ruitong Liu <cnitlrt@gmail.com>
Co-developed-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20260304140603.76500-1-jhs@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/tc_act/tc_ife.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h
index c7f24a2da1ca..24d4d5a62b3c 100644
--- a/include/net/tc_act/tc_ife.h
+++ b/include/net/tc_act/tc_ife.h
@@ -13,15 +13,13 @@ struct tcf_ife_params {
 	u8 eth_src[ETH_ALEN];
 	u16 eth_type;
 	u16 flags;
-
+	struct list_head metalist;
 	struct rcu_head rcu;
 };
 
 struct tcf_ife_info {
 	struct tc_action common;
 	struct tcf_ife_params __rcu *params;
-	/* list of metaids allowed */
-	struct list_head metalist;
 };
 #define to_ife(a) ((struct tcf_ife_info *)a)
 
-- 
cgit v1.2.3


From 16394d80539937d348dd3b9ea32415c54e67a81b Mon Sep 17 00:00:00 2001
From: Larysa Zaremba <larysa.zaremba@intel.com>
Date: Thu, 5 Mar 2026 12:12:43 +0100
Subject: xsk: introduce helper to determine rxq->frag_size

rxq->frag_size is basically a step between consecutive strictly aligned
frames. In ZC mode, chunk size fits exactly, but if chunks are unaligned,
there is no safe way to determine accessible space to grow tailroom.

Report frag_size to be zero, if chunks are unaligned, chunk_size otherwise.

Fixes: 24ea50127ecf ("xsk: support mbuf on ZC RX")
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Link: https://patch.msgid.link/20260305111253.2317394-3-larysa.zaremba@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/xdp_sock_drv.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index aefc368449d5..6b9ebae2dc95 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -51,6 +51,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
 	return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool);
 }
 
+static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool)
+{
+	return pool->unaligned ? 0 : xsk_pool_get_chunk_size(pool);
+}
+
 static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
 					 struct xdp_rxq_info *rxq)
 {
@@ -337,6 +342,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
 	return 0;
 }
 
+static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool)
+{
+	return 0;
+}
+
 static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
 					 struct xdp_rxq_info *rxq)
 {
-- 
cgit v1.2.3


From 75d9228982f23d68066ca0b7d87014c3eb8ddc85 Mon Sep 17 00:00:00 2001
From: Larysa Zaremba <larysa.zaremba@intel.com>
Date: Thu, 5 Mar 2026 12:12:48 +0100
Subject: libeth, idpf: use truesize as XDP RxQ info frag_size

The only user of frag_size field in XDP RxQ info is
bpf_xdp_frags_increase_tail(). It clearly expects whole buffer size instead
of DMA write size. Different assumptions in idpf driver configuration lead
to negative tailroom.

To make it worse, buffer sizes are not actually uniform in idpf when
splitq is enabled, as there are several buffer queues, so rxq->rx_buf_size
is meaningless in this case.

Use truesize of the first bufq in AF_XDP ZC, as there is only one. Disable
growing tail for regular splitq.

Fixes: ac8a861f632e ("idpf: prepare structures to support XDP")
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Link: https://patch.msgid.link/20260305111253.2317394-8-larysa.zaremba@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/libeth/xsk.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h
index 481a7b28e6f2..82b5d21aae87 100644
--- a/include/net/libeth/xsk.h
+++ b/include/net/libeth/xsk.h
@@ -597,6 +597,7 @@ __libeth_xsk_run_pass(struct libeth_xdp_buff *xdp,
  * @pending: current number of XSkFQEs to refill
  * @thresh: threshold below which the queue is refilled
  * @buf_len: HW-writeable length per each buffer
+ * @truesize: step between consecutive buffers, 0 if none exists
  * @nid: ID of the closest NUMA node with memory
  */
 struct libeth_xskfq {
@@ -614,6 +615,8 @@ struct libeth_xskfq {
 	u32			thresh;
 
 	u32			buf_len;
+	u32			truesize;
+
 	int			nid;
 };
 
-- 
cgit v1.2.3


From 55f854dd5bdd8e19b936a00ef1f8d776ac32c7b0 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <lvivier@redhat.com>
Date: Wed, 4 Mar 2026 14:43:38 +0100
Subject: qmi_wwan: allow max_mtu above hard_mtu to control rx_urb_size

Commit c7159e960f14 ("usbnet: limit max_mtu based on device's hard_mtu")
capped net->max_mtu to the device's hard_mtu in usbnet_probe(). While
this correctly prevents oversized packets on standard USB network
devices, it breaks the qmi_wwan driver.

qmi_wwan relies on userspace (e.g. ModemManager) setting a large MTU on
the wwan0 interface to configure rx_urb_size via usbnet_change_mtu().
QMI modems negotiate USB transfer sizes of 16,383 or 32,767 bytes, and
the USB receive buffers must be sized accordingly. With max_mtu capped
to hard_mtu (~1500 bytes), userspace can no longer raise the MTU, the
receive buffers remain small, and download speeds drop from >300 Mbps
to ~0.8 Mbps.

Introduce a FLAG_NOMAXMTU driver flag that allows individual usbnet
drivers to opt out of the max_mtu cap. Set this flag in qmi_wwan's
driver_info structures to restore the previous behavior for QMI devices,
while keeping the safety fix in place for all other usbnet drivers.

Fixes: c7159e960f14 ("usbnet: limit max_mtu based on device's hard_mtu")
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/lkml/CAPh3n803k8JcBPV5qEzUB-oKzWkAs-D5CU7z=Vd_nLRCr5ZqQg@mail.gmail.com/
Reported-by: Koen Vandeputte <koen.vandeputte@citymesh.com>
Tested-by: Daniele Palmas <dnlplm@gmail.com>
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Link: https://patch.msgid.link/20260304134338.1785002-1-lvivier@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/usb/usbnet.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index b0e84896e6ac..bbf799ccf3b3 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -132,6 +132,7 @@ struct driver_info {
 #define FLAG_MULTI_PACKET	0x2000
 #define FLAG_RX_ASSEMBLE	0x4000	/* rx packets may span >1 frames */
 #define FLAG_NOARP		0x8000	/* device can't do ARP */
+#define FLAG_NOMAXMTU		0x10000	/* allow max_mtu above hard_mtu */
 
 	/* init device ... can sleep, or cause probe() failure */
 	int	(*bind)(struct usbnet *, struct usb_interface *);
-- 
cgit v1.2.3


From 1954c4f012206147c34acda8da04f827aa7d3ee3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 7 Mar 2026 20:07:15 +0000
Subject: eventpoll: Convert epoll_put_uevent() to scoped user access

Saves two function calls, and one stac/clac pair.

stac/clac is rather expensive on older cpus like Zen 2.

A synthetic network stress test gives a ~1.5% increase of pps
on AMD Zen 2.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Kuniyuki Iwashima <kuniyu@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/eventpoll.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index ccb478eb174b..ea9ca0e4172a 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -82,11 +82,14 @@ static inline struct epoll_event __user *
 epoll_put_uevent(__poll_t revents, __u64 data,
 		 struct epoll_event __user *uevent)
 {
-	if (__put_user(revents, &uevent->events) ||
-	    __put_user(data, &uevent->data))
-		return NULL;
-
+	scoped_user_write_access_size(uevent, sizeof(*uevent), efault) {
+		unsafe_put_user(revents, &uevent->events, efault);
+		unsafe_put_user(data, &uevent->data, efault);
+	}
 	return uevent+1;
+
+efault:
+	return NULL;
 }
 #endif
 
-- 
cgit v1.2.3


From 6f1a9140ecda3baba3d945b9a6155af4268aafc4 Mon Sep 17 00:00:00 2001
From: Weiming Shi <bestswngs@gmail.com>
Date: Sat, 7 Mar 2026 00:01:34 +0800
Subject: net: add xmit recursion limit to tunnel xmit functions

Tunnel xmit functions (iptunnel_xmit, ip6tunnel_xmit) lack their own
recursion limit. When a bond device in broadcast mode has GRE tap
interfaces as slaves, and those GRE tunnels route back through the
bond, multicast/broadcast traffic triggers infinite recursion between
bond_xmit_broadcast() and ip_tunnel_xmit()/ip6_tnl_xmit(), causing
kernel stack overflow.

The existing XMIT_RECURSION_LIMIT (8) in the no-qdisc path is not
sufficient because tunnel recursion involves route lookups and full IP
output, consuming much more stack per level. Use a lower limit of 4
(IP_TUNNEL_RECURSION_LIMIT) to prevent overflow.

Add recursion detection using dev_xmit_recursion helpers directly in
iptunnel_xmit() and ip6tunnel_xmit() to cover all IPv4/IPv6 tunnel
paths including UDP encapsulated tunnels (VXLAN, Geneve, etc.).

Move dev_xmit_recursion helpers from net/core/dev.h to public header
include/linux/netdevice.h so they can be used by tunnel code.

 BUG: KASAN: stack-out-of-bounds in blake2s.constprop.0+0xe7/0x160
 Write of size 32 at addr ffff88810033fed0 by task kworker/0:1/11
 Workqueue: mld mld_ifc_work
 Call Trace:
  <TASK>
  __build_flow_key.constprop.0 (net/ipv4/route.c:515)
  ip_rt_update_pmtu (net/ipv4/route.c:1073)
  iptunnel_xmit (net/ipv4/ip_tunnel_core.c:84)
  ip_tunnel_xmit (net/ipv4/ip_tunnel.c:847)
  gre_tap_xmit (net/ipv4/ip_gre.c:779)
  dev_hard_start_xmit (net/core/dev.c:3887)
  sch_direct_xmit (net/sched/sch_generic.c:347)
  __dev_queue_xmit (net/core/dev.c:4802)
  bond_dev_queue_xmit (drivers/net/bonding/bond_main.c:312)
  bond_xmit_broadcast (drivers/net/bonding/bond_main.c:5279)
  bond_start_xmit (drivers/net/bonding/bond_main.c:5530)
  dev_hard_start_xmit (net/core/dev.c:3887)
  __dev_queue_xmit (net/core/dev.c:4841)
  ip_finish_output2 (net/ipv4/ip_output.c:237)
  ip_output (net/ipv4/ip_output.c:438)
  iptunnel_xmit (net/ipv4/ip_tunnel_core.c:86)
  gre_tap_xmit (net/ipv4/ip_gre.c:779)
  dev_hard_start_xmit (net/core/dev.c:3887)
  sch_direct_xmit (net/sched/sch_generic.c:347)
  __dev_queue_xmit (net/core/dev.c:4802)
  bond_dev_queue_xmit (drivers/net/bonding/bond_main.c:312)
  bond_xmit_broadcast (drivers/net/bonding/bond_main.c:5279)
  bond_start_xmit (drivers/net/bonding/bond_main.c:5530)
  dev_hard_start_xmit (net/core/dev.c:3887)
  __dev_queue_xmit (net/core/dev.c:4841)
  ip_finish_output2 (net/ipv4/ip_output.c:237)
  ip_output (net/ipv4/ip_output.c:438)
  iptunnel_xmit (net/ipv4/ip_tunnel_core.c:86)
  ip_tunnel_xmit (net/ipv4/ip_tunnel.c:847)
  gre_tap_xmit (net/ipv4/ip_gre.c:779)
  dev_hard_start_xmit (net/core/dev.c:3887)
  sch_direct_xmit (net/sched/sch_generic.c:347)
  __dev_queue_xmit (net/core/dev.c:4802)
  bond_dev_queue_xmit (drivers/net/bonding/bond_main.c:312)
  bond_xmit_broadcast (drivers/net/bonding/bond_main.c:5279)
  bond_start_xmit (drivers/net/bonding/bond_main.c:5530)
  dev_hard_start_xmit (net/core/dev.c:3887)
  __dev_queue_xmit (net/core/dev.c:4841)
  mld_sendpack
  mld_ifc_work
  process_one_work
  worker_thread
  </TASK>

Fixes: 745e20f1b626 ("net: add a recursion limit in xmit path")
Reported-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
Link: https://patch.msgid.link/20260306160133.3852900-2-bestswngs@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 32 ++++++++++++++++++++++++++++++++
 include/net/ip6_tunnel.h  | 12 ++++++++++++
 include/net/ip_tunnels.h  |  7 +++++++
 3 files changed, 51 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 67e25f6d15a4..ae269a2e7f4d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3576,17 +3576,49 @@ struct page_pool_bh {
 };
 DECLARE_PER_CPU(struct page_pool_bh, system_page_pool);
 
+#define XMIT_RECURSION_LIMIT	8
+
 #ifndef CONFIG_PREEMPT_RT
 static inline int dev_recursion_level(void)
 {
 	return this_cpu_read(softnet_data.xmit.recursion);
 }
+
+static inline bool dev_xmit_recursion(void)
+{
+	return unlikely(__this_cpu_read(softnet_data.xmit.recursion) >
+			XMIT_RECURSION_LIMIT);
+}
+
+static inline void dev_xmit_recursion_inc(void)
+{
+	__this_cpu_inc(softnet_data.xmit.recursion);
+}
+
+static inline void dev_xmit_recursion_dec(void)
+{
+	__this_cpu_dec(softnet_data.xmit.recursion);
+}
 #else
 static inline int dev_recursion_level(void)
 {
 	return current->net_xmit.recursion;
 }
 
+static inline bool dev_xmit_recursion(void)
+{
+	return unlikely(current->net_xmit.recursion > XMIT_RECURSION_LIMIT);
+}
+
+static inline void dev_xmit_recursion_inc(void)
+{
+	current->net_xmit.recursion++;
+}
+
+static inline void dev_xmit_recursion_dec(void)
+{
+	current->net_xmit.recursion--;
+}
 #endif
 
 void __netif_schedule(struct Qdisc *q);
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 120db2865811..1253cbb4b0a4 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -156,6 +156,16 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
 {
 	int pkt_len, err;
 
+	if (dev_recursion_level() > IP_TUNNEL_RECURSION_LIMIT) {
+		net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
+				     dev->name);
+		DEV_STATS_INC(dev, tx_errors);
+		kfree_skb(skb);
+		return;
+	}
+
+	dev_xmit_recursion_inc();
+
 	memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
 	IP6CB(skb)->flags = ip6cb_flags;
 	pkt_len = skb->len - skb_inner_network_offset(skb);
@@ -166,6 +176,8 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
 			pkt_len = -1;
 		iptunnel_xmit_stats(dev, pkt_len);
 	}
+
+	dev_xmit_recursion_dec();
 }
 #endif
 #endif
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 4021e6a73e32..80662f812080 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -27,6 +27,13 @@
 #include <net/ip6_route.h>
 #endif
 
+/* Recursion limit for tunnel xmit to detect routing loops.
+ * Unlike XMIT_RECURSION_LIMIT (8) used in the no-qdisc path, tunnel
+ * recursion involves route lookups and full IP output, consuming much
+ * more stack per level, so a lower limit is needed.
+ */
+#define IP_TUNNEL_RECURSION_LIMIT	4
+
 /* Keep error state on tunnel for 30 sec */
 #define IPTUNNEL_ERR_TIMEO	(30*HZ)
 
-- 
cgit v1.2.3


From fa655a9ca73f7df32b8ca4d14ce11742f9578288 Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@linux.dev>
Date: Tue, 3 Mar 2026 22:31:01 +0100
Subject: nvme: Annotate struct nvme_dhchap_key with __counted_by

Add the __counted_by() compiler attribute to the flexible array member
'key' to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and
CONFIG_FORTIFY_SOURCE.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme-auth.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h
index 60e069a6757f..e75c29c51464 100644
--- a/include/linux/nvme-auth.h
+++ b/include/linux/nvme-auth.h
@@ -11,7 +11,7 @@
 struct nvme_dhchap_key {
 	size_t len;
 	u8 hash;
-	u8 key[];
+	u8 key[] __counted_by(len);
 };
 
 u32 nvme_auth_get_seqnum(void);
-- 
cgit v1.2.3


From 22fd7f7fed2ae3702f90d1985c326354e86b9c75 Mon Sep 17 00:00:00 2001
From: Muhammad Amirul Asyraf Mohamad Jamian
 <muhammad.amirul.asyraf.mohamad.jamian@altera.com>
Date: Thu, 5 Mar 2026 01:31:51 -0800
Subject: firmware: stratix10-svc: Add Multi SVC clients support

In the current implementation, SVC client drivers such as socfpga-hwmon,
intel_fcs, stratix10-soc, stratix10-rsu each send an SMC command that
triggers a single thread in the stratix10-svc driver. Upon receiving a
callback, the initiating client driver sends a stratix10-svc-done signal,
terminating the thread without waiting for other pending SMC commands to
complete. This leads to a timeout issue in the firmware SVC mailbox service
when multiple client drivers send SMC commands concurrently.

To resolve this issue, a dedicated thread is now created per channel. The
stratix10-svc driver will support up to the number of channels defined by
SVC_NUM_CHANNEL. Thread synchronization is handled using a mutex to prevent
simultaneous issuance of SMC commands by multiple threads.

SVC_NUM_DATA_IN_FIFO is reduced from 32 to 8, since each channel now has
its own dedicated FIFO and the SDM processes commands one at a time.
8 entries per channel is sufficient while keeping the total aggregate
capacity the same (4 channels x 8 = 32 entries).

Additionally, a thread task is now validated before invoking kthread_stop
when the user aborts, ensuring safe termination.

Timeout values have also been adjusted to accommodate the increased load
from concurrent client driver activity.

Fixes: 7ca5ce896524 ("firmware: add Intel Stratix10 service layer driver")
Cc: stable@vger.kernel.org
Signed-off-by: Ang Tien Sung <tien.sung.ang@altera.com>
Signed-off-by: Fong, Yan Kei <yankei.fong@altera.com>
Signed-off-by: Muhammad Amirul Asyraf Mohamad Jamian <muhammad.amirul.asyraf.mohamad.jamian@altera.com>
Link: https://lore.kernel.org/all/20260305093151.2678-1-muhammad.amirul.asyraf.mohamad.jamian@altera.com
Signed-off-by: Dinh Nguyen <dinguyen@kernel.org>
---
 include/linux/firmware/intel/stratix10-svc-client.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h
index d290060f4c73..91013161e9db 100644
--- a/include/linux/firmware/intel/stratix10-svc-client.h
+++ b/include/linux/firmware/intel/stratix10-svc-client.h
@@ -68,12 +68,12 @@
  * timeout value used in Stratix10 FPGA manager driver.
  * timeout value used in RSU driver
  */
-#define SVC_RECONFIG_REQUEST_TIMEOUT_MS         300
-#define SVC_RECONFIG_BUFFER_TIMEOUT_MS          720
-#define SVC_RSU_REQUEST_TIMEOUT_MS              300
+#define SVC_RECONFIG_REQUEST_TIMEOUT_MS         5000
+#define SVC_RECONFIG_BUFFER_TIMEOUT_MS          5000
+#define SVC_RSU_REQUEST_TIMEOUT_MS              2000
 #define SVC_FCS_REQUEST_TIMEOUT_MS		2000
 #define SVC_COMPLETED_TIMEOUT_MS		30000
-#define SVC_HWMON_REQUEST_TIMEOUT_MS		300
+#define SVC_HWMON_REQUEST_TIMEOUT_MS		2000
 
 struct stratix10_svc_chan;
 
-- 
cgit v1.2.3


From 6ffd853b0b10e1e292cef0bfd0997986471254de Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 1 Mar 2026 16:51:44 -0800
Subject: build_bug.h: correct function parameters names in kernel-doc

Use the correct function (or macro) names to avoid kernel-doc warnings:

Warning: include/linux/build_bug.h:38 function parameter 'cond' not
 described in 'BUILD_BUG_ON_MSG'
Warning: include/linux/build_bug.h:38 function parameter 'msg' not
 described in 'BUILD_BUG_ON_MSG'
Warning: include/linux/build_bug.h:76 function parameter 'expr' not
 described in 'static_assert'

Link: https://lkml.kernel.org/r/20260302005144.3467019-1-rdunlap@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/build_bug.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h
index 2cfbb4c65c78..d3dc5dc5f916 100644
--- a/include/linux/build_bug.h
+++ b/include/linux/build_bug.h
@@ -32,7 +32,8 @@
 /**
  * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied
  *		      error message.
- * @condition: the condition which the compiler should know is false.
+ * @cond: the condition which the compiler should know is false.
+ * @msg: build-time error message
  *
  * See BUILD_BUG_ON for description.
  */
@@ -60,6 +61,7 @@
 
 /**
  * static_assert - check integer constant expression at build time
+ * @expr: expression to be checked
  *
  * static_assert() is a wrapper for the C11 _Static_assert, with a
  * little macro magic to make the message optional (defaulting to the
-- 
cgit v1.2.3


From 28b225282d44e2ef40e7f46cfdbd5d1b20b8874f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 9 Mar 2026 17:39:07 -0700
Subject: page_pool: store detach_time as ktime_t to avoid false-negatives

While testing other changes in vng I noticed that
nl_netdev.page_pool_check flakes. This never happens in real CI.

Turns out vng may boot and get to that test in less than a second.
page_pool_detached() records the detach time in seconds, so if
vng is fast enough detach time is set to 0. Other code treats
0 as "not detached". detach_time is only used to report the state
to the user, so it's not a huge deal in practice but let's fix it.
Store the raw ktime_t (nanoseconds) instead. A nanosecond value
of 0 is practically impossible.

Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Fixes: 69cb4952b6f6 ("net: page_pool: report when page pool was destroyed")
Link: https://patch.msgid.link/20260310003907.3540019-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/page_pool/types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 0d453484a585..cdd95477af7a 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -247,7 +247,7 @@ struct page_pool {
 	/* User-facing fields, protected by page_pools_lock */
 	struct {
 		struct hlist_node list;
-		u64 detach_time;
+		ktime_t detach_time;
 		u32 id;
 	} user;
 };
-- 
cgit v1.2.3


From b2e48c429ec54715d16fefa719dd2fbded2e65be Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 10 Mar 2026 21:28:53 +0100
Subject: sched/mmcid: Prevent CID stalls due to concurrent forks

A newly forked task is accounted as MMCID user before the task is visible
in the process' thread list and the global task list. This creates the
following problem:

 CPU1			CPU2
 fork()
   sched_mm_cid_fork(tnew1)
     tnew1->mm.mm_cid_users++;
     tnew1->mm_cid.cid = getcid()
-> preemption
			fork()
			  sched_mm_cid_fork(tnew2)
			    tnew2->mm.mm_cid_users++;
                            // Reaches the per CPU threshold
			    mm_cid_fixup_tasks_to_cpus()
			    for_each_other(current, p)
			         ....

As tnew1 is not visible yet, this fails to fix up the already allocated CID
of tnew1. As a consequence a subsequent schedule in might fail to acquire a
(transitional) CID and the machine stalls.

Move the invocation of sched_mm_cid_fork() after the new task becomes
visible in the thread and the task list to prevent this.

This also makes it symmetrical vs. exit() where the task is removed as CID
user before the task is removed from the thread and task lists.

Fixes: fbd0e71dc370 ("sched/mmcid: Provide CID ownership mode fixup functions")
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20260310202525.969061974@kernel.org
---
 include/linux/sched.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a7b4a980eb2f..5a5d3dbc9cdf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2354,7 +2354,6 @@ static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct allo
 #ifdef CONFIG_SCHED_MM_CID
 void sched_mm_cid_before_execve(struct task_struct *t);
 void sched_mm_cid_after_execve(struct task_struct *t);
-void sched_mm_cid_fork(struct task_struct *t);
 void sched_mm_cid_exit(struct task_struct *t);
 static __always_inline int task_mm_cid(struct task_struct *t)
 {
@@ -2363,7 +2362,6 @@ static __always_inline int task_mm_cid(struct task_struct *t)
 #else
 static inline void sched_mm_cid_before_execve(struct task_struct *t) { }
 static inline void sched_mm_cid_after_execve(struct task_struct *t) { }
-static inline void sched_mm_cid_fork(struct task_struct *t) { }
 static inline void sched_mm_cid_exit(struct task_struct *t) { }
 static __always_inline int task_mm_cid(struct task_struct *t)
 {
-- 
cgit v1.2.3


From 192d852129b1b7c4f0ddbab95d0de1efd5ee1405 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 10 Mar 2026 21:29:09 +0100
Subject: sched/mmcid: Avoid full tasklist walks

Chasing vfork()'ed tasks on a CID ownership mode switch requires a full
task list walk, which is obviously expensive on large systems.

Avoid that by keeping a list of tasks using a mm MMCID entity in mm::mm_cid
and walk this list instead. This removes the proven to be flaky counting
logic and avoids a full task list walk in the case of vfork()'ed tasks.

Fixes: fbd0e71dc370 ("sched/mmcid: Provide CID ownership mode fixup functions")
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20260310202526.183824481@kernel.org
---
 include/linux/rseq_types.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h
index da5fa6f40294..0b42045988db 100644
--- a/include/linux/rseq_types.h
+++ b/include/linux/rseq_types.h
@@ -133,10 +133,12 @@ struct rseq_data { };
  * @active:	MM CID is active for the task
  * @cid:	The CID associated to the task either permanently or
  *		borrowed from the CPU
+ * @node:	Queued in the per MM MMCID list
  */
 struct sched_mm_cid {
 	unsigned int		active;
 	unsigned int		cid;
+	struct hlist_node	node;
 };
 
 /**
@@ -157,6 +159,7 @@ struct mm_cid_pcpu {
  * @work:		Regular work to handle the affinity mode change case
  * @lock:		Spinlock to protect against affinity setting which can't take @mutex
  * @mutex:		Mutex to serialize forks and exits related to this mm
+ * @user_list:		List of the MM CID users of a MM
  * @nr_cpus_allowed:	The number of CPUs in the per MM allowed CPUs map. The map
  *			is growth only.
  * @users:		The number of tasks sharing this MM. Separate from mm::mm_users
@@ -177,13 +180,14 @@ struct mm_mm_cid {
 
 	raw_spinlock_t		lock;
 	struct mutex		mutex;
+	struct hlist_head	user_list;
 
 	/* Low frequency modified */
 	unsigned int		nr_cpus_allowed;
 	unsigned int		users;
 	unsigned int		pcpu_thrs;
 	unsigned int		update_deferred;
-}____cacheline_aligned_in_smp;
+} ____cacheline_aligned;
 #else /* CONFIG_SCHED_MM_CID */
 struct mm_mm_cid { };
 struct sched_mm_cid { };
-- 
cgit v1.2.3


From 227312b4a65c373d5d8b4683b7fc36203fedc516 Mon Sep 17 00:00:00 2001
From: Hans de Goede <johannes.goede@oss.qualcomm.com>
Date: Sat, 28 Feb 2026 15:52:58 +0100
Subject: HID: input: Add HID_BATTERY_QUIRK_DYNAMIC for Elan touchscreens

Elan touchscreens have a HID-battery device for the stylus which is always
there even if there is no stylus.

This is causing upower to report an empty battery for the stylus and some
desktop-environments will show a notification about this, which is quite
annoying.

Because of this the HID-battery is being ignored on all Elan I2c and USB
touchscreens, but this causes there to be no battery reporting for
the stylus at all.

This adds a new HID_BATTERY_QUIRK_DYNAMIC and uses these for the Elan
touchscreens.

This new quirks causes the present value of the battery to start at 0,
which will make userspace ignore it and only sets present to 1 after
receiving a battery input report which only happens when the stylus
gets in range.

Reported-by: ggrundik@gmail.com
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221118
Signed-off-by: Hans de Goede <johannes.goede@oss.qualcomm.com>
Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
---
 include/linux/hid.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 2990b9f94cb5..31324609af4d 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -682,6 +682,7 @@ struct hid_device {
 	__s32 battery_charge_status;
 	enum hid_battery_status battery_status;
 	bool battery_avoid_query;
+	bool battery_present;
 	ktime_t battery_ratelimit_time;
 #endif
 
-- 
cgit v1.2.3


From 416909962e7cdf29fd01ac523c953f37708df93d Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 17 Feb 2026 22:07:47 -0500
Subject: USB: usbcore: Introduce usb_bulk_msg_killable()

The synchronous message API in usbcore (usb_control_msg(),
usb_bulk_msg(), and so on) uses uninterruptible waits.  However,
drivers may call these routines in the context of a user thread, which
means it ought to be possible to at least kill them.

For this reason, introduce a new usb_bulk_msg_killable() function
which behaves the same as usb_bulk_msg() except for using
wait_for_completion_killable_timeout() instead of
wait_for_completion_timeout().  The same can be done later for
usb_control_msg() later on, if it turns out to be needed.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Suggested-by: Oliver Neukum <oneukum@suse.com>
Link: https://lore.kernel.org/linux-usb/3acfe838-6334-4f6d-be7c-4bb01704b33d@rowland.harvard.edu/
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
CC: stable@vger.kernel.org
Link: https://patch.msgid.link/248628b4-cc83-4e81-a620-3ce4e0376d41@rowland.harvard.edu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index fbfcc70b07fb..57ceeb02a7cb 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1868,8 +1868,9 @@ extern int usb_control_msg(struct usb_device *dev, unsigned int pipe,
 extern int usb_interrupt_msg(struct usb_device *usb_dev, unsigned int pipe,
 	void *data, int len, int *actual_length, int timeout);
 extern int usb_bulk_msg(struct usb_device *usb_dev, unsigned int pipe,
-	void *data, int len, int *actual_length,
-	int timeout);
+	void *data, int len, int *actual_length, int timeout);
+extern int usb_bulk_msg_killable(struct usb_device *usb_dev, unsigned int pipe,
+	void *data, int len, int *actual_length, int timeout);
 
 /* wrappers around usb_control_msg() for the most common standard requests */
 int usb_control_msg_send(struct usb_device *dev, __u8 endpoint, __u8 request,
-- 
cgit v1.2.3


From 1015c27a5e1a63efae2b18a9901494474b4d1dc3 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 17 Feb 2026 22:10:32 -0500
Subject: USB: core: Limit the length of unkillable synchronous timeouts

The usb_control_msg(), usb_bulk_msg(), and usb_interrupt_msg() APIs in
usbcore allow unlimited timeout durations.  And since they use
uninterruptible waits, this leaves open the possibility of hanging a
task for an indefinitely long time, with no way to kill it short of
unplugging the target device.

To prevent this sort of problem, enforce a maximum limit on the length
of these unkillable timeouts.  The limit chosen here, somewhat
arbitrarily, is 60 seconds.  On many systems (although not all) this
is short enough to avoid triggering the kernel's hung-task detector.

In addition, clear up the ambiguity of negative timeout values by
treating them the same as 0, i.e., using the maximum allowed timeout.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Link: https://lore.kernel.org/linux-usb/3acfe838-6334-4f6d-be7c-4bb01704b33d@rowland.harvard.edu/
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
CC: stable@vger.kernel.org
Link: https://patch.msgid.link/15fc9773-a007-47b0-a703-df89a8cf83dd@rowland.harvard.edu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 57ceeb02a7cb..04277af4bb9d 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1862,6 +1862,9 @@ void usb_free_noncoherent(struct usb_device *dev, size_t size,
  *                         SYNCHRONOUS CALL SUPPORT                  *
  *-------------------------------------------------------------------*/
 
+/* Maximum value allowed for timeout in synchronous routines below */
+#define USB_MAX_SYNCHRONOUS_TIMEOUT		60000	/* ms */
+
 extern int usb_control_msg(struct usb_device *dev, unsigned int pipe,
 	__u8 request, __u8 requesttype, __u16 value, __u16 index,
 	void *data, __u16 size, int timeout);
-- 
cgit v1.2.3


From 9f6a983cfa22ac662c86e60816d3a357d4b551e9 Mon Sep 17 00:00:00 2001
From: Jie Deng <dengjie03@kylinos.cn>
Date: Fri, 27 Feb 2026 16:49:31 +0800
Subject: usb: core: new quirk to handle devices with zero configurations

Some USB devices incorrectly report bNumConfigurations as 0 in their
device descriptor, which causes the USB core to reject them during
enumeration.
logs:
usb 1-2: device descriptor read/64, error -71
usb 1-2: no configurations
usb 1-2: can't read configurations, error -22

However, these devices actually work correctly when
treated as having a single configuration.

Add a new quirk USB_QUIRK_FORCE_ONE_CONFIG to handle such devices.
When this quirk is set, assume the device has 1 configuration instead
of failing with -EINVAL.

This quirk is applied to the device with VID:PID 5131:2007 which
exhibits this behavior.

Signed-off-by: Jie Deng <dengjie03@kylinos.cn>
Link: https://patch.msgid.link/20260227084931.1527461-1-dengjie03@kylinos.cn
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/quirks.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index 2f7bd2fdc616..b3cc7beab4a3 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -78,4 +78,7 @@
 /* skip BOS descriptor request */
 #define USB_QUIRK_NO_BOS			BIT(17)
 
+/* Device claims zero configurations, forcing to 1 */
+#define USB_QUIRK_FORCE_ONE_CONFIG		BIT(18)
+
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
cgit v1.2.3


From edd20cb693d9cb5e3d6fcecd858093dab4e2b0aa Mon Sep 17 00:00:00 2001
From: Wei Liu <wei.liu@kernel.org>
Date: Wed, 11 Mar 2026 16:51:00 +0000
Subject: Revert "mshv: expose the scrub partition hypercall"

This reverts commit 36d6cbb62133fc6eea28f380409e0fb190f3dfbe.

Calling this as a passthrough hypercall leaves the VM in an inconsistent
state. Revert before it is released.

Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 include/hyperv/hvgdk_mini.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h
index 8bb3dd71c5b4..1823a290a7b7 100644
--- a/include/hyperv/hvgdk_mini.h
+++ b/include/hyperv/hvgdk_mini.h
@@ -477,7 +477,6 @@ union hv_vp_assist_msr_contents {	 /* HV_REGISTER_VP_ASSIST_PAGE */
 #define HVCALL_NOTIFY_PARTITION_EVENT                   0x0087
 #define HVCALL_ENTER_SLEEP_STATE			0x0084
 #define HVCALL_NOTIFY_PORT_RING_EMPTY			0x008b
-#define HVCALL_SCRUB_PARTITION				0x008d
 #define HVCALL_REGISTER_INTERCEPT_RESULT		0x0091
 #define HVCALL_ASSERT_VIRTUAL_INTERRUPT			0x0094
 #define HVCALL_CREATE_PORT				0x0095
-- 
cgit v1.2.3


From 96189080265e6bb5dde3a4afbaf947af493e3f82 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 9 Mar 2026 14:21:37 -0600
Subject: io_uring: ensure ctx->rings is stable for task work flags
 manipulation

If DEFER_TASKRUN | SETUP_TASKRUN is used and task work is added while
the ring is being resized, it's possible for the OR'ing of
IORING_SQ_TASKRUN to happen in the small window of swapping into the
new rings and the old rings being freed.

Prevent this by adding a 2nd ->rings pointer, ->rings_rcu, which is
protected by RCU. The task work flags manipulation is inside RCU
already, and if the resize ring freeing is done post an RCU synchronize,
then there's no need to add locking to the fast path of task work
additions.

Note: this is only done for DEFER_TASKRUN, as that's the only setup mode
that supports ring resizing. If this ever changes, then they too need to
use the io_ctx_mark_taskrun() helper.

Link: https://lore.kernel.org/io-uring/20260309062759.482210-1-naup96721@gmail.com/
Cc: stable@vger.kernel.org
Fixes: 79cfe9e59c2a ("io_uring/register: add IORING_REGISTER_RESIZE_RINGS")
Reported-by: Hao-Yu Yang <naup96721@gmail.com>
Suggested-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 3e4a82a6f817..dd1420bfcb73 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -388,6 +388,7 @@ struct io_ring_ctx {
 	 * regularly bounce b/w CPUs.
 	 */
 	struct {
+		struct io_rings	__rcu	*rings_rcu;
 		struct llist_head	work_llist;
 		struct llist_head	retry_llist;
 		unsigned long		check_cq;
-- 
cgit v1.2.3


From 94a4b1f959989de9c54d43c3a102fb1ee92e1414 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 7 Mar 2026 17:50:53 -0300
Subject: ipv6: move the disable_ipv6_mod knob to core code

From: Jakub Kicinski <kuba@kernel.org>

Make sure disable_ipv6_mod itself is not part of the IPv6 module,
in case core code wants to refer to it. We will remove support
for IPv6=m soon, this change helps make fixes we commit before
that less messy.

Link: https://patch.msgid.link/20260307-net-nd_tbl_fixes-v4-1-e2677e85628c@suse.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ipv6.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 443053a76dcf..a7421382a916 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -333,7 +333,12 @@ struct tcp6_timewait_sock {
 };
 
 #if IS_ENABLED(CONFIG_IPV6)
-bool ipv6_mod_enabled(void);
+extern int disable_ipv6_mod;
+
+static inline bool ipv6_mod_enabled(void)
+{
+	return disable_ipv6_mod == 0;
+}
 
 static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk)
 {
-- 
cgit v1.2.3


From d87f8bc47fbf012a7f115e311d0603d97e47c34c Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Mon, 9 Mar 2026 11:32:43 +0100
Subject: xfrm: avoid RCU warnings around the per-netns netlink socket

net->xfrm.nlsk is used in 2 types of contexts:
 - fully under RCU, with rcu_read_lock + rcu_dereference and a NULL check
 - in the netlink handlers, with requests coming from a userspace socket

In the 2nd case, net->xfrm.nlsk is guaranteed to stay non-NULL and the
object is alive, since we can't enter the netns destruction path while
the user socket holds a reference on the netns.

After adding the __rcu annotation to netns_xfrm.nlsk (which silences
sparse warnings in the RCU users and __net_init code), we need to tell
sparse that the 2nd case is safe. Add a helper for that.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/netns/xfrm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 23dd647fe024..b73983a17e08 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -59,7 +59,7 @@ struct netns_xfrm {
 	struct list_head	inexact_bins;
 
 
-	struct sock		*nlsk;
+	struct sock		__rcu *nlsk;
 	struct sock		*nlsk_stash;
 
 	u32			sysctl_aevent_etime;
-- 
cgit v1.2.3


From 14de1552a4e3fece78bb20314887e70888c9d448 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 11 Mar 2026 16:14:55 -0700
Subject: include/linux/local_lock_internal.h: Make this header file again
 compatible with sparse

There are two versions of the __this_cpu_local_lock() definitions in
include/linux/local_lock_internal.h: one version that relies on the
Clang overloading functionality and another version that does not.
Select the latter version when using sparse. This patch fixes the
following errors reported by sparse:

   include/linux/local_lock_internal.h:331:40: sparse: sparse: multiple definitions for function '__this_cpu_local_lock'
   include/linux/local_lock_internal.h:325:37: sparse:  the previous one is here

Closes: https://lore.kernel.org/oe-kbuild-all/202603062334.wgI5htP0-lkp@intel.com/
Fixes: d3febf16dee2 ("locking/local_lock: Support Clang's context analysis")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Marco Elver <elver@google.com>
Link: https://patch.msgid.link/20260311231455.1961413-1-bvanassche@acm.org
---
 include/linux/local_lock_internal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h
index eff711bf973f..234be7f12c15 100644
--- a/include/linux/local_lock_internal.h
+++ b/include/linux/local_lock_internal.h
@@ -315,7 +315,7 @@ do {								\
 
 #endif /* CONFIG_PREEMPT_RT */
 
-#if defined(WARN_CONTEXT_ANALYSIS)
+#if defined(WARN_CONTEXT_ANALYSIS) && !defined(__CHECKER__)
 /*
  * Because the compiler only knows about the base per-CPU variable, use this
  * helper function to make the compiler think we lock/unlock the @base variable,
-- 
cgit v1.2.3


From 8324a54f604da18f21070702a8ad82ab2062787b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Tue, 3 Feb 2026 19:10:45 +0200
Subject: serial: 8250: Add serial8250_handle_irq_locked()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

8250_port exports serial8250_handle_irq() to HW specific 8250 drivers.
It takes port's lock within but a HW specific 8250 driver may want to
take port's lock itself, do something, and then call the generic
handler in 8250_port but to do that, the caller has to release port's
lock for no good reason.

Introduce serial8250_handle_irq_locked() which a HW specific driver can
call while already holding port's lock.

As this is new export, put it straight into a namespace (where all 8250
exports should eventually be moved).

Tested-by: Bandal, Shankar <shankar.bandal@intel.com>
Tested-by: Murthy, Shanth <shanth.murthy@intel.com>
Cc: stable <stable@kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://patch.msgid.link/20260203171049.4353-4-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_8250.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 01efdce0fda0..a95b2d143d24 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -195,6 +195,7 @@ void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl);
 void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud,
 			       unsigned int quot);
 int fsl8250_handle_irq(struct uart_port *port);
+void serial8250_handle_irq_locked(struct uart_port *port, unsigned int iir);
 int serial8250_handle_irq(struct uart_port *port, unsigned int iir);
 u16 serial8250_rx_chars(struct uart_8250_port *up, u16 lsr);
 void serial8250_read_char(struct uart_8250_port *up, u16 lsr);
-- 
cgit v1.2.3


From c38b8f5f791ecce13ab77e2257f8fd2444ba80f6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 12 Mar 2026 04:39:08 +0000
Subject: net: prevent NULL deref in ip[6]tunnel_xmit()

Blamed commit missed that both functions can be called with dev == NULL.

Also add unlikely() hints for these conditions that only fuzzers can hit.

Fixes: 6f1a9140ecda ("net: add xmit recursion limit to tunnel xmit functions")
Signed-off-by: Eric Dumazet <edumazet@google.com>
CC: Weiming Shi <bestswngs@gmail.com>
Link: https://patch.msgid.link/20260312043908.2790803-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/ip6_tunnel.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 1253cbb4b0a4..359b595f1df9 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -156,10 +156,12 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
 {
 	int pkt_len, err;
 
-	if (dev_recursion_level() > IP_TUNNEL_RECURSION_LIMIT) {
-		net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
-				     dev->name);
-		DEV_STATS_INC(dev, tx_errors);
+	if (unlikely(dev_recursion_level() > IP_TUNNEL_RECURSION_LIMIT)) {
+		if (dev) {
+			net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
+					     dev->name);
+			DEV_STATS_INC(dev, tx_errors);
+		}
 		kfree_skb(skb);
 		return;
 	}
-- 
cgit v1.2.3


From 2619da73bb2f10d88f7e1087125c40144fdf0987 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Thu, 5 Mar 2026 20:49:55 +0100
Subject: KVM: x86: Use __DECLARE_FLEX_ARRAY() for UAPI structures with VLAs

Commit 94dfc73e7cf4 ("treewide: uapi: Replace zero-length arrays with
flexible-array members") broke the userspace API for C++.

These structures ending in VLAs are typically a *header*, which can be
followed by an arbitrary number of entries. Userspace typically creates
a larger structure with some non-zero number of entries, for example in
QEMU's kvm_arch_get_supported_msr_feature():

    struct {
        struct kvm_msrs info;
        struct kvm_msr_entry entries[1];
    } msr_data = {};

While that works in C, it fails in C++ with an error like:
 flexible array member 'kvm_msrs::entries' not at end of 'struct msr_data'

Fix this by using __DECLARE_FLEX_ARRAY() for the VLA, which uses [0]
for C++ compilation.

Fixes: 94dfc73e7cf4 ("treewide: uapi: Replace zero-length arrays with flexible-array members")
Cc: stable@vger.kernel.org
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Link: https://patch.msgid.link/3abaf6aefd6e5efeff3b860ac38421d9dec908db.camel@infradead.org
[sean: tag for stable@]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/uapi/linux/kvm.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 80364d4dbebb..3f0d8d3c3daf 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -11,6 +11,7 @@
 #include <linux/const.h>
 #include <linux/types.h>
 #include <linux/compiler.h>
+#include <linux/stddef.h>
 #include <linux/ioctl.h>
 #include <asm/kvm.h>
 
@@ -542,7 +543,7 @@ struct kvm_coalesced_mmio {
 
 struct kvm_coalesced_mmio_ring {
 	__u32 first, last;
-	struct kvm_coalesced_mmio coalesced_mmio[];
+	__DECLARE_FLEX_ARRAY(struct kvm_coalesced_mmio, coalesced_mmio);
 };
 
 #define KVM_COALESCED_MMIO_MAX \
@@ -592,7 +593,7 @@ struct kvm_clear_dirty_log {
 /* for KVM_SET_SIGNAL_MASK */
 struct kvm_signal_mask {
 	__u32 len;
-	__u8  sigset[];
+	__DECLARE_FLEX_ARRAY(__u8, sigset);
 };
 
 /* for KVM_TPR_ACCESS_REPORTING */
@@ -1051,7 +1052,7 @@ struct kvm_irq_routing_entry {
 struct kvm_irq_routing {
 	__u32 nr;
 	__u32 flags;
-	struct kvm_irq_routing_entry entries[];
+	__DECLARE_FLEX_ARRAY(struct kvm_irq_routing_entry, entries);
 };
 
 #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
@@ -1142,7 +1143,7 @@ struct kvm_dirty_tlb {
 
 struct kvm_reg_list {
 	__u64 n; /* number of regs */
-	__u64 reg[];
+	__DECLARE_FLEX_ARRAY(__u64, reg);
 };
 
 struct kvm_one_reg {
@@ -1608,7 +1609,7 @@ struct kvm_stats_desc {
 #ifdef __KERNEL__
 	char name[KVM_STATS_NAME_SIZE];
 #else
-	char name[];
+	__DECLARE_FLEX_ARRAY(char, name);
 #endif
 };
 
-- 
cgit v1.2.3


From 8431c602f551549f082bbfa67f3003f2d8e3e132 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 11 Mar 2026 12:31:10 +0000
Subject: ip_tunnel: adapt iptunnel_xmit_stats() to NETDEV_PCPU_STAT_DSTATS

Blamed commits forgot that vxlan/geneve use udp_tunnel[6]_xmit_skb() which
call iptunnel_xmit_stats().

iptunnel_xmit_stats() was assuming tunnels were only using
NETDEV_PCPU_STAT_TSTATS.

@syncp offset in pcpu_sw_netstats and pcpu_dstats is different.

32bit kernels would either have corruptions or freezes if the syncp
sequence was overwritten.

This patch also moves pcpu_stat_type closer to dev->{t,d}stats to avoid
a potential cache line miss since iptunnel_xmit_stats() needs to read it.

Fixes: 6fa6de302246 ("geneve: Handle stats using NETDEV_PCPU_STAT_DSTATS.")
Fixes: be226352e8dc ("vxlan: Handle stats using NETDEV_PCPU_STAT_DSTATS.")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Link: https://patch.msgid.link/20260311123110.1471930-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h |  3 +--
 include/net/ip_tunnels.h  | 30 +++++++++++++++++++++++-------
 2 files changed, 24 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ae269a2e7f4d..d7aac6f185bc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2155,6 +2155,7 @@ struct net_device {
 	unsigned long		state;
 	unsigned int		flags;
 	unsigned short		hard_header_len;
+	enum netdev_stat_type	pcpu_stat_type:8;
 	netdev_features_t	features;
 	struct inet6_dev __rcu	*ip6_ptr;
 	__cacheline_group_end(net_device_read_txrx);
@@ -2404,8 +2405,6 @@ struct net_device {
 	void				*ml_priv;
 	enum netdev_ml_priv_type	ml_priv_type;
 
-	enum netdev_stat_type		pcpu_stat_type:8;
-
 #if IS_ENABLED(CONFIG_GARP)
 	struct garp_port __rcu	*garp_port;
 #endif
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 80662f812080..1f577a4f8ce9 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -665,13 +665,29 @@ static inline int iptunnel_pull_offloads(struct sk_buff *skb)
 static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
 {
 	if (pkt_len > 0) {
-		struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats);
-
-		u64_stats_update_begin(&tstats->syncp);
-		u64_stats_add(&tstats->tx_bytes, pkt_len);
-		u64_stats_inc(&tstats->tx_packets);
-		u64_stats_update_end(&tstats->syncp);
-		put_cpu_ptr(tstats);
+		if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_DSTATS) {
+			struct pcpu_dstats *dstats = get_cpu_ptr(dev->dstats);
+
+			u64_stats_update_begin(&dstats->syncp);
+			u64_stats_add(&dstats->tx_bytes, pkt_len);
+			u64_stats_inc(&dstats->tx_packets);
+			u64_stats_update_end(&dstats->syncp);
+			put_cpu_ptr(dstats);
+			return;
+		}
+		if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_TSTATS) {
+			struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats);
+
+			u64_stats_update_begin(&tstats->syncp);
+			u64_stats_add(&tstats->tx_bytes, pkt_len);
+			u64_stats_inc(&tstats->tx_packets);
+			u64_stats_update_end(&tstats->syncp);
+			put_cpu_ptr(tstats);
+			return;
+		}
+		pr_err_once("iptunnel_xmit_stats pcpu_stat_type=%d\n",
+			    dev->pcpu_stat_type);
+		WARN_ON_ONCE(1);
 		return;
 	}
 
-- 
cgit v1.2.3


From 5eb608319bb56464674a71b4a66ea65c6c435d64 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <npitre@baylibre.com>
Date: Tue, 27 Jan 2026 17:56:01 -0500
Subject: vt: save/restore unicode screen buffer for alternate screen

The alternate screen support added by commit 23743ba64709 ("vt: add
support for smput/rmput escape codes") only saves and restores the
regular screen buffer (vc_origin), but completely ignores the corresponding
unicode screen buffer (vc_uni_lines) creating a messed-up display.

Add vc_saved_uni_lines to save the unicode screen buffer when entering
the alternate screen, and restore it when leaving. Also ensure proper
cleanup in reset_terminal() and vc_deallocate().

Fixes: 23743ba64709 ("vt: add support for smput/rmput escape codes")
Cc: stable <stable@kernel.org>
Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
Link: https://patch.msgid.link/5o2p6qp3-91pq-0p17-or02-1oors4417ns7@onlyvoer.pbz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/console_struct.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index 13b35637bd5a..d5ca855116df 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -160,6 +160,7 @@ struct vc_data {
 	struct uni_pagedict **uni_pagedict_loc; /* [!] Location of uni_pagedict variable for this console */
 	u32 **vc_uni_lines;			/* unicode screen content */
 	u16		*vc_saved_screen;
+	u32		**vc_saved_uni_lines;
 	unsigned int	vc_saved_cols;
 	unsigned int	vc_saved_rows;
 	/* additional information is in vt_kern.h */
-- 
cgit v1.2.3


From 598adea720b97572c7028635cb1c59b3684e128c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 11 Mar 2026 16:24:02 +0100
Subject: netfilter: revert nft_set_rbtree: validate open interval overlap

This reverts commit 648946966a08 ("netfilter: nft_set_rbtree: validate
open interval overlap").

There have been reports of nft failing to laod valid rulesets after this
patch was merged into -stable.

I can reproduce several such problem with recent nft versions, including
nft 1.1.6 which is widely shipped by distributions.

We currently have little choice here.
This commit can be resurrected at some point once the nftables fix that
triggers the false overlap positive has appeared in common distros
(see e83e32c8d1cd ("mnl: restore create element command with large batches" in
 nftables.git).

Fixes: 648946966a08 ("netfilter: nft_set_rbtree: validate open interval overlap")
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/netfilter/nf_tables.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index e2d2bfc1f989..6299af4ef423 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -277,8 +277,6 @@ struct nft_userdata {
 	unsigned char		data[];
 };
 
-#define NFT_SET_ELEM_INTERNAL_LAST	0x1
-
 /* placeholder structure for opaque set element backend representation. */
 struct nft_elem_priv { };
 
@@ -288,7 +286,6 @@ struct nft_elem_priv { };
  *	@key: element key
  *	@key_end: closing element key
  *	@data: element data
- * 	@flags: flags
  *	@priv: element private data and extensions
  */
 struct nft_set_elem {
@@ -304,7 +301,6 @@ struct nft_set_elem {
 		u32		buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)];
 		struct nft_data val;
 	} data;
-	u32			flags;
 	struct nft_elem_priv	*priv;
 };
 
-- 
cgit v1.2.3


From 0548a13b5a145b16e4da0628b5936baf35f51b43 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 12 Mar 2026 12:38:59 +0100
Subject: nf_tables: nft_dynset: fix possible stateful expression memleak in
 error path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If cloning the second stateful expression in the element via GFP_ATOMIC
fails, then the first stateful expression remains in place without being
released.

   unreferenced object (percpu) 0x607b97e9cab8 (size 16):
     comm "softirq", pid 0, jiffies 4294931867
     hex dump (first 16 bytes on cpu 3):
       00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
     backtrace (crc 0):
       pcpu_alloc_noprof+0x453/0xd80
       nft_counter_clone+0x9c/0x190 [nf_tables]
       nft_expr_clone+0x8f/0x1b0 [nf_tables]
       nft_dynset_new+0x2cb/0x5f0 [nf_tables]
       nft_rhash_update+0x236/0x11c0 [nf_tables]
       nft_dynset_eval+0x11f/0x670 [nf_tables]
       nft_do_chain+0x253/0x1700 [nf_tables]
       nft_do_chain_ipv4+0x18d/0x270 [nf_tables]
       nf_hook_slow+0xaa/0x1e0
       ip_local_deliver+0x209/0x330

Fixes: 563125a73ac3 ("netfilter: nftables: generalize set extension to support for several expressions")
Reported-by: Gurpreet Shergill <giki.shergill@proton.me>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/netfilter/nf_tables.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 6299af4ef423..ec8a8ec9c0aa 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -874,6 +874,8 @@ struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set,
 					u64 timeout, u64 expiration, gfp_t gfp);
 int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
 			    struct nft_expr *expr_array[]);
+void nft_set_elem_expr_destroy(const struct nft_ctx *ctx,
+			       struct nft_set_elem_expr *elem_expr);
 void nft_set_elem_destroy(const struct nft_set *set,
 			  const struct nft_elem_priv *elem_priv,
 			  bool destroy_expr);
-- 
cgit v1.2.3


From 641f6fda143b879da1515f821ee475073678cf2a Mon Sep 17 00:00:00 2001
From: Mukesh Ojha <mukesh.ojha@oss.qualcomm.com>
Date: Thu, 29 Jan 2026 20:53:20 +0530
Subject: soc: qcom: pd-mapper: Fix element length in servreg_loc_pfr_req_ei

It looks element length declared in servreg_loc_pfr_req_ei for reason
not matching servreg_loc_pfr_req's reason field due which we could
observe decoding error on PD crash.

  qmi_decode_string_elem: String len 81 >= Max Len 65

Fix this by matching with servreg_loc_pfr_req's reason field.

Fixes: 1ebcde047c54 ("soc: qcom: add pd-mapper implementation")
Signed-off-by: Mukesh Ojha <mukesh.ojha@oss.qualcomm.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Tested-by: Nikita Travkin <nikita@trvn.ru>
Link: https://lore.kernel.org/r/20260129152320.3658053-2-mukesh.ojha@oss.qualcomm.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/soc/qcom/pdr.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/soc/qcom/pdr.h b/include/linux/soc/qcom/pdr.h
index 83a8ea612e69..2b7691e47c2a 100644
--- a/include/linux/soc/qcom/pdr.h
+++ b/include/linux/soc/qcom/pdr.h
@@ -5,6 +5,7 @@
 #include <linux/soc/qcom/qmi.h>
 
 #define SERVREG_NAME_LENGTH	64
+#define SERVREG_PFR_LENGTH	256
 
 struct pdr_service;
 struct pdr_handle;
-- 
cgit v1.2.3


From b7405dcf7385445e10821777143f18c3ce20fa04 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 15 Mar 2026 10:41:52 +0000
Subject: bonding: prevent potential infinite loop in bond_header_parse()

bond_header_parse() can loop if a stack of two bonding devices is setup,
because skb->dev always points to the hierarchy top.

Add new "const struct net_device *dev" parameter to
(struct header_ops)->parse() method to make sure the recursion
is bounded, and that the final leaf parse method is called.

Fixes: 950803f72547 ("bonding: fix type confusion in bond_setup_by_slave()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Tested-by: Jiayuan Chen <jiayuan.chen@shopee.com>
Cc: Jay Vosburgh <jv@jvosburgh.net>
Cc: Andrew Lunn <andrew+netdev@lunn.ch>
Link: https://patch.msgid.link/20260315104152.1436867-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/etherdevice.h | 3 ++-
 include/linux/if_ether.h    | 3 ++-
 include/linux/netdevice.h   | 6 ++++--
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 9a1eacf35d37..df8f88f63a70 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -42,7 +42,8 @@ extern const struct header_ops eth_header_ops;
 
 int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
 	       const void *daddr, const void *saddr, unsigned len);
-int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr);
+int eth_header_parse(const struct sk_buff *skb, const struct net_device *dev,
+		     unsigned char *haddr);
 int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh,
 		     __be16 type);
 void eth_header_cache_update(struct hh_cache *hh, const struct net_device *dev,
diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index 61b7335aa037..ca9afa824aa4 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -40,7 +40,8 @@ static inline struct ethhdr *inner_eth_hdr(const struct sk_buff *skb)
 	return (struct ethhdr *)skb_inner_mac_header(skb);
 }
 
-int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr);
+int eth_header_parse(const struct sk_buff *skb, const struct net_device *dev,
+		     unsigned char *haddr);
 
 extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len);
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d7aac6f185bc..7ca01eb3f7d2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -311,7 +311,9 @@ struct header_ops {
 	int	(*create) (struct sk_buff *skb, struct net_device *dev,
 			   unsigned short type, const void *daddr,
 			   const void *saddr, unsigned int len);
-	int	(*parse)(const struct sk_buff *skb, unsigned char *haddr);
+	int	(*parse)(const struct sk_buff *skb,
+			 const struct net_device *dev,
+			 unsigned char *haddr);
 	int	(*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type);
 	void	(*cache_update)(struct hh_cache *hh,
 				const struct net_device *dev,
@@ -3445,7 +3447,7 @@ static inline int dev_parse_header(const struct sk_buff *skb,
 
 	if (!dev->header_ops || !dev->header_ops->parse)
 		return 0;
-	return dev->header_ops->parse(skb, haddr);
+	return dev->header_ops->parse(skb, dev, haddr);
 }
 
 static inline __be16 dev_parse_header_protocol(const struct sk_buff *skb)
-- 
cgit v1.2.3


From 66360460cab63c248ca5b1070a01c0c29133b960 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Sun, 15 Mar 2026 11:54:22 -0400
Subject: net/sched: teql: Fix double-free in teql_master_xmit

Whenever a TEQL devices has a lockless Qdisc as root, qdisc_reset should
be called using the seq_lock to avoid racing with the datapath. Failure
to do so may cause crashes like the following:

[  238.028993][  T318] BUG: KASAN: double-free in skb_release_data (net/core/skbuff.c:1139)
[  238.029328][  T318] Free of addr ffff88810c67ec00 by task poc_teql_uaf_ke/318
[  238.029749][  T318]
[  238.029900][  T318] CPU: 3 UID: 0 PID: 318 Comm: poc_teql_ke Not tainted 7.0.0-rc3-00149-ge5b31d988a41 #704 PREEMPT(full)
[  238.029906][  T318] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[  238.029910][  T318] Call Trace:
[  238.029913][  T318]  <TASK>
[  238.029916][  T318]  dump_stack_lvl (lib/dump_stack.c:122)
[  238.029928][  T318]  print_report (mm/kasan/report.c:379 mm/kasan/report.c:482)
[  238.029940][  T318]  ? skb_release_data (net/core/skbuff.c:1139)
[  238.029944][  T318]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
...
[  238.029957][  T318]  ? skb_release_data (net/core/skbuff.c:1139)
[  238.029969][  T318]  kasan_report_invalid_free (mm/kasan/report.c:221 mm/kasan/report.c:563)
[  238.029979][  T318]  ? skb_release_data (net/core/skbuff.c:1139)
[  238.029989][  T318]  check_slab_allocation (mm/kasan/common.c:231)
[  238.029995][  T318]  kmem_cache_free (mm/slub.c:2637 (discriminator 1) mm/slub.c:6168 (discriminator 1) mm/slub.c:6298 (discriminator 1))
[  238.030004][  T318]  skb_release_data (net/core/skbuff.c:1139)
...
[  238.030025][  T318]  sk_skb_reason_drop (net/core/skbuff.c:1256)
[  238.030032][  T318]  pfifo_fast_reset (./include/linux/ptr_ring.h:171 ./include/linux/ptr_ring.h:309 ./include/linux/skb_array.h:98 net/sched/sch_generic.c:827)
[  238.030039][  T318]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
...
[  238.030054][  T318]  qdisc_reset (net/sched/sch_generic.c:1034)
[  238.030062][  T318]  teql_destroy (./include/linux/spinlock.h:395 net/sched/sch_teql.c:157)
[  238.030071][  T318]  __qdisc_destroy (./include/net/pkt_sched.h:328 net/sched/sch_generic.c:1077)
[  238.030077][  T318]  qdisc_graft (net/sched/sch_api.c:1062 net/sched/sch_api.c:1053 net/sched/sch_api.c:1159)
[  238.030089][  T318]  ? __pfx_qdisc_graft (net/sched/sch_api.c:1091)
[  238.030095][  T318]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[  238.030102][  T318]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[  238.030106][  T318]  ? srso_alias_return_thunk (arch/x86/lib/retpoline.S:221)
[  238.030114][  T318]  tc_get_qdisc (net/sched/sch_api.c:1529 net/sched/sch_api.c:1556)
...
[  238.072958][  T318] Allocated by task 303 on cpu 5 at 238.026275s:
[  238.073392][  T318]  kasan_save_stack (mm/kasan/common.c:58)
[  238.073884][  T318]  kasan_save_track (mm/kasan/common.c:64 (discriminator 5) mm/kasan/common.c:79 (discriminator 5))
[  238.074230][  T318]  __kasan_slab_alloc (mm/kasan/common.c:369)
[  238.074578][  T318]  kmem_cache_alloc_node_noprof (./include/linux/kasan.h:253 mm/slub.c:4542 mm/slub.c:4869 mm/slub.c:4921)
[  238.076091][  T318]  kmalloc_reserve (net/core/skbuff.c:616 (discriminator 107))
[  238.076450][  T318]  __alloc_skb (net/core/skbuff.c:713)
[  238.076834][  T318]  alloc_skb_with_frags (./include/linux/skbuff.h:1383 net/core/skbuff.c:6763)
[  238.077178][  T318]  sock_alloc_send_pskb (net/core/sock.c:2997)
[  238.077520][  T318]  packet_sendmsg (net/packet/af_packet.c:2926 net/packet/af_packet.c:3019 net/packet/af_packet.c:3108)
[  238.081469][  T318]
[  238.081870][  T318] Freed by task 299 on cpu 1 at 238.028496s:
[  238.082761][  T318]  kasan_save_stack (mm/kasan/common.c:58)
[  238.083481][  T318]  kasan_save_track (mm/kasan/common.c:64 (discriminator 5) mm/kasan/common.c:79 (discriminator 5))
[  238.085348][  T318]  kasan_save_free_info (mm/kasan/generic.c:587 (discriminator 1))
[  238.085900][  T318]  __kasan_slab_free (mm/kasan/common.c:287)
[  238.086439][  T318]  kmem_cache_free (mm/slub.c:6168 (discriminator 3) mm/slub.c:6298 (discriminator 3))
[  238.087007][  T318]  skb_release_data (net/core/skbuff.c:1139)
[  238.087491][  T318]  consume_skb (net/core/skbuff.c:1451)
[  238.087757][  T318]  teql_master_xmit (net/sched/sch_teql.c:358)
[  238.088116][  T318]  dev_hard_start_xmit (./include/linux/netdevice.h:5324 ./include/linux/netdevice.h:5333 net/core/dev.c:3871 net/core/dev.c:3887)
[  238.088468][  T318]  sch_direct_xmit (net/sched/sch_generic.c:347)
[  238.088820][  T318]  __qdisc_run (net/sched/sch_generic.c:420 (discriminator 1))
[  238.089166][  T318]  __dev_queue_xmit (./include/net/sch_generic.h:229 ./include/net/pkt_sched.h:121 ./include/net/pkt_sched.h:117 net/core/dev.c:4196 net/core/dev.c:4802)

Workflow to reproduce:
1. Initialize a TEQL topology (dummy0 and ifb0 as slaves, teql0 up).
2. Start multiple sender workers continuously transmitting packets
   through teql0 to drive teql_master_xmit().
3. In parallel, repeatedly delete and re-add the root qdisc on
   dummy0 and ifb0 via RTNETLINK, forcing frequent teardown and reset activity
   (teql_destroy() / qdisc_reset()).
4. After running both workloads concurrently for several iterations,
   KASAN reports slab-use-after-free or double-free in the skb free path.

Fix this by moving dev_reset_queue to sch_generic.h and calling it, instead
of qdisc_reset, in teql_destroy since it handles both the lock and lockless
cases correctly for root qdiscs.

Fixes: 96009c7d500e ("sched: replace __QDISC_STATE_RUNNING bit with a spin lock")
Reported-by: Xianrui Dong <keenanat2000@gmail.com>
Tested-by: Xianrui Dong <keenanat2000@gmail.com>
Co-developed-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20260315155422.147256-1-jhs@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sch_generic.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index d5d55cb21686..cafb266a0b80 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -716,6 +716,34 @@ void qdisc_destroy(struct Qdisc *qdisc);
 void qdisc_put(struct Qdisc *qdisc);
 void qdisc_put_unlocked(struct Qdisc *qdisc);
 void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len);
+
+static inline void dev_reset_queue(struct net_device *dev,
+				   struct netdev_queue *dev_queue,
+				   void *_unused)
+{
+	struct Qdisc *qdisc;
+	bool nolock;
+
+	qdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
+	if (!qdisc)
+		return;
+
+	nolock = qdisc->flags & TCQ_F_NOLOCK;
+
+	if (nolock)
+		spin_lock_bh(&qdisc->seqlock);
+	spin_lock_bh(qdisc_lock(qdisc));
+
+	qdisc_reset(qdisc);
+
+	spin_unlock_bh(qdisc_lock(qdisc));
+	if (nolock) {
+		clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+		clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
+		spin_unlock_bh(&qdisc->seqlock);
+	}
+}
+
 #ifdef CONFIG_NET_SCHED
 int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type,
 			      void *type_data);
-- 
cgit v1.2.3


From a0671125d4f55e1e98d9bde8a0b671941987e208 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 13 Mar 2026 07:55:31 +0100
Subject: clsact: Fix use-after-free in init/destroy rollback asymmetry

Fix a use-after-free in the clsact qdisc upon init/destroy rollback asymmetry.
The latter is achieved by first fully initializing a clsact instance, and
then in a second step having a replacement failure for the new clsact qdisc
instance. clsact_init() initializes ingress first and then takes care of the
egress part. This can fail midway, for example, via tcf_block_get_ext(). Upon
failure, the kernel will trigger the clsact_destroy() callback.

Commit 1cb6f0bae504 ("bpf: Fix too early release of tcx_entry") details the
way how the transition is happening. If tcf_block_get_ext on the q->ingress_block
ends up failing, we took the tcx_miniq_inc reference count on the ingress
side, but not yet on the egress side. clsact_destroy() tests whether the
{ingress,egress}_entry was non-NULL. However, even in midway failure on the
replacement, both are in fact non-NULL with a valid egress_entry from the
previous clsact instance.

What we really need to test for is whether the qdisc instance-specific ingress
or egress side previously got initialized. This adds a small helper for checking
the miniq initialization called mini_qdisc_pair_inited, and utilizes that upon
clsact_destroy() in order to fix the use-after-free scenario. Convert the
ingress_destroy() side as well so both are consistent to each other.

Fixes: 1cb6f0bae504 ("bpf: Fix too early release of tcx_entry")
Reported-by: Keenan Dong <keenanat2000@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://patch.msgid.link/20260313065531.98639-1-daniel@iogearbox.net
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/sch_generic.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index cafb266a0b80..c3d657359a3d 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -1457,6 +1457,11 @@ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
 void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp,
 				struct tcf_block *block);
 
+static inline bool mini_qdisc_pair_inited(struct mini_Qdisc_pair *miniqp)
+{
+	return !!miniqp->p_miniq;
+}
+
 void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx);
 
 int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb));
-- 
cgit v1.2.3


From 45c6a2dc7ec8339052666b06065c521a10cc29bb Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 1 Mar 2026 16:52:14 -0800
Subject: iommu/io-pgtable: fix all kernel-doc warnings in io-pgtable.h

Avoid kernel-doc warnings in io-pgtable.h:
- use the correct struct member names or kernel-doc format
- add a missing struct member description
- add a missing function return comment section

Warning: include/linux/io-pgtable.h:187 struct member 'coherent_walk' not
 described in 'io_pgtable_cfg'
Warning: include/linux/io-pgtable.h:187 struct member 'arm_lpae_s1_cfg' not
 described in 'io_pgtable_cfg'
Warning: include/linux/io-pgtable.h:187 struct member 'arm_lpae_s2_cfg' not
 described in 'io_pgtable_cfg'
Warning: include/linux/io-pgtable.h:187 struct member 'arm_v7s_cfg' not
 described in 'io_pgtable_cfg'
Warning: include/linux/io-pgtable.h:187 struct member 'arm_mali_lpae_cfg'
 not described in 'io_pgtable_cfg'
Warning: include/linux/io-pgtable.h:187 struct member 'apple_dart_cfg' not
 described in 'io_pgtable_cfg'
Warning: include/linux/io-pgtable.h:187 struct member 'amd' not described
 in 'io_pgtable_cfg'
Warning: include/linux/io-pgtable.h:223 struct member
 'read_and_clear_dirty' not described in 'io_pgtable_ops'
Warning: include/linux/io-pgtable.h:237 No description found for return
 value of 'alloc_io_pgtable_ops'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/io-pgtable.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 7a1516011ccf..e19872e37e06 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -53,7 +53,7 @@ struct iommu_flush_ops {
  *                 tables.
  * @ias:           Input address (iova) size, in bits.
  * @oas:           Output address (paddr) size, in bits.
- * @coherent_walk  A flag to indicate whether or not page table walks made
+ * @coherent_walk: A flag to indicate whether or not page table walks made
  *                 by the IOMMU are coherent with the CPU caches.
  * @tlb:           TLB management callbacks for this set of tables.
  * @iommu_dev:     The device representing the DMA configuration for the
@@ -136,6 +136,7 @@ struct io_pgtable_cfg {
 	void (*free)(void *cookie, void *pages, size_t size);
 
 	/* Low-level data specific to the table format */
+	/* private: */
 	union {
 		struct {
 			u64	ttbr;
@@ -203,6 +204,9 @@ struct arm_lpae_io_pgtable_walk_data {
  * @unmap_pages:  Unmap a range of virtually contiguous pages of the same size.
  * @iova_to_phys: Translate iova to physical address.
  * @pgtable_walk: (optional) Perform a page table walk for a given iova.
+ * @read_and_clear_dirty: Record dirty info per IOVA. If an IOVA is dirty,
+ *			  clear its dirty state from the PTE unless the
+ *			  IOMMU_DIRTY_NO_CLEAR flag is passed in.
  *
  * These functions map directly onto the iommu_ops member functions with
  * the same names.
@@ -231,7 +235,9 @@ struct io_pgtable_ops {
  *          the configuration actually provided by the allocator (e.g. the
  *          pgsize_bitmap may be restricted).
  * @cookie: An opaque token provided by the IOMMU driver and passed back to
- *          the callback routines in cfg->tlb.
+ *          the callback routines.
+ *
+ * Returns: Pointer to the &struct io_pgtable_ops for this set of page tables.
  */
 struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt,
 					    struct io_pgtable_cfg *cfg,
-- 
cgit v1.2.3


From cb3d1049f4ea77d5ad93f17d8ac1f2ed4da70501 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@kernel.org>
Date: Tue, 3 Mar 2026 12:53:18 +0100
Subject: driver core: generalize driver_override in struct device

Currently, there are 12 busses (including platform and PCI) that
duplicate the driver_override logic for their individual devices.

All of them seem to be prone to the bug described in [1].

While this could be solved for every bus individually using a separate
lock, solving this in the driver-core generically results in less (and
cleaner) changes overall.

Thus, move driver_override to struct device, provide corresponding
accessors for busses and handle locking with a separate lock internally.

In particular, add device_set_driver_override(),
device_has_driver_override(), device_match_driver_override() and
generalize the sysfs store() and show() callbacks via a driver_override
feature flag in struct bus_type.

Until all busses have migrated, keep driver_set_override() in place.

Note that we can't use the device lock for the reasons described in [2].

Link: https://bugzilla.kernel.org/show_bug.cgi?id=220789 [1]
Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [2]
Tested-by: Gui-Dong Han <hanguidong02@gmail.com>
Co-developed-by: Gui-Dong Han <hanguidong02@gmail.com>
Signed-off-by: Gui-Dong Han <hanguidong02@gmail.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/20260303115720.48783-2-dakr@kernel.org
[ Use dev->bus instead of sp->bus for consistency; fix commit message to
  refer to the struct bus_type's driver_override feature flag. - Danilo ]
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
---
 include/linux/device.h     | 54 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/device/bus.h |  4 ++++
 2 files changed, 58 insertions(+)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index 0be95294b6e6..e65d564f01cd 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -483,6 +483,8 @@ struct device_physical_location {
  * 		on.  This shrinks the "Board Support Packages" (BSPs) and
  * 		minimizes board-specific #ifdefs in drivers.
  * @driver_data: Private pointer for driver specific info.
+ * @driver_override: Driver name to force a match.  Do not touch directly; use
+ *		     device_set_driver_override() instead.
  * @links:	Links to suppliers and consumers of this device.
  * @power:	For device power management.
  *		See Documentation/driver-api/pm/devices.rst for details.
@@ -576,6 +578,10 @@ struct device {
 					   core doesn't touch it */
 	void		*driver_data;	/* Driver data, set and get with
 					   dev_set_drvdata/dev_get_drvdata */
+	struct {
+		const char	*name;
+		spinlock_t	lock;
+	} driver_override;
 	struct mutex		mutex;	/* mutex to synchronize calls to
 					 * its driver.
 					 */
@@ -701,6 +707,54 @@ struct device_link {
 
 #define kobj_to_dev(__kobj)	container_of_const(__kobj, struct device, kobj)
 
+int __device_set_driver_override(struct device *dev, const char *s, size_t len);
+
+/**
+ * device_set_driver_override() - Helper to set or clear driver override.
+ * @dev: Device to change
+ * @s: NUL-terminated string, new driver name to force a match, pass empty
+ *     string to clear it ("" or "\n", where the latter is only for sysfs
+ *     interface).
+ *
+ * Helper to set or clear driver override of a device.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+static inline int device_set_driver_override(struct device *dev, const char *s)
+{
+	return __device_set_driver_override(dev, s, s ? strlen(s) : 0);
+}
+
+/**
+ * device_has_driver_override() - Check if a driver override has been set.
+ * @dev: device to check
+ *
+ * Returns true if a driver override has been set for this device.
+ */
+static inline bool device_has_driver_override(struct device *dev)
+{
+	guard(spinlock)(&dev->driver_override.lock);
+	return !!dev->driver_override.name;
+}
+
+/**
+ * device_match_driver_override() - Match a driver against the device's driver_override.
+ * @dev: device to check
+ * @drv: driver to match against
+ *
+ * Returns > 0 if a driver override is set and matches the given driver, 0 if a
+ * driver override is set but does not match, or < 0 if a driver override is not
+ * set at all.
+ */
+static inline int device_match_driver_override(struct device *dev,
+					       const struct device_driver *drv)
+{
+	guard(spinlock)(&dev->driver_override.lock);
+	if (dev->driver_override.name)
+		return !strcmp(dev->driver_override.name, drv->name);
+	return -1;
+}
+
 /**
  * device_iommu_mapped - Returns true when the device DMA is translated
  *			 by an IOMMU
diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h
index 63de5f053c33..c1b463cd6464 100644
--- a/include/linux/device/bus.h
+++ b/include/linux/device/bus.h
@@ -65,6 +65,9 @@ struct fwnode_handle;
  *			this bus.
  * @pm:		Power management operations of this bus, callback the specific
  *		device driver's pm-ops.
+ * @driver_override:	Set to true if this bus supports the driver_override
+ *			mechanism, which allows userspace to force a specific
+ *			driver to bind to a device via a sysfs attribute.
  * @need_parent_lock:	When probing or removing a device on this bus, the
  *			device core should lock the device's parent.
  *
@@ -106,6 +109,7 @@ struct bus_type {
 
 	const struct dev_pm_ops *pm;
 
+	bool driver_override;
 	bool need_parent_lock;
 };
 
-- 
cgit v1.2.3


From 2b38efc05bf7a8568ec74bfffea0f5cfa62bc01d Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@kernel.org>
Date: Tue, 3 Mar 2026 12:53:21 +0100
Subject: driver core: platform: use generic driver_override infrastructure

When a driver is probed through __driver_attach(), the bus' match()
callback is called without the device lock held, thus accessing the
driver_override field without a lock, which can cause a UAF.

Fix this by using the driver-core driver_override infrastructure taking
care of proper locking internally.

Note that calling match() from __driver_attach() without the device lock
held is intentional. [1]

Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [1]
Reported-by: Gui-Dong Han <hanguidong02@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220789
Fixes: 3d713e0e382e ("driver core: platform: add device binding path 'driver_override'")
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/20260303115720.48783-5-dakr@kernel.org
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
---
 include/linux/platform_device.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 813da101b5bf..ed1d50d1c3c1 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -31,11 +31,6 @@ struct platform_device {
 	struct resource	*resource;
 
 	const struct platform_device_id	*id_entry;
-	/*
-	 * Driver name to force a match.  Do not set directly, because core
-	 * frees it.  Use driver_set_override() to set or clear it.
-	 */
-	const char *driver_override;
 
 	/* MFD cell pointer */
 	struct mfd_cell *mfd_cell;
-- 
cgit v1.2.3


From d5ad6ab61cbd89afdb60881f6274f74328af3ee9 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 14 Mar 2026 06:54:55 +0000
Subject: wifi: mac80211: always free skb on ieee80211_tx_prepare_skb() failure

ieee80211_tx_prepare_skb() has three error paths, but only two of them
free the skb. The first error path (ieee80211_tx_prepare() returning
TX_DROP) does not free it, while invoke_tx_handlers() failure and the
fragmentation check both do.

Add kfree_skb() to the first error path so all three are consistent,
and remove the now-redundant frees in callers (ath9k, mt76,
mac80211_hwsim) to avoid double-free.

Document the skb ownership guarantee in the function's kdoc.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://patch.msgid.link/20260314065455.2462900-1-nbd@nbd.name
Fixes: 06be6b149f7e ("mac80211: add ieee80211_tx_prepare_skb() helper function")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7f9d96939a4e..adce2144a678 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -7407,7 +7407,9 @@ void ieee80211_report_wowlan_wakeup(struct ieee80211_vif *vif,
  * @band: the band to transmit on
  * @sta: optional pointer to get the station to send the frame to
  *
- * Return: %true if the skb was prepared, %false otherwise
+ * Return: %true if the skb was prepared, %false otherwise.
+ * On failure, the skb is freed by this function; callers must not
+ * free it again.
  *
  * Note: must be called under RCU lock
  */
-- 
cgit v1.2.3


From b3a6df291fecf5f8a308953b65ca72b7fc9e015d Mon Sep 17 00:00:00 2001
From: Xiang Mei <xmei5@asu.edu>
Date: Mon, 16 Mar 2026 18:02:41 -0700
Subject: udp_tunnel: fix NULL deref caused by udp_sock_create6 when
 CONFIG_IPV6=n

When CONFIG_IPV6 is disabled, the udp_sock_create6() function returns 0
(success) without actually creating a socket. Callers such as
fou_create() then proceed to dereference the uninitialized socket
pointer, resulting in a NULL pointer dereference.

The captured NULL deref crash:
  BUG: kernel NULL pointer dereference, address: 0000000000000018
  RIP: 0010:fou_nl_add_doit (net/ipv4/fou_core.c:590 net/ipv4/fou_core.c:764)
  [...]
  Call Trace:
    <TASK>
    genl_family_rcv_msg_doit.constprop.0 (net/netlink/genetlink.c:1114)
    genl_rcv_msg (net/netlink/genetlink.c:1194 net/netlink/genetlink.c:1209)
    [...]
    netlink_rcv_skb (net/netlink/af_netlink.c:2550)
    genl_rcv (net/netlink/genetlink.c:1219)
    netlink_unicast (net/netlink/af_netlink.c:1319 net/netlink/af_netlink.c:1344)
    netlink_sendmsg (net/netlink/af_netlink.c:1894)
    __sock_sendmsg (net/socket.c:727 (discriminator 1) net/socket.c:742 (discriminator 1))
    __sys_sendto (./include/linux/file.h:62 (discriminator 1) ./include/linux/file.h:83 (discriminator 1) net/socket.c:2183 (discriminator 1))
    __x64_sys_sendto (net/socket.c:2213 (discriminator 1) net/socket.c:2209 (discriminator 1) net/socket.c:2209 (discriminator 1))
    do_syscall_64 (arch/x86/entry/syscall_64.c:63 (discriminator 1) arch/x86/entry/syscall_64.c:94 (discriminator 1))
    entry_SYSCALL_64_after_hwframe (net/arch/x86/entry/entry_64.S:130)

This patch makes udp_sock_create6 return -EPFNOSUPPORT instead, so
callers correctly take their error paths. There is only one caller of
the vulnerable function and only privileged users can trigger it.

Fixes: fd384412e199b ("udp_tunnel: Seperate ipv6 functions into its own file.")
Reported-by: Weiming Shi <bestswngs@gmail.com>
Signed-off-by: Xiang Mei <xmei5@asu.edu>
Link: https://patch.msgid.link/20260317010241.1893893-1-xmei5@asu.edu
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/udp_tunnel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index d9c6d04bb3b5..fc1fc43345b5 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -52,7 +52,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
 static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
 				   struct socket **sockp)
 {
-	return 0;
+	return -EPFNOSUPPORT;
 }
 #endif
 
-- 
cgit v1.2.3


From 761fb8ec8778f0caf2bba5a41e3cff1ea86974f3 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Tue, 17 Mar 2026 11:54:01 -0400
Subject: Bluetooth: L2CAP: Fix regressions caused by reusing ident

This attempt to fix regressions caused by reusing ident which apparently
is not handled well on certain stacks causing the stack to not respond to
requests, so instead of simple returning the first unallocated id this
stores the last used tx_ident and then attempt to use the next until all
available ids are exausted and then cycle starting over to 1.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=221120
Link: https://bugzilla.kernel.org/show_bug.cgi?id=221177
Fixes: 6c3ea155e5ee ("Bluetooth: L2CAP: Fix not tracking outstanding TX ident")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Tested-by: Christian Eggers <ceggers@arri.de>
---
 include/net/bluetooth/l2cap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 010f1a8fd15f..5172afee5494 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -658,6 +658,7 @@ struct l2cap_conn {
 	struct sk_buff		*rx_skb;
 	__u32			rx_len;
 	struct ida		tx_ida;
+	__u8			tx_ident;
 
 	struct sk_buff_head	pending_rx;
 	struct work_struct	pending_rx_work;
-- 
cgit v1.2.3


From 418eab7a6f3c002d8e64d6e95ec27118017019af Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 19 Mar 2026 14:29:20 -0600
Subject: io_uring/kbuf: propagate BUF_MORE through early buffer commit path

When io_should_commit() returns true (eg for non-pollable files), buffer
commit happens at buffer selection time and sel->buf_list is set to
NULL. When __io_put_kbufs() generates CQE flags at completion time, it
calls __io_put_kbuf_ring() which finds a NULL buffer_list and hence
cannot determine whether the buffer was consumed or not. This means that
IORING_CQE_F_BUF_MORE is never set for non-pollable input with
incrementally consumed buffers.

Likewise for io_buffers_select(), which always commits upfront and
discards the return value of io_kbuf_commit().

Add REQ_F_BUF_MORE to store the result of io_kbuf_commit() during early
commit. Then __io_put_kbuf_ring() can check this flag and set
IORING_F_BUF_MORE accordingy.

Reported-by: Martin Michaelis <code@mgjm.de>
Cc: stable@vger.kernel.org
Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption")
Link: https://github.com/axboe/liburing/issues/1553
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index dd1420bfcb73..214fdbd49052 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -541,6 +541,7 @@ enum {
 	REQ_F_BL_NO_RECYCLE_BIT,
 	REQ_F_BUFFERS_COMMIT_BIT,
 	REQ_F_BUF_NODE_BIT,
+	REQ_F_BUF_MORE_BIT,
 	REQ_F_HAS_METADATA_BIT,
 	REQ_F_IMPORT_BUFFER_BIT,
 	REQ_F_SQE_COPIED_BIT,
@@ -626,6 +627,8 @@ enum {
 	REQ_F_BUFFERS_COMMIT	= IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT),
 	/* buf node is valid */
 	REQ_F_BUF_NODE		= IO_REQ_FLAG(REQ_F_BUF_NODE_BIT),
+	/* incremental buffer consumption, more space available */
+	REQ_F_BUF_MORE		= IO_REQ_FLAG(REQ_F_BUF_MORE_BIT),
 	/* request has read/write metadata assigned */
 	REQ_F_HAS_METADATA	= IO_REQ_FLAG(REQ_F_HAS_METADATA_BIT),
 	/*
-- 
cgit v1.2.3


From 6f45b1604cf43945ef472ae4ef30354025307c19 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Mon, 16 Mar 2026 21:06:46 +0200
Subject: dma-mapping: handle DMA_ATTR_CPU_CACHE_CLEAN in trace output

Tracing prints decoded DMA attribute flags, but it does not yet
include the recently added DMA_ATTR_CPU_CACHE_CLEAN. Add support
for decoding and displaying this attribute in the trace output.

Fixes: 61868dc55a11 ("dma-mapping: add DMA_ATTR_CPU_CACHE_CLEAN")
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260316-dma-debug-overlap-v3-2-1dde90a7f08b@nvidia.com
---
 include/trace/events/dma.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
index 33e99e792f1a..69cb3805ee81 100644
--- a/include/trace/events/dma.h
+++ b/include/trace/events/dma.h
@@ -32,7 +32,8 @@ TRACE_DEFINE_ENUM(DMA_NONE);
 		{ DMA_ATTR_ALLOC_SINGLE_PAGES, "ALLOC_SINGLE_PAGES" }, \
 		{ DMA_ATTR_NO_WARN, "NO_WARN" }, \
 		{ DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \
-		{ DMA_ATTR_MMIO, "MMIO" })
+		{ DMA_ATTR_MMIO, "MMIO" }, \
+		{ DMA_ATTR_CPU_CACHE_CLEAN, "CACHE_CLEAN" })
 
 DECLARE_EVENT_CLASS(dma_map,
 	TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,
-- 
cgit v1.2.3


From 9bb0a4d6a4433b75274204b083dac8e515d2007d Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Mon, 16 Mar 2026 21:06:47 +0200
Subject: dma-mapping: Clarify valid conditions for CPU cache line overlap

Rename the DMA_ATTR_CPU_CACHE_CLEAN attribute to better reflect that it
is debugging aid to inform DMA core code that CPU cache line overlaps are
allowed, and refine the documentation describing its use.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260316-dma-debug-overlap-v3-3-1dde90a7f08b@nvidia.com
---
 include/linux/dma-mapping.h | 8 ++++----
 include/trace/events/dma.h  | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 29973baa0581..da44394b3a1a 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -80,11 +80,11 @@
 #define DMA_ATTR_MMIO		(1UL << 10)
 
 /*
- * DMA_ATTR_CPU_CACHE_CLEAN: Indicates the CPU will not dirty any cacheline
- * overlapping this buffer while it is mapped for DMA. All mappings sharing
- * a cacheline must have this attribute for this to be considered safe.
+ * DMA_ATTR_DEBUGGING_IGNORE_CACHELINES: Indicates the CPU cache line can be
+ * overlapped. All mappings sharing a cacheline must have this attribute for
+ * this to be considered safe.
  */
-#define DMA_ATTR_CPU_CACHE_CLEAN	(1UL << 11)
+#define DMA_ATTR_DEBUGGING_IGNORE_CACHELINES	(1UL << 11)
 
 /*
  * A dma_addr_t can hold any valid DMA or bus address for the platform.  It can
diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
index 69cb3805ee81..8c64bc0721fe 100644
--- a/include/trace/events/dma.h
+++ b/include/trace/events/dma.h
@@ -33,7 +33,7 @@ TRACE_DEFINE_ENUM(DMA_NONE);
 		{ DMA_ATTR_NO_WARN, "NO_WARN" }, \
 		{ DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \
 		{ DMA_ATTR_MMIO, "MMIO" }, \
-		{ DMA_ATTR_CPU_CACHE_CLEAN, "CACHE_CLEAN" })
+		{ DMA_ATTR_DEBUGGING_IGNORE_CACHELINES, "CACHELINES_OVERLAP" })
 
 DECLARE_EVENT_CLASS(dma_map,
 	TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,
-- 
cgit v1.2.3


From e6a58fa2556203a7f6731b4071705dc81cca5ca5 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Mon, 16 Mar 2026 21:06:48 +0200
Subject: dma-mapping: Introduce DMA require coherency attribute

The mapping buffers which carry this attribute require DMA coherent system.
This means that they can't take SWIOTLB path, can perform CPU cache overlap
and doesn't perform cache flushing.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260316-dma-debug-overlap-v3-4-1dde90a7f08b@nvidia.com
---
 include/linux/dma-mapping.h | 7 +++++++
 include/trace/events/dma.h  | 3 ++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index da44394b3a1a..482b919f040f 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -86,6 +86,13 @@
  */
 #define DMA_ATTR_DEBUGGING_IGNORE_CACHELINES	(1UL << 11)
 
+/*
+ * DMA_ATTR_REQUIRE_COHERENT: Indicates that DMA coherency is required.
+ * All mappings that carry this attribute can't work with SWIOTLB and cache
+ * flushing.
+ */
+#define DMA_ATTR_REQUIRE_COHERENT	(1UL << 12)
+
 /*
  * A dma_addr_t can hold any valid DMA or bus address for the platform.  It can
  * be given to a device to use as a DMA source or target.  It is specific to a
diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
index 8c64bc0721fe..63597b004424 100644
--- a/include/trace/events/dma.h
+++ b/include/trace/events/dma.h
@@ -33,7 +33,8 @@ TRACE_DEFINE_ENUM(DMA_NONE);
 		{ DMA_ATTR_NO_WARN, "NO_WARN" }, \
 		{ DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \
 		{ DMA_ATTR_MMIO, "MMIO" }, \
-		{ DMA_ATTR_DEBUGGING_IGNORE_CACHELINES, "CACHELINES_OVERLAP" })
+		{ DMA_ATTR_DEBUGGING_IGNORE_CACHELINES, "CACHELINES_OVERLAP" }, \
+		{ DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" })
 
 DECLARE_EVENT_CLASS(dma_map,
 	TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,
-- 
cgit v1.2.3


From 1613462be621ad5103ec338a7b0ca0746ec4e5f1 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 14 Oct 2025 13:28:15 +0200
Subject: xen/privcmd: add boot control for restricted usage in domU

When running in an unprivileged domU under Xen, the privcmd driver
is restricted to allow only hypercalls against a target domain, for
which the current domU is acting as a device model.

Add a boot parameter "unrestricted" to allow all hypercalls (the
hypervisor will still refuse destructive hypercalls affecting other
guests).

Make this new parameter effective only in case the domU wasn't started
using secure boot, as otherwise hypercalls targeting the domU itself
might result in violating the secure boot functionality.

This is achieved by adding another lockdown reason, which can be
tested to not being set when applying the "unrestricted" option.

This is part of XSA-482

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- new patch
---
 include/linux/security.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index 83a646d72f6f..ee88dd2d2d1f 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -145,6 +145,7 @@ enum lockdown_reason {
 	LOCKDOWN_BPF_WRITE_USER,
 	LOCKDOWN_DBG_WRITE_KERNEL,
 	LOCKDOWN_RTAS_ERROR_INJECTION,
+	LOCKDOWN_XEN_USER_ACTIONS,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_KCORE,
 	LOCKDOWN_KPROBES,
-- 
cgit v1.2.3


From 76f9377cd2ab7a9220c25d33940d9ca20d368172 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Thu, 19 Mar 2026 17:51:45 -0700
Subject: writeback: don't block sync for filesystems with no data integrity
 guarantees

Add a SB_I_NO_DATA_INTEGRITY superblock flag for filesystems that cannot
guarantee data persistence on sync (eg fuse). For superblocks with this
flag set, sync kicks off writeback of dirty inodes but does not wait
for the flusher threads to complete the writeback.

This replaces the per-inode AS_NO_DATA_INTEGRITY mapping flag added in
commit f9a49aa302a0 ("fs/writeback: skip AS_NO_DATA_INTEGRITY mappings
in wait_sb_inodes()"). The flag belongs at the superblock level because
data integrity is a filesystem-wide property, not a per-inode one.
Having this flag at the superblock level also allows us to skip having
to iterate every dirty inode in wait_sb_inodes() only to skip each inode
individually.

Prior to this commit, mappings with no data integrity guarantees skipped
waiting on writeback completion but still waited on the flusher threads
to finish initiating the writeback. Waiting on the flusher threads is
unnecessary. This commit kicks off writeback but does not wait on the
flusher threads. This change properly addresses a recent report [1] for
a suspend-to-RAM hang seen on fuse-overlayfs that was caused by waiting
on the flusher threads to finish:

Workqueue: pm_fs_sync pm_fs_sync_work_fn
Call Trace:
 <TASK>
 __schedule+0x457/0x1720
 schedule+0x27/0xd0
 wb_wait_for_completion+0x97/0xe0
 sync_inodes_sb+0xf8/0x2e0
 __iterate_supers+0xdc/0x160
 ksys_sync+0x43/0xb0
 pm_fs_sync_work_fn+0x17/0xa0
 process_one_work+0x193/0x350
 worker_thread+0x1a1/0x310
 kthread+0xfc/0x240
 ret_from_fork+0x243/0x280
 ret_from_fork_asm+0x1a/0x30
 </TASK>

On fuse this is problematic because there are paths that may cause the
flusher thread to block (eg if systemd freezes the user session cgroups
first, which freezes the fuse daemon, before invoking the kernel
suspend. The kernel suspend triggers ->write_node() which on fuse issues
a synchronous setattr request, which cannot be processed since the
daemon is frozen. Or if the daemon is buggy and cannot properly complete
writeback, initiating writeback on a dirty folio already under writeback
leads to writeback_get_folio() -> folio_prepare_writeback() ->
unconditional wait on writeback to finish, which will cause a hang).
This commit restores fuse to its prior behavior before tmp folios were
removed, where sync was essentially a no-op.

[1] https://lore.kernel.org/linux-fsdevel/CAJnrk1a-asuvfrbKXbEwwDSctvemF+6zfhdnuzO65Pt8HsFSRw@mail.gmail.com/T/#m632c4648e9cafc4239299887109ebd880ac6c5c1

Fixes: 0c58a97f919c ("fuse: remove tmp folio for writebacks and internal rb tree")
Reported-by: John <therealgraysky@proton.me>
Cc: stable@vger.kernel.org
Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Link: https://patch.msgid.link/20260320005145.2483161-2-joannelkoong@gmail.com
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: David Hildenbrand (Arm) <david@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs/super_types.h |  1 +
 include/linux/pagemap.h        | 11 -----------
 2 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs/super_types.h b/include/linux/fs/super_types.h
index fa7638b81246..383050e7fdf5 100644
--- a/include/linux/fs/super_types.h
+++ b/include/linux/fs/super_types.h
@@ -338,5 +338,6 @@ struct super_block {
 #define SB_I_NOUMASK	0x00001000	/* VFS does not apply umask */
 #define SB_I_NOIDMAP	0x00002000	/* No idmapped mounts on this superblock */
 #define SB_I_ALLOW_HSM	0x00004000	/* Allow HSM events on this superblock */
+#define SB_I_NO_DATA_INTEGRITY	0x00008000 /* fs cannot guarantee data persistence on sync */
 
 #endif /* _LINUX_FS_SUPER_TYPES_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ec442af3f886..31a848485ad9 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -210,7 +210,6 @@ enum mapping_flags {
 	AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9,
 	AS_KERNEL_FILE = 10,	/* mapping for a fake kernel file that shouldn't
 				   account usage to user cgroups */
-	AS_NO_DATA_INTEGRITY = 11, /* no data integrity guarantees */
 	/* Bits 16-25 are used for FOLIO_ORDER */
 	AS_FOLIO_ORDER_BITS = 5,
 	AS_FOLIO_ORDER_MIN = 16,
@@ -346,16 +345,6 @@ static inline bool mapping_writeback_may_deadlock_on_reclaim(const struct addres
 	return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags);
 }
 
-static inline void mapping_set_no_data_integrity(struct address_space *mapping)
-{
-	set_bit(AS_NO_DATA_INTEGRITY, &mapping->flags);
-}
-
-static inline bool mapping_no_data_integrity(const struct address_space *mapping)
-{
-	return test_bit(AS_NO_DATA_INTEGRITY, &mapping->flags);
-}
-
 static inline gfp_t mapping_gfp_mask(const struct address_space *mapping)
 {
 	return mapping->gfp_mask;
-- 
cgit v1.2.3


From a6f22e50c7d51aa225c392c62c33f0fae11f734d Mon Sep 17 00:00:00 2001
From: Xuewen Yan <xuewen.yan@unisoc.com>
Date: Fri, 6 Mar 2026 15:59:54 +0800
Subject: tracing: Revert "tracing: Remove pid in task_rename tracing output"

This reverts commit e3f6a42272e028c46695acc83fc7d7c42f2750ad.

The commit says that the tracepoint only deals with the current task,
however the following case is not current task:

comm_write() {
    p = get_proc_task(inode);
    if (!p)
        return -ESRCH;

    if (same_thread_group(current, p))
        set_task_comm(p, buffer);
}
where set_task_comm() calls __set_task_comm() which records
the update of p and not current.

So revert the patch to show pid.

Cc: <mhiramat@kernel.org>
Cc: <mathieu.desnoyers@efficios.com>
Cc: <elver@google.com>
Cc: <kees@kernel.org>
Link: https://patch.msgid.link/20260306075954.4533-1-xuewen.yan@unisoc.com
Fixes: e3f6a42272e0 ("tracing: Remove pid in task_rename tracing output")
Reported-by: Guohua Yan <guohua.yan@unisoc.com>
Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/trace/events/task.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/task.h b/include/trace/events/task.h
index 4f0759634306..b9a129eb54d9 100644
--- a/include/trace/events/task.h
+++ b/include/trace/events/task.h
@@ -38,19 +38,22 @@ TRACE_EVENT(task_rename,
 	TP_ARGS(task, comm),
 
 	TP_STRUCT__entry(
+		__field(	pid_t,	pid)
 		__array(	char, oldcomm,  TASK_COMM_LEN)
 		__array(	char, newcomm,  TASK_COMM_LEN)
 		__field(	short,	oom_score_adj)
 	),
 
 	TP_fast_assign(
+		__entry->pid = task->pid;
 		memcpy(entry->oldcomm, task->comm, TASK_COMM_LEN);
 		strscpy(entry->newcomm, comm, TASK_COMM_LEN);
 		__entry->oom_score_adj = task->signal->oom_score_adj;
 	),
 
-	TP_printk("oldcomm=%s newcomm=%s oom_score_adj=%hd",
-		  __entry->oldcomm, __entry->newcomm, __entry->oom_score_adj)
+	TP_printk("pid=%d oldcomm=%s newcomm=%s oom_score_adj=%hd",
+		__entry->pid, __entry->oldcomm,
+		__entry->newcomm, __entry->oom_score_adj)
 );
 
 /**
-- 
cgit v1.2.3


From 26f775a054c3cda86ad465a64141894a90a9e145 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Thu, 19 Mar 2026 07:52:17 -0700
Subject: mm/damon/core: avoid use of half-online-committed context

One major usage of damon_call() is online DAMON parameters update.  It is
done by calling damon_commit_ctx() inside the damon_call() callback
function.  damon_commit_ctx() can fail for two reasons: 1) invalid
parameters and 2) internal memory allocation failures.  In case of
failures, the damon_ctx that attempted to be updated (commit destination)
can be partially updated (or, corrupted from a perspective), and therefore
shouldn't be used anymore.  The function only ensures the damon_ctx object
can safely deallocated using damon_destroy_ctx().

The API callers are, however, calling damon_commit_ctx() only after
asserting the parameters are valid, to avoid damon_commit_ctx() fails due
to invalid input parameters.  But it can still theoretically fail if the
internal memory allocation fails.  In the case, DAMON may run with the
partially updated damon_ctx.  This can result in unexpected behaviors
including even NULL pointer dereference in case of damos_commit_dests()
failure [1].  Such allocation failure is arguably too small to fail, so
the real world impact would be rare.  But, given the bad consequence, this
needs to be fixed.

Avoid such partially-committed (maybe-corrupted) damon_ctx use by saving
the damon_commit_ctx() failure on the damon_ctx object.  For this,
introduce damon_ctx->maybe_corrupted field.  damon_commit_ctx() sets it
when it is failed.  kdamond_call() checks if the field is set after each
damon_call_control->fn() is executed.  If it is set, ignore remaining
callback requests and return.  All kdamond_call() callers including
kdamond_fn() also check the maybe_corrupted field right after
kdamond_call() invocations.  If the field is set, break the kdamond_fn()
main loop so that DAMON sill doesn't use the context that might be
corrupted.

[sj@kernel.org: let kdamond_call() with cancel regardless of maybe_corrupted]
  Link: https://lkml.kernel.org/r/20260320031553.2479-1-sj@kernel.org
  Link: https://sashiko.dev/#/patchset/20260319145218.86197-1-sj%40kernel.org
Link: https://lkml.kernel.org/r/20260319145218.86197-1-sj@kernel.org
Link: https://lore.kernel.org/20260319043309.97966-1-sj@kernel.org [1]
Fixes: 3301f1861d34 ("mm/damon/sysfs: handle commit command using damon_call()")
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: <stable@vger.kernel.org>	[6.15+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index a4fea23da857..be3d198043ff 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -810,6 +810,12 @@ struct damon_ctx {
 	struct damos_walk_control *walk_control;
 	struct mutex walk_control_lock;
 
+	/*
+	 * indicate if this may be corrupted.  Currentonly this is set only for
+	 * damon_commit_ctx() failure.
+	 */
+	bool maybe_corrupted;
+
 	/* Working thread of the given DAMON context */
 	struct task_struct *kdamond;
 	/* Protects @kdamond field access */
-- 
cgit v1.2.3


From a0e0c2f8c5f32b675f58e25a9338283cedb5ad2b Mon Sep 17 00:00:00 2001
From: Yixun Lan <dlan@kernel.org>
Date: Fri, 20 Mar 2026 11:06:17 +0000
Subject: reset: spacemit: k3: Decouple composite reset lines

Instead of grouping several different reset lines into one composite
reset, decouple them to individual ones which make it more aligned
with underlying hardware. And for DWC USB driver, it will match well
with the number of the reset property in the DT bindings.

The DWC3 USB host controller in K3 SoC has three reset lines - AHB, VCC,
PHY. The PCIe controller also has three reset lines - DBI, Slave, Master.
Also three reset lines each for UCIE and RCPU block.

As an agreement with maintainer, the reset IDs has been rearranged as
contiguous number but keep most part unchanged to avoid break patches
which already sent to mailing list. The changes of DT binding header file
and reset driver are merged together as one single commit to avoid
git-bisect breakage.

Fixes: 938ce3b16582 ("reset: spacemit: Add SpacemiT K3 reset driver")
Fixes: 216e0a5e98e5 ("dt-bindings: soc: spacemit: Add K3 reset support and IDs")
Signed-off-by: Yixun Lan <dlan@kernel.org>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/dt-bindings/reset/spacemit,k3-resets.h | 48 +++++++++++++++++++-------
 1 file changed, 36 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/reset/spacemit,k3-resets.h b/include/dt-bindings/reset/spacemit,k3-resets.h
index 79ac1c22b7b5..dc1ef009ba79 100644
--- a/include/dt-bindings/reset/spacemit,k3-resets.h
+++ b/include/dt-bindings/reset/spacemit,k3-resets.h
@@ -97,11 +97,11 @@
 #define RESET_APMU_SDH0          13
 #define RESET_APMU_SDH1          14
 #define RESET_APMU_SDH2          15
-#define RESET_APMU_USB2          16
-#define RESET_APMU_USB3_PORTA    17
-#define RESET_APMU_USB3_PORTB    18
-#define RESET_APMU_USB3_PORTC    19
-#define RESET_APMU_USB3_PORTD    20
+#define RESET_APMU_USB2_AHB      16
+#define RESET_APMU_USB2_VCC      17
+#define RESET_APMU_USB2_PHY      18
+#define RESET_APMU_USB3_A_AHB    19
+#define RESET_APMU_USB3_A_VCC    20
 #define RESET_APMU_QSPI          21
 #define RESET_APMU_QSPI_BUS      22
 #define RESET_APMU_DMA           23
@@ -132,8 +132,8 @@
 #define RESET_APMU_CPU7_SW       48
 #define RESET_APMU_C1_MPSUB_SW   49
 #define RESET_APMU_MPSUB_DBG     50
-#define RESET_APMU_UCIE          51
-#define RESET_APMU_RCPU          52
+#define RESET_APMU_USB3_A_PHY    51	/* USB3 A */
+#define RESET_APMU_USB3_B_AHB    52
 #define RESET_APMU_DSI4LN2_ESCCLK     53
 #define RESET_APMU_DSI4LN2_LCD_SW     54
 #define RESET_APMU_DSI4LN2_LCD_MCLK   55
@@ -143,16 +143,40 @@
 #define RESET_APMU_UFS_ACLK      59
 #define RESET_APMU_EDP0          60
 #define RESET_APMU_EDP1          61
-#define RESET_APMU_PCIE_PORTA    62
-#define RESET_APMU_PCIE_PORTB    63
-#define RESET_APMU_PCIE_PORTC    64
-#define RESET_APMU_PCIE_PORTD    65
-#define RESET_APMU_PCIE_PORTE    66
+#define RESET_APMU_USB3_B_VCC    62	/* USB3 B */
+#define RESET_APMU_USB3_B_PHY    63
+#define RESET_APMU_USB3_C_AHB    64
+#define RESET_APMU_USB3_C_VCC    65
+#define RESET_APMU_USB3_C_PHY    66
 #define RESET_APMU_EMAC0         67
 #define RESET_APMU_EMAC1         68
 #define RESET_APMU_EMAC2         69
 #define RESET_APMU_ESPI_MCLK     70
 #define RESET_APMU_ESPI_SCLK     71
+#define RESET_APMU_USB3_D_AHB    72	/* USB3 D */
+#define RESET_APMU_USB3_D_VCC    73
+#define RESET_APMU_USB3_D_PHY    74
+#define RESET_APMU_UCIE_IP       75
+#define RESET_APMU_UCIE_HOT      76
+#define RESET_APMU_UCIE_MON      77
+#define RESET_APMU_RCPU_AUDIO_SYS     78
+#define RESET_APMU_RCPU_MCU_CORE      79
+#define RESET_APMU_RCPU_AUDIO_APMU    80
+#define RESET_APMU_PCIE_A_DBI    81
+#define RESET_APMU_PCIE_A_SLAVE  82
+#define RESET_APMU_PCIE_A_MASTER 83
+#define RESET_APMU_PCIE_B_DBI    84
+#define RESET_APMU_PCIE_B_SLAVE  85
+#define RESET_APMU_PCIE_B_MASTER 86
+#define RESET_APMU_PCIE_C_DBI    87
+#define RESET_APMU_PCIE_C_SLAVE  88
+#define RESET_APMU_PCIE_C_MASTER 89
+#define RESET_APMU_PCIE_D_DBI    90
+#define RESET_APMU_PCIE_D_SLAVE  91
+#define RESET_APMU_PCIE_D_MASTER 92
+#define RESET_APMU_PCIE_E_DBI    93
+#define RESET_APMU_PCIE_E_SLAVE  94
+#define RESET_APMU_PCIE_E_MASTER 95
 
 /* DCIU resets*/
 #define RESET_DCIU_HDMA          0
-- 
cgit v1.2.3


From a85b46db143fda5869e7d8df8f258ccef5fa1719 Mon Sep 17 00:00:00 2001
From: Goldwyn Rodrigues <rgoldwyn@suse.de>
Date: Fri, 13 Mar 2026 14:11:39 -0400
Subject: btrfs: tracepoints: get correct superblock from dentry in event
 btrfs_sync_file()

If overlay is used on top of btrfs, dentry->d_sb translates to overlay's
super block and fsid assignment will lead to a crash.

Use file_inode(file)->i_sb to always get btrfs_sb.

Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 125bdc166bfe..0864700f76e0 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -769,12 +769,15 @@ TRACE_EVENT(btrfs_sync_file,
 	),
 
 	TP_fast_assign(
-		const struct dentry *dentry = file->f_path.dentry;
-		const struct inode *inode = d_inode(dentry);
+		struct dentry *dentry = file_dentry(file);
+		struct inode *inode = file_inode(file);
+		struct dentry *parent = dget_parent(dentry);
+		struct inode *parent_inode = d_inode(parent);
 
-		TP_fast_assign_fsid(btrfs_sb(file->f_path.dentry->d_sb));
+		dput(parent);
+		TP_fast_assign_fsid(btrfs_sb(inode->i_sb));
 		__entry->ino		= btrfs_ino(BTRFS_I(inode));
-		__entry->parent		= btrfs_ino(BTRFS_I(d_inode(dentry->d_parent)));
+		__entry->parent		= btrfs_ino(BTRFS_I(parent_inode));
 		__entry->datasync	= datasync;
 		__entry->root_objectid	= btrfs_root_id(BTRFS_I(inode)->root);
 	),
-- 
cgit v1.2.3


From 4be7b99c253f0c85a255cc1db7127ba3232dfa30 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@google.com>
Date: Fri, 20 Mar 2026 07:23:00 +0000
Subject: ipv6: Don't remove permanent routes with exceptions from
 tb6_gc_hlist.

The cited commit mechanically put fib6_remove_gc_list()
just after every fib6_clean_expires() call.

When a temporary route is promoted to a permanent route,
there may already be exception routes tied to it.

If fib6_remove_gc_list() removes the route from tb6_gc_hlist,
such exception routes will no longer be aged.

Let's replace fib6_remove_gc_list() with a new helper
fib6_may_remove_gc_list() and use fib6_age_exceptions() there.

Note that net->ipv6 is only compiled when CONFIG_IPV6 is
enabled, so fib6_{add,remove,may_remove}_gc_list() are guarded.

Fixes: 5eb902b8e719 ("net/ipv6: Remove expired routes with a separated list of routes.")
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20260320072317.2561779-3-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip6_fib.h | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 88b0dd4d8e09..9f8b6814a96a 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -507,12 +507,14 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt,
 void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
 		     unsigned int flags);
 
+void fib6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args,
+			 unsigned long now);
 void fib6_run_gc(unsigned long expires, struct net *net, bool force);
-
 void fib6_gc_cleanup(void);
 
 int fib6_init(void);
 
+#if IS_ENABLED(CONFIG_IPV6)
 /* Add the route to the gc list if it is not already there
  *
  * The callers should hold f6i->fib6_table->tb6_lock.
@@ -545,6 +547,23 @@ static inline void fib6_remove_gc_list(struct fib6_info *f6i)
 		hlist_del_init(&f6i->gc_link);
 }
 
+static inline void fib6_may_remove_gc_list(struct net *net,
+					   struct fib6_info *f6i)
+{
+	struct fib6_gc_args gc_args;
+
+	if (hlist_unhashed(&f6i->gc_link))
+		return;
+
+	gc_args.timeout = READ_ONCE(net->ipv6.sysctl.ip6_rt_gc_interval);
+	gc_args.more = 0;
+
+	rcu_read_lock();
+	fib6_age_exceptions(f6i, &gc_args, jiffies);
+	rcu_read_unlock();
+}
+#endif
+
 struct ipv6_route_iter {
 	struct seq_net_private p;
 	struct fib6_walker w;
-- 
cgit v1.2.3


From e537dd15d0d4ad989d56a1021290f0c674dd8b28 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Thu, 19 Mar 2026 11:18:17 -0700
Subject: udp: Fix wildcard bind conflict check when using hash2

When binding a udp_sock to a local address and port, UDP uses
two hashes (udptable->hash and udptable->hash2) for collision
detection. The current code switches to "hash2" when
hslot->count > 10.

"hash2" is keyed by local address and local port.
"hash" is keyed by local port only.

The issue can be shown in the following bind sequence (pseudo code):

bind(fd1,  "[fd00::1]:8888")
bind(fd2,  "[fd00::2]:8888")
bind(fd3,  "[fd00::3]:8888")
bind(fd4,  "[fd00::4]:8888")
bind(fd5,  "[fd00::5]:8888")
bind(fd6,  "[fd00::6]:8888")
bind(fd7,  "[fd00::7]:8888")
bind(fd8,  "[fd00::8]:8888")
bind(fd9,  "[fd00::9]:8888")
bind(fd10, "[fd00::10]:8888")

/* Correctly return -EADDRINUSE because "hash" is used
 * instead of "hash2". udp_lib_lport_inuse() detects the
 * conflict.
 */
bind(fail_fd, "[::]:8888")

/* After one more socket is bound to "[fd00::11]:8888",
 * hslot->count exceeds 10 and "hash2" is used instead.
 */
bind(fd11, "[fd00::11]:8888")
bind(fail_fd, "[::]:8888")      /* succeeds unexpectedly */

The same issue applies to the IPv4 wildcard address "0.0.0.0"
and the IPv4-mapped wildcard address "::ffff:0.0.0.0". For
example, if there are existing sockets bound to
"192.168.1.[1-11]:8888", then binding "0.0.0.0:8888" or
"[::ffff:0.0.0.0]:8888" can also miss the conflict when
hslot->count > 10.

TCP inet_csk_get_port() already has the correct check in
inet_use_bhash2_on_bind(). Rename it to
inet_use_hash2_on_bind() and move it to inet_hashtables.h
so udp.c can reuse it in this fix.

Fixes: 30fff9231fad ("udp: bind() optimisation")
Reported-by: Andrew Onyshchuk <oandrew@meta.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260319181817.1901357-1-martin.lau@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/inet_hashtables.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 5a979dcab538..6d936e9f2fd3 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -264,6 +264,20 @@ inet_bhashfn_portaddr(const struct inet_hashinfo *hinfo, const struct sock *sk,
 	return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
 }
 
+static inline bool inet_use_hash2_on_bind(const struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	if (sk->sk_family == AF_INET6) {
+		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+			return false;
+
+		if (!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
+			return true;
+	}
+#endif
+	return sk->sk_rcv_saddr != htonl(INADDR_ANY);
+}
+
 struct inet_bind_hashbucket *
 inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port);
 
-- 
cgit v1.2.3


From 38ec410b99a5ee6566f75650ce3d4fd632940fd0 Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Fri, 20 Mar 2026 10:18:17 +0800
Subject: virtio-net: correct hdr_len handling for VIRTIO_NET_F_GUEST_HDRLEN

The commit be50da3e9d4a ("net: virtio_net: implement exact header length
guest feature") introduces support for the VIRTIO_NET_F_GUEST_HDRLEN
feature in virtio-net.

This feature requires virtio-net to set hdr_len to the actual header
length of the packet when transmitting, the number of
bytes from the start of the packet to the beginning of the
transport-layer payload.

However, in practice, hdr_len was being set using skb_headlen(skb),
which is clearly incorrect. This commit fixes that issue.

Fixes: be50da3e9d4a ("net: virtio_net: implement exact header length guest feature")
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Link: https://patch.msgid.link/20260320021818.111741-2-xuanzhuo@linux.alibaba.com
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/virtio_net.h | 34 ++++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 75dabb763c65..361b60c8be68 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -207,6 +207,23 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 	return __virtio_net_hdr_to_skb(skb, hdr, little_endian, hdr->gso_type);
 }
 
+/* This function must be called after virtio_net_hdr_from_skb(). */
+static inline void __virtio_net_set_hdrlen(const struct sk_buff *skb,
+					   struct virtio_net_hdr *hdr,
+					   bool little_endian)
+{
+	u16 hdr_len;
+
+	hdr_len = skb_transport_offset(skb);
+
+	if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4)
+		hdr_len += sizeof(struct udphdr);
+	else
+		hdr_len += tcp_hdrlen(skb);
+
+	hdr->hdr_len = __cpu_to_virtio16(little_endian, hdr_len);
+}
+
 static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
 					  struct virtio_net_hdr *hdr,
 					  bool little_endian,
@@ -385,7 +402,8 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb,
 			    bool tnl_hdr_negotiated,
 			    bool little_endian,
 			    int vlan_hlen,
-			    bool has_data_valid)
+			    bool has_data_valid,
+			    bool feature_hdrlen)
 {
 	struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)vhdr;
 	unsigned int inner_nh, outer_th;
@@ -394,9 +412,17 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb,
 
 	tnl_gso_type = skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL |
 						    SKB_GSO_UDP_TUNNEL_CSUM);
-	if (!tnl_gso_type)
-		return virtio_net_hdr_from_skb(skb, hdr, little_endian,
-					       has_data_valid, vlan_hlen);
+	if (!tnl_gso_type) {
+		ret = virtio_net_hdr_from_skb(skb, hdr, little_endian,
+					      has_data_valid, vlan_hlen);
+		if (ret)
+			return ret;
+
+		if (feature_hdrlen && hdr->hdr_len)
+			__virtio_net_set_hdrlen(skb, hdr, little_endian);
+
+		return ret;
+	}
 
 	/* Tunnel support not negotiated but skb ask for it. */
 	if (!tnl_hdr_negotiated)
-- 
cgit v1.2.3


From 6c860dc02a8e60b438e26940227dfa641fcdb66a Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Fri, 20 Mar 2026 10:18:18 +0800
Subject: virtio-net: correct hdr_len handling for tunnel gso

The commit a2fb4bc4e2a6a03 ("net: implement virtio helpers to handle UDP
GSO tunneling.") introduces support for the UDP GSO tunnel feature in
virtio-net.

The virtio spec says:

    If the \field{gso_type} has the VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4 bit or
    VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6 bit set, \field{hdr_len} accounts for
    all the headers up to and including the inner transport.

The commit did not update the hdr_len to include the inner transport.

I observed that the "hdr_len" is 116 for this packet:

    17:36:18.241105 52:55:00:d1:27:0a > 2e:2c:df:46:a9:e1, ethertype IPv4 (0x0800), length 2912: (tos 0x0, ttl 64, id 45197, offset 0, flags [none], proto UDP (17), length 2898)
        192.168.122.100.50613 > 192.168.122.1.4789: [bad udp cksum 0x8106 -> 0x26a0!] VXLAN, flags [I] (0x08), vni 1
    fa:c3:ba:82:05:ee > ce:85:0c:31:77:e5, ethertype IPv4 (0x0800), length 2862: (tos 0x0, ttl 64, id 14678, offset 0, flags [DF], proto TCP (6), length 2848)
        192.168.3.1.49880 > 192.168.3.2.9898: Flags [P.], cksum 0x9266 (incorrect -> 0xaa20), seq 515667:518463, ack 1, win 64, options [nop,nop,TS val 2990048824 ecr 2798801412], length 2796

116 = 14(mac) + 20(ip) + 8(udp) + 8(vxlan) + 14(inner mac) + 20(inner ip) + 32(innner tcp)

Fixes: a2fb4bc4e2a6a03 ("net: implement virtio helpers to handle UDP GSO tunneling.")
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Link: https://patch.msgid.link/20260320021818.111741-3-xuanzhuo@linux.alibaba.com
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/virtio_net.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 361b60c8be68..f36d21b5bc19 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -224,6 +224,22 @@ static inline void __virtio_net_set_hdrlen(const struct sk_buff *skb,
 	hdr->hdr_len = __cpu_to_virtio16(little_endian, hdr_len);
 }
 
+/* This function must be called after virtio_net_hdr_from_skb(). */
+static inline void __virtio_net_set_tnl_hdrlen(const struct sk_buff *skb,
+					       struct virtio_net_hdr *hdr)
+{
+	u16 hdr_len;
+
+	hdr_len = skb_inner_transport_offset(skb);
+
+	if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4)
+		hdr_len += sizeof(struct udphdr);
+	else
+		hdr_len += inner_tcp_hdrlen(skb);
+
+	hdr->hdr_len = __cpu_to_virtio16(true, hdr_len);
+}
+
 static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
 					  struct virtio_net_hdr *hdr,
 					  bool little_endian,
@@ -440,6 +456,9 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb,
 	if (ret)
 		return ret;
 
+	if (feature_hdrlen && hdr->hdr_len)
+		__virtio_net_set_tnl_hdrlen(skb, hdr);
+
 	if (skb->protocol == htons(ETH_P_IPV6))
 		hdr->gso_type |= VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6;
 	else
-- 
cgit v1.2.3


From cc34d77dd48708d810c12bfd6f5bf03304f6c824 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@kernel.org>
Date: Tue, 24 Mar 2026 01:59:15 +0100
Subject: spi: use generic driver_override infrastructure

When a driver is probed through __driver_attach(), the bus' match()
callback is called without the device lock held, thus accessing the
driver_override field without a lock, which can cause a UAF.

Fix this by using the driver-core driver_override infrastructure taking
care of proper locking internally.

Note that calling match() from __driver_attach() without the device lock
held is intentional. [1]

Also note that we do not enable the driver_override feature of struct
bus_type, as SPI - in contrast to most other buses - passes "" to
sysfs_emit() when the driver_override pointer is NULL. Thus, printing
"\n" instead of "(null)\n".

Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [1]
Reported-by: Gui-Dong Han <hanguidong02@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220789
Fixes: 5039563e7c25 ("spi: Add driver_override SPI device attribute")
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
Link: https://patch.msgid.link/20260324005919.2408620-12-dakr@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index af7cfee7b8f6..0dc671c07d3a 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -159,10 +159,6 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg,
  * @modalias: Name of the driver to use with this device, or an alias
  *	for that name.  This appears in the sysfs "modalias" attribute
  *	for driver coldplugging, and in uevents used for hotplugging
- * @driver_override: If the name of a driver is written to this attribute, then
- *	the device will bind to the named driver and only the named driver.
- *	Do not set directly, because core frees it; use driver_set_override() to
- *	set or clear it.
  * @pcpu_statistics: statistics for the spi_device
  * @word_delay: delay to be inserted between consecutive
  *	words of a transfer
@@ -224,7 +220,6 @@ struct spi_device {
 	void			*controller_state;
 	void			*controller_data;
 	char			modalias[SPI_NAME_SIZE];
-	const char		*driver_override;
 
 	/* The statistics */
 	struct spi_statistics __percpu	*pcpu_statistics;
-- 
cgit v1.2.3


From 1b164b876c36c3eb5561dd9b37702b04401b0166 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 24 Mar 2026 10:21:25 -1000
Subject: cgroup: Wait for dying tasks to leave on rmdir

a72f73c4dd9b ("cgroup: Don't expose dead tasks in cgroup") hid PF_EXITING
tasks from cgroup.procs so that systemd doesn't see tasks that have already
been reaped via waitpid(). However, the populated counter (nr_populated_csets)
is only decremented when the task later passes through cgroup_task_dead() in
finish_task_switch(). This means cgroup.procs can appear empty while the
cgroup is still populated, causing rmdir to fail with -EBUSY.

Fix this by making cgroup_rmdir() wait for dying tasks to fully leave. If the
cgroup is populated but all remaining tasks have PF_EXITING set (the task
iterator returns none due to the existing filter), wait for a kick from
cgroup_task_dead() and retry. The wait is brief as tasks are removed from the
cgroup's css_set between PF_EXITING assertion in do_exit() and
cgroup_task_dead() in finish_task_switch().

v2: cgroup_is_populated() true to false transition happens under css_set_lock
    not cgroup_mutex, so retest under css_set_lock before sleeping to avoid
    missed wakeups (Sebastian).

Fixes: a72f73c4dd9b ("cgroup: Don't expose dead tasks in cgroup")
Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202603222104.2c81684e-lkp@intel.com
Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Bert Karwatzki <spasswolf@web.de>
Cc: Michal Koutny <mkoutny@suse.com>
Cc: cgroups@vger.kernel.org
---
 include/linux/cgroup-defs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index bb92f5c169ca..7f87399938fa 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -609,6 +609,9 @@ struct cgroup {
 	/* used to wait for offlining of csses */
 	wait_queue_head_t offline_waitq;
 
+	/* used by cgroup_rmdir() to wait for dying tasks to leave */
+	wait_queue_head_t dying_populated_waitq;
+
 	/* used to schedule release agent */
 	struct work_struct release_agent_work;
 
-- 
cgit v1.2.3


From 815980fe6dbb01ad4007e8b260a45617f598b76d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20K=C3=B6ppeler?= <j.koeppeler@tu-berlin.de>
Date: Mon, 23 Mar 2026 18:49:20 +0100
Subject: net_sched: codel: fix stale state for empty flows in fq_codel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When codel_dequeue() finds an empty queue, it resets vars->dropping
but does not reset vars->first_above_time.  The reference CoDel
algorithm (Nichols & Jacobson, ACM Queue 2012) resets both:

  dodeque_result codel_queue_t::dodeque(time_t now) {
      ...
      if (r.p == NULL) {
          first_above_time = 0;   // <-- Linux omits this
      }
      ...
  }

Note that codel_should_drop() does reset first_above_time when called
with a NULL skb, but codel_dequeue() returns early before ever calling
codel_should_drop() in the empty-queue case.  The post-drop code paths
do reach codel_should_drop(NULL) and correctly reset the timer, so a
dropped packet breaks the cycle -- but the next delivered packet
re-arms first_above_time and the cycle repeats.

For sparse flows such as ICMP ping (one packet every 200ms-1s), the
first packet arms first_above_time, the flow goes empty, and the
second packet arrives after the interval has elapsed and gets dropped.
The pattern repeats, producing sustained loss on flows that are not
actually congested.

Test: veth pair, fq_codel, BQL disabled, 30000 iptables rules in the
consumer namespace (NAPI-64 cycle ~14ms, well above fq_codel's 5ms
target), ping at 5 pps under UDP flood:

  Before fix:  26% ping packet loss
  After fix:    0% ping packet loss

Fix by resetting first_above_time to zero in the empty-queue path
of codel_dequeue(), matching the reference algorithm.

Fixes: 76e3cc126bb2 ("codel: Controlled Delay AQM")
Fixes: d068ca2ae2e6 ("codel: split into multiple files")
Co-developed-by: Jesper Dangaard Brouer <hawk@kernel.org>
Signed-off-by: Jesper Dangaard Brouer <hawk@kernel.org>
Signed-off-by: Jonas Köppeler <j.koeppeler@tu-berlin.de>
Reported-by: Chris Arges <carges@cloudflare.com>
Tested-by: Jonas Köppeler <j.koeppeler@tu-berlin.de>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/all/20260318134826.1281205-7-hawk@kernel.org/
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260323174920.253526-1-hawk@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/codel_impl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h
index 78a27ac73070..b2c359c6dd1b 100644
--- a/include/net/codel_impl.h
+++ b/include/net/codel_impl.h
@@ -158,6 +158,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
 	bool drop;
 
 	if (!skb) {
+		vars->first_above_time = 0;
 		vars->dropping = false;
 		return skb;
 	}
-- 
cgit v1.2.3


From 2cdaff22ed26f1e619aa2b43f27bb84f2c6ef8f8 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Wed, 25 Mar 2026 02:55:48 +0100
Subject: dma-mapping: add missing `inline` for `dma_free_attrs`

Under an UML build for an upcoming series [1], I got `-Wstatic-in-inline`
for `dma_free_attrs`:

      BINDGEN rust/bindings/bindings_generated.rs - due to target missing
    In file included from rust/helpers/helpers.c:59:
    rust/helpers/dma.c:17:2: warning: static function 'dma_free_attrs' is used in an inline function with external linkage [-Wstatic-in-inline]
       17 |         dma_free_attrs(dev, size, cpu_addr, dma_handle, attrs);
          |         ^
    rust/helpers/dma.c:12:1: note: use 'static' to give inline function 'rust_helper_dma_free_attrs' internal linkage
       12 | __rust_helper void rust_helper_dma_free_attrs(struct device *dev, size_t size,
          | ^
          | static

The issue is that `dma_free_attrs` was not marked `inline` when it was
introduced alongside the rest of the stubs.

Thus mark it.

Fixes: ed6ccf10f24b ("dma-mapping: properly stub out the DMA API for !CONFIG_HAS_DMA")
Closes: https://lore.kernel.org/rust-for-linux/20260322194616.89847-1-ojeda@kernel.org/ [1]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20260325015548.70912-1-ojeda@kernel.org
---
 include/linux/dma-mapping.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 482b919f040f..99ef042ecdb4 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -255,8 +255,8 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size,
 {
 	return NULL;
 }
-static void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
-		dma_addr_t dma_handle, unsigned long attrs)
+static inline void dma_free_attrs(struct device *dev, size_t size,
+		void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
 {
 }
 static inline void *dmam_alloc_attrs(struct device *dev, size_t size,
-- 
cgit v1.2.3


From c991ca3238410b611a2ce59adeca9b55850aff69 Mon Sep 17 00:00:00 2001
From: Shuming Fan <shumingf@realtek.com>
Date: Wed, 25 Mar 2026 17:20:17 +0800
Subject: ASoC: SDCA: remove the max count of initialization table

The number of the initialization table may exceed 2048.
Therefore, this patch removes the limitation and allows the driver to
allocate memory dynamically based on the size of the initialization table.

Signed-off-by: Shuming Fan <shumingf@realtek.com>
Reviewed-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://patch.msgid.link/20260325092017.3221640-1-shumingf@realtek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/sdca_function.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h
index 79bd5a7a0f88..0e871c786513 100644
--- a/include/sound/sdca_function.h
+++ b/include/sound/sdca_function.h
@@ -26,11 +26,6 @@ struct sdca_function_desc;
  */
 #define SDCA_MAX_ENTITY_COUNT 128
 
-/*
- * Sanity check on number of initialization writes, can be expanded if needed.
- */
-#define SDCA_MAX_INIT_COUNT 2048
-
 /*
  * The Cluster IDs are 16-bit, so a maximum of 65535 Clusters per
  * function can be represented, however limit this to a slightly
-- 
cgit v1.2.3


From 175b45ed343a9c547b5f45293d3ea08d38a7b6f4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sat, 14 Mar 2026 04:12:58 -0700
Subject: srcu: Use raw spinlocks so call_srcu() can be used under
 preempt_disable()

Tree SRCU has used non-raw spinlocks for many years, motivated by a desire
to avoid unnecessary real-time latency and the absence of any reason to
use raw spinlocks.  However, the recent use of SRCU in tracing as the
underlying implementation of RCU Tasks Trace means that call_srcu()
is invoked from preemption-disabled regions of code, which in turn
requires that any locks acquired by call_srcu() or its callees must be
raw spinlocks.

This commit therefore converts SRCU's spinlocks to raw spinlocks.

[boqun: Add Fixes tag]

Reported-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Fixes: c27cea4416a3 ("rcu: Re-implement RCU Tasks Trace in terms of SRCU-fast")
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Boqun Feng <boqun@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 include/linux/srcutree.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 958cb7ef41cb..dfb31d11ff05 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -34,7 +34,7 @@ struct srcu_data {
 						/* Values: SRCU_READ_FLAVOR_.*  */
 
 	/* Update-side state. */
-	spinlock_t __private lock ____cacheline_internodealigned_in_smp;
+	raw_spinlock_t __private lock ____cacheline_internodealigned_in_smp;
 	struct rcu_segcblist srcu_cblist;	/* List of callbacks.*/
 	unsigned long srcu_gp_seq_needed;	/* Furthest future GP needed. */
 	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
@@ -55,7 +55,7 @@ struct srcu_data {
  * Node in SRCU combining tree, similar in function to rcu_data.
  */
 struct srcu_node {
-	spinlock_t __private lock;
+	raw_spinlock_t __private lock;
 	unsigned long srcu_have_cbs[4];		/* GP seq for children having CBs, but only */
 						/*  if greater than ->srcu_gp_seq. */
 	unsigned long srcu_data_have_cbs[4];	/* Which srcu_data structs have CBs for given GP? */
@@ -74,7 +74,7 @@ struct srcu_usage {
 						/* First node at each level. */
 	int srcu_size_state;			/* Small-to-big transition state. */
 	struct mutex srcu_cb_mutex;		/* Serialize CB preparation. */
-	spinlock_t __private lock;		/* Protect counters and size state. */
+	raw_spinlock_t __private lock;		/* Protect counters and size state. */
 	struct mutex srcu_gp_mutex;		/* Serialize GP work. */
 	unsigned long srcu_gp_seq;		/* Grace-period seq #. */
 	unsigned long srcu_gp_seq_needed;	/* Latest gp_seq needed. */
@@ -156,7 +156,7 @@ struct srcu_struct {
 
 #define __SRCU_USAGE_INIT(name)									\
 {												\
-	.lock = __SPIN_LOCK_UNLOCKED(name.lock),						\
+	.lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock),						\
 	.srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL,							\
 	.srcu_gp_seq_needed = SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE,				\
 	.srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL,					\
-- 
cgit v1.2.3


From 7c405fb3279b39244b260b54f1bd6488689ae235 Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun@kernel.org>
Date: Wed, 18 Mar 2026 17:56:21 -0700
Subject: rcu: Use an intermediate irq_work to start process_srcu()

Since commit c27cea4416a3 ("rcu: Re-implement RCU Tasks Trace in terms
of SRCU-fast") we switched to SRCU in BPF. However as BPF instrument can
happen basically everywhere (including where a scheduler lock is held),
call_srcu() now needs to avoid acquiring scheduler lock because
otherwise it could cause deadlock [1]. Fix this by following what the
previous RCU Tasks Trace did: using an irq_work to delay the queuing of
the work to start process_srcu().

[boqun: Apply Joel's feedback]
[boqun: Apply Andrea's test feedback]

Reported-by: Andrea Righi <arighi@nvidia.com>
Closes: https://lore.kernel.org/all/abjzvz_tL_siV17s@gpd4/
Fixes: commit c27cea4416a3 ("rcu: Re-implement RCU Tasks Trace in terms of SRCU-fast")
Link: https://lore.kernel.org/rcu/3c4c5a29-24ea-492d-aeee-e0d9605b4183@nvidia.com/ [1]
Suggested-by: Zqiang <qiang.zhang@linux.dev>
Tested-by: Andrea Righi <arighi@nvidia.com>
Tested-by: Paul E. McKenney <paulmck@kernel.org>
Tested-by: Joel Fernandes <joelagnelf@nvidia.com>
Signed-off-by: Boqun Feng <boqun@kernel.org>
---
 include/linux/srcutree.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index dfb31d11ff05..be76fa4fc170 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -95,6 +95,7 @@ struct srcu_usage {
 	unsigned long reschedule_jiffies;
 	unsigned long reschedule_count;
 	struct delayed_work work;
+	struct irq_work irq_work;
 	struct srcu_struct *srcu_ssp;
 };
 
-- 
cgit v1.2.3


From a6fc88b22bc8d12ad52e8412c667ec0f5bf055af Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelagnelf@nvidia.com>
Date: Mon, 23 Mar 2026 20:14:18 -0400
Subject: srcu: Use irq_work to start GP in tiny SRCU

Tiny SRCU's srcu_gp_start_if_needed() directly calls schedule_work(),
which acquires the workqueue pool->lock.

This causes a lockdep splat when call_srcu() is called with a scheduler
lock held, due to:

  call_srcu() [holding pi_lock]
    srcu_gp_start_if_needed()
      schedule_work() -> pool->lock

  workqueue_init() / create_worker() [holding pool->lock]
    wake_up_process() -> try_to_wake_up() -> pi_lock

Also add irq_work_sync() to cleanup_srcu_struct() to prevent a
use-after-free if a queued irq_work fires after cleanup begins.

Tested with rcutorture SRCU-T and no lockdep warnings.

[ Thanks to Boqun for similar fix in patch "rcu: Use an intermediate irq_work
to start process_srcu()" ]

Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Boqun Feng <boqun@kernel.org>
---
 include/linux/srcutiny.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index dec7cbe015aa..905b629e8fa3 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -11,6 +11,7 @@
 #ifndef _LINUX_SRCU_TINY_H
 #define _LINUX_SRCU_TINY_H
 
+#include <linux/irq_work_types.h>
 #include <linux/swait.h>
 
 struct srcu_struct {
@@ -24,18 +25,21 @@ struct srcu_struct {
 	struct rcu_head *srcu_cb_head;	/* Pending callbacks: Head. */
 	struct rcu_head **srcu_cb_tail;	/* Pending callbacks: Tail. */
 	struct work_struct srcu_work;	/* For driving grace periods. */
+	struct irq_work srcu_irq_work;	/* Defer schedule_work() to irq work. */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 };
 
 void srcu_drive_gp(struct work_struct *wp);
+void srcu_tiny_irq_work(struct irq_work *irq_work);
 
 #define __SRCU_STRUCT_INIT(name, __ignored, ___ignored, ____ignored)	\
 {									\
 	.srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq),	\
 	.srcu_cb_tail = &name.srcu_cb_head,				\
 	.srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp),	\
+	.srcu_irq_work = { .func = srcu_tiny_irq_work },		\
 	__SRCU_DEP_MAP_INIT(name)					\
 }
 
-- 
cgit v1.2.3


From 9c42bc9db90a154bc61ae337a070465f3393485a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 25 Mar 2026 14:11:02 +0100
Subject: netfilter: nf_conntrack_expect: honor expectation helper field

The expectation helper field is mostly unused. As a result, the
netfilter codebase relies on accessing the helper through exp->master.

Always set on the expectation helper field so it can be used to reach
the helper.

nf_ct_expect_init() is called from packet path where the skb owns
the ct object, therefore accessing exp->master for the newly created
expectation is safe. This saves a lot of updates in all callsites
to pass the ct object as parameter to nf_ct_expect_init().

This is a preparation patches for follow up fixes.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_expect.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 165e7a03b8e9..1b01400b10bd 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -40,7 +40,7 @@ struct nf_conntrack_expect {
 			 struct nf_conntrack_expect *this);
 
 	/* Helper to assign to new connection */
-	struct nf_conntrack_helper *helper;
+	struct nf_conntrack_helper __rcu *helper;
 
 	/* The conntrack of the master connection */
 	struct nf_conn *master;
-- 
cgit v1.2.3


From bffcaad9afdfe45d7fc777397d3b83c1e3ebffe5 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 25 Mar 2026 14:11:04 +0100
Subject: netfilter: ctnetlink: ensure safe access to master conntrack

Holding reference on the expectation is not sufficient, the master
conntrack object can just go away, making exp->master invalid.

To access exp->master safely:

- Grab the nf_conntrack_expect_lock, this gets serialized with
  clean_from_lists() which also holds this lock when the master
  conntrack goes away.

- Hold reference on master conntrack via nf_conntrack_find_get().
  Not so easy since the master tuple to look up for the master conntrack
  is not available in the existing problematic paths.

This patch goes for extending the nf_conntrack_expect_lock section
to address this issue for simplicity, in the cases that are described
below this is just slightly extending the lock section.

The add expectation command already holds a reference to the master
conntrack from ctnetlink_create_expect().

However, the delete expectation command needs to grab the spinlock
before looking up for the expectation. Expand the existing spinlock
section to address this to cover the expectation lookup. Note that,
the nf_ct_expect_iterate_net() calls already grabs the spinlock while
iterating over the expectation table, which is correct.

The get expectation command needs to grab the spinlock to ensure master
conntrack does not go away. This also expands the existing spinlock
section to cover the expectation lookup too. I needed to move the
netlink skb allocation out of the spinlock to keep it GFP_KERNEL.

For the expectation events, the IPEXP_DESTROY event is already delivered
under the spinlock, just move the delivery of IPEXP_NEW under the
spinlock too because the master conntrack event cache is reached through
exp->master.

While at it, add lockdep notations to help identify what codepaths need
to grab the spinlock.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_core.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 3384859a8921..8883575adcc1 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -83,6 +83,11 @@ void nf_conntrack_lock(spinlock_t *lock);
 
 extern spinlock_t nf_conntrack_expect_lock;
 
+static inline void lockdep_nfct_expect_lock_held(void)
+{
+	lockdep_assert_held(&nf_conntrack_expect_lock);
+}
+
 /* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */
 
 static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
-- 
cgit v1.2.3


From 02a3231b6d82efe750da6554ebf280e4a6f78756 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 25 Mar 2026 22:39:55 +0100
Subject: netfilter: nf_conntrack_expect: store netns and zone in expectation

__nf_ct_expect_find() and nf_ct_expect_find_get() are called under
rcu_read_lock() but they dereference the master conntrack via
exp->master.

Since the expectation does not hold a reference on the master conntrack,
this could be dying conntrack or different recycled conntrack than the
real master due to SLAB_TYPESAFE_RCU.

Store the netns, the master_tuple and the zone in struct
nf_conntrack_expect as a safety measure.

This patch is required by the follow up fix not to dump expectations
that do not belong to this netns.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_expect.h | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 1b01400b10bd..e9a8350e7ccf 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -22,10 +22,16 @@ struct nf_conntrack_expect {
 	/* Hash member */
 	struct hlist_node hnode;
 
+	/* Network namespace */
+	possible_net_t net;
+
 	/* We expect this tuple, with the following mask */
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_tuple_mask mask;
 
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	struct nf_conntrack_zone zone;
+#endif
 	/* Usage count. */
 	refcount_t use;
 
@@ -62,7 +68,17 @@ struct nf_conntrack_expect {
 
 static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp)
 {
-	return nf_ct_net(exp->master);
+	return read_pnet(&exp->net);
+}
+
+static inline bool nf_ct_exp_zone_equal_any(const struct nf_conntrack_expect *a,
+					    const struct nf_conntrack_zone *b)
+{
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	return a->zone.id == b->id;
+#else
+	return true;
+#endif
 }
 
 #define NF_CT_EXP_POLICY_NAME_LEN	16
-- 
cgit v1.2.3


From 8f15b5071b4548b0aafc03b366eb45c9c6566704 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Wed, 25 Mar 2026 14:11:08 +0100
Subject: netfilter: ctnetlink: use netlink policy range checks

Replace manual range and mask validations with netlink policy
annotations in ctnetlink code paths, so that the netlink core rejects
invalid values early and can generate extack errors.

- CTA_PROTOINFO_TCP_STATE: reject values > TCP_CONNTRACK_SYN_SENT2 at
  policy level, removing the manual >= TCP_CONNTRACK_MAX check.
- CTA_PROTOINFO_TCP_WSCALE_ORIGINAL/REPLY: reject values > TCP_MAX_WSCALE
  (14). The normal TCP option parsing path already clamps to this value,
  but the ctnetlink path accepted 0-255, causing undefined behavior when
  used as a u32 shift count.
- CTA_FILTER_ORIG_FLAGS/REPLY_FLAGS: use NLA_POLICY_MASK with
  CTA_FILTER_F_ALL, removing the manual mask checks.
- CTA_EXPECT_FLAGS: use NLA_POLICY_MASK with NF_CT_EXPECT_MASK, adding
  a new mask define grouping all valid expect flags.

Extracted from a broader nf-next patch by Florian Westphal, scoped to
ctnetlink for the fixes tree.

Fixes: c8e2078cfe41 ("[NETFILTER]: ctnetlink: add support for internal tcp connection tracking flags handling")
Signed-off-by: David Carlier <devnexen@gmail.com>
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_conntrack_common.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 26071021e986..56b6b60a814f 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -159,5 +159,9 @@ enum ip_conntrack_expect_events {
 #define NF_CT_EXPECT_INACTIVE		0x2
 #define NF_CT_EXPECT_USERSPACE		0x4
 
+#ifdef __KERNEL__
+#define NF_CT_EXPECT_MASK	(NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE | \
+				 NF_CT_EXPECT_USERSPACE)
+#endif
 
 #endif /* _UAPI_NF_CONNTRACK_COMMON_H */
-- 
cgit v1.2.3


From 0e764b9d46071668969410ec5429be0e2f38c6d3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 25 Mar 2026 08:20:17 +0000
Subject: netfs: Fix the handling of stream->front by removing it

The netfs_io_stream::front member is meant to point to the subrequest
currently being collected on a stream, but it isn't actually used this way
by direct write (which mostly ignores it).  However, there's a tracepoint
which looks at it.  Further, stream->front is actually redundant with
stream->subrequests.next.

Fix the potential problem in the direct code by just removing the member
and using stream->subrequests.next instead, thereby also simplifying the
code.

Fixes: a0b4c7a49137 ("netfs: Fix unbuffered/DIO writes to dispatch subrequests in strict sequence")
Reported-by: Paulo Alcantara <pc@manguebit.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://patch.msgid.link/4158599.1774426817@warthog.procyon.org.uk
Reviewed-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h        | 1 -
 include/trace/events/netfs.h | 8 ++++----
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 72ee7d210a74..ba17ac5bf356 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -140,7 +140,6 @@ struct netfs_io_stream {
 	void (*issue_write)(struct netfs_io_subrequest *subreq);
 	/* Collection tracking */
 	struct list_head	subrequests;	/* Contributory I/O operations */
-	struct netfs_io_subrequest *front;	/* Op being collected */
 	unsigned long long	collected_to;	/* Position we've collected results to */
 	size_t			transferred;	/* The amount transferred from this stream */
 	unsigned short		error;		/* Aggregate error for the stream */
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 2d366be46a1c..cbe28211106c 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -740,19 +740,19 @@ TRACE_EVENT(netfs_collect_stream,
 		    __field(unsigned int,	wreq)
 		    __field(unsigned char,	stream)
 		    __field(unsigned long long,	collected_to)
-		    __field(unsigned long long,	front)
+		    __field(unsigned long long,	issued_to)
 			     ),
 
 	    TP_fast_assign(
 		    __entry->wreq	= wreq->debug_id;
 		    __entry->stream	= stream->stream_nr;
 		    __entry->collected_to = stream->collected_to;
-		    __entry->front	= stream->front ? stream->front->start : UINT_MAX;
+		    __entry->issued_to	= atomic64_read(&wreq->issued_to);
 			   ),
 
-	    TP_printk("R=%08x[%x:] cto=%llx frn=%llx",
+	    TP_printk("R=%08x[%x:] cto=%llx ito=%llx",
 		      __entry->wreq, __entry->stream,
-		      __entry->collected_to, __entry->front)
+		      __entry->collected_to, __entry->issued_to)
 	    );
 
 TRACE_EVENT(netfs_folioq,
-- 
cgit v1.2.3


From 190a8c48ff623c3d67cb295b4536a660db2012aa Mon Sep 17 00:00:00 2001
From: Hao-Yu Yang <naup96721@gmail.com>
Date: Fri, 13 Mar 2026 20:47:56 +0800
Subject: futex: Fix UaF between futex_key_to_node_opt() and
 vma_replace_policy()

During futex_key_to_node_opt() execution, vma->vm_policy is read under
speculative mmap lock and RCU. Concurrently, mbind() may call
vma_replace_policy() which frees the old mempolicy immediately via
kmem_cache_free().

This creates a race where __futex_key_to_node() dereferences a freed
mempolicy pointer, causing a use-after-free read of mpol->mode.

[  151.412631] BUG: KASAN: slab-use-after-free in __futex_key_to_node (kernel/futex/core.c:349)
[  151.414046] Read of size 2 at addr ffff888001c49634 by task e/87

[  151.415969] Call Trace:

[  151.416732]  __asan_load2 (mm/kasan/generic.c:271)
[  151.416777]  __futex_key_to_node (kernel/futex/core.c:349)
[  151.416822]  get_futex_key (kernel/futex/core.c:374 kernel/futex/core.c:386 kernel/futex/core.c:593)

Fix by adding rcu to __mpol_put().

Fixes: c042c505210d ("futex: Implement FUTEX2_MPOL")
Reported-by: Hao-Yu Yang <naup96721@gmail.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Hao-Yu Yang <naup96721@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Link: https://patch.msgid.link/20260324174418.GB1850007@noisy.programming.kicks-ass.net
---
 include/linux/mempolicy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 0fe96f3ab3ef..65c732d440d2 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -55,6 +55,7 @@ struct mempolicy {
 		nodemask_t cpuset_mems_allowed;	/* relative to these nodes */
 		nodemask_t user_nodemask;	/* nodemask passed by user */
 	} w;
+	struct rcu_head rcu;
 };
 
 /*
-- 
cgit v1.2.3


From 629ec78ef8608d955ce217880cdc3e1873af3a15 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue, 24 Mar 2026 00:25:57 +0100
Subject: mpls: add seqcount to protect the platform_label{,s} pair

The RCU-protected codepaths (mpls_forward, mpls_dump_routes) can have
an inconsistent view of platform_labels vs platform_label in case of a
concurrent resize (resize_platform_label_table, under
platform_mutex). This can lead to OOB accesses.

This patch adds a seqcount, so that we get a consistent snapshot.

Note that mpls_label_ok is also susceptible to this, so the check
against RTA_DST in rtm_to_route_config, done outside platform_mutex,
is not sufficient. This value gets passed to mpls_label_ok once more
in both mpls_route_add and mpls_route_del, so there is no issue, but
that additional check must not be removed.

Reported-by: Yuan Tan <tanyuan98@outlook.com>
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Reported-by: Xin Liu <bird@lzu.edu.cn>
Fixes: 7720c01f3f590 ("mpls: Add a sysctl to control the size of the mpls label table")
Fixes: dde1b38e873c ("mpls: Convert mpls_dump_routes() to RCU.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://patch.msgid.link/cd8fca15e3eb7e212b094064cd83652e20fd9d31.1774284088.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/netns/mpls.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h
index 6682e51513ef..2073cbac2afb 100644
--- a/include/net/netns/mpls.h
+++ b/include/net/netns/mpls.h
@@ -17,6 +17,7 @@ struct netns_mpls {
 	size_t platform_labels;
 	struct mpls_route __rcu * __rcu *platform_label;
 	struct mutex platform_mutex;
+	seqcount_mutex_t platform_label_seq;
 
 	struct ctl_table_header *ctl;
 };
-- 
cgit v1.2.3


From 57a04a13aac1f247d171c3f3aef93efc69e6979e Mon Sep 17 00:00:00 2001
From: Qingfang Deng <dqfext@gmail.com>
Date: Tue, 24 Mar 2026 22:08:56 +0800
Subject: netdevsim: fix build if SKB_EXTENSIONS=n

__skb_ext_put() is not declared if SKB_EXTENSIONS is not enabled, which
causes a build error:

drivers/net/netdevsim/netdev.c: In function 'nsim_forward_skb':
drivers/net/netdevsim/netdev.c:114:25: error: implicit declaration of function '__skb_ext_put'; did you mean 'skb_ext_put'? [-Werror=implicit-function-declaration]
  114 |                         __skb_ext_put(psp_ext);
      |                         ^~~~~~~~~~~~~
      |                         skb_ext_put
cc1: some warnings being treated as errors

Add a stub to fix the build.

Fixes: 7d9351435ebb ("netdevsim: drop PSP ext ref on forward failure")
Signed-off-by: Qingfang Deng <dqfext@gmail.com>
Link: https://patch.msgid.link/20260324140857.783-1-dqfext@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index daa4e4944ce3..2f278ce376b7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -5097,6 +5097,7 @@ static inline bool skb_has_extensions(struct sk_buff *skb)
 	return unlikely(skb->active_extensions);
 }
 #else
+static inline void __skb_ext_put(struct skb_ext *ext) {}
 static inline void skb_ext_put(struct sk_buff *skb) {}
 static inline void skb_ext_reset(struct sk_buff *skb) {}
 static inline void skb_ext_del(struct sk_buff *skb, int unused) {}
-- 
cgit v1.2.3


From 90c5def10bea574b101b7a520c015ca81742183f Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Mon, 2 Mar 2026 18:22:52 -0400
Subject: iommu: Do not call drivers for empty gathers

An empty gather is coded with start=U64_MAX, end=0 and several drivers go
on to convert that to a size with:

 end - start + 1

Which gives 2 for an empty gather. This then causes Weird Stuff to
happen (for example an UBSAN splat in VT-d) that is hopefully harmless,
but maybe not.

Prevent drivers from being called right in iommu_iotlb_sync().

Auditing shows that AMD, Intel, Mediatek and RSIC-V drivers all do things
on these empty gathers.

Further, there are several callers that can trigger empty gathers,
especially in unusual conditions. For example iommu_map_nosync() will call
a 0 size unmap on some error paths. Also in VFIO, iommupt and other
places.

Cc: stable@vger.kernel.org
Reported-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Closes: https://lore.kernel.org/r/11145826.aFP6jjVeTY@jkrzyszt-mobl2.ger.corp.intel.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Samiullah Khawaja <skhawaja@google.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/iommu.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 54b8b48c762e..555597b54083 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -980,7 +980,8 @@ static inline void iommu_flush_iotlb_all(struct iommu_domain *domain)
 static inline void iommu_iotlb_sync(struct iommu_domain *domain,
 				  struct iommu_iotlb_gather *iotlb_gather)
 {
-	if (domain->ops->iotlb_sync)
+	if (domain->ops->iotlb_sync &&
+	    likely(iotlb_gather->start < iotlb_gather->end))
 		domain->ops->iotlb_sync(domain, iotlb_gather);
 
 	iommu_iotlb_gather_init(iotlb_gather);
-- 
cgit v1.2.3


From fffca572f9ca51607f180a37d0c898404c8f9112 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 26 Mar 2026 15:06:31 +0100
Subject: mpage: Provide variant of mpage_writepages() with own optional folio
 handler

Some filesystems need to treat some folios specially (for example for
inodes with inline data). Doing the handling in their .writepages method
in a race-free manner results in duplicating some of the writeback
internals. So provide generalized version of mpage_writepages() that
allows filesystem to provide a handler called for each folio which can
handle the folio in a special way.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20260326140635.15895-3-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/mpage.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mpage.h b/include/linux/mpage.h
index 1bdc39daac0a..358946990bfa 100644
--- a/include/linux/mpage.h
+++ b/include/linux/mpage.h
@@ -17,7 +17,14 @@ struct readahead_control;
 
 void mpage_readahead(struct readahead_control *, get_block_t get_block);
 int mpage_read_folio(struct folio *folio, get_block_t get_block);
-int mpage_writepages(struct address_space *mapping,
-		struct writeback_control *wbc, get_block_t get_block);
+int __mpage_writepages(struct address_space *mapping,
+		struct writeback_control *wbc, get_block_t get_block,
+		int (*write_folio)(struct folio *folio,
+				   struct writeback_control *wbc));
+static inline int mpage_writepages(struct address_space *mapping,
+		struct writeback_control *wbc, get_block_t get_block)
+{
+	return __mpage_writepages(mapping, wbc, get_block, NULL);
+}
 
 #endif
-- 
cgit v1.2.3


From 4c5e7f0fcd592801c9cc18f29f80fbee84eb8669 Mon Sep 17 00:00:00 2001
From: Jinjiang Tu <tujinjiang@huawei.com>
Date: Thu, 19 Mar 2026 09:25:41 +0800
Subject: mm/huge_memory: fix folio isn't locked in softleaf_to_folio()

On arm64 server, we found folio that get from migration entry isn't locked
in softleaf_to_folio().  This issue triggers when mTHP splitting and
zap_nonpresent_ptes() races, and the root cause is lack of memory barrier
in softleaf_to_folio().  The race is as follows:

	CPU0                                             CPU1

deferred_split_scan()                              zap_nonpresent_ptes()
  lock folio
  split_folio()
    unmap_folio()
      change ptes to migration entries
    __split_folio_to_order()                         softleaf_to_folio()
      set flags(including PG_locked) for tail pages    folio = pfn_folio(softleaf_to_pfn(entry))
      smp_wmb()                                        VM_WARN_ON_ONCE(!folio_test_locked(folio))
      prep_compound_page() for tail pages

In __split_folio_to_order(), smp_wmb() guarantees page flags of tail pages
are visible before the tail page becomes non-compound.  smp_wmb() should
be paired with smp_rmb() in softleaf_to_folio(), which is missed.  As a
result, if zap_nonpresent_ptes() accesses migration entry that stores tail
pfn, softleaf_to_folio() may see the updated compound_head of tail page
before page->flags.

This issue will trigger VM_WARN_ON_ONCE() in pfn_swap_entry_folio()
because of the race between folio split and zap_nonpresent_ptes()
leading to a folio incorrectly undergoing modification without a folio
lock being held.

This is a BUG_ON() before commit 93976a20345b ("mm: eliminate further
swapops predicates"), which in merged in v6.19-rc1.

To fix it, add missing smp_rmb() if the softleaf entry is migration entry
in softleaf_to_folio() and softleaf_to_page().

[tujinjiang@huawei.com: update function name and comments]
  Link: https://lkml.kernel.org/r/20260321075214.3305564-1-tujinjiang@huawei.com
Link: https://lkml.kernel.org/r/20260319012541.4158561-1-tujinjiang@huawei.com
Fixes: e9b61f19858a ("thp: reintroduce split_huge_page()")
Signed-off-by: Jinjiang Tu <tujinjiang@huawei.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Barry Song <baohua@kernel.org>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nanyong Sun <sunnanyong@huawei.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/leafops.h | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/leafops.h b/include/linux/leafops.h
index a9ff94b744f2..05673d3529e7 100644
--- a/include/linux/leafops.h
+++ b/include/linux/leafops.h
@@ -363,6 +363,23 @@ static inline unsigned long softleaf_to_pfn(softleaf_t entry)
 	return swp_offset(entry) & SWP_PFN_MASK;
 }
 
+static inline void softleaf_migration_sync(softleaf_t entry,
+		struct folio *folio)
+{
+	/*
+	 * Ensure we do not race with split, which might alter tail pages into new
+	 * folios and thus result in observing an unlocked folio.
+	 * This matches the write barrier in __split_folio_to_order().
+	 */
+	smp_rmb();
+
+	/*
+	 * Any use of migration entries may only occur while the
+	 * corresponding page is locked
+	 */
+	VM_WARN_ON_ONCE(!folio_test_locked(folio));
+}
+
 /**
  * softleaf_to_page() - Obtains struct page for PFN encoded within leaf entry.
  * @entry: Leaf entry, softleaf_has_pfn(@entry) must return true.
@@ -374,11 +391,8 @@ static inline struct page *softleaf_to_page(softleaf_t entry)
 	struct page *page = pfn_to_page(softleaf_to_pfn(entry));
 
 	VM_WARN_ON_ONCE(!softleaf_has_pfn(entry));
-	/*
-	 * Any use of migration entries may only occur while the
-	 * corresponding page is locked
-	 */
-	VM_WARN_ON_ONCE(softleaf_is_migration(entry) && !PageLocked(page));
+	if (softleaf_is_migration(entry))
+		softleaf_migration_sync(entry, page_folio(page));
 
 	return page;
 }
@@ -394,12 +408,8 @@ static inline struct folio *softleaf_to_folio(softleaf_t entry)
 	struct folio *folio = pfn_folio(softleaf_to_pfn(entry));
 
 	VM_WARN_ON_ONCE(!softleaf_has_pfn(entry));
-	/*
-	 * Any use of migration entries may only occur while the
-	 * corresponding folio is locked.
-	 */
-	VM_WARN_ON_ONCE(softleaf_is_migration(entry) &&
-			!folio_test_locked(folio));
+	if (softleaf_is_migration(entry))
+		softleaf_migration_sync(entry, folio);
 
 	return folio;
 }
-- 
cgit v1.2.3


From 45065a5095c7773fb98c35d60c20c3b513540597 Mon Sep 17 00:00:00 2001
From: Akshai Murari <akshaim@google.com>
Date: Fri, 27 Mar 2026 06:54:45 +0000
Subject: Input: add keycodes for contextual AI usages (HUTRR119)

HUTRR119 introduces new usages for keys intended to invoke AI agents
based on the current context. These are useful with the increasing
number of operating systems with integrated Large Language Models

Add new key definitions for KEY_ACTION_ON_SELECTION,
KEY_CONTEXTUAL_INSERT and KEY_CONTEXTUAL_QUERY

Signed-off-by: Akshai Murari <akshaim@google.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
---
 include/uapi/linux/input-event-codes.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
index 4bdb6a165987..3528168f7c6d 100644
--- a/include/uapi/linux/input-event-codes.h
+++ b/include/uapi/linux/input-event-codes.h
@@ -643,6 +643,10 @@
 #define KEY_EPRIVACY_SCREEN_ON		0x252
 #define KEY_EPRIVACY_SCREEN_OFF		0x253
 
+#define KEY_ACTION_ON_SELECTION		0x254	/* AL Action on Selection (HUTRR119) */
+#define KEY_CONTEXTUAL_INSERT		0x255	/* AL Contextual Insertion (HUTRR119) */
+#define KEY_CONTEXTUAL_QUERY		0x256	/* AL Contextual Query (HUTRR119) */
+
 #define KEY_KBDINPUTASSIST_PREV		0x260
 #define KEY_KBDINPUTASSIST_NEXT		0x261
 #define KEY_KBDINPUTASSIST_PREVGROUP		0x262
-- 
cgit v1.2.3


From a664bf3d603dc3bdcf9ae47cc21e0daec706d7a5 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 26 Mar 2026 15:30:20 +0900
Subject: crypto: algif_aead - Revert to operating out-of-place

This mostly reverts commit 72548b093ee3 except for the copying of
the associated data.

There is no benefit in operating in-place in algif_aead since the
source and destination come from different mappings.  Get rid of
all the complexity added for in-place operation and just copy the
AD directly.

Fixes: 72548b093ee3 ("crypto: algif_aead - copy AAD from src to dst")
Reported-by: Taeyang Lee <0wn@theori.io>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/if_alg.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index 107b797c33ec..0cc8fa749f68 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -230,9 +230,8 @@ static inline bool af_alg_readable(struct sock *sk)
 	return PAGE_SIZE <= af_alg_rcvbuf(sk);
 }
 
-unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset);
-void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
-		      size_t dst_offset);
+unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes);
+void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst);
 void af_alg_wmem_wakeup(struct sock *sk);
 int af_alg_wait_for_data(struct sock *sk, unsigned flags, unsigned min);
 int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
-- 
cgit v1.2.3


From c76fef7dcd9372e3476d4df5e0a72ed5919a814b Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Tue, 31 Mar 2026 23:10:20 +0200
Subject: bpf: Fix grace period wait for tracepoint bpf_link

Recently, tracepoints were switched from using disabled preemption
(which acts as RCU read section) to SRCU-fast when they are not
faultable. This means that to do a proper grace period wait for programs
running in such tracepoints, we must use SRCU's grace period wait.
This is only for non-faultable tracepoints, faultable ones continue
using RCU Tasks Trace.

However, bpf_link_free() currently does call_rcu() for all cases when
the link is non-sleepable (hence, for tracepoints, non-faultable). Fix
this by doing a call_srcu() grace period wait.

As far RCU Tasks Trace gp -> RCU gp chaining is concerned, it is deemed
unnecessary for tracepoint programs. The link and program are either
accessed under RCU Tasks Trace protection, or SRCU-fast protection now.

The earlier logic of chaining both RCU Tasks Trace and RCU gp waits was
to generalize the logic, even if it conceded an extra RCU gp wait,
however that is unnecessary for tracepoints even before this change.
In practice no cost was paid since rcu_trace_implies_rcu_gp() was always
true. Hence we need not chaining any RCU gp after the SRCU gp.

For instance, in the non-faultable raw tracepoint, the RCU read section
of the program in __bpf_trace_run() is enclosed in the SRCU gp, likewise
for faultable raw tracepoint, the program is under the RCU Tasks Trace
protection. Hence, the outermost scope can be waited upon to ensure
correctness.

Also, sleepable programs cannot be attached to non-faultable
tracepoints, so whenever program or link is sleepable, only RCU Tasks
Trace protection is being used for the link and prog.

Fixes: a46023d5616e ("tracing: Guard __DECLARE_TRACE() use of __DO_TRACE_CALL() with SRCU-fast")
Reviewed-by: Sun Jian <sun.jian.kdev@gmail.com>
Reviewed-by: Puranjay Mohan <puranjay@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Link: https://lore.kernel.org/r/20260331211021.1632902-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h        |  4 ++++
 include/linux/tracepoint.h | 20 ++++++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 05b34a6355b0..35b1e25bd104 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1854,6 +1854,10 @@ struct bpf_link_ops {
 	 * target hook is sleepable, we'll go through tasks trace RCU GP and
 	 * then "classic" RCU GP; this need for chaining tasks trace and
 	 * classic RCU GPs is designated by setting bpf_link->sleepable flag
+	 *
+	 * For non-sleepable tracepoint links we go through SRCU gp instead,
+	 * since RCU is not used in that case. Sleepable tracepoints still
+	 * follow the scheme above.
 	 */
 	void (*dealloc_deferred)(struct bpf_link *link);
 	int (*detach)(struct bpf_link *link);
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 22ca1c8b54f3..1d7f29f5e901 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -122,6 +122,22 @@ static inline bool tracepoint_is_faultable(struct tracepoint *tp)
 {
 	return tp->ext && tp->ext->faultable;
 }
+/*
+ * Run RCU callback with the appropriate grace period wait for non-faultable
+ * tracepoints, e.g., those used in atomic context.
+ */
+static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func)
+{
+	call_srcu(&tracepoint_srcu, rcu, func);
+}
+/*
+ * Run RCU callback with the appropriate grace period wait for faultable
+ * tracepoints, e.g., those used in syscall context.
+ */
+static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func)
+{
+	call_rcu_tasks_trace(rcu, func);
+}
 #else
 static inline void tracepoint_synchronize_unregister(void)
 { }
@@ -129,6 +145,10 @@ static inline bool tracepoint_is_faultable(struct tracepoint *tp)
 {
 	return false;
 }
+static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func)
+{  }
+static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func)
+{  }
 #endif
 
 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
-- 
cgit v1.2.3


From 8c0ef7b56d6bbbc53f2d43d99c195144f01b0775 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 11 Mar 2026 22:14:00 -0700
Subject: lis3lv02d: fix kernel-doc warnings

Use the correct kernel-doc format to avoid kernel-doc warnings:

Warning: include/linux/lis3lv02d.h:125 struct member 'st_min_limits' not
 described in 'lis3lv02d_platform_data'
Warning: include/linux/lis3lv02d.h:125 struct member 'st_max_limits' not
 described in 'lis3lv02d_platform_data'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20260312051400.682991-1-rdunlap@infradead.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/lis3lv02d.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h
index b72b8cdba765..feb60ba4e30e 100644
--- a/include/linux/lis3lv02d.h
+++ b/include/linux/lis3lv02d.h
@@ -30,8 +30,8 @@
  * @default_rate:	Default sampling rate. 0 means reset default
  * @setup_resources:	Interrupt line setup call back function
  * @release_resources:	Interrupt line release call back function
- * @st_min_limits[3]:	Selftest acceptance minimum values
- * @st_max_limits[3]:	Selftest acceptance maximum values
+ * @st_min_limits:	Selftest acceptance minimum values (x, y, z)
+ * @st_max_limits:	Selftest acceptance maximum values (x, y, z)
  * @irq2:		Irq line 2 number
  *
  * Platform data is used to setup the sensor chip. Meaning of the different
-- 
cgit v1.2.3


From b7e8590987aa94c9dc51518fad0e58cb887b1db5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 30 Mar 2026 14:16:34 +0200
Subject: netfilter: ipset: use nla_strcmp for IPSET_ATTR_NAME attr

IPSET_ATTR_NAME and IPSET_ATTR_NAMEREF are of NLA_STRING type, they
cannot be treated like a c-string.

They either have to be switched to NLA_NUL_STRING, or the compare
operations need to use the nla functions.

Fixes: f830837f0eed ("netfilter: ipset: list:set set type support")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index e9f4f845d760..b98331572ad2 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -309,7 +309,7 @@ enum {
 
 /* register and unregister set references */
 extern ip_set_id_t ip_set_get_byname(struct net *net,
-				     const char *name, struct ip_set **set);
+				     const struct nlattr *name, struct ip_set **set);
 extern void ip_set_put_byindex(struct net *net, ip_set_id_t index);
 extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name);
 extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
-- 
cgit v1.2.3


From 88c4bd90725557796c15878b7cb70066e9e6b5ab Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <m.wilczynski@samsung.com>
Date: Thu, 3 Apr 2025 15:10:51 +0200
Subject: firmware: thead: Fix buffer overflow and use standard endian macros

Addresses two issues in the TH1520 AON firmware protocol driver:

1. Fix a potential buffer overflow where the code used unsafe pointer
   arithmetic to access the 'mode' field through the 'resource' pointer
   with an offset. This was flagged by Smatch static checker as:
   "buffer overflow 'data' 2 <= 3"

2. Replace custom RPC_SET_BE* and RPC_GET_BE* macros with standard
   kernel endianness conversion macros (cpu_to_be16, etc.) for better
   portability and maintainability.

The functionality was re-tested with the GPU power-up sequence,
confirming the GPU powers up correctly and the driver probes
successfully.

[   12.702370] powervr ffef400000.gpu: [drm] loaded firmware
powervr/rogue_36.52.104.182_v1.fw
[   12.711043] powervr ffef400000.gpu: [drm] FW version v1.0 (build
6645434 OS)
[   12.719787] [drm] Initialized powervr 1.0.0 for ffef400000.gpu on
minor 0

Fixes: e4b3cbd840e5 ("firmware: thead: Add AON firmware protocol driver")
Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/all/17a0ccce-060b-4b9d-a3c4-8d5d5823b1c9@stanley.mountain/
Signed-off-by: Michal Wilczynski <m.wilczynski@samsung.com>
Reviewed-by: Dan Carpenter <dan.carpenter@linaro.org>
Acked-by: Drew Fustini <fustini@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/firmware/thead/thead,th1520-aon.h | 74 -------------------------
 1 file changed, 74 deletions(-)

(limited to 'include')

diff --git a/include/linux/firmware/thead/thead,th1520-aon.h b/include/linux/firmware/thead/thead,th1520-aon.h
index dae132b66873..d81f5f6f5b90 100644
--- a/include/linux/firmware/thead/thead,th1520-aon.h
+++ b/include/linux/firmware/thead/thead,th1520-aon.h
@@ -97,80 +97,6 @@ struct th1520_aon_rpc_ack_common {
 #define RPC_GET_SVC_FLAG_ACK_TYPE(MESG) (((MESG)->svc & 0x40) >> 6)
 #define RPC_SET_SVC_FLAG_ACK_TYPE(MESG, ACK) ((MESG)->svc |= (ACK) << 6)
 
-#define RPC_SET_BE64(MESG, OFFSET, SET_DATA)                                \
-	do {                                                                \
-		u8 *data = (u8 *)(MESG);                                    \
-		u64 _offset = (OFFSET);                                     \
-		u64 _set_data = (SET_DATA);                                 \
-		data[_offset + 7] = _set_data & 0xFF;                       \
-		data[_offset + 6] = (_set_data & 0xFF00) >> 8;              \
-		data[_offset + 5] = (_set_data & 0xFF0000) >> 16;           \
-		data[_offset + 4] = (_set_data & 0xFF000000) >> 24;         \
-		data[_offset + 3] = (_set_data & 0xFF00000000) >> 32;       \
-		data[_offset + 2] = (_set_data & 0xFF0000000000) >> 40;     \
-		data[_offset + 1] = (_set_data & 0xFF000000000000) >> 48;   \
-		data[_offset + 0] = (_set_data & 0xFF00000000000000) >> 56; \
-	} while (0)
-
-#define RPC_SET_BE32(MESG, OFFSET, SET_DATA)			    \
-	do {							    \
-		u8 *data = (u8 *)(MESG);			    \
-		u64 _offset = (OFFSET);				    \
-		u64 _set_data = (SET_DATA);			    \
-		data[_offset + 3] = (_set_data) & 0xFF;		    \
-		data[_offset + 2] = (_set_data & 0xFF00) >> 8;	    \
-		data[_offset + 1] = (_set_data & 0xFF0000) >> 16;   \
-		data[_offset + 0] = (_set_data & 0xFF000000) >> 24; \
-	} while (0)
-
-#define RPC_SET_BE16(MESG, OFFSET, SET_DATA)		       \
-	do {						       \
-		u8 *data = (u8 *)(MESG);		       \
-		u64 _offset = (OFFSET);			       \
-		u64 _set_data = (SET_DATA);		       \
-		data[_offset + 1] = (_set_data) & 0xFF;	       \
-		data[_offset + 0] = (_set_data & 0xFF00) >> 8; \
-	} while (0)
-
-#define RPC_SET_U8(MESG, OFFSET, SET_DATA)	  \
-	do {					  \
-		u8 *data = (u8 *)(MESG);	  \
-		data[OFFSET] = (SET_DATA) & 0xFF; \
-	} while (0)
-
-#define RPC_GET_BE64(MESG, OFFSET, PTR)                                      \
-	do {                                                                 \
-		u8 *data = (u8 *)(MESG);                                     \
-		u64 _offset = (OFFSET);                                      \
-		*(u32 *)(PTR) =                                              \
-			(data[_offset + 7] | data[_offset + 6] << 8 |        \
-			 data[_offset + 5] << 16 | data[_offset + 4] << 24 | \
-			 data[_offset + 3] << 32 | data[_offset + 2] << 40 | \
-			 data[_offset + 1] << 48 | data[_offset + 0] << 56); \
-	} while (0)
-
-#define RPC_GET_BE32(MESG, OFFSET, PTR)                                      \
-	do {                                                                 \
-		u8 *data = (u8 *)(MESG);                                     \
-		u64 _offset = (OFFSET);                                      \
-		*(u32 *)(PTR) =                                              \
-			(data[_offset + 3] | data[_offset + 2] << 8 |        \
-			 data[_offset + 1] << 16 | data[_offset + 0] << 24); \
-	} while (0)
-
-#define RPC_GET_BE16(MESG, OFFSET, PTR)                                       \
-	do {                                                                  \
-		u8 *data = (u8 *)(MESG);                                      \
-		u64 _offset = (OFFSET);                                       \
-		*(u16 *)(PTR) = (data[_offset + 1] | data[_offset + 0] << 8); \
-	} while (0)
-
-#define RPC_GET_U8(MESG, OFFSET, PTR)          \
-	do {                                   \
-		u8 *data = (u8 *)(MESG);       \
-		*(u8 *)(PTR) = (data[OFFSET]); \
-	} while (0)
-
 /*
  * Defines for SC PM Power Mode
  */
-- 
cgit v1.2.3


From bd3d245b0fef571f93504904df62b8865b1c0d34 Mon Sep 17 00:00:00 2001
From: Guan-Yu Lin <guanyulin@google.com>
Date: Wed, 1 Apr 2026 12:32:17 +0000
Subject: usb: core: use dedicated spinlock for offload state

Replace the coarse USB device lock with a dedicated offload_lock
spinlock to reduce contention during offload operations. Use
offload_pm_locked to synchronize with PM transitions and replace
the legacy offload_at_suspend flag.

Optimize usb_offload_get/put by switching from auto-resume/suspend
to pm_runtime_get_if_active(). This ensures offload state is only
modified when the device is already active, avoiding unnecessary
power transitions.

Cc: stable <stable@kernel.org>
Fixes: ef82a4803aab ("xhci: sideband: add api to trace sideband usage")
Signed-off-by: Guan-Yu Lin <guanyulin@google.com>
Tested-by: Hailong Liu <hailong.liu@oppo.com>
Acked-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://patch.msgid.link/20260401123238.3790062-2-guanyulin@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 04277af4bb9d..4aab20015851 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -21,6 +21,7 @@
 #include <linux/completion.h>	/* for struct completion */
 #include <linux/sched.h>	/* for current && schedule_timeout */
 #include <linux/mutex.h>	/* for struct mutex */
+#include <linux/spinlock.h>	/* for spinlock_t */
 #include <linux/pm_runtime.h>	/* for runtime PM */
 
 struct usb_device;
@@ -636,8 +637,9 @@ struct usb3_lpm_parameters {
  * @do_remote_wakeup:  remote wakeup should be enabled
  * @reset_resume: needs reset instead of resume
  * @port_is_suspended: the upstream port is suspended (L2 or U3)
- * @offload_at_suspend: offload activities during suspend is enabled.
+ * @offload_pm_locked: prevents offload_usage changes during PM transitions.
  * @offload_usage: number of offload activities happening on this usb device.
+ * @offload_lock: protects offload_usage and offload_pm_locked
  * @slot_id: Slot ID assigned by xHCI
  * @l1_params: best effor service latency for USB2 L1 LPM state, and L1 timeout.
  * @u1_params: exit latencies for USB3 U1 LPM state, and hub-initiated timeout.
@@ -726,8 +728,9 @@ struct usb_device {
 	unsigned do_remote_wakeup:1;
 	unsigned reset_resume:1;
 	unsigned port_is_suspended:1;
-	unsigned offload_at_suspend:1;
+	unsigned offload_pm_locked:1;
 	int offload_usage;
+	spinlock_t offload_lock;
 	enum usb_link_tunnel_mode tunnel_mode;
 	struct device_link *usb4_link;
 
@@ -849,6 +852,7 @@ static inline void usb_mark_last_busy(struct usb_device *udev)
 int usb_offload_get(struct usb_device *udev);
 int usb_offload_put(struct usb_device *udev);
 bool usb_offload_check(struct usb_device *udev);
+void usb_offload_set_pm_locked(struct usb_device *udev, bool locked);
 #else
 
 static inline int usb_offload_get(struct usb_device *udev)
@@ -857,6 +861,8 @@ static inline int usb_offload_put(struct usb_device *udev)
 { return 0; }
 static inline bool usb_offload_check(struct usb_device *udev)
 { return false; }
+static inline void usb_offload_set_pm_locked(struct usb_device *udev, bool locked)
+{ }
 #endif
 
 extern int usb_disable_lpm(struct usb_device *udev);
-- 
cgit v1.2.3


From 77facb35227c421467cdb49268de433168c2dcef Mon Sep 17 00:00:00 2001
From: Chris J Arges <carges@cloudflare.com>
Date: Thu, 2 Apr 2026 17:23:16 -0500
Subject: net: increase IP_TUNNEL_RECURSION_LIMIT to 5

In configurations with multiple tunnel layers and MPLS lwtunnel routing, a
single tunnel hop can increment the counter beyond this limit. This causes
packets to be dropped with the "Dead loop on virtual device" message even
when a routing loop doesn't exist.

Increase IP_TUNNEL_RECURSION_LIMIT from 4 to 5 to handle this use-case.

Fixes: 6f1a9140ecda ("net: add xmit recursion limit to tunnel xmit functions")
Link: https://lore.kernel.org/netdev/88deb91b-ef1b-403c-8eeb-0f971f27e34f@redhat.com/
Signed-off-by: Chris J Arges <carges@cloudflare.com>
Link: https://patch.msgid.link/20260402222401.3408368-1-carges@cloudflare.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip_tunnels.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 1f577a4f8ce9..d708b66e55cd 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -32,7 +32,7 @@
  * recursion involves route lookups and full IP output, consuming much
  * more stack per level, so a lower limit is needed.
  */
-#define IP_TUNNEL_RECURSION_LIMIT	4
+#define IP_TUNNEL_RECURSION_LIMIT	5
 
 /* Keep error state on tunnel for 30 sec */
 #define IPTUNNEL_ERR_TIMEO	(30*HZ)
-- 
cgit v1.2.3


From 16cbec24897624051b324aa3a85859c38ca65fde Mon Sep 17 00:00:00 2001
From: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Date: Tue, 24 Mar 2026 23:57:40 +0000
Subject: mshv: Fix infinite fault loop on permission-denied GPA intercepts

Prevent infinite fault loops when guests access memory regions without
proper permissions. Currently, mshv_handle_gpa_intercept() attempts to
remap pages for all faults on movable memory regions, regardless of
whether the access type is permitted. When a guest writes to a read-only
region, the remap succeeds but the region remains read-only, causing
immediate re-fault and spinning the vCPU indefinitely.

Validate intercept access type against region permissions before
attempting remaps. Reject writes to non-writable regions and executes to
non-executable regions early, returning false to let the VMM handle the
intercept appropriately.

This also closes a potential DoS vector where malicious guests could
intentionally trigger these fault loops to consume host resources.

Fixes: b9a66cd5ccbb ("mshv: Add support for movable memory regions")
Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Reviewed-by: Anirudh Rayabharam (Microsoft) <anirudh@anirudhrb.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 include/hyperv/hvgdk_mini.h | 6 ++++++
 include/hyperv/hvhdk.h      | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h
index 1823a290a7b7..f9600f87186a 100644
--- a/include/hyperv/hvgdk_mini.h
+++ b/include/hyperv/hvgdk_mini.h
@@ -1533,4 +1533,10 @@ struct hv_mmio_write_input {
 	u8 data[HV_HYPERCALL_MMIO_MAX_DATA_LENGTH];
 } __packed;
 
+enum hv_intercept_access_type {
+	HV_INTERCEPT_ACCESS_READ	= 0,
+	HV_INTERCEPT_ACCESS_WRITE	= 1,
+	HV_INTERCEPT_ACCESS_EXECUTE	= 2
+};
+
 #endif /* _HV_HVGDK_MINI_H */
diff --git a/include/hyperv/hvhdk.h b/include/hyperv/hvhdk.h
index 245f3db53bf1..5e83d3714966 100644
--- a/include/hyperv/hvhdk.h
+++ b/include/hyperv/hvhdk.h
@@ -779,7 +779,7 @@ struct hv_x64_intercept_message_header {
 	u32 vp_index;
 	u8 instruction_length:4;
 	u8 cr8:4; /* Only set for exo partitions */
-	u8 intercept_access_type;
+	u8 intercept_access_type; /* enum hv_intercept_access_type */
 	union hv_x64_vp_execution_state execution_state;
 	struct hv_x64_segment_register cs_segment;
 	u64 rip;
@@ -825,7 +825,7 @@ union hv_arm64_vp_execution_state {
 struct hv_arm64_intercept_message_header {
 	u32 vp_index;
 	u8 instruction_length;
-	u8 intercept_access_type;
+	u8 intercept_access_type; /* enum hv_intercept_access_type */
 	union hv_arm64_vp_execution_state execution_state;
 	u64 pc;
 	u64 cpsr;
-- 
cgit v1.2.3


From adfc80dd0d7831335b5105fb3d8747094bf42878 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <pjw@kernel.org>
Date: Sat, 4 Apr 2026 18:40:58 -0600
Subject: prctl: rename branch landing pad implementation functions to be more
 explicit

Per Linus' comments about the unreadability of abbreviations such as
"indir_br_lp", rename the three prctl() implementation functions to be more
explicit.  This involves renaming "indir_br_lp_status" in the function
names to "branch_landing_pad_state".

While here, add _prctl_ into the function names, following the
speculation control prctl implementation functions.

Link: https://lore.kernel.org/linux-riscv/CAHk-=whhSLGZAx3N5jJpb4GLFDqH_QvS07D+6BnkPWmCEzTAgw@mail.gmail.com/
Cc: Deepak Gupta <debug@rivosinc.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Brown <broonie@kernel.org>
Signed-off-by: Paul Walmsley <pjw@kernel.org>
---
 include/linux/cpu.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 8239cd95a005..9b6b0d87fdb0 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -229,8 +229,8 @@ static inline bool cpu_attack_vector_mitigated(enum cpu_attack_vectors v)
 #define smt_mitigations SMT_MITIGATIONS_OFF
 #endif
 
-int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status);
-int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status);
-int arch_lock_indir_br_lp_status(struct task_struct *t, unsigned long status);
+int arch_prctl_get_branch_landing_pad_state(struct task_struct *t, unsigned long __user *state);
+int arch_prctl_set_branch_landing_pad_state(struct task_struct *t, unsigned long state);
+int arch_prctl_lock_branch_landing_pad_state(struct task_struct *t);
 
 #endif /* _LINUX_CPU_H_ */
-- 
cgit v1.2.3


From 08ee1559052be302f1d3752f48360b89517d9f8d Mon Sep 17 00:00:00 2001
From: Paul Walmsley <pjw@kernel.org>
Date: Sat, 4 Apr 2026 18:40:58 -0600
Subject: prctl: cfi: change the branch landing pad prctl()s to be more
 descriptive

Per Linus' comments requesting the replacement of "INDIR_BR_LP" in the
indirect branch tracking prctl()s with something more readable, and
suggesting the use of the speculation control prctl()s as an exemplar,
reimplement the prctl()s and related constants that control per-task
forward-edge control flow integrity.

This primarily involves two changes.  First, the prctls are
restructured to resemble the style of the speculative execution
workaround control prctls PR_{GET,SET}_SPECULATION_CTRL, to make them
easier to extend in the future.  Second, the "indir_br_lp" abbrevation
is expanded to "branch_landing_pads" to be less telegraphic.  The
kselftest and documentation is adjusted accordingly.

Link: https://lore.kernel.org/linux-riscv/CAHk-=whhSLGZAx3N5jJpb4GLFDqH_QvS07D+6BnkPWmCEzTAgw@mail.gmail.com/
Cc: Deepak Gupta <debug@rivosinc.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Brown <broonie@kernel.org>
Signed-off-by: Paul Walmsley <pjw@kernel.org>
---
 include/uapi/linux/prctl.h | 37 +++++++++++++++----------------------
 1 file changed, 15 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 55b0446fff9d..b6ec6f693719 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -397,30 +397,23 @@ struct prctl_mm_map {
 # define PR_RSEQ_SLICE_EXT_ENABLE		0x01
 
 /*
- * Get the current indirect branch tracking configuration for the current
- * thread, this will be the value configured via PR_SET_INDIR_BR_LP_STATUS.
+ * Get or set the control flow integrity (CFI) configuration for the
+ * current thread.
+ *
+ * Some per-thread control flow integrity settings are not yet
+ * controlled through this prctl(); see for example
+ * PR_{GET,SET,LOCK}_SHADOW_STACK_STATUS
  */
-#define PR_GET_INDIR_BR_LP_STATUS      80
-
+#define PR_GET_CFI	80
+#define PR_SET_CFI	81
 /*
- * Set the indirect branch tracking configuration. PR_INDIR_BR_LP_ENABLE will
- * enable cpu feature for user thread, to track all indirect branches and ensure
- * they land on arch defined landing pad instruction.
- * x86 - If enabled, an indirect branch must land on an ENDBRANCH instruction.
- * arch64 - If enabled, an indirect branch must land on a BTI instruction.
- * riscv - If enabled, an indirect branch must land on an lpad instruction.
- * PR_INDIR_BR_LP_DISABLE will disable feature for user thread and indirect
- * branches will no more be tracked by cpu to land on arch defined landing pad
- * instruction.
- */
-#define PR_SET_INDIR_BR_LP_STATUS      81
-# define PR_INDIR_BR_LP_ENABLE		   (1UL << 0)
-
-/*
- * Prevent further changes to the specified indirect branch tracking
- * configuration.  All bits may be locked via this call, including
- * undefined bits.
+ * Forward-edge CFI variants (excluding ARM64 BTI, which has its own
+ * prctl()s).
  */
-#define PR_LOCK_INDIR_BR_LP_STATUS      82
+#define PR_CFI_BRANCH_LANDING_PADS	0
+/* Return and control values for PR_{GET,SET}_CFI */
+# define PR_CFI_ENABLE		_BITUL(0)
+# define PR_CFI_DISABLE		_BITUL(1)
+# define PR_CFI_LOCK		_BITUL(2)
 
 #endif /* _LINUX_PRCTL_H */
-- 
cgit v1.2.3


From 4e53116437e919c4b9a9d95fb73ae14fe0cfc8f9 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Mon, 16 Mar 2026 14:14:49 +0000
Subject: ASoC: SDCA: Fix errors in IRQ cleanup

IRQs are enabled through sdca_irq_populate() from component probe
using devm_request_threaded_irq(), this however means the IRQs can
persist if the sound card is torn down. Some of the IRQ handlers
store references to the card and the kcontrols which can then
fail. Some detail of the crash was explained in [1].

Generally it is not advised to use devm outside of bus probe, so
the code is updated to not use devm. The IRQ requests are not moved
to bus probe time as it makes passing the snd_soc_component into
the IRQs very awkward and would the require a second step once the
component is available, so it is simpler to just register the IRQs
at this point, even though that necessitates some manual cleanup.

Link: https://lore.kernel.org/linux-sound/20260310183829.2907805-1-gaggery.tsai@intel.com/ [1]
Fixes: b126394d9ec6 ("ASoC: SDCA: Generic interrupt support")
Reported-by: Gaggery Tsai <gaggery.tsai@intel.com>
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://patch.msgid.link/20260316141449.2950215-1-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/sdca_interrupts.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/sound/sdca_interrupts.h b/include/sound/sdca_interrupts.h
index 9bcb5d8fd592..b47003c3d26e 100644
--- a/include/sound/sdca_interrupts.h
+++ b/include/sound/sdca_interrupts.h
@@ -69,6 +69,8 @@ struct sdca_interrupt_info {
 int sdca_irq_request(struct device *dev, struct sdca_interrupt_info *interrupt_info,
 		     int sdca_irq, const char *name, irq_handler_t handler,
 		     void *data);
+void sdca_irq_free(struct device *dev, struct sdca_interrupt_info *interrupt_info,
+		   int sdca_irq, const char *name, void *data);
 int sdca_irq_data_populate(struct device *dev, struct regmap *function_regmap,
 			   struct snd_soc_component *component,
 			   struct sdca_function_data *function,
@@ -81,6 +83,9 @@ int sdca_irq_populate_early(struct device *dev, struct regmap *function_regmap,
 int sdca_irq_populate(struct sdca_function_data *function,
 		      struct snd_soc_component *component,
 		      struct sdca_interrupt_info *info);
+void sdca_irq_cleanup(struct sdca_function_data *function,
+		      struct snd_soc_component *component,
+		      struct sdca_interrupt_info *info);
 struct sdca_interrupt_info *sdca_irq_allocate(struct device *dev,
 					      struct regmap *regmap, int irq);
 
-- 
cgit v1.2.3


From 1ee1605138fc94cc8f8f273321dd2471c64977f9 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Thu, 2 Apr 2026 17:49:52 +0200
Subject: xsk: respect tailroom for ZC setups
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Multi-buffer XDP stores information about frags in skb_shared_info that
sits at the tailroom of a packet. The storage space is reserved via
xdp_data_hard_end():

	((xdp)->data_hard_start + (xdp)->frame_sz -	\
	 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))

and then we refer to it via macro below:

static inline struct skb_shared_info *
xdp_get_shared_info_from_buff(const struct xdp_buff *xdp)
{
        return (struct skb_shared_info *)xdp_data_hard_end(xdp);
}

Currently we do not respect this tailroom space in multi-buffer AF_XDP
ZC scenario. To address this, introduce xsk_pool_get_tailroom() and use
it within xsk_pool_get_rx_frame_size() which is used in ZC drivers to
configure length of HW Rx buffer.

Typically drivers on Rx Hw buffers side work on 128 byte alignment so
let us align the value returned by xsk_pool_get_rx_frame_size() in order
to avoid addressing this on driver's side. This addresses the fact that
idpf uses mentioned function *before* pool->dev being set so we were at
risk that after subtracting tailroom we would not provide 128-byte
aligned value to HW.

Since xsk_pool_get_rx_frame_size() is actively used in xsk_rcv_check()
and __xsk_rcv(), add a variant of this routine that will not include 128
byte alignment and therefore old behavior is preserved.

Reviewed-by: Björn Töpel <bjorn@kernel.org>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Fixes: 24ea50127ecf ("xsk: support mbuf on ZC RX")
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://patch.msgid.link/20260402154958.562179-3-maciej.fijalkowski@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/xdp_sock_drv.h | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 6b9ebae2dc95..46797645a0c2 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -41,16 +41,37 @@ static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool)
 	return XDP_PACKET_HEADROOM + pool->headroom;
 }
 
+static inline u32 xsk_pool_get_tailroom(bool mbuf)
+{
+	return mbuf ? SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : 0;
+}
+
 static inline u32 xsk_pool_get_chunk_size(struct xsk_buff_pool *pool)
 {
 	return pool->chunk_size;
 }
 
-static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
+static inline u32 __xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
 {
 	return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool);
 }
 
+static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
+{
+	u32 frame_size =  __xsk_pool_get_rx_frame_size(pool);
+	struct xdp_umem *umem = pool->umem;
+	bool mbuf;
+
+	/* Reserve tailroom only for zero-copy pools that opted into
+	 * multi-buffer. The reserved area is used for skb_shared_info,
+	 * matching the XDP core's xdp_data_hard_end() layout.
+	 */
+	mbuf = pool->dev && (umem->flags & XDP_UMEM_SG_FLAG);
+	frame_size -= xsk_pool_get_tailroom(mbuf);
+
+	return ALIGN_DOWN(frame_size, 128);
+}
+
 static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool)
 {
 	return pool->unaligned ? 0 : xsk_pool_get_chunk_size(pool);
-- 
cgit v1.2.3


From 93e84fe45b752d17a5a46b306ed78f0133bbc719 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Thu, 2 Apr 2026 17:49:53 +0200
Subject: xsk: fix XDP_UMEM_SG_FLAG issues
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently xp_assign_dev_shared() is missing XDP_USE_SG being propagated
to flags so set it in order to preserve mtu check that is supposed to be
done only when no multi-buffer setup is in picture.

Also, this flag has the same value as XDP_UMEM_TX_SW_CSUM so we could
get unexpected SG setups for software Tx checksums. Since csum flag is
UAPI, modify value of XDP_UMEM_SG_FLAG.

Fixes: d609f3d228a8 ("xsk: add multi-buffer support for sockets sharing umem")
Reviewed-by: Björn Töpel <bjorn@kernel.org>
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://patch.msgid.link/20260402154958.562179-4-maciej.fijalkowski@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/xdp_sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 23e8861e8b25..ebac60a3d8a1 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -14,7 +14,7 @@
 #include <linux/mm.h>
 #include <net/sock.h>
 
-#define XDP_UMEM_SG_FLAG (1 << 1)
+#define XDP_UMEM_SG_FLAG BIT(3)
 
 struct net_device;
 struct xsk_queue;
-- 
cgit v1.2.3


From f8dca15a1b190787bbd03285304b569631160eda Mon Sep 17 00:00:00 2001
From: Tuan Do <tuan@calif.io>
Date: Fri, 3 Apr 2026 00:33:17 -0700
Subject: netfilter: nft_ct: fix use-after-free in timeout object destroy

nft_ct_timeout_obj_destroy() frees the timeout object with kfree()
immediately after nf_ct_untimeout(), without waiting for an RCU grace
period. Concurrent packet processing on other CPUs may still hold
RCU-protected references to the timeout object obtained via
rcu_dereference() in nf_ct_timeout_data().

Add an rcu_head to struct nf_ct_timeout and use kfree_rcu() to defer
freeing until after an RCU grace period, matching the approach already
used in nfnetlink_cttimeout.c.

KASAN report:
 BUG: KASAN: slab-use-after-free in nf_conntrack_tcp_packet+0x1381/0x29d0
 Read of size 4 at addr ffff8881035fe19c by task exploit/80

 Call Trace:
  nf_conntrack_tcp_packet+0x1381/0x29d0
  nf_conntrack_in+0x612/0x8b0
  nf_hook_slow+0x70/0x100
  __ip_local_out+0x1b2/0x210
  tcp_sendmsg_locked+0x722/0x1580
  __sys_sendto+0x2d8/0x320

 Allocated by task 75:
  nft_ct_timeout_obj_init+0xf6/0x290
  nft_obj_init+0x107/0x1b0
  nf_tables_newobj+0x680/0x9c0
  nfnetlink_rcv_batch+0xc29/0xe00

 Freed by task 26:
  nft_obj_destroy+0x3f/0xa0
  nf_tables_trans_destroy_work+0x51c/0x5c0
  process_one_work+0x2c4/0x5a0

Fixes: 7e0b2b57f01d ("netfilter: nft_ct: add ct timeout support")
Cc: stable@vger.kernel.org
Signed-off-by: Tuan Do <tuan@calif.io>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/netfilter/nf_conntrack_timeout.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index 9fdaba911de6..3a66d4abb6d6 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -14,6 +14,7 @@
 struct nf_ct_timeout {
 	__u16			l3num;
 	const struct nf_conntrack_l4proto *l4proto;
+	struct rcu_head		rcu;
 	char			data[];
 };
 
-- 
cgit v1.2.3


From 936206e3f6ff411581e615e930263d6f8b78df9d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 7 Apr 2026 17:00:01 +0200
Subject: netfilter: nfnetlink_queue: make hash table per queue

Sharing a global hash table among all queues is tempting, but
it can cause crash:

BUG: KASAN: slab-use-after-free in nfqnl_recv_verdict+0x11ac/0x15e0 [nfnetlink_queue]
[..]
 nfqnl_recv_verdict+0x11ac/0x15e0 [nfnetlink_queue]
 nfnetlink_rcv_msg+0x46a/0x930
 kmem_cache_alloc_node_noprof+0x11e/0x450

struct nf_queue_entry is freed via kfree, but parallel cpu can still
encounter such an nf_queue_entry when walking the list.

Alternative fix is to free the nf_queue_entry via kfree_rcu() instead,
but as we have to alloc/free for each skb this will cause more mem
pressure.

Cc: Scott Mitchell <scott.k.mitch1@gmail.com>
Fixes: e19079adcd26 ("netfilter: nfnetlink_queue: optimize verdict lookup with hash table")
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/netfilter/nf_queue.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 45eb26b2e95b..d17035d14d96 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -23,7 +23,6 @@ struct nf_queue_entry {
 	struct nf_hook_state	state;
 	bool			nf_ct_is_unconfirmed;
 	u16			size; /* sizeof(entry) + saved route keys */
-	u16			queue_num;
 
 	/* extra space to store route keys */
 };
-- 
cgit v1.2.3


From 0b8757b220f94421bd4ff50cce03886387c4e71c Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Wed, 8 Apr 2026 10:38:34 +0100
Subject: ASoC: SDCA: Unregister IRQ handlers on module remove

Ensure that all interrupt handlers are unregistered before the parent
regmap_irq is unregistered.

sdca_irq_cleanup() was only called from the component_remove(). If the
module was loaded and removed without ever being component probed the
FDL interrupts would not be unregistered and this would hit a WARN
when devm called regmap_del_irq_chip() during the removal of the
parent IRQ.

Fixes: 4e53116437e9 ("ASoC: SDCA: Fix errors in IRQ cleanup")
Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://patch.msgid.link/20260408093835.2881486-5-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/sdca_interrupts.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/sdca_interrupts.h b/include/sound/sdca_interrupts.h
index b47003c3d26e..a515cc3df097 100644
--- a/include/sound/sdca_interrupts.h
+++ b/include/sound/sdca_interrupts.h
@@ -83,8 +83,8 @@ int sdca_irq_populate_early(struct device *dev, struct regmap *function_regmap,
 int sdca_irq_populate(struct sdca_function_data *function,
 		      struct snd_soc_component *component,
 		      struct sdca_interrupt_info *info);
-void sdca_irq_cleanup(struct sdca_function_data *function,
-		      struct snd_soc_component *component,
+void sdca_irq_cleanup(struct device *dev,
+		      struct sdca_function_data *function,
 		      struct sdca_interrupt_info *info);
 struct sdca_interrupt_info *sdca_irq_allocate(struct device *dev,
 					      struct regmap *regmap, int irq);
-- 
cgit v1.2.3


From 52f657e34d7b21b47434d9d8b26fa7f6778b63a0 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 8 Apr 2026 13:18:57 -0700
Subject: x86: shadow stacks: proper error handling for mmap lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

김영민 reports that shstk_pop_sigframe() doesn't check for errors from
mmap_read_lock_killable(), which is a silly oversight, and also shows
that we haven't marked those functions with "__must_check", which would
have immediately caught it.

So let's fix both issues.

Reported-by: 김영민 <osori@hspace.io>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Acked-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmap_lock.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 93eca48bc443..04b8f61ece5d 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -546,7 +546,7 @@ static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
 	__mmap_lock_trace_acquire_returned(mm, true, true);
 }
 
-static inline int mmap_write_lock_killable(struct mm_struct *mm)
+static inline int __must_check mmap_write_lock_killable(struct mm_struct *mm)
 {
 	int ret;
 
@@ -593,7 +593,7 @@ static inline void mmap_read_lock(struct mm_struct *mm)
 	__mmap_lock_trace_acquire_returned(mm, false, true);
 }
 
-static inline int mmap_read_lock_killable(struct mm_struct *mm)
+static inline int __must_check mmap_read_lock_killable(struct mm_struct *mm)
 {
 	int ret;
 
@@ -603,7 +603,7 @@ static inline int mmap_read_lock_killable(struct mm_struct *mm)
 	return ret;
 }
 
-static inline bool mmap_read_trylock(struct mm_struct *mm)
+static inline bool __must_check mmap_read_trylock(struct mm_struct *mm)
 {
 	bool ret;
 
-- 
cgit v1.2.3


From 146d4ab94cf129ee06cd467cb5c71368a6b5bad6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 8 Apr 2026 13:12:32 +0100
Subject: rxrpc: Fix call removal to use RCU safe deletion

Fix rxrpc call removal from the rxnet->calls list to use list_del_rcu()
rather than list_del_init() to prevent stuffing up reading
/proc/net/rxrpc/calls from potentially getting into an infinite loop.

This, however, means that list_empty() no longer works on an entry that's
been deleted from the list, making it harder to detect prior deletion.  Fix
this by:

Firstly, make rxrpc_destroy_all_calls() only dump the first ten calls that
are unexpectedly still on the list.  Limiting the number of steps means
there's no need to call cond_resched() or to remove calls from the list
here, thereby eliminating the need for rxrpc_put_call() to check for that.

rxrpc_put_call() can then be fixed to unconditionally delete the call from
the list as it is the only place that the deletion occurs.

Fixes: 2baec2c3f854 ("rxrpc: Support network namespacing")
Closes: https://sashiko.dev/#/patchset/20260319150150.4189381-1-dhowells%40redhat.com
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Jeffrey Altman <jaltman@auristor.com>
cc: Linus Torvalds <torvalds@linux-foundation.org>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-5-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/trace/events/rxrpc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 869f97c9bf73..a826cd80007b 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -347,7 +347,7 @@
 	EM(rxrpc_call_see_release,		"SEE release ") \
 	EM(rxrpc_call_see_userid_exists,	"SEE u-exists") \
 	EM(rxrpc_call_see_waiting_call,		"SEE q-conn  ") \
-	E_(rxrpc_call_see_zap,			"SEE zap     ")
+	E_(rxrpc_call_see_still_live,		"SEE !still-l")
 
 #define rxrpc_txqueue_traces \
 	EM(rxrpc_txqueue_await_reply,		"AWR") \
-- 
cgit v1.2.3


From b33f5741bb187db8ff32e8f5b96def77cc94dfca Mon Sep 17 00:00:00 2001
From: Alok Tiwari <alok.a.tiwari@oracle.com>
Date: Wed, 8 Apr 2026 13:12:34 +0100
Subject: rxrpc: Fix use of wrong skb when comparing queued RESP challenge
 serial

In rxrpc_post_response(), the code should be comparing the challenge serial
number from the cached response before deciding to switch to a newer
response, but looks at the newer packet private data instead, rendering the
comparison always false.

Fix this by switching to look at the older packet.

Fix further[1] to substitute the new packet in place of the old one if
newer and also to release whichever we don't use.

Fixes: 5800b1cf3fd8 ("rxrpc: Allow CHALLENGEs to the passed to the app for a RESPONSE")
Signed-off-by: Alok Tiwari <alok.a.tiwari@oracle.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeffrey Altman <jaltman@auristor.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://sashiko.dev/#/patchset/20260319150150.4189381-1-dhowells%40redhat.com [1]
Link: https://patch.msgid.link/20260408121252.2249051-7-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/trace/events/rxrpc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index a826cd80007b..f7f559204b87 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -185,6 +185,7 @@
 	EM(rxrpc_skb_put_input,			"PUT input    ") \
 	EM(rxrpc_skb_put_jumbo_subpacket,	"PUT jumbo-sub") \
 	EM(rxrpc_skb_put_oob,			"PUT oob      ") \
+	EM(rxrpc_skb_put_old_response,		"PUT old-resp ") \
 	EM(rxrpc_skb_put_purge,			"PUT purge    ") \
 	EM(rxrpc_skb_put_purge_oob,		"PUT purge-oob") \
 	EM(rxrpc_skb_put_response,		"PUT response ") \
-- 
cgit v1.2.3


From 0cd3e3f3f2ec1a45aa559e2c0f3d57fac5eb3c25 Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.c.dionne@gmail.com>
Date: Wed, 8 Apr 2026 13:12:37 +0100
Subject: rxrpc: Fix to request an ack if window is limited

Peers may only send immediate acks for every 2 UDP packets received.
When sending a jumbogram, it is important to check that there is
sufficient window space to send another same sized jumbogram following
the current one, and request an ack if there isn't.  Failure to do so may
cause the call to stall waiting for an ack until the resend timer fires.

Where jumbograms are in use this causes a very significant drop in
performance.

Fixes: fe24a5494390 ("rxrpc: Send jumbo DATA packets")
Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeffrey Altman <jaltman@auristor.com>
cc: Simon Horman <horms@kernel.org>
cc: linux-afs@lists.infradead.org
cc: stable@kernel.org
Link: https://patch.msgid.link/20260408121252.2249051-10-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/trace/events/rxrpc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index f7f559204b87..578b8038b211 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -521,6 +521,7 @@
 #define rxrpc_req_ack_traces \
 	EM(rxrpc_reqack_ack_lost,		"ACK-LOST  ")	\
 	EM(rxrpc_reqack_app_stall,		"APP-STALL ")	\
+	EM(rxrpc_reqack_jumbo_win,		"JUMBO-WIN ")	\
 	EM(rxrpc_reqack_more_rtt,		"MORE-RTT  ")	\
 	EM(rxrpc_reqack_no_srv_last,		"NO-SRVLAST")	\
 	EM(rxrpc_reqack_old_rtt,		"OLD-RTT   ")	\
-- 
cgit v1.2.3


From e3b2cf6e5dba416a03152f299d99982dfe1e861d Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 1 Apr 2026 12:15:58 +0200
Subject: kernfs: pass struct ns_common instead of const void * for namespace
 tags

kernfs has historically used const void * to pass around namespace tags
used for directory-level namespace filtering. The only current user of
this is sysfs network namespace tagging where struct net pointers are
cast to void *.

Replace all const void * namespace parameters with const struct
ns_common * throughout the kernfs, sysfs, and kobject namespace layers.
This includes the kobj_ns_type_operations callbacks, kobject_namespace(),
and all sysfs/kernfs APIs that accept or return namespace tags.

Passing struct ns_common is needed because various codepaths require
access to the underlying namespace. A struct ns_common can always be
converted back to the concrete namespace type (e.g., struct net) via
container_of() or to_ns_common() in the reverse direction.

This is a preparatory change for switching to ns_id-based directory
iteration to prevent a KASLR pointer leak through the current use of
raw namespace pointers as hash seeds and comparison keys.

Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/device/class.h |  6 +++---
 include/linux/kernfs.h       | 40 ++++++++++++++++++++++++----------------
 include/linux/kobject.h      |  4 ++--
 include/linux/kobject_ns.h   | 13 +++++++------
 include/linux/netdevice.h    |  4 ++--
 include/linux/sysfs.h        | 24 ++++++++++++------------
 include/net/net_namespace.h  |  8 ++++----
 7 files changed, 54 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/include/linux/device/class.h b/include/linux/device/class.h
index 65880e60c720..021da0d61796 100644
--- a/include/linux/device/class.h
+++ b/include/linux/device/class.h
@@ -62,7 +62,7 @@ struct class {
 	int (*shutdown_pre)(struct device *dev);
 
 	const struct kobj_ns_type_operations *ns_type;
-	const void *(*namespace)(const struct device *dev);
+	const struct ns_common *(*namespace)(const struct device *dev);
 
 	void (*get_ownership)(const struct device *dev, kuid_t *uid, kgid_t *gid);
 
@@ -180,9 +180,9 @@ struct class_attribute {
 	struct class_attribute class_attr_##_name = __ATTR_WO(_name)
 
 int __must_check class_create_file_ns(const struct class *class, const struct class_attribute *attr,
-				      const void *ns);
+				      const struct ns_common *ns);
 void class_remove_file_ns(const struct class *class, const struct class_attribute *attr,
-			  const void *ns);
+			  const struct ns_common *ns);
 
 static inline int __must_check class_create_file(const struct class *class,
 						 const struct class_attribute *attr)
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index b5a5f32fdfd1..4f0ab88a1b31 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -23,6 +23,7 @@
 struct file;
 struct dentry;
 struct iattr;
+struct ns_common;
 struct seq_file;
 struct vm_area_struct;
 struct vm_operations_struct;
@@ -209,7 +210,7 @@ struct kernfs_node {
 
 	struct rb_node		rb;
 
-	const void		*ns;	/* namespace tag */
+	const struct ns_common	*ns;	/* namespace tag */
 	unsigned int		hash;	/* ns + name hash */
 	unsigned short		flags;
 	umode_t			mode;
@@ -331,7 +332,7 @@ struct kernfs_ops {
  */
 struct kernfs_fs_context {
 	struct kernfs_root	*root;		/* Root of the hierarchy being mounted */
-	void			*ns_tag;	/* Namespace tag of the mount (or NULL) */
+	struct ns_common	*ns_tag;	/* Namespace tag of the mount (or NULL) */
 	unsigned long		magic;		/* File system specific magic number */
 
 	/* The following are set/used by kernfs_mount() */
@@ -406,9 +407,11 @@ void pr_cont_kernfs_name(struct kernfs_node *kn);
 void pr_cont_kernfs_path(struct kernfs_node *kn);
 struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn);
 struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
-					   const char *name, const void *ns);
+					   const char *name,
+					   const struct ns_common *ns);
 struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
-					   const char *path, const void *ns);
+					   const char *path,
+					   const struct ns_common *ns);
 void kernfs_get(struct kernfs_node *kn);
 void kernfs_put(struct kernfs_node *kn);
 
@@ -426,7 +429,8 @@ unsigned int kernfs_root_flags(struct kernfs_node *kn);
 struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
 					 const char *name, umode_t mode,
 					 kuid_t uid, kgid_t gid,
-					 void *priv, const void *ns);
+					 void *priv,
+					 const struct ns_common *ns);
 struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
 					    const char *name);
 struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
@@ -434,7 +438,8 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
 					 kuid_t uid, kgid_t gid,
 					 loff_t size,
 					 const struct kernfs_ops *ops,
-					 void *priv, const void *ns,
+					 void *priv,
+					 const struct ns_common *ns,
 					 struct lock_class_key *key);
 struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
 				       const char *name,
@@ -446,9 +451,9 @@ void kernfs_break_active_protection(struct kernfs_node *kn);
 void kernfs_unbreak_active_protection(struct kernfs_node *kn);
 bool kernfs_remove_self(struct kernfs_node *kn);
 int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
-			     const void *ns);
+			     const struct ns_common *ns);
 int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
-		     const char *new_name, const void *new_ns);
+		     const char *new_name, const struct ns_common *new_ns);
 int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr);
 __poll_t kernfs_generic_poll(struct kernfs_open_file *of,
 			     struct poll_table_struct *pt);
@@ -459,7 +464,7 @@ int kernfs_xattr_get(struct kernfs_node *kn, const char *name,
 int kernfs_xattr_set(struct kernfs_node *kn, const char *name,
 		     const void *value, size_t size, int flags);
 
-const void *kernfs_super_ns(struct super_block *sb);
+const struct ns_common *kernfs_super_ns(struct super_block *sb);
 int kernfs_get_tree(struct fs_context *fc);
 void kernfs_free_fs_context(struct fs_context *fc);
 void kernfs_kill_sb(struct super_block *sb);
@@ -494,11 +499,11 @@ static inline struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
 
 static inline struct kernfs_node *
 kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name,
-		       const void *ns)
+		       const struct ns_common *ns)
 { return NULL; }
 static inline struct kernfs_node *
 kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path,
-		       const void *ns)
+		       const struct ns_common *ns)
 { return NULL; }
 
 static inline void kernfs_get(struct kernfs_node *kn) { }
@@ -526,14 +531,15 @@ static inline unsigned int kernfs_root_flags(struct kernfs_node *kn)
 static inline struct kernfs_node *
 kernfs_create_dir_ns(struct kernfs_node *parent, const char *name,
 		     umode_t mode, kuid_t uid, kgid_t gid,
-		     void *priv, const void *ns)
+		     void *priv, const struct ns_common *ns)
 { return ERR_PTR(-ENOSYS); }
 
 static inline struct kernfs_node *
 __kernfs_create_file(struct kernfs_node *parent, const char *name,
 		     umode_t mode, kuid_t uid, kgid_t gid,
 		     loff_t size, const struct kernfs_ops *ops,
-		     void *priv, const void *ns, struct lock_class_key *key)
+		     void *priv, const struct ns_common *ns,
+		     struct lock_class_key *key)
 { return ERR_PTR(-ENOSYS); }
 
 static inline struct kernfs_node *
@@ -549,12 +555,14 @@ static inline bool kernfs_remove_self(struct kernfs_node *kn)
 { return false; }
 
 static inline int kernfs_remove_by_name_ns(struct kernfs_node *kn,
-					   const char *name, const void *ns)
+					   const char *name,
+					   const struct ns_common *ns)
 { return -ENOSYS; }
 
 static inline int kernfs_rename_ns(struct kernfs_node *kn,
 				   struct kernfs_node *new_parent,
-				   const char *new_name, const void *new_ns)
+				   const char *new_name,
+				   const struct ns_common *new_ns)
 { return -ENOSYS; }
 
 static inline int kernfs_setattr(struct kernfs_node *kn,
@@ -575,7 +583,7 @@ static inline int kernfs_xattr_set(struct kernfs_node *kn, const char *name,
 				   const void *value, size_t size, int flags)
 { return -ENOSYS; }
 
-static inline const void *kernfs_super_ns(struct super_block *sb)
+static inline const struct ns_common *kernfs_super_ns(struct super_block *sb)
 { return NULL; }
 
 static inline int kernfs_get_tree(struct fs_context *fc)
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index c8219505a79f..bcb5d4e32001 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -109,7 +109,7 @@ struct kobject *kobject_get(struct kobject *kobj);
 struct kobject * __must_check kobject_get_unless_zero(struct kobject *kobj);
 void kobject_put(struct kobject *kobj);
 
-const void *kobject_namespace(const struct kobject *kobj);
+const struct ns_common *kobject_namespace(const struct kobject *kobj);
 void kobject_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid);
 char *kobject_get_path(const struct kobject *kobj, gfp_t flag);
 
@@ -118,7 +118,7 @@ struct kobj_type {
 	const struct sysfs_ops *sysfs_ops;
 	const struct attribute_group **default_groups;
 	const struct kobj_ns_type_operations *(*child_ns_type)(const struct kobject *kobj);
-	const void *(*namespace)(const struct kobject *kobj);
+	const struct ns_common *(*namespace)(const struct kobject *kobj);
 	void (*get_ownership)(const struct kobject *kobj, kuid_t *uid, kgid_t *gid);
 };
 
diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h
index 150fe2ae1b6b..4f0990e09b93 100644
--- a/include/linux/kobject_ns.h
+++ b/include/linux/kobject_ns.h
@@ -16,6 +16,7 @@
 #ifndef _LINUX_KOBJECT_NS_H
 #define _LINUX_KOBJECT_NS_H
 
+struct ns_common;
 struct sock;
 struct kobject;
 
@@ -39,10 +40,10 @@ enum kobj_ns_type {
 struct kobj_ns_type_operations {
 	enum kobj_ns_type type;
 	bool (*current_may_mount)(void);
-	void *(*grab_current_ns)(void);
-	const void *(*netlink_ns)(struct sock *sk);
-	const void *(*initial_ns)(void);
-	void (*drop_ns)(void *);
+	struct ns_common *(*grab_current_ns)(void);
+	const struct ns_common *(*netlink_ns)(struct sock *sk);
+	const struct ns_common *(*initial_ns)(void);
+	void (*drop_ns)(struct ns_common *);
 };
 
 int kobj_ns_type_register(const struct kobj_ns_type_operations *ops);
@@ -51,7 +52,7 @@ const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *pa
 const struct kobj_ns_type_operations *kobj_ns_ops(const struct kobject *kobj);
 
 bool kobj_ns_current_may_mount(enum kobj_ns_type type);
-void *kobj_ns_grab_current(enum kobj_ns_type type);
-void kobj_ns_drop(enum kobj_ns_type type, void *ns);
+struct ns_common *kobj_ns_grab_current(enum kobj_ns_type type);
+void kobj_ns_drop(enum kobj_ns_type type, struct ns_common *ns);
 
 #endif /* _LINUX_KOBJECT_NS_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7ca01eb3f7d2..85c20bdd36fb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -5339,9 +5339,9 @@ static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_devi
 }
 
 int netdev_class_create_file_ns(const struct class_attribute *class_attr,
-				const void *ns);
+				const struct ns_common *ns);
 void netdev_class_remove_file_ns(const struct class_attribute *class_attr,
-				 const void *ns);
+				 const struct ns_common *ns);
 
 extern const struct kobj_ns_type_operations net_ns_type_operations;
 
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 99b775f3ff46..468259fb6049 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -396,13 +396,13 @@ struct sysfs_ops {
 
 #ifdef CONFIG_SYSFS
 
-int __must_check sysfs_create_dir_ns(struct kobject *kobj, const void *ns);
+int __must_check sysfs_create_dir_ns(struct kobject *kobj, const struct ns_common *ns);
 void sysfs_remove_dir(struct kobject *kobj);
 int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name,
-				     const void *new_ns);
+				     const struct ns_common *new_ns);
 int __must_check sysfs_move_dir_ns(struct kobject *kobj,
 				   struct kobject *new_parent_kobj,
-				   const void *new_ns);
+				   const struct ns_common *new_ns);
 int __must_check sysfs_create_mount_point(struct kobject *parent_kobj,
 					  const char *name);
 void sysfs_remove_mount_point(struct kobject *parent_kobj,
@@ -410,7 +410,7 @@ void sysfs_remove_mount_point(struct kobject *parent_kobj,
 
 int __must_check sysfs_create_file_ns(struct kobject *kobj,
 				      const struct attribute *attr,
-				      const void *ns);
+				      const struct ns_common *ns);
 int __must_check sysfs_create_files(struct kobject *kobj,
 				   const struct attribute * const *attr);
 int __must_check sysfs_chmod_file(struct kobject *kobj,
@@ -419,7 +419,7 @@ struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj,
 						  const struct attribute *attr);
 void sysfs_unbreak_active_protection(struct kernfs_node *kn);
 void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
-			  const void *ns);
+			  const struct ns_common *ns);
 bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr);
 void sysfs_remove_files(struct kobject *kobj, const struct attribute * const *attr);
 
@@ -437,7 +437,7 @@ void sysfs_remove_link(struct kobject *kobj, const char *name);
 
 int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *target,
 			 const char *old_name, const char *new_name,
-			 const void *new_ns);
+			 const struct ns_common *new_ns);
 
 void sysfs_delete_link(struct kobject *dir, struct kobject *targ,
 			const char *name);
@@ -502,7 +502,7 @@ ssize_t sysfs_bin_attr_simple_read(struct file *file, struct kobject *kobj,
 
 #else /* CONFIG_SYSFS */
 
-static inline int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
+static inline int sysfs_create_dir_ns(struct kobject *kobj, const struct ns_common *ns)
 {
 	return 0;
 }
@@ -512,14 +512,14 @@ static inline void sysfs_remove_dir(struct kobject *kobj)
 }
 
 static inline int sysfs_rename_dir_ns(struct kobject *kobj,
-				      const char *new_name, const void *new_ns)
+				      const char *new_name, const struct ns_common *new_ns)
 {
 	return 0;
 }
 
 static inline int sysfs_move_dir_ns(struct kobject *kobj,
 				    struct kobject *new_parent_kobj,
-				    const void *new_ns)
+				    const struct ns_common *new_ns)
 {
 	return 0;
 }
@@ -537,7 +537,7 @@ static inline void sysfs_remove_mount_point(struct kobject *parent_kobj,
 
 static inline int sysfs_create_file_ns(struct kobject *kobj,
 				       const struct attribute *attr,
-				       const void *ns)
+				       const struct ns_common *ns)
 {
 	return 0;
 }
@@ -567,7 +567,7 @@ static inline void sysfs_unbreak_active_protection(struct kernfs_node *kn)
 
 static inline void sysfs_remove_file_ns(struct kobject *kobj,
 					const struct attribute *attr,
-					const void *ns)
+					const struct ns_common *ns)
 {
 }
 
@@ -612,7 +612,7 @@ static inline void sysfs_remove_link(struct kobject *kobj, const char *name)
 
 static inline int sysfs_rename_link_ns(struct kobject *k, struct kobject *t,
 				       const char *old_name,
-				       const char *new_name, const void *ns)
+				       const char *new_name, const struct ns_common *ns)
 {
 	return 0;
 }
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index d7bec49ee9ea..80de5e98a66d 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -264,14 +264,14 @@ void ipx_unregister_sysctl(void);
 #define ipx_unregister_sysctl()
 #endif
 
-#ifdef CONFIG_NET_NS
-void __put_net(struct net *net);
-
 static inline struct net *to_net_ns(struct ns_common *ns)
 {
 	return container_of(ns, struct net, ns);
 }
 
+#ifdef CONFIG_NET_NS
+void __put_net(struct net *net);
+
 /* Try using get_net_track() instead */
 static inline struct net *get_net(struct net *net)
 {
@@ -309,7 +309,7 @@ static inline int check_net(const struct net *net)
 	return ns_ref_read(net) != 0;
 }
 
-void net_drop_ns(void *);
+void net_drop_ns(struct ns_common *);
 void net_passive_dec(struct net *net);
 
 #else
-- 
cgit v1.2.3


From d6e152d905bdb1f32f9d99775e2f453350399a6a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@kernel.org>
Date: Tue, 7 Apr 2026 10:54:17 +0200
Subject: clockevents: Prevent timer interrupt starvation

Calvin reported an odd NMI watchdog lockup which claims that the CPU locked
up in user space. He provided a reproducer, which sets up a timerfd based
timer and then rearms it in a loop with an absolute expiry time of 1ns.

As the expiry time is in the past, the timer ends up as the first expiring
timer in the per CPU hrtimer base and the clockevent device is programmed
with the minimum delta value. If the machine is fast enough, this ends up
in a endless loop of programming the delta value to the minimum value
defined by the clock event device, before the timer interrupt can fire,
which starves the interrupt and consequently triggers the lockup detector
because the hrtimer callback of the lockup mechanism is never invoked.

As a first step to prevent this, avoid reprogramming the clock event device
when:
     - a forced minimum delta event is pending
     - the new expiry delta is less then or equal to the minimum delta

Thanks to Calvin for providing the reproducer and to Borislav for testing
and providing data from his Zen5 machine.

The problem is not limited to Zen5, but depending on the underlying
clock event device (e.g. TSC deadline timer on Intel) and the CPU speed
not necessarily observable.

This change serves only as the last resort and further changes will be made
to prevent this scenario earlier in the call chain as far as possible.

[ tglx: Updated to restore the old behaviour vs. !force and delta <= 0 and
  	fixed up the tick-broadcast handlers as pointed out by Borislav ]

Fixes: d316c57ff6bf ("[PATCH] clockevents: add core functionality")
Reported-by: Calvin Owens <calvin@wbinvd.org>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Calvin Owens <calvin@wbinvd.org>
Tested-by: Borislav Petkov <bp@alien8.de>
Link: https://lore.kernel.org/lkml/acMe-QZUel-bBYUh@mozart.vkv.me/
Link: https://patch.msgid.link/20260407083247.562657657@kernel.org
---
 include/linux/clockchips.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index b0df28ddd394..50cdc9da8d32 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -80,6 +80,7 @@ enum clock_event_state {
  * @shift:		nanoseconds to cycles divisor (power of two)
  * @state_use_accessors:current state of the device, assigned by the core code
  * @features:		features
+ * @next_event_forced:	True if the last programming was a forced event
  * @retries:		number of forced programming retries
  * @set_state_periodic:	switch state to periodic
  * @set_state_oneshot:	switch state to oneshot
@@ -108,6 +109,7 @@ struct clock_event_device {
 	u32			shift;
 	enum clock_event_state	state_use_accessors;
 	unsigned int		features;
+	unsigned int		next_event_forced;
 	unsigned long		retries;
 
 	int			(*set_state_periodic)(struct clock_event_device *);
-- 
cgit v1.2.3