From 7efa84b5cdd6d473c7e80912638fca9d7167f202 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Fri, 4 Apr 2025 15:10:02 -0700
Subject: compiler-gcc.h: Introduce __diag_GCC_all

It is not possible disabling a diagnostic for all versions of GCC
without hard coding the minimum supported version at the site, as the
GCC specific macros require a minimum version to disable the warning
for:

    __diag_ignore(GCC, 5, ...);

__diag_ignore_all() does not solve this issue because it disables a
diagnostic for all versions of both GCC and clang, not just one or the
other.

Introduce __diag_GCC_all so that developers can write

    __diag_ignore(GCC, all, ...);

to disable a particular diagnostic for all versions of GCC, while not
affecting clang.

Closes: https://lore.kernel.org/r/CAHk-=wgfX9nBGE0Ap9GjhOy7Mn=RSy=rx0MvqfYFFDx31KJXqQ@mail.gmail.com
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Tested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://patch.msgid.link/20250404-vsprintf-convert-pragmas-to-__diag-v1-1-5d6c5c55b2bd@kernel.org
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/compiler-gcc.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index c9b58188ec61..c75a222880f9 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -127,6 +127,8 @@
 #define __diag_GCC_8(s)
 #endif
 
+#define __diag_GCC_all(s)	__diag(s)
+
 #define __diag_ignore_all(option, comment) \
 	__diag(__diag_GCC_ignore option)
 
-- 
cgit v1.2.3


From de1c831a7898f164c1c2703c6b2b9e4fb4bebefc Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Tue, 15 Apr 2025 10:02:33 -0700
Subject: slab: Decouple slab_debug and no_hash_pointers

Some system owners use slab_debug=FPZ (or similar) as a hardening option,
but do not want to be forced into having kernel addresses exposed due
to the implicit "no_hash_pointers" boot param setting.[1]

Introduce the "hash_pointers" boot param, which defaults to "auto"
(the current behavior), but also includes "always" (forcing on hashing
even when "slab_debug=..." is defined), and "never". The existing
"no_hash_pointers" boot param becomes an alias for "hash_pointers=never".

This makes it possible to boot with "slab_debug=FPZ hash_pointers=always".

Link: https://github.com/KSPP/linux/issues/368 [1]
Fixes: 792702911f58 ("slub: force on no_hash_pointers when slub_debug is enabled")
Co-developed-by: Sergio Perez Gonzalez <sperezglz@gmail.com>
Signed-off-by: Sergio Perez Gonzalez <sperezglz@gmail.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Rientjes <rientjes@google.com>
Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
Signed-off-by: Kees Cook <kees@kernel.org>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Acked-by: Rafael Aquini <raquini@redhat.com>
Tested-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://patch.msgid.link/20250415170232.it.467-kees@kernel.org
[kees@kernel.org: Add note about hash_pointers into slab_debug kernel parameter documentation.]
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/sprintf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sprintf.h b/include/linux/sprintf.h
index 51cab2def9ec..521bb2cd2648 100644
--- a/include/linux/sprintf.h
+++ b/include/linux/sprintf.h
@@ -22,7 +22,7 @@ __scanf(2, 0) int vsscanf(const char *, const char *, va_list);
 
 /* These are for specific cases, do not use without real need */
 extern bool no_hash_pointers;
-int no_hash_pointers_enable(char *str);
+void hash_pointers_finalize(bool slub_debug);
 
 /* Used for Rust formatting ('%pA') */
 char *rust_fmt_argument(char *buf, char *end, const void *ptr);
-- 
cgit v1.2.3


From 7934a8dd8692b56714ce9b36421e316445d94a77 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Fri, 6 Jun 2025 13:10:23 +0900
Subject: module: remove meaningless 'name' parameter from __MODULE_INFO()

The symbol names in the .modinfo section are never used and already
randomized by the __UNIQUE_ID() macro.

Therefore, the second parameter of  __MODULE_INFO() is meaningless
and can be removed to simplify the code.

With this change, the symbol names in the .modinfo section will be
prefixed with __UNIQUE_ID_modinfo, making it clearer that they
originate from MODULE_INFO().

[Before]

  $ objcopy  -j .modinfo vmlinux.o modinfo.o
  $ nm -n modinfo.o | head -n10
  0000000000000000 r __UNIQUE_ID_license560
  0000000000000011 r __UNIQUE_ID_file559
  0000000000000030 r __UNIQUE_ID_description558
  0000000000000074 r __UNIQUE_ID_license580
  000000000000008e r __UNIQUE_ID_file579
  00000000000000bd r __UNIQUE_ID_description578
  00000000000000e6 r __UNIQUE_ID_license581
  00000000000000ff r __UNIQUE_ID_file580
  0000000000000134 r __UNIQUE_ID_description579
  0000000000000179 r __UNIQUE_ID_uncore_no_discover578

[After]

  $ objcopy  -j .modinfo vmlinux.o modinfo.o
  $ nm -n modinfo.o | head -n10
  0000000000000000 r __UNIQUE_ID_modinfo560
  0000000000000011 r __UNIQUE_ID_modinfo559
  0000000000000030 r __UNIQUE_ID_modinfo558
  0000000000000074 r __UNIQUE_ID_modinfo580
  000000000000008e r __UNIQUE_ID_modinfo579
  00000000000000bd r __UNIQUE_ID_modinfo578
  00000000000000e6 r __UNIQUE_ID_modinfo581
  00000000000000ff r __UNIQUE_ID_modinfo580
  0000000000000134 r __UNIQUE_ID_modinfo579
  0000000000000179 r __UNIQUE_ID_modinfo578

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
---
 include/linux/module.h      | 3 ---
 include/linux/moduleparam.h | 9 +++++----
 2 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 92e1420fccdf..81b41cc6a19e 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -164,9 +164,6 @@ extern void cleanup_module(void);
 
 struct module_kobject *lookup_or_create_module_kobject(const char *name);
 
-/* Generic info of form tag = "info" */
-#define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info)
-
 /* For userspace: you can also call me... */
 #define MODULE_ALIAS(_alias) MODULE_INFO(alias, _alias)
 
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index bfb85fd13e1f..00166f747e27 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -20,18 +20,19 @@
 /* Chosen so that structs with an unsigned long line up. */
 #define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long))
 
-#define __MODULE_INFO(tag, name, info)					  \
-	static const char __UNIQUE_ID(name)[]				  \
+/* Generic info of form tag = "info" */
+#define MODULE_INFO(tag, info)					  \
+	static const char __UNIQUE_ID(modinfo)[]			  \
 		__used __section(".modinfo") __aligned(1)		  \
 		= __MODULE_INFO_PREFIX __stringify(tag) "=" info
 
 #define __MODULE_PARM_TYPE(name, _type)					  \
-	__MODULE_INFO(parmtype, name##type, #name ":" _type)
+	MODULE_INFO(parmtype, #name ":" _type)
 
 /* One for each parameter, describing how to use it.  Some files do
    multiple of these per line, so can't just use MODULE_INFO. */
 #define MODULE_PARM_DESC(_parm, desc) \
-	__MODULE_INFO(parm, _parm, #_parm ":" desc)
+	MODULE_INFO(parm, #_parm ":" desc)
 
 struct kernel_param;
 
-- 
cgit v1.2.3


From f393a761763c542761abcf978252d431269366d6 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Tue, 8 Jul 2025 14:56:23 +0200
Subject: efi: add ovmf debug log driver

Recent OVMF versions (edk2-stable202508 + newer) can write their debug
log to a memory buffer.  This driver exposes the log content via sysfs
(/sys/firmware/efi/ovmf_debug_log).

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/linux/efi.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 7d63d1d75f22..50db7df0efab 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -439,6 +439,7 @@ void efi_native_runtime_setup(void);
 
 /* OVMF protocol GUIDs */
 #define OVMF_SEV_MEMORY_ACCEPTANCE_PROTOCOL_GUID	EFI_GUID(0xc5a010fe, 0x38a7, 0x4531,  0x8a, 0x4a, 0x05, 0x00, 0xd2, 0xfd, 0x16, 0x49)
+#define OVMF_MEMORY_LOG_TABLE_GUID		EFI_GUID(0x95305139, 0xb20f, 0x4723,  0x84, 0x25, 0x62, 0x7c, 0x88, 0x8f, 0xf1, 0x21)
 
 typedef struct {
 	efi_guid_t guid;
@@ -642,6 +643,7 @@ extern struct efi {
 	unsigned long			esrt;			/* ESRT table */
 	unsigned long			tpm_log;		/* TPM2 Event Log table */
 	unsigned long			tpm_final_log;		/* TPM2 Final Events Log table */
+	unsigned long                   ovmf_debug_log;
 	unsigned long			mokvar_table;		/* MOK variable config table */
 	unsigned long			coco_secret;		/* Confidential computing secret table */
 	unsigned long			unaccepted;		/* Unaccepted memory table */
@@ -1344,6 +1346,8 @@ bool efi_config_table_is_usable(const efi_guid_t *guid, unsigned long table)
 
 umode_t efi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n);
 
+int ovmf_log_probe(unsigned long ovmf_debug_log_table);
+
 /*
  * efivar ops event type
  */
-- 
cgit v1.2.3


From 50b4233a22b1ee9ccd0e847597de66ce21329ddb Mon Sep 17 00:00:00 2001
From: Julian Vetter <julian@outer-limits.org>
Date: Tue, 3 Jun 2025 15:21:21 +0200
Subject: include/linux/jhash.h: replace __get_unaligned_cpu32 in jhash
 function

__get_unaligned_cpu32() is deprecated.  So, replace it with the more
generic get_unaligned() and just cast the input parameter.

Link: https://lkml.kernel.org/r/20250603132121.3674066-1-julian@outer-limits.org
Signed-off-by: Julian Vetter <julian@outer-limits.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Wei-Hsin Yeh <weihsinyeh168@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/jhash.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jhash.h b/include/linux/jhash.h
index fa26a2dd3b52..7c1c1821c694 100644
--- a/include/linux/jhash.h
+++ b/include/linux/jhash.h
@@ -24,7 +24,7 @@
  * Jozsef
  */
 #include <linux/bitops.h>
-#include <linux/unaligned/packed_struct.h>
+#include <linux/unaligned.h>
 
 /* Best hash sizes are of power of two */
 #define jhash_size(n)   ((u32)1<<(n))
@@ -77,9 +77,9 @@ static inline u32 jhash(const void *key, u32 length, u32 initval)
 
 	/* All but the last block: affect some 32 bits of (a,b,c) */
 	while (length > 12) {
-		a += __get_unaligned_cpu32(k);
-		b += __get_unaligned_cpu32(k + 4);
-		c += __get_unaligned_cpu32(k + 8);
+		a += get_unaligned((u32 *)k);
+		b += get_unaligned((u32 *)(k + 4));
+		c += get_unaligned((u32 *)(k + 8));
 		__jhash_mix(a, b, c);
 		length -= 12;
 		k += 12;
-- 
cgit v1.2.3


From 2489e958129ff7cbf26a34ee33cdc9ccbd68fe3c Mon Sep 17 00:00:00 2001
From: Jason Xing <kernelxing@tencent.com>
Date: Thu, 12 Jun 2025 14:11:57 +0800
Subject: relayfs: abolish prev_padding

Patch series "relayfs: misc changes", v5.

The series mostly focuses on the error counters which helps every user
debug their own kernel module.


This patch (of 5):

prev_padding represents the unused space of certain subbuffer.  If the
content of a call of relay_write() exceeds the limit of the remainder of
this subbuffer, it will skip storing in the rest space and record the
start point as buf->prev_padding in relay_switch_subbuf().  Since the buf
is a per-cpu big buffer, the point of prev_padding as a global value for
the whole buffer instead of a single subbuffer (whose padding info is
stored in buf->padding[]) seems meaningless from the real use cases, so we
don't bother to record it any more.

Link: https://lkml.kernel.org/r/20250612061201.34272-1-kerneljasonxing@gmail.com
Link: https://lkml.kernel.org/r/20250612061201.34272-2-kerneljasonxing@gmail.com
Signed-off-by: Jason Xing <kernelxing@tencent.com>
Reviewed-by: Yushan Zhou <katrinzhou@tencent.com>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/relay.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/relay.h b/include/linux/relay.h
index b3224111d074..e10a0fdf4325 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -47,7 +47,6 @@ struct rchan_buf
 	unsigned int page_count;	/* number of current buffer pages */
 	unsigned int finalized;		/* buffer has been finalized */
 	size_t *padding;		/* padding counts per sub-buffer */
-	size_t prev_padding;		/* temporary variable */
 	size_t bytes_consumed;		/* bytes consumed in cur read subbuf */
 	size_t early_bytes;		/* bytes consumed before VFS inited */
 	unsigned int cpu;		/* this buf's cpu */
@@ -84,7 +83,6 @@ struct rchan_callbacks
 	 * @buf: the channel buffer containing the new sub-buffer
 	 * @subbuf: the start of the new sub-buffer
 	 * @prev_subbuf: the start of the previous sub-buffer
-	 * @prev_padding: unused space at the end of previous sub-buffer
 	 *
 	 * The client should return 1 to continue logging, 0 to stop
 	 * logging.
@@ -100,8 +98,7 @@ struct rchan_callbacks
 	 */
 	int (*subbuf_start) (struct rchan_buf *buf,
 			     void *subbuf,
-			     void *prev_subbuf,
-			     size_t prev_padding);
+			     void *prev_subbuf);
 
 	/*
 	 * create_buf_file - create file to represent a relay channel buffer
-- 
cgit v1.2.3


From ca01a90ae7bf9bb22137e719366bdc0f387675c2 Mon Sep 17 00:00:00 2001
From: Jason Xing <kernelxing@tencent.com>
Date: Thu, 12 Jun 2025 14:11:58 +0800
Subject: relayfs: support a counter tracking if per-cpu buffers is full

When using relay mechanism, we often encounter the case where new data are
lost or old unconsumed data are overwritten because of slow reader.

Add 'full' field in per-cpu buffer structure to detect if the above case
is happening.  Relay has two modes: 1) non-overwrite mode, 2) overwrite
mode.  So buffer being full here respectively means: 1) relayfs doesn't
intend to accept new data and then simply drop them, or 2) relayfs is
going to start over again and overwrite old unread data with new data.

Note: this counter doesn't need any explicit lock to protect from being
modified by different threads for the better performance consideration.
Writers calling __relay_write/relay_write should consider how to use the
lock and ensure it performs under the lock protection, thus it's not
necessary to add a new small lock here.

Link: https://lkml.kernel.org/r/20250612061201.34272-3-kerneljasonxing@gmail.com
Signed-off-by: Jason Xing <kernelxing@tencent.com>
Reviewed-by: Yushan Zhou <katrinzhou@tencent.com>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/relay.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/relay.h b/include/linux/relay.h
index e10a0fdf4325..cd77eb285a48 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -28,6 +28,14 @@
  */
 #define RELAYFS_CHANNEL_VERSION		7
 
+/*
+ * Relay buffer statistics
+ */
+struct rchan_buf_stats
+{
+	unsigned int full_count;	/* counter for buffer full */
+};
+
 /*
  * Per-cpu relay channel buffer
  */
@@ -43,6 +51,7 @@ struct rchan_buf
 	struct irq_work wakeup_work;	/* reader wakeup */
 	struct dentry *dentry;		/* channel file dentry */
 	struct kref kref;		/* channel buffer refcount */
+	struct rchan_buf_stats stats;	/* buffer stats */
 	struct page **page_array;	/* array of current buffer pages */
 	unsigned int page_count;	/* number of current buffer pages */
 	unsigned int finalized;		/* buffer has been finalized */
-- 
cgit v1.2.3


From a53202ce7fbafd24f854865b02eff891e246c550 Mon Sep 17 00:00:00 2001
From: Jason Xing <kernelxing@tencent.com>
Date: Thu, 12 Jun 2025 14:11:59 +0800
Subject: relayfs: introduce getting relayfs statistics function

In this version, only support getting the counter for buffer full and
implement the framework of how it works.

Users can pass certain flag to fetch what field/statistics they expect to
know.  Each time it only returns one result.  So do not pass multiple
flags.

Link: https://lkml.kernel.org/r/20250612061201.34272-4-kerneljasonxing@gmail.com
Signed-off-by: Jason Xing <kernelxing@tencent.com>
Reviewed-by: Yushan Zhou <katrinzhou@tencent.com>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/relay.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/relay.h b/include/linux/relay.h
index cd77eb285a48..5310967f9d74 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -31,6 +31,12 @@
 /*
  * Relay buffer statistics
  */
+enum {
+	RELAY_STATS_BUF_FULL = (1 << 0),
+
+	RELAY_STATS_LAST = RELAY_STATS_BUF_FULL,
+};
+
 struct rchan_buf_stats
 {
 	unsigned int full_count;	/* counter for buffer full */
@@ -167,6 +173,7 @@ struct rchan *relay_open(const char *base_filename,
 			 void *private_data);
 extern void relay_close(struct rchan *chan);
 extern void relay_flush(struct rchan *chan);
+size_t relay_stats(struct rchan *chan, int flags);
 extern void relay_subbufs_consumed(struct rchan *chan,
 				   unsigned int cpu,
 				   size_t consumed);
-- 
cgit v1.2.3


From 19f3cb64a25b80db667a00182785577fae465b3e Mon Sep 17 00:00:00 2001
From: Jason Xing <kernelxing@tencent.com>
Date: Thu, 12 Jun 2025 14:12:01 +0800
Subject: relayfs: support a counter tracking if data is too big to write

It really doesn't matter if the user/admin knows what the last too big
value is.  Record how many times this case is triggered would be helpful.

Solve the existing issue where relay_reset() doesn't restore the value.

Store the counter in the per-cpu buffer structure instead of the global
buffer structure.  It also solves the racy condition which is likely to
happen when a few of per-cpu buffers encounter the too big data case and
then access the global field last_toobig without lock protection.

Remove the printk in relay_close() since kernel module can directly call
relay_stats() as they want.

Link: https://lkml.kernel.org/r/20250612061201.34272-6-kerneljasonxing@gmail.com
Signed-off-by: Jason Xing <kernelxing@tencent.com>
Reviewed-by: Yushan Zhou <katrinzhou@tencent.com>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/relay.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/relay.h b/include/linux/relay.h
index 5310967f9d74..6772a7075840 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -33,13 +33,15 @@
  */
 enum {
 	RELAY_STATS_BUF_FULL = (1 << 0),
+	RELAY_STATS_WRT_BIG = (1 << 1),
 
-	RELAY_STATS_LAST = RELAY_STATS_BUF_FULL,
+	RELAY_STATS_LAST = RELAY_STATS_WRT_BIG,
 };
 
 struct rchan_buf_stats
 {
 	unsigned int full_count;	/* counter for buffer full */
+	unsigned int big_count;		/* counter for too big to write */
 };
 
 /*
@@ -79,7 +81,6 @@ struct rchan
 	const struct rchan_callbacks *cb; /* client callbacks */
 	struct kref kref;		/* channel refcount */
 	void *private_data;		/* for user-defined data */
-	size_t last_toobig;		/* tried to log event > subbuf size */
 	struct rchan_buf * __percpu *buf; /* per-cpu channel buffers */
 	int is_global;			/* One global buffer ? */
 	struct list_head list;		/* for channel list */
-- 
cgit v1.2.3


From 1857fcc847443b0238cb64584b43d8c3a9049a0a Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 17 Mar 2025 17:02:28 +0800
Subject: lib/raid6: replace custom zero page with ZERO_PAGE

Use the system-wide zero page instead of a custom zero page.

[herbert@gondor.apana.org.au: update lib/raid6/recov_rvv.c, per Klara]
  Link: https://lkml.kernel.org/r/aFkUnXWtxcgOTVkw@gondor.apana.org.au
Link: https://lkml.kernel.org/r/Z9flJNkWQICx0PXk@gondor.apana.org.au
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Song Liu <song@kernel.org>
Cc: Yu Kuai <yukuai3@huawei.com>
Cc: Klara Modin <klarasmodin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/raid/pq.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 72ff44cca864..2467b3be15c9 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -11,8 +11,13 @@
 #ifdef __KERNEL__
 
 #include <linux/blkdev.h>
+#include <linux/mm.h>
 
-extern const char raid6_empty_zero_page[PAGE_SIZE];
+/* This should be const but the raid6 code is too convoluted for that. */
+static inline void *raid6_get_zero_page(void)
+{
+	return page_address(ZERO_PAGE(0));
+}
 
 #else /* ! __KERNEL__ */
 /* Used for testing in user space */
@@ -191,6 +196,11 @@ static inline uint32_t raid6_jiffies(void)
 	return tv.tv_sec*1000 + tv.tv_usec/1000;
 }
 
+static inline void *raid6_get_zero_page(void)
+{
+	return raid6_empty_zero_page;
+}
+
 #endif /* ! __KERNEL__ */
 
 #endif /* LINUX_RAID_RAID6_H */
-- 
cgit v1.2.3


From ce60ab3964782df9ba34f0a64c0bc766dd508bde Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 29 May 2025 06:45:45 -0400
Subject: Expand the type of nfs_fattr->valid

We need to be able to track more than 32 attributes per inode.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Lance Shelton <lance.shelton@hammerspace.com>
Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/1e3405fca54efd0be7c91c1da77917b94f5dfcc4.1748515333.git.bcodding@redhat.com
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/nfs_fs_sb.h |  2 +-
 include/linux/nfs_xdr.h   | 54 +++++++++++++++++++++++------------------------
 2 files changed, 28 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 63141320c2a8..d7895eeccea3 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -172,8 +172,8 @@ struct nfs_server {
 #define NFS_MOUNT_FORCE_RDIRPLUS	0x20000000
 #define NFS_MOUNT_NETUNREACH_FATAL	0x40000000
 
-	unsigned int		fattr_valid;	/* Valid attributes */
 	unsigned int		caps;		/* server capabilities */
+	__u64			fattr_valid;	/* Valid attributes */
 	unsigned int		rsize;		/* read size */
 	unsigned int		rpages;		/* read size (in pages) */
 	unsigned int		wsize;		/* write size */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 67f6632f723b..9cacbbd14787 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -45,7 +45,7 @@ struct nfs4_threshold {
 };
 
 struct nfs_fattr {
-	unsigned int		valid;		/* which fields are valid */
+	__u64			valid;		/* which fields are valid */
 	umode_t			mode;
 	__u32			nlink;
 	kuid_t			uid;
@@ -80,32 +80,32 @@ struct nfs_fattr {
 	struct nfs4_label	*label;
 };
 
-#define NFS_ATTR_FATTR_TYPE		(1U << 0)
-#define NFS_ATTR_FATTR_MODE		(1U << 1)
-#define NFS_ATTR_FATTR_NLINK		(1U << 2)
-#define NFS_ATTR_FATTR_OWNER		(1U << 3)
-#define NFS_ATTR_FATTR_GROUP		(1U << 4)
-#define NFS_ATTR_FATTR_RDEV		(1U << 5)
-#define NFS_ATTR_FATTR_SIZE		(1U << 6)
-#define NFS_ATTR_FATTR_PRESIZE		(1U << 7)
-#define NFS_ATTR_FATTR_BLOCKS_USED	(1U << 8)
-#define NFS_ATTR_FATTR_SPACE_USED	(1U << 9)
-#define NFS_ATTR_FATTR_FSID		(1U << 10)
-#define NFS_ATTR_FATTR_FILEID		(1U << 11)
-#define NFS_ATTR_FATTR_ATIME		(1U << 12)
-#define NFS_ATTR_FATTR_MTIME		(1U << 13)
-#define NFS_ATTR_FATTR_CTIME		(1U << 14)
-#define NFS_ATTR_FATTR_PREMTIME		(1U << 15)
-#define NFS_ATTR_FATTR_PRECTIME		(1U << 16)
-#define NFS_ATTR_FATTR_CHANGE		(1U << 17)
-#define NFS_ATTR_FATTR_PRECHANGE	(1U << 18)
-#define NFS_ATTR_FATTR_V4_LOCATIONS	(1U << 19)
-#define NFS_ATTR_FATTR_V4_REFERRAL	(1U << 20)
-#define NFS_ATTR_FATTR_MOUNTPOINT	(1U << 21)
-#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID (1U << 22)
-#define NFS_ATTR_FATTR_OWNER_NAME	(1U << 23)
-#define NFS_ATTR_FATTR_GROUP_NAME	(1U << 24)
-#define NFS_ATTR_FATTR_V4_SECURITY_LABEL (1U << 25)
+#define NFS_ATTR_FATTR_TYPE		BIT_ULL(0)
+#define NFS_ATTR_FATTR_MODE		BIT_ULL(1)
+#define NFS_ATTR_FATTR_NLINK		BIT_ULL(2)
+#define NFS_ATTR_FATTR_OWNER		BIT_ULL(3)
+#define NFS_ATTR_FATTR_GROUP		BIT_ULL(4)
+#define NFS_ATTR_FATTR_RDEV		BIT_ULL(5)
+#define NFS_ATTR_FATTR_SIZE		BIT_ULL(6)
+#define NFS_ATTR_FATTR_PRESIZE		BIT_ULL(7)
+#define NFS_ATTR_FATTR_BLOCKS_USED	BIT_ULL(8)
+#define NFS_ATTR_FATTR_SPACE_USED	BIT_ULL(9)
+#define NFS_ATTR_FATTR_FSID		BIT_ULL(10)
+#define NFS_ATTR_FATTR_FILEID		BIT_ULL(11)
+#define NFS_ATTR_FATTR_ATIME		BIT_ULL(12)
+#define NFS_ATTR_FATTR_MTIME		BIT_ULL(13)
+#define NFS_ATTR_FATTR_CTIME		BIT_ULL(14)
+#define NFS_ATTR_FATTR_PREMTIME		BIT_ULL(15)
+#define NFS_ATTR_FATTR_PRECTIME		BIT_ULL(16)
+#define NFS_ATTR_FATTR_CHANGE		BIT_ULL(17)
+#define NFS_ATTR_FATTR_PRECHANGE	BIT_ULL(18)
+#define NFS_ATTR_FATTR_V4_LOCATIONS	BIT_ULL(19)
+#define NFS_ATTR_FATTR_V4_REFERRAL	BIT_ULL(20)
+#define NFS_ATTR_FATTR_MOUNTPOINT	BIT_ULL(21)
+#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID BIT_ULL(22)
+#define NFS_ATTR_FATTR_OWNER_NAME	BIT_ULL(23)
+#define NFS_ATTR_FATTR_GROUP_NAME	BIT_ULL(24)
+#define NFS_ATTR_FATTR_V4_SECURITY_LABEL BIT_ULL(25)
 
 #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \
 		| NFS_ATTR_FATTR_MODE \
-- 
cgit v1.2.3


From 1c7ae2dd3f0e6d07ec0a5a348f2561f2171b9c81 Mon Sep 17 00:00:00 2001
From: Anne Marie Merritt <annemarie.merritt@primarydata.com>
Date: Thu, 29 May 2025 06:45:46 -0400
Subject: nfs: Add timecreate to nfs inode

Add tracking of the create time (a.k.a. btime) along with corresponding
bitfields, request, and decode xdr routines.

Signed-off-by: Anne Marie Merritt <annemarie.merritt@primarydata.com>
Signed-off-by: Lance Shelton <lance.shelton@hammerspace.com>
Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/1e3677b0655fa2bbaba0817b41d111d94a06e5ee.1748515333.git.bcodding@redhat.com
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/nfs_fs.h  | 8 ++++++++
 include/linux/nfs_xdr.h | 3 +++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 67ae2c3f41d2..c585939b6cd6 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -160,6 +160,12 @@ struct nfs_inode {
 	unsigned long		flags;			/* atomic bit ops */
 	unsigned long		cache_validity;		/* bit mask */
 
+	/*
+	 * NFS Attributes not included in struct inode
+	 */
+
+	struct timespec64	btime;
+
 	/*
 	 * read_cache_jiffies is when we started read-caching this inode.
 	 * attrtimeo is for how long the cached information is assumed
@@ -316,10 +322,12 @@ struct nfs4_copy_state {
 #define NFS_INO_INVALID_XATTR	BIT(15)		/* xattrs are invalid */
 #define NFS_INO_INVALID_NLINK	BIT(16)		/* cached nlinks is invalid */
 #define NFS_INO_INVALID_MODE	BIT(17)		/* cached mode is invalid */
+#define NFS_INO_INVALID_BTIME	BIT(18)		/* cached btime is invalid */
 
 #define NFS_INO_INVALID_ATTR	(NFS_INO_INVALID_CHANGE \
 		| NFS_INO_INVALID_CTIME \
 		| NFS_INO_INVALID_MTIME \
+		| NFS_INO_INVALID_BTIME \
 		| NFS_INO_INVALID_SIZE \
 		| NFS_INO_INVALID_NLINK \
 		| NFS_INO_INVALID_MODE \
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9cacbbd14787..ac4bff6e9913 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -67,6 +67,7 @@ struct nfs_fattr {
 	struct timespec64	atime;
 	struct timespec64	mtime;
 	struct timespec64	ctime;
+	struct timespec64	btime;
 	__u64			change_attr;	/* NFSv4 change attribute */
 	__u64			pre_change_attr;/* pre-op NFSv4 change attribute */
 	__u64			pre_size;	/* pre_op_attr.size	  */
@@ -106,6 +107,7 @@ struct nfs_fattr {
 #define NFS_ATTR_FATTR_OWNER_NAME	BIT_ULL(23)
 #define NFS_ATTR_FATTR_GROUP_NAME	BIT_ULL(24)
 #define NFS_ATTR_FATTR_V4_SECURITY_LABEL BIT_ULL(25)
+#define NFS_ATTR_FATTR_BTIME		BIT_ULL(26)
 
 #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \
 		| NFS_ATTR_FATTR_MODE \
@@ -126,6 +128,7 @@ struct nfs_fattr {
 		| NFS_ATTR_FATTR_SPACE_USED)
 #define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \
 		| NFS_ATTR_FATTR_SPACE_USED \
+		| NFS_ATTR_FATTR_BTIME \
 		| NFS_ATTR_FATTR_V4_SECURITY_LABEL)
 
 /*
-- 
cgit v1.2.3


From 0715a72ee9a38461eac4b34388b772914f269119 Mon Sep 17 00:00:00 2001
From: Anthony Iliopoulos <ailiop@suse.com>
Date: Fri, 13 Jun 2025 11:44:37 +0200
Subject: NFS: remove unused wpages field from struct nfs_server

The wpages field is not serving any purpose since commit c63c7b051395
("NFS: Fix a race when doing NFS write coalescing") which was merged in
v2.6.22-rc1. Remove it completely.

Signed-off-by: Anthony Iliopoulos <ailiop@suse.com>
Link: https://lore.kernel.org/r/20250613094439.82338-2-ailiop@suse.com
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/nfs_fs_sb.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index d7895eeccea3..7048f9b867ab 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -177,7 +177,6 @@ struct nfs_server {
 	unsigned int		rsize;		/* read size */
 	unsigned int		rpages;		/* read size (in pages) */
 	unsigned int		wsize;		/* write size */
-	unsigned int		wpages;		/* write size (in pages) */
 	unsigned int		wtmult;		/* server disk block size */
 	unsigned int		dtsize;		/* readdir size */
 	unsigned short		port;		/* "port=" setting */
-- 
cgit v1.2.3


From 74a33326cfe8e62ebe0a65ba01ea8a8bceb532f8 Mon Sep 17 00:00:00 2001
From: Anthony Iliopoulos <ailiop@suse.com>
Date: Fri, 13 Jun 2025 11:44:38 +0200
Subject: NFS: remove unused time_delta field from struct nfs_server

The last code that was using this was removed via commit ca0daa277aca
("NFS: Cache aggressively when file is open for writing") which was
merged in v4.8-rc1, so it can be removed completely.

Signed-off-by: Anthony Iliopoulos <ailiop@suse.com>
Link: https://lore.kernel.org/r/20250613094439.82338-3-ailiop@suse.com
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/nfs_fs_sb.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 7048f9b867ab..e1b2cf57e765 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -202,7 +202,6 @@ struct nfs_server {
 	struct nfs_fsid		fsid;
 	int			s_sysfs_id;	/* sysfs dentry index */
 	__u64			maxfilesize;	/* maximum file size */
-	struct timespec64	time_delta;	/* smallest time granularity */
 	unsigned long		mount_time;	/* when this fs was mounted */
 	struct super_block	*super;		/* VFS super block */
 	dev_t			s_dev;		/* superblock dev numbers */
-- 
cgit v1.2.3


From 2c665d91c2a2d8b5bdf1374d1253b3c89fca4ede Mon Sep 17 00:00:00 2001
From: Anthony Iliopoulos <ailiop@suse.com>
Date: Fri, 13 Jun 2025 11:44:39 +0200
Subject: NFS: remove unused pnfs_ld_data field from struct nfs_server

The last code that was using this was removed via commit 20d655d6197d
("pnfs/blocklayout: use the device id cache") which was merged in
v3.18-rc1, so it can be removed completely.

Signed-off-by: Anthony Iliopoulos <ailiop@suse.com>
Link: https://lore.kernel.org/r/20250613094439.82338-4-ailiop@suse.com
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/nfs_fs_sb.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index e1b2cf57e765..d2d36711a119 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -246,7 +246,6 @@ struct nfs_server {
 						   filesystem */
 	struct pnfs_layoutdriver_type  *pnfs_curr_ld; /* Active layout driver */
 	struct rpc_wait_queue	roc_rpcwaitq;
-	void			*pnfs_ld_data;	/* per mount point data */
 
 	/* the following fields are protected by nfs_client->cl_lock */
 	struct rb_root		state_owners;
-- 
cgit v1.2.3


From 48693d119b2114f8eaf8b8f972b29e05ae581ad4 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Sun, 13 Jul 2025 00:30:06 +0100
Subject: SUNRPC: Remove unused xdr functions

Remove a bunch of unused xdr_*decode* functions:
  The last use of xdr_decode_netobj() was removed in 2021 by:
commit 7cf96b6d0104 ("lockd: Update the NLMv4 SHARE arguments decoder to
use struct xdr_stream")
  The last use of xdr_decode_string_inplace() was removed in 2021 by:
commit 3049e974a7c7 ("lockd: Update the NLMv4 FREE_ALL arguments decoder
to use struct xdr_stream")
  The last use of xdr_stream_decode_opaque() was removed in 2024 by:
commit fed8a17c61ff ("xdrgen: typedefs should use the built-in string and
opaque functions")

  The functions xdr_stream_decode_string() and
xdr_stream_decode_opaque_dup() were both added in 2018 by the
commit 0e779aa70308 ("SUNRPC: Add helpers for decoding opaque and string
types")
but never used.

Remove them.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Link: https://lore.kernel.org/r/20250712233006.403226-1-linux@treblig.org
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xdr.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index a2ab813a9800..e370886632b0 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -128,10 +128,7 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
 __be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int len);
 __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int len);
 __be32 *xdr_encode_string(__be32 *p, const char *s);
-__be32 *xdr_decode_string_inplace(__be32 *p, char **sp, unsigned int *lenp,
-			unsigned int maxlen);
 __be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *);
-__be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *);
 
 void	xdr_inline_pages(struct xdr_buf *, unsigned int,
 			 struct page **, unsigned int, unsigned int);
@@ -341,12 +338,6 @@ xdr_stream_remaining(const struct xdr_stream *xdr)
 	return xdr->nwords << 2;
 }
 
-ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr,
-		size_t size);
-ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr,
-		size_t maxlen, gfp_t gfp_flags);
-ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str,
-		size_t size);
 ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str,
 		size_t maxlen, gfp_t gfp_flags);
 ssize_t xdr_stream_decode_opaque_auth(struct xdr_stream *xdr, u32 *flavor,
-- 
cgit v1.2.3


From 35c18f2933c596b4fd6a98baee36f3137d133a5f Mon Sep 17 00:00:00 2001
From: Jiri Bohac <jbohac@suse.cz>
Date: Thu, 12 Jun 2025 12:13:21 +0200
Subject: Add a new optional ",cma" suffix to the crashkernel= command line
 option

Patch series "kdump: crashkernel reservation from CMA", v5.

This series implements a way to reserve additional crash kernel memory
using CMA.

Currently, all the memory for the crash kernel is not usable by the 1st
(production) kernel.  It is also unmapped so that it can't be corrupted by
the fault that will eventually trigger the crash.  This makes sense for
the memory actually used by the kexec-loaded crash kernel image and initrd
and the data prepared during the load (vmcoreinfo, ...).  However, the
reserved space needs to be much larger than that to provide enough
run-time memory for the crash kernel and the kdump userspace.  Estimating
the amount of memory to reserve is difficult.  Being too careful makes
kdump likely to end in OOM, being too generous takes even more memory from
the production system.  Also, the reservation only allows reserving a
single contiguous block (or two with the "low" suffix).  I've seen systems
where this fails because the physical memory is fragmented.

By reserving additional crashkernel memory from CMA, the main crashkernel
reservation can be just large enough to fit the kernel and initrd image,
minimizing the memory taken away from the production system.  Most of the
run-time memory for the crash kernel will be memory previously available
to userspace in the production system.  As this memory is no longer
wasted, the reservation can be done with a generous margin, making kdump
more reliable.  Kernel memory that we need to preserve for dumping is
normally not allocated from CMA, unless it is explicitly allocated as
movable.  Currently this is only the case for memory ballooning and zswap.
Such movable memory will be missing from the vmcore.  User data is
typically not dumped by makedumpfile.  When dumping of user data is
intended this new CMA reservation cannot be used.

There are five patches in this series:

The first adds a new ",cma" suffix to the recenly introduced generic
crashkernel parsing code.  parse_crashkernel() takes one more argument to
store the cma reservation size.

The second patch implements reserve_crashkernel_cma() which performs the
reservation.  If the requested size is not available in a single range,
multiple smaller ranges will be reserved.

The third patch updates Documentation/, explicitly mentioning the
potential DMA corruption of the CMA-reserved memory.

The fourth patch adds a short delay before booting the kdump kernel,
allowing pending DMA transfers to finish.

The fifth patch enables the functionality for x86 as a proof of
concept. There are just three things every arch needs to do:
- call reserve_crashkernel_cma()
- include the CMA-reserved ranges in the physical memory map
- exclude the CMA-reserved ranges from the memory available
  through /proc/vmcore by excluding them from the vmcoreinfo
  PT_LOAD ranges.

Adding other architectures is easy and I can do that as soon as this
series is merged.

With this series applied, specifying
	crashkernel=100M craskhernel=1G,cma
on the command line will make a standard crashkernel reservation
of 100M, where kexec will load the kernel and initrd.

An additional 1G will be reserved from CMA, still usable by the production
system.  The crash kernel will have 1.1G memory available.  The 100M can
be reliably predicted based on the size of the kernel and initrd.

The new cma suffix is completely optional. When no
crashkernel=size,cma is specified, everything works as before.


This patch (of 5):

Add a new cma_size parameter to parse_crashkernel().  When not NULL, call
__parse_crashkernel to parse the CMA reservation size from
"crashkernel=size,cma" and store it in cma_size.

Set cma_size to NULL in all calls to parse_crashkernel().

Link: https://lkml.kernel.org/r/aEqnxxfLZMllMC8I@dwarf.suse.cz
Link: https://lkml.kernel.org/r/aEqoQckgoTQNULnh@dwarf.suse.cz
Signed-off-by: Jiri Bohac <jbohac@suse.cz>
Cc: Baoquan He <bhe@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Donald Dutile <ddutile@redhat.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Philipp Rudo <prudo@redhat.com>
Cc: Pingfan Liu <piliu@redhat.com>
Cc: Tao Liu <ltao@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/crash_reserve.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h
index 1fe7e7d1b214..e784aaff2f5a 100644
--- a/include/linux/crash_reserve.h
+++ b/include/linux/crash_reserve.h
@@ -16,7 +16,8 @@ extern struct resource crashk_low_res;
 
 int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
 		unsigned long long *crash_size, unsigned long long *crash_base,
-		unsigned long long *low_size, bool *high);
+		unsigned long long *low_size, unsigned long long *cma_size,
+		bool *high);
 
 #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
 #ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE
-- 
cgit v1.2.3


From ab475510e0422bb5672d465f9d0f523d72fdb7f1 Mon Sep 17 00:00:00 2001
From: Jiri Bohac <jbohac@suse.cz>
Date: Thu, 12 Jun 2025 12:16:39 +0200
Subject: kdump: implement reserve_crashkernel_cma

reserve_crashkernel_cma() reserves CMA ranges for the crash kernel.  If
allocating the requested size fails, try to reserve in smaller blocks.

Store the reserved ranges in the crashk_cma_ranges array and the number of
ranges in crashk_cma_cnt.

Link: https://lkml.kernel.org/r/aEqpBwOy_ekm0gw9@dwarf.suse.cz
Signed-off-by: Jiri Bohac <jbohac@suse.cz>
Cc: Baoquan He <bhe@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Donald Dutile <ddutile@redhat.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Philipp Rudo <prudo@redhat.com>
Cc: Pingfan Liu <piliu@redhat.com>
Cc: Tao Liu <ltao@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/crash_reserve.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h
index e784aaff2f5a..7b44b41d0a20 100644
--- a/include/linux/crash_reserve.h
+++ b/include/linux/crash_reserve.h
@@ -13,12 +13,24 @@
  */
 extern struct resource crashk_res;
 extern struct resource crashk_low_res;
+extern struct range crashk_cma_ranges[];
+#if defined(CONFIG_CMA) && defined(CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION)
+#define CRASHKERNEL_CMA
+#define CRASHKERNEL_CMA_RANGES_MAX 4
+extern int crashk_cma_cnt;
+#else
+#define crashk_cma_cnt 0
+#define CRASHKERNEL_CMA_RANGES_MAX 0
+#endif
+
 
 int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
 		unsigned long long *crash_size, unsigned long long *crash_base,
 		unsigned long long *low_size, unsigned long long *cma_size,
 		bool *high);
 
+void __init reserve_crashkernel_cma(unsigned long long cma_size);
+
 #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
 #ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE
 #define DEFAULT_CRASH_KERNEL_LOW_SIZE	(128UL << 20)
-- 
cgit v1.2.3


From b76e89e50fc3693b7b8a443ed906320d8ccb93fd Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@linux.alibaba.com>
Date: Thu, 3 Jul 2025 10:10:01 +0800
Subject: panic: generalize panic_print's function to show sys info

'panic_print' was introduced to help debugging kernel panic by dumping
different kinds of system information like tasks' call stack, memory,
ftrace buffer, etc.  Actually this function could also be used to help
debugging other cases like task-hung, soft/hard lockup, etc.  where user
may need the snapshot of system info at that time.

Extract system info dump function related code from panic.c to separate
file sys_info.[ch], for wider usage by other kernel parts for debugging.

Also modify the macro names about singulars/plurals.

Link: https://lkml.kernel.org/r/20250703021004.42328-3-feng.tang@linux.alibaba.com
Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>
Suggested-by: Petr Mladek <pmladek@suse.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: "Paul E . McKenney" <paulmck@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sys_info.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 include/linux/sys_info.h

(limited to 'include/linux')

diff --git a/include/linux/sys_info.h b/include/linux/sys_info.h
new file mode 100644
index 000000000000..53b7e27dbf2a
--- /dev/null
+++ b/include/linux/sys_info.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SYS_INFO_H
+#define _LINUX_SYS_INFO_H
+
+/*
+ * SYS_INFO_PANIC_CONSOLE_REPLAY is for panic case only, as it needs special
+ * handling which only fits panic case.
+ */
+#define SYS_INFO_TASKS			0x00000001
+#define SYS_INFO_MEM			0x00000002
+#define SYS_INFO_TIMERS			0x00000004
+#define SYS_INFO_LOCKS			0x00000008
+#define SYS_INFO_FTRACE			0x00000010
+#define SYS_INFO_PANIC_CONSOLE_REPLAY	0x00000020
+#define SYS_INFO_ALL_CPU_BT		0x00000040
+#define SYS_INFO_BLOCKED_TASKS		0x00000080
+
+void sys_info(unsigned long si_mask);
+
+#endif	/* _LINUX_SYS_INFO_H */
-- 
cgit v1.2.3


From d747755917bf8ae08f490c3fe7d8e321afab8127 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@linux.alibaba.com>
Date: Thu, 3 Jul 2025 10:10:02 +0800
Subject: panic: add 'panic_sys_info' sysctl to take human readable string
 parameter

Bitmap definition for 'panic_print' is hard to remember and decode.  Add
'panic_sys_info='sysctl to take human readable string like
"tasks,mem,timers,locks,ftrace,..." and translate it into bitmap.

The detailed mapping is:
	SYS_INFO_TASKS		"tasks"
	SYS_INFO_MEM		"mem"
	SYS_INFO_TIMERS		"timers"
	SYS_INFO_LOCKS		"locks"
	SYS_INFO_FTRACE		"ftrace"
	SYS_INFO_ALL_CPU_BT	"all_bt"
	SYS_INFO_BLOCKED_TASKS	"blocked_tasks"

[nathan@kernel.org: add __maybe_unused to sys_info_avail]
  Link: https://lkml.kernel.org/r/20250708-fix-clang-sys_info_avail-warning-v1-1-60d239eacd64@kernel.org
Link: https://lkml.kernel.org/r/20250703021004.42328-4-feng.tang@linux.alibaba.com
Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>
Suggested-by: Petr Mladek <pmladek@suse.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: "Paul E . McKenney" <paulmck@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sys_info.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sys_info.h b/include/linux/sys_info.h
index 53b7e27dbf2a..89d77dc4f2ed 100644
--- a/include/linux/sys_info.h
+++ b/include/linux/sys_info.h
@@ -2,6 +2,8 @@
 #ifndef _LINUX_SYS_INFO_H
 #define _LINUX_SYS_INFO_H
 
+#include <linux/sysctl.h>
+
 /*
  * SYS_INFO_PANIC_CONSOLE_REPLAY is for panic case only, as it needs special
  * handling which only fits panic case.
@@ -16,5 +18,11 @@
 #define SYS_INFO_BLOCKED_TASKS		0x00000080
 
 void sys_info(unsigned long si_mask);
+unsigned long sys_info_parse_param(char *str);
 
+#ifdef CONFIG_SYSCTL
+int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write,
+					  void *buffer, size_t *lenp,
+					  loff_t *ppos);
+#endif
 #endif	/* _LINUX_SYS_INFO_H */
-- 
cgit v1.2.3


From ae2da51def76020fa16f53cd3446c00cafe41008 Mon Sep 17 00:00:00 2001
From: Lance Yang <lance.yang@linux.dev>
Date: Fri, 27 Jun 2025 15:29:22 +0800
Subject: locking/rwsem: make owner helpers globally available

Patch series "extend hung task blocker tracking to rwsems".

Inspired by mutex blocker tracking[1], and having already extended it to
semaphores, let's now add support for reader-writer semaphores (rwsems).

The approach is simple: when a task enters TASK_UNINTERRUPTIBLE while
waiting for an rwsem, we just call hung_task_set_blocker().  The hung task
detector can then query the rwsem's owner to identify the lock holder.

Tracking works reliably for writers, as there can only be a single writer
holding the lock, and its task struct is stored in the owner field.

The main challenge lies with readers.  The owner field points to only one
of many concurrent readers, so we might lose track of the blocker if that
specific reader unlocks, even while others remain.  This is not a
significant issue, however.  In practice, long-lasting lock contention is
almost always caused by a writer.  Therefore, reliably tracking the writer
is the primary goal of this patch series ;)

With this change, the hung task detector can now show blocker task's info
like below:

[Fri Jun 27 15:21:34 2025] INFO: task cat:28631 blocked for more than 122 seconds.
[Fri Jun 27 15:21:34 2025]       Tainted: G S                  6.16.0-rc3 #8
[Fri Jun 27 15:21:34 2025] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[Fri Jun 27 15:21:34 2025] task:cat             state:D stack:0     pid:28631 tgid:28631 ppid:28501  task_flags:0x400000 flags:0x00004000
[Fri Jun 27 15:21:34 2025] Call Trace:
[Fri Jun 27 15:21:34 2025]  <TASK>
[Fri Jun 27 15:21:34 2025]  __schedule+0x7c7/0x1930
[Fri Jun 27 15:21:34 2025]  ? __pfx___schedule+0x10/0x10
[Fri Jun 27 15:21:34 2025]  ? policy_nodemask+0x215/0x340
[Fri Jun 27 15:21:34 2025]  ? _raw_spin_lock_irq+0x8a/0xe0
[Fri Jun 27 15:21:34 2025]  ? __pfx__raw_spin_lock_irq+0x10/0x10
[Fri Jun 27 15:21:34 2025]  schedule+0x6a/0x180
[Fri Jun 27 15:21:34 2025]  schedule_preempt_disabled+0x15/0x30
[Fri Jun 27 15:21:34 2025]  rwsem_down_read_slowpath+0x55e/0xe10
[Fri Jun 27 15:21:34 2025]  ? __pfx_rwsem_down_read_slowpath+0x10/0x10
[Fri Jun 27 15:21:34 2025]  ? __pfx___might_resched+0x10/0x10
[Fri Jun 27 15:21:34 2025]  down_read+0xc9/0x230
[Fri Jun 27 15:21:34 2025]  ? __pfx_down_read+0x10/0x10
[Fri Jun 27 15:21:34 2025]  ? __debugfs_file_get+0x14d/0x700
[Fri Jun 27 15:21:34 2025]  ? __pfx___debugfs_file_get+0x10/0x10
[Fri Jun 27 15:21:34 2025]  ? handle_pte_fault+0x52a/0x710
[Fri Jun 27 15:21:34 2025]  ? selinux_file_permission+0x3a9/0x590
[Fri Jun 27 15:21:34 2025]  read_dummy_rwsem_read+0x4a/0x90
[Fri Jun 27 15:21:34 2025]  full_proxy_read+0xff/0x1c0
[Fri Jun 27 15:21:34 2025]  ? rw_verify_area+0x6d/0x410
[Fri Jun 27 15:21:34 2025]  vfs_read+0x177/0xa50
[Fri Jun 27 15:21:34 2025]  ? __pfx_vfs_read+0x10/0x10
[Fri Jun 27 15:21:34 2025]  ? fdget_pos+0x1cf/0x4c0
[Fri Jun 27 15:21:34 2025]  ksys_read+0xfc/0x1d0
[Fri Jun 27 15:21:34 2025]  ? __pfx_ksys_read+0x10/0x10
[Fri Jun 27 15:21:34 2025]  do_syscall_64+0x66/0x2d0
[Fri Jun 27 15:21:34 2025]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[Fri Jun 27 15:21:34 2025] RIP: 0033:0x7f3f8faefb40
[Fri Jun 27 15:21:34 2025] RSP: 002b:00007ffdeda5ab98 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
[Fri Jun 27 15:21:34 2025] RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007f3f8faefb40
[Fri Jun 27 15:21:34 2025] RDX: 0000000000010000 RSI: 00000000010fa000 RDI: 0000000000000003
[Fri Jun 27 15:21:34 2025] RBP: 00000000010fa000 R08: 0000000000000000 R09: 0000000000010fff
[Fri Jun 27 15:21:34 2025] R10: 00007ffdeda59fe0 R11: 0000000000000246 R12: 00000000010fa000
[Fri Jun 27 15:21:34 2025] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000fff
[Fri Jun 27 15:21:34 2025]  </TASK>
[Fri Jun 27 15:21:34 2025] INFO: task cat:28631 <reader> blocked on an rw-semaphore likely owned by task cat:28630 <writer>
[Fri Jun 27 15:21:34 2025] task:cat             state:S stack:0     pid:28630 tgid:28630 ppid:28501  task_flags:0x400000 flags:0x00004000
[Fri Jun 27 15:21:34 2025] Call Trace:
[Fri Jun 27 15:21:34 2025]  <TASK>
[Fri Jun 27 15:21:34 2025]  __schedule+0x7c7/0x1930
[Fri Jun 27 15:21:34 2025]  ? __pfx___schedule+0x10/0x10
[Fri Jun 27 15:21:34 2025]  ? __mod_timer+0x304/0xa80
[Fri Jun 27 15:21:34 2025]  schedule+0x6a/0x180
[Fri Jun 27 15:21:34 2025]  schedule_timeout+0xfb/0x230
[Fri Jun 27 15:21:34 2025]  ? __pfx_schedule_timeout+0x10/0x10
[Fri Jun 27 15:21:34 2025]  ? __pfx_process_timeout+0x10/0x10
[Fri Jun 27 15:21:34 2025]  ? down_write+0xc4/0x140
[Fri Jun 27 15:21:34 2025]  msleep_interruptible+0xbe/0x150
[Fri Jun 27 15:21:34 2025]  read_dummy_rwsem_write+0x54/0x90
[Fri Jun 27 15:21:34 2025]  full_proxy_read+0xff/0x1c0
[Fri Jun 27 15:21:34 2025]  ? rw_verify_area+0x6d/0x410
[Fri Jun 27 15:21:34 2025]  vfs_read+0x177/0xa50
[Fri Jun 27 15:21:34 2025]  ? __pfx_vfs_read+0x10/0x10
[Fri Jun 27 15:21:34 2025]  ? fdget_pos+0x1cf/0x4c0
[Fri Jun 27 15:21:34 2025]  ksys_read+0xfc/0x1d0
[Fri Jun 27 15:21:34 2025]  ? __pfx_ksys_read+0x10/0x10
[Fri Jun 27 15:21:34 2025]  do_syscall_64+0x66/0x2d0
[Fri Jun 27 15:21:34 2025]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[Fri Jun 27 15:21:34 2025] RIP: 0033:0x7f8f288efb40
[Fri Jun 27 15:21:34 2025] RSP: 002b:00007ffffb631038 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
[Fri Jun 27 15:21:34 2025] RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007f8f288efb40
[Fri Jun 27 15:21:34 2025] RDX: 0000000000010000 RSI: 000000002a4b5000 RDI: 0000000000000003
[Fri Jun 27 15:21:34 2025] RBP: 000000002a4b5000 R08: 0000000000000000 R09: 0000000000010fff
[Fri Jun 27 15:21:34 2025] R10: 00007ffffb630460 R11: 0000000000000246 R12: 000000002a4b5000
[Fri Jun 27 15:21:34 2025] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000fff
[Fri Jun 27 15:21:34 2025]  </TASK>


This patch (of 3):

In preparation for extending blocker tracking to support rwsems, make the
rwsem_owner() and is_rwsem_reader_owned() helpers globally available for
determining if the blocker is a writer or one of the readers.

Additionally, a stale owner pointer in a reader-owned rwsem can lead to
false positives in blocker tracking when CONFIG_DETECT_HUNG_TASK_BLOCKER
is enabled.  To mitigate this, clear the owner field on the reader unlock
path, similar to what CONFIG_DEBUG_RWSEMS does.  A NULL owner is better
than a stale one for diagnostics.

Link: https://lkml.kernel.org/r/20250627072924.36567-1-lance.yang@linux.dev
Link: https://lkml.kernel.org/r/20250627072924.36567-2-lance.yang@linux.dev
Link: https://lore.kernel.org/all/174046694331.2194069.15472952050240807469.stgit@mhiramat.tok.corp.google.com/ [1]
Signed-off-by: Lance Yang <lance.yang@linux.dev>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: John Stultz <jstultz@google.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Mingzhe Yang <mingzhe.yang@ly.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tomasz Figa <tfiga@chromium.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yongliang Gao <leonylgao@tencent.com>
Cc: Zi Li <zi.li@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/rwsem.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index c8b543d428b0..544853bed5b9 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -132,6 +132,18 @@ static inline int rwsem_is_contended(struct rw_semaphore *sem)
 	return !list_empty(&sem->wait_list);
 }
 
+#if defined(CONFIG_DEBUG_RWSEMS) || defined(CONFIG_DETECT_HUNG_TASK_BLOCKER)
+/*
+ * Return just the real task structure pointer of the owner
+ */
+extern struct task_struct *rwsem_owner(struct rw_semaphore *sem);
+
+/*
+ * Return true if the rwsem is owned by a reader.
+ */
+extern bool is_rwsem_reader_owned(struct rw_semaphore *sem);
+#endif
+
 #else /* !CONFIG_PREEMPT_RT */
 
 #include <linux/rwbase_rt.h>
-- 
cgit v1.2.3


From 77da18de55ac6417e48905bec8b3c66f023b15a9 Mon Sep 17 00:00:00 2001
From: Lance Yang <lance.yang@linux.dev>
Date: Fri, 27 Jun 2025 15:29:23 +0800
Subject: hung_task: extend hung task blocker tracking to rwsems

Inspired by mutex blocker tracking[1], and having already extended it to
semaphores, let's now add support for reader-writer semaphores (rwsems).

The approach is simple: when a task enters TASK_UNINTERRUPTIBLE while
waiting for an rwsem, we just call hung_task_set_blocker().  The hung task
detector can then query the rwsem's owner to identify the lock holder.

Tracking works reliably for writers, as there can only be a single writer
holding the lock, and its task struct is stored in the owner field.

The main challenge lies with readers.  The owner field points to only one
of many concurrent readers, so we might lose track of the blocker if that
specific reader unlocks, even while others remain.  This is not a
significant issue, however.  In practice, long-lasting lock contention is
almost always caused by a writer.  Therefore, reliably tracking the writer
is the primary goal of this patch series ;)

With this change, the hung task detector can now show blocker task's info
like below:

[Fri Jun 27 15:21:34 2025] INFO: task cat:28631 blocked for more than 122 seconds.
[Fri Jun 27 15:21:34 2025]       Tainted: G S                  6.16.0-rc3 #8
[Fri Jun 27 15:21:34 2025] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[Fri Jun 27 15:21:34 2025] task:cat             state:D stack:0     pid:28631 tgid:28631 ppid:28501  task_flags:0x400000 flags:0x00004000
[Fri Jun 27 15:21:34 2025] Call Trace:
[Fri Jun 27 15:21:34 2025]  <TASK>
[Fri Jun 27 15:21:34 2025]  __schedule+0x7c7/0x1930
[Fri Jun 27 15:21:34 2025]  ? __pfx___schedule+0x10/0x10
[Fri Jun 27 15:21:34 2025]  ? policy_nodemask+0x215/0x340
[Fri Jun 27 15:21:34 2025]  ? _raw_spin_lock_irq+0x8a/0xe0
[Fri Jun 27 15:21:34 2025]  ? __pfx__raw_spin_lock_irq+0x10/0x10
[Fri Jun 27 15:21:34 2025]  schedule+0x6a/0x180
[Fri Jun 27 15:21:34 2025]  schedule_preempt_disabled+0x15/0x30
[Fri Jun 27 15:21:34 2025]  rwsem_down_read_slowpath+0x55e/0xe10
[Fri Jun 27 15:21:34 2025]  ? __pfx_rwsem_down_read_slowpath+0x10/0x10
[Fri Jun 27 15:21:34 2025]  ? __pfx___might_resched+0x10/0x10
[Fri Jun 27 15:21:34 2025]  down_read+0xc9/0x230
[Fri Jun 27 15:21:34 2025]  ? __pfx_down_read+0x10/0x10
[Fri Jun 27 15:21:34 2025]  ? __debugfs_file_get+0x14d/0x700
[Fri Jun 27 15:21:34 2025]  ? __pfx___debugfs_file_get+0x10/0x10
[Fri Jun 27 15:21:34 2025]  ? handle_pte_fault+0x52a/0x710
[Fri Jun 27 15:21:34 2025]  ? selinux_file_permission+0x3a9/0x590
[Fri Jun 27 15:21:34 2025]  read_dummy_rwsem_read+0x4a/0x90
[Fri Jun 27 15:21:34 2025]  full_proxy_read+0xff/0x1c0
[Fri Jun 27 15:21:34 2025]  ? rw_verify_area+0x6d/0x410
[Fri Jun 27 15:21:34 2025]  vfs_read+0x177/0xa50
[Fri Jun 27 15:21:34 2025]  ? __pfx_vfs_read+0x10/0x10
[Fri Jun 27 15:21:34 2025]  ? fdget_pos+0x1cf/0x4c0
[Fri Jun 27 15:21:34 2025]  ksys_read+0xfc/0x1d0
[Fri Jun 27 15:21:34 2025]  ? __pfx_ksys_read+0x10/0x10
[Fri Jun 27 15:21:34 2025]  do_syscall_64+0x66/0x2d0
[Fri Jun 27 15:21:34 2025]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[Fri Jun 27 15:21:34 2025] RIP: 0033:0x7f3f8faefb40
[Fri Jun 27 15:21:34 2025] RSP: 002b:00007ffdeda5ab98 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
[Fri Jun 27 15:21:34 2025] RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007f3f8faefb40
[Fri Jun 27 15:21:34 2025] RDX: 0000000000010000 RSI: 00000000010fa000 RDI: 0000000000000003
[Fri Jun 27 15:21:34 2025] RBP: 00000000010fa000 R08: 0000000000000000 R09: 0000000000010fff
[Fri Jun 27 15:21:34 2025] R10: 00007ffdeda59fe0 R11: 0000000000000246 R12: 00000000010fa000
[Fri Jun 27 15:21:34 2025] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000fff
[Fri Jun 27 15:21:34 2025]  </TASK>
[Fri Jun 27 15:21:34 2025] INFO: task cat:28631 <reader> blocked on an rw-semaphore likely owned by task cat:28630 <writer>
[Fri Jun 27 15:21:34 2025] task:cat             state:S stack:0     pid:28630 tgid:28630 ppid:28501  task_flags:0x400000 flags:0x00004000
[Fri Jun 27 15:21:34 2025] Call Trace:
[Fri Jun 27 15:21:34 2025]  <TASK>
[Fri Jun 27 15:21:34 2025]  __schedule+0x7c7/0x1930
[Fri Jun 27 15:21:34 2025]  ? __pfx___schedule+0x10/0x10
[Fri Jun 27 15:21:34 2025]  ? __mod_timer+0x304/0xa80
[Fri Jun 27 15:21:34 2025]  schedule+0x6a/0x180
[Fri Jun 27 15:21:34 2025]  schedule_timeout+0xfb/0x230
[Fri Jun 27 15:21:34 2025]  ? __pfx_schedule_timeout+0x10/0x10
[Fri Jun 27 15:21:34 2025]  ? __pfx_process_timeout+0x10/0x10
[Fri Jun 27 15:21:34 2025]  ? down_write+0xc4/0x140
[Fri Jun 27 15:21:34 2025]  msleep_interruptible+0xbe/0x150
[Fri Jun 27 15:21:34 2025]  read_dummy_rwsem_write+0x54/0x90
[Fri Jun 27 15:21:34 2025]  full_proxy_read+0xff/0x1c0
[Fri Jun 27 15:21:34 2025]  ? rw_verify_area+0x6d/0x410
[Fri Jun 27 15:21:34 2025]  vfs_read+0x177/0xa50
[Fri Jun 27 15:21:34 2025]  ? __pfx_vfs_read+0x10/0x10
[Fri Jun 27 15:21:34 2025]  ? fdget_pos+0x1cf/0x4c0
[Fri Jun 27 15:21:34 2025]  ksys_read+0xfc/0x1d0
[Fri Jun 27 15:21:34 2025]  ? __pfx_ksys_read+0x10/0x10
[Fri Jun 27 15:21:34 2025]  do_syscall_64+0x66/0x2d0
[Fri Jun 27 15:21:34 2025]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[Fri Jun 27 15:21:34 2025] RIP: 0033:0x7f8f288efb40
[Fri Jun 27 15:21:34 2025] RSP: 002b:00007ffffb631038 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
[Fri Jun 27 15:21:34 2025] RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007f8f288efb40
[Fri Jun 27 15:21:34 2025] RDX: 0000000000010000 RSI: 000000002a4b5000 RDI: 0000000000000003
[Fri Jun 27 15:21:34 2025] RBP: 000000002a4b5000 R08: 0000000000000000 R09: 0000000000010fff
[Fri Jun 27 15:21:34 2025] R10: 00007ffffb630460 R11: 0000000000000246 R12: 000000002a4b5000
[Fri Jun 27 15:21:34 2025] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000fff
[Fri Jun 27 15:21:34 2025]  </TASK>

[1] https://lore.kernel.org/all/174046694331.2194069.15472952050240807469.stgit@mhiramat.tok.corp.google.com/

Link: https://lkml.kernel.org/r/20250627072924.36567-3-lance.yang@linux.dev
Signed-off-by: Lance Yang <lance.yang@linux.dev>
Suggested-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: John Stultz <jstultz@google.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Mingzhe Yang <mingzhe.yang@ly.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tomasz Figa <tfiga@chromium.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yongliang Gao <leonylgao@tencent.com>
Cc: Zi Li <zi.li@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hung_task.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hung_task.h b/include/linux/hung_task.h
index 1bc2b3244613..34e615c76ca5 100644
--- a/include/linux/hung_task.h
+++ b/include/linux/hung_task.h
@@ -21,17 +21,17 @@
  * type.
  *
  * Type encoding:
- * 00 - Blocked on mutex        (BLOCKER_TYPE_MUTEX)
- * 01 - Blocked on semaphore    (BLOCKER_TYPE_SEM)
- * 10 - Blocked on rt-mutex     (BLOCKER_TYPE_RTMUTEX)
- * 11 - Blocked on rw-semaphore (BLOCKER_TYPE_RWSEM)
+ * 00 - Blocked on mutex			(BLOCKER_TYPE_MUTEX)
+ * 01 - Blocked on semaphore			(BLOCKER_TYPE_SEM)
+ * 10 - Blocked on rw-semaphore as READER	(BLOCKER_TYPE_RWSEM_READER)
+ * 11 - Blocked on rw-semaphore as WRITER	(BLOCKER_TYPE_RWSEM_WRITER)
  */
-#define BLOCKER_TYPE_MUTEX      0x00UL
-#define BLOCKER_TYPE_SEM        0x01UL
-#define BLOCKER_TYPE_RTMUTEX    0x02UL
-#define BLOCKER_TYPE_RWSEM      0x03UL
+#define BLOCKER_TYPE_MUTEX		0x00UL
+#define BLOCKER_TYPE_SEM		0x01UL
+#define BLOCKER_TYPE_RWSEM_READER	0x02UL
+#define BLOCKER_TYPE_RWSEM_WRITER	0x03UL
 
-#define BLOCKER_TYPE_MASK       0x03UL
+#define BLOCKER_TYPE_MASK		0x03UL
 
 #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
 static inline void hung_task_set_blocker(void *lock, unsigned long type)
-- 
cgit v1.2.3


From b3d5fd6f82dde8c906dc2a587003a44252ae5eae Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Fri, 6 Jun 2025 21:47:56 +0800
Subject: lib/math/gcd: use static key to select implementation at runtime

Patch series "Optimize GCD performance on RISC-V by selecting
implementation at runtime", v3.

The current implementation of gcd() selects between the binary GCD and the
odd-even GCD algorithm at compile time, depending on whether
CONFIG_CPU_NO_EFFICIENT_FFS is set.  On platforms like RISC-V, however,
this compile-time decision can be misleading: even when the compiler emits
ctz instructions based on the assumption that they are efficient (as is
the case when CONFIG_RISCV_ISA_ZBB is enabled), the actual hardware may
lack support for the Zbb extension.  In such cases, ffs() falls back to a
software implementation at runtime, making the binary GCD algorithm
significantly slower than the odd-even variant.

To address this, we introduce a static key to allow runtime selection
between the binary and odd-even GCD implementations.  On RISC-V, the
kernel now checks for Zbb support during boot.  If Zbb is unavailable, the
static key is disabled so that gcd() consistently uses the more efficient
odd-even algorithm in that scenario.  Additionally, to further reduce code
size, we select CONFIG_CPU_NO_EFFICIENT_FFS automatically when
CONFIG_RISCV_ISA_ZBB is not enabled, avoiding compilation of the unused
binary GCD implementation entirely on systems where it would never be
executed.

This series ensures that the most efficient GCD algorithm is used in
practice and avoids compiling unnecessary code based on hardware
capabilities and kernel configuration.


This patch (of 3):

On platforms like RISC-V, the compiler may generate hardware FFS
instructions even if the underlying CPU does not actually support them.
Currently, the GCD implementation is chosen at compile time based on
CONFIG_CPU_NO_EFFICIENT_FFS, which can result in suboptimal behavior on
such systems.

Introduce a static key, efficient_ffs_key, to enable runtime selection
between the binary GCD (using ffs) and the odd-even GCD implementation.
This allows the kernel to default to the faster binary GCD when FFS is
efficient, while retaining the ability to fall back when needed.

Link: https://lkml.kernel.org/r/20250606134758.1308400-1-visitorckw@gmail.com
Link: https://lkml.kernel.org/r/20250606134758.1308400-2-visitorckw@gmail.com
Co-developed-by: Yu-Chun Lin <eleanor15x@gmail.com>
Signed-off-by: Yu-Chun Lin <eleanor15x@gmail.com>
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/gcd.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gcd.h b/include/linux/gcd.h
index cb572677fd7f..616e81a7f7e3 100644
--- a/include/linux/gcd.h
+++ b/include/linux/gcd.h
@@ -3,6 +3,9 @@
 #define _GCD_H
 
 #include <linux/compiler.h>
+#include <linux/jump_label.h>
+
+DECLARE_STATIC_KEY_TRUE(efficient_ffs_key);
 
 unsigned long gcd(unsigned long a, unsigned long b) __attribute_const__;
 
-- 
cgit v1.2.3


From 2fb4af5ea3c735a205d97de10f044f809b20af51 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 18 Jul 2025 10:14:49 +0200
Subject: NFS: track active delegations per-server

The active delegation watermark was added to avoid overloading servers.
Track the active delegation per-server instead of globally so that clients
talking to multiple servers aren't limited by the global limit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/20250718081509.2607553-5-hch@lst.de
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/nfs_fs_sb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index d2d36711a119..a9b44f12623f 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -254,6 +254,7 @@ struct nfs_server {
 	struct list_head	state_owners_lru;
 	struct list_head	layouts;
 	struct list_head	delegations;
+	atomic_long_t		nr_active_delegations;
 	struct list_head	ss_copies;
 	struct list_head	ss_src_copies;
 
-- 
cgit v1.2.3


From f5b3108e6a14418b120a3c38ca589b8d6cf87627 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 18 Jul 2025 10:14:50 +0200
Subject: NFS: use a hash table for delegation lookup

nfs_delegation_find_inode currently has to walk the entire list of
delegations per inode, which can become pretty large, and can become even
larger when increasing the delegation watermark.

Add a hash table to speed up the delegation lookup, sized as a fraction
of the delegation watermark.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20250718081509.2607553-6-hch@lst.de
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/nfs_fs_sb.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index a9b44f12623f..d30c0245031c 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -255,6 +255,8 @@ struct nfs_server {
 	struct list_head	layouts;
 	struct list_head	delegations;
 	atomic_long_t		nr_active_delegations;
+	unsigned int		delegation_hash_mask;
+	struct hlist_head	*delegation_hash_table;
 	struct list_head	ss_copies;
 	struct list_head	ss_src_copies;
 
-- 
cgit v1.2.3


From ad38574a8e8223361e265973fbd87013ea058c5d Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 8 Jul 2025 18:03:33 +0100
Subject: f2fs: Pass a folio to ADDRS_PER_PAGE()

All callers now have a folio so pass it in.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 5206d63b3386..25857877eaec 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -268,7 +268,7 @@ struct node_footer {
 /* Node IDs in an Indirect Block */
 #define NIDS_PER_BLOCK		((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32))
 
-#define ADDRS_PER_PAGE(page, inode)	(addrs_per_page(inode, IS_INODE(page)))
+#define ADDRS_PER_PAGE(folio, inode)	(addrs_per_page(inode, IS_INODE(&folio->page)))
 
 #define	NODE_DIR1_BLOCK		(DEF_ADDRS_PER_INODE + 1)
 #define	NODE_DIR2_BLOCK		(DEF_ADDRS_PER_INODE + 2)
-- 
cgit v1.2.3


From a5f3be6e652a7beaaf6c482bc013b64129a5d239 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 8 Jul 2025 18:03:34 +0100
Subject: f2fs: Pass a folio to IS_INODE()

All callers now have a folio so pass it in.  Also make it const to help
the compiler.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 25857877eaec..2f8b8bfc0e73 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -268,7 +268,7 @@ struct node_footer {
 /* Node IDs in an Indirect Block */
 #define NIDS_PER_BLOCK		((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32))
 
-#define ADDRS_PER_PAGE(folio, inode)	(addrs_per_page(inode, IS_INODE(&folio->page)))
+#define ADDRS_PER_PAGE(folio, inode)	(addrs_per_page(inode, IS_INODE(folio)))
 
 #define	NODE_DIR1_BLOCK		(DEF_ADDRS_PER_INODE + 1)
 #define	NODE_DIR2_BLOCK		(DEF_ADDRS_PER_INODE + 2)
-- 
cgit v1.2.3


From a824388d911927b2a82bf7dcfd7cef6ee45c8b43 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 8 Jul 2025 18:03:36 +0100
Subject: f2fs: Use a folio in f2fs_is_cp_guaranteed()

Convert the passed page to a folio and use it throughout.  Removes
a use of fscrypt_is_bounce_page(), which we're trying to remove.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/fscrypt.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 56fad33043d5..8d9127a0fdb3 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -332,12 +332,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page)
 	return (struct page *)page_private(bounce_page);
 }
 
-static inline bool fscrypt_is_bounce_folio(struct folio *folio)
+static inline bool fscrypt_is_bounce_folio(const struct folio *folio)
 {
 	return folio->mapping == NULL;
 }
 
-static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio)
+static inline
+struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio)
 {
 	return bounce_folio->private;
 }
@@ -518,12 +519,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page)
 	return ERR_PTR(-EINVAL);
 }
 
-static inline bool fscrypt_is_bounce_folio(struct folio *folio)
+static inline bool fscrypt_is_bounce_folio(const struct folio *folio)
 {
 	return false;
 }
 
-static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio)
+static inline
+struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio)
 {
 	WARN_ON_ONCE(1);
 	return ERR_PTR(-EINVAL);
-- 
cgit v1.2.3


From ea4d331050b4cd43e6a900937db88b01ef75e1f2 Mon Sep 17 00:00:00 2001
From: Javier Carrasco <javier.carrasco@wolfvision.net>
Date: Wed, 16 Oct 2024 06:02:41 +0200
Subject: Input: touch-overlay - add touchscreen overlay handling

Some touch devices provide mechanical overlays with different objects
like buttons or clipped touchscreen surfaces.

In order to support these objects, add a series of helper functions
to the input subsystem to transform them into overlay objects via
device tree nodes.

These overlay objects consume the raw touch events and report the
expected input events depending on the object properties.

Note that the current implementation allows for multiple definitions
of touchscreen areas (regions that report touch events), but only the
first one will be used for the touchscreen device that the consumers
typically provide.
Should the need for multiple touchscreen areas arise, additional
touchscreen devices would be required at the consumer side.
There is no limitation in the number of touch areas defined as buttons.

Reviewed-by: Jeff LaBundy <jeff@labundy.com>
Signed-off-by: Javier Carrasco <javier.carrasco@wolfvision.net>
Link: https://lore.kernel.org/r/20241016-feature-ts_virtobj_patch-v11-2-b292a1bbb0a1@wolfvision.net
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/input/touch-overlay.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 include/linux/input/touch-overlay.h

(limited to 'include/linux')

diff --git a/include/linux/input/touch-overlay.h b/include/linux/input/touch-overlay.h
new file mode 100644
index 000000000000..0253e554d3cd
--- /dev/null
+++ b/include/linux/input/touch-overlay.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2023 Javier Carrasco <javier.carrasco@wolfvision.net>
+ */
+
+#ifndef _TOUCH_OVERLAY
+#define _TOUCH_OVERLAY
+
+#include <linux/types.h>
+
+struct input_dev;
+
+int touch_overlay_map(struct list_head *list, struct input_dev *input);
+
+void touch_overlay_get_touchscreen_abs(struct list_head *list, u16 *x, u16 *y);
+
+bool touch_overlay_mapped_touchscreen(struct list_head *list);
+
+bool touch_overlay_process_contact(struct list_head *list,
+				   struct input_dev *input,
+				   struct input_mt_pos *pos, int slot);
+
+void touch_overlay_sync_frame(struct list_head *list, struct input_dev *input);
+
+#endif
-- 
cgit v1.2.3


From 5523a466e905b6287b94654ddb364536f2f948cf Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 25 Jul 2025 11:06:03 +0200
Subject: i3c: fix module_i3c_i2c_driver() with I3C=n

When CONFIG_I3C is disabled and the i3c_i2c_driver_register() happens
to not be inlined, any driver calling it still references the i3c_driver
instance, which then causes a link failure:

x86_64-linux-ld: drivers/hwmon/lm75.o: in function `lm75_i3c_reg_read':
lm75.c:(.text+0xc61): undefined reference to `i3cdev_to_dev'
x86_64-linux-ld: lm75.c:(.text+0xd25): undefined reference to `i3c_device_do_priv_xfers'
x86_64-linux-ld: lm75.c:(.text+0xdd8): undefined reference to `i3c_device_do_priv_xfers'

This issue was part of the original i3c code, but only now caused problems
when i3c support got added to lm75.

Change the 'inline' annotations in the header to '__always_inline' to
ensure that the dead-code-elimination pass in the compiler can optimize
it out as intended.

Fixes: 6071d10413ff ("hwmon: (lm75) add I3C support for P3T1755")
Fixes: 3a379bbcea0a ("i3c: Add core I3C infrastructure")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://lore.kernel.org/r/20250725090609.2456262-1-arnd@kernel.org
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/i3c/device.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h
index b674f64d0822..7f136de4b73e 100644
--- a/include/linux/i3c/device.h
+++ b/include/linux/i3c/device.h
@@ -245,7 +245,7 @@ void i3c_driver_unregister(struct i3c_driver *drv);
  *
  * Return: 0 if both registrations succeeds, a negative error code otherwise.
  */
-static inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv,
+static __always_inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv,
 					  struct i2c_driver *i2cdrv)
 {
 	int ret;
@@ -270,7 +270,7 @@ static inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv,
  * Note that when CONFIG_I3C is not enabled, this function only unregisters the
  * @i2cdrv.
  */
-static inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv,
+static __always_inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv,
 					     struct i2c_driver *i2cdrv)
 {
 	if (IS_ENABLED(CONFIG_I3C))
-- 
cgit v1.2.3


From 9c0609d685b27a0bb392390680207baa820ed118 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Thu, 24 Jul 2025 11:41:40 +0200
Subject: i3c: Standardize defines for specification parameters

Align existing defines to follow the consistent pattern:
I3C_BUS_<PARAM>_<MAX|MIN|TYP>_<UNIT>. Prepare the codebase for adding
new parameters and help avoid duplication.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Tested-by: Tommaso Merciai <tommaso.merciai.xr@bp.renesas.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://lore.kernel.org/r/20250724094146.6443-2-wsa+renesas@sang-engineering.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/i3c/master.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h
index c67922ece617..7dfcbe530515 100644
--- a/include/linux/i3c/master.h
+++ b/include/linux/i3c/master.h
@@ -249,10 +249,11 @@ struct i3c_device {
  */
 #define I3C_BUS_MAX_DEVS		11
 
-#define I3C_BUS_MAX_I3C_SCL_RATE	12900000
-#define I3C_BUS_TYP_I3C_SCL_RATE	12500000
-#define I3C_BUS_I2C_FM_PLUS_SCL_RATE	1000000
-#define I3C_BUS_I2C_FM_SCL_RATE		400000
+/* Taken from the I3C Spec V1.1.1, chapter 6.2. "Timing specification" */
+#define I3C_BUS_I2C_FM_PLUS_SCL_MAX_RATE	1000000
+#define I3C_BUS_I2C_FM_SCL_MAX_RATE		400000
+#define I3C_BUS_I3C_SCL_MAX_RATE	12900000
+#define I3C_BUS_I3C_SCL_TYP_RATE	12500000
 #define I3C_BUS_TLOW_OD_MIN_NS		200
 
 /**
-- 
cgit v1.2.3


From 8acf1f3bae1ea48949458b67d68a72a95c3244a4 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Thu, 24 Jul 2025 11:41:41 +0200
Subject: i3c: Add more parameters for controllers to the header

Add standard timing value definition from specification.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Tested-by: Tommaso Merciai <tommaso.merciai.xr@bp.renesas.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://lore.kernel.org/r/20250724094146.6443-3-wsa+renesas@sang-engineering.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/i3c/master.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h
index 7dfcbe530515..043f5c7ff398 100644
--- a/include/linux/i3c/master.h
+++ b/include/linux/i3c/master.h
@@ -254,6 +254,10 @@ struct i3c_device {
 #define I3C_BUS_I2C_FM_SCL_MAX_RATE		400000
 #define I3C_BUS_I3C_SCL_MAX_RATE	12900000
 #define I3C_BUS_I3C_SCL_TYP_RATE	12500000
+#define I3C_BUS_TAVAL_MIN_NS		1000
+#define I3C_BUS_TBUF_MIXED_FM_MIN_NS	1300
+#define I3C_BUS_THIGH_MIXED_MAX_NS	41
+#define I3C_BUS_TIDLE_MIN_NS		200000
 #define I3C_BUS_TLOW_OD_MIN_NS		200
 
 /**
-- 
cgit v1.2.3


From 0060beec0bfa647c4b510df188b1c4673a197839 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Mon, 28 Jul 2025 13:04:29 +0900
Subject: ata: libata-sata: Add link_power_management_supported sysfs attribute

A port link power management (LPM) policy can be controlled using the
link_power_management_policy sysfs host attribute. However, this
attribute exists also for hosts that do not support LPM and in such
case, attempting to change the LPM policy for the host (port) will fail
with -EOPNOTSUPP.

Introduce the new sysfs link_power_management_supported host attribute
to indicate to the user if a the port and the devices connected to the
port for the host support LPM, which implies that the
link_power_management_policy attribute can be used.

Since checking that a port and its devices support LPM is common between
the new ata_scsi_lpm_supported_show() function and the existing
ata_scsi_lpm_store() function, the new helper ata_scsi_lpm_supported()
is introduced.

Fixes: 413e800cadbf ("ata: libata-sata: Disallow changing LPM state if not supported")
Reported-by: Borah, Chaitanya Kumar <chaitanya.kumar.borah@intel.com>
Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202507251014.a5becc3b-lkp@intel.com
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/linux/libata.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 912ace523880..0620dd67369f 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -545,6 +545,7 @@ typedef void (*ata_postreset_fn_t)(struct ata_link *link, unsigned int *classes)
 
 extern struct device_attribute dev_attr_unload_heads;
 #ifdef CONFIG_SATA_HOST
+extern struct device_attribute dev_attr_link_power_management_supported;
 extern struct device_attribute dev_attr_link_power_management_policy;
 extern struct device_attribute dev_attr_ncq_prio_supported;
 extern struct device_attribute dev_attr_ncq_prio_enable;
-- 
cgit v1.2.3


From 199d9ffb31650f948dd342ade1c1b920e157630f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <thomas.weissschuh@linutronix.de>
Date: Fri, 11 Jul 2025 15:31:36 +0200
Subject: module: move 'struct module_use' to internal.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The struct was moved to the public header file in commit c8e21ced08b3
("module: fix kdb's illicit use of struct module_use.").
Back then the structure was used outside of the module core.
Nowadays this is not true anymore, so the structure can be made internal.

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Reviewed-by: Daniel Gomez <da.gomez@samsung.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Link: https://lore.kernel.org/r/20250711-kunit-ifdef-modules-v2-1-39443decb1f8@linutronix.de
Signed-off-by: Daniel Gomez <da.gomez@samsung.com>
---
 include/linux/module.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index a7cac01d95e7..97c38e1cd377 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -313,13 +313,6 @@ void *__symbol_get_gpl(const char *symbol);
 		__used __section(".no_trim_symbol") = __stringify(x); \
 	(typeof(&x))(__symbol_get(__stringify(x))); })
 
-/* modules using other modules: kdb wants to see this. */
-struct module_use {
-	struct list_head source_list;
-	struct list_head target_list;
-	struct module *source, *target;
-};
-
 enum module_state {
 	MODULE_STATE_LIVE,	/* Normal state. */
 	MODULE_STATE_COMING,	/* Full formed, running module_init. */
-- 
cgit v1.2.3


From 818783c804bc051f7faf0ac226b5597f8259c6f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <thomas.weissschuh@linutronix.de>
Date: Fri, 11 Jul 2025 15:31:37 +0200
Subject: module: make structure definitions always visible
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To write code that works with both CONFIG_MODULES=y and CONFIG_MODULES=n
it is convenient to use "if (IS_ENABLED(CONFIG_MODULES))" over raw #ifdef.
The code will still fully typechecked but the unreachable parts are
discarded by the compiler. This prevents accidental breakage when a certain
kconfig combination was not specifically tested by the developer.
This pattern is already supported to some extend by module.h defining
empty stub functions if CONFIG_MODULES=n.
However some users of module.h work on the structured defined by module.h.

Therefore these structure definitions need to be visible, too.

Many structure members are still gated by specific configuration settings.
The assumption for those is that the code using them will be gated behind
the same configuration setting anyways.

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Reviewed-by: Daniel Gomez <da.gomez@samsung.com>
Link: https://lore.kernel.org/r/20250711-kunit-ifdef-modules-v2-2-39443decb1f8@linutronix.de
Signed-off-by: Daniel Gomez <da.gomez@samsung.com>
---
 include/linux/module.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 97c38e1cd377..5fe812de2d84 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -303,16 +303,6 @@ static typeof(name) __mod_device_table__##type##__##name		\
 
 struct notifier_block;
 
-#ifdef CONFIG_MODULES
-
-/* Get/put a kernel symbol (calls must be symmetric) */
-void *__symbol_get(const char *symbol);
-void *__symbol_get_gpl(const char *symbol);
-#define symbol_get(x)	({ \
-	static const char __notrim[] \
-		__used __section(".no_trim_symbol") = __stringify(x); \
-	(typeof(&x))(__symbol_get(__stringify(x))); })
-
 enum module_state {
 	MODULE_STATE_LIVE,	/* Normal state. */
 	MODULE_STATE_COMING,	/* Full formed, running module_init. */
@@ -597,6 +587,16 @@ struct module {
 #define MODULE_ARCH_INIT {}
 #endif
 
+#ifdef CONFIG_MODULES
+
+/* Get/put a kernel symbol (calls must be symmetric) */
+void *__symbol_get(const char *symbol);
+void *__symbol_get_gpl(const char *symbol);
+#define symbol_get(x)	({ \
+	static const char __notrim[] \
+		__used __section(".no_trim_symbol") = __stringify(x); \
+	(typeof(&x))(__symbol_get(__stringify(x))); })
+
 #ifndef HAVE_ARCH_KALLSYMS_SYMBOL_VALUE
 static inline unsigned long kallsyms_symbol_value(const Elf_Sym *sym)
 {
-- 
cgit v1.2.3


From bdc877ba6b7ff1b6d2ebeff11e63da4a50a54854 Mon Sep 17 00:00:00 2001
From: Petr Pavlu <petr.pavlu@suse.com>
Date: Mon, 30 Jun 2025 16:32:34 +0200
Subject: module: Restore the moduleparam prefix length check

The moduleparam code allows modules to provide their own definition of
MODULE_PARAM_PREFIX, instead of using the default KBUILD_MODNAME ".".

Commit 730b69d22525 ("module: check kernel param length at compile time,
not runtime") added a check to ensure the prefix doesn't exceed
MODULE_NAME_LEN, as this is what param_sysfs_builtin() expects.

Later, commit 58f86cc89c33 ("VERIFY_OCTAL_PERMISSIONS: stricter checking
for sysfs perms.") removed this check, but there is no indication this was
intentional.

Since the check is still useful for param_sysfs_builtin() to function
properly, reintroduce it in __module_param_call(), but in a modernized form
using static_assert().

While here, clean up the __module_param_call() comments. In particular,
remove the comment "Default value instead of permissions?", which comes
from commit 9774a1f54f17 ("[PATCH] Compile-time check re world-writeable
module params"). This comment was related to the test variable
__param_perm_check_##name, which was removed in the previously mentioned
commit 58f86cc89c33.

Fixes: 58f86cc89c33 ("VERIFY_OCTAL_PERMISSIONS: stricter checking for sysfs perms.")
Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
Reviewed-by: Daniel Gomez <da.gomez@samsung.com>
Link: https://lore.kernel.org/r/20250630143535.267745-4-petr.pavlu@suse.com
Signed-off-by: Daniel Gomez <da.gomez@samsung.com>
---
 include/linux/moduleparam.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index bfb85fd13e1f..110e9d09de24 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -282,10 +282,9 @@ struct kparam_array
 #define __moduleparam_const const
 #endif
 
-/* This is the fundamental function for registering boot/module
-   parameters. */
+/* This is the fundamental function for registering boot/module parameters. */
 #define __module_param_call(prefix, name, ops, arg, perm, level, flags)	\
-	/* Default value instead of permissions? */			\
+	static_assert(sizeof(""prefix) - 1 <= MAX_PARAM_PREFIX_LEN);	\
 	static const char __param_str_##name[] = prefix #name;		\
 	static struct kernel_param __moduleparam_const __param_##name	\
 	__used __section("__param")					\
-- 
cgit v1.2.3


From 40a826bd6c82ae45cfd3a19cd2a60a10f56b74c0 Mon Sep 17 00:00:00 2001
From: Petr Pavlu <petr.pavlu@suse.com>
Date: Mon, 30 Jun 2025 16:32:36 +0200
Subject: module: Rename MAX_PARAM_PREFIX_LEN to __MODULE_NAME_LEN

The maximum module name length (MODULE_NAME_LEN) is somewhat confusingly
defined in terms of the maximum parameter prefix length
(MAX_PARAM_PREFIX_LEN), when in fact the dependency is in the opposite
direction.

This split originates from commit 730b69d22525 ("module: check kernel param
length at compile time, not runtime"). The code needed to use
MODULE_NAME_LEN in moduleparam.h, but because module.h requires
moduleparam.h, this created a circular dependency. It was resolved by
introducing MAX_PARAM_PREFIX_LEN in moduleparam.h and defining
MODULE_NAME_LEN in module.h in terms of MAX_PARAM_PREFIX_LEN.

Rename MAX_PARAM_PREFIX_LEN to __MODULE_NAME_LEN for clarity. This matches
the similar approach of defining MODULE_INFO in module.h and __MODULE_INFO
in moduleparam.h.

Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
Reviewed-by: Daniel Gomez <da.gomez@samsung.com>
Link: https://lore.kernel.org/r/20250630143535.267745-6-petr.pavlu@suse.com
Signed-off-by: Daniel Gomez <da.gomez@samsung.com>
---
 include/linux/module.h      |  2 +-
 include/linux/moduleparam.h | 12 ++++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 5fe812de2d84..313ecb8e5181 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -33,7 +33,7 @@
 #include <linux/percpu.h>
 #include <asm/module.h>
 
-#define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN
+#define MODULE_NAME_LEN __MODULE_NAME_LEN
 
 struct modversion_info {
 	unsigned long crc;
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 110e9d09de24..a04a2bc4f51e 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -6,6 +6,13 @@
 #include <linux/stringify.h>
 #include <linux/kernel.h>
 
+/*
+ * The maximum module name length, including the NUL byte.
+ * Chosen so that structs with an unsigned long line up, specifically
+ * modversion_info.
+ */
+#define __MODULE_NAME_LEN (64 - sizeof(unsigned long))
+
 /* You can override this manually, but generally this should match the
    module name. */
 #ifdef MODULE
@@ -17,9 +24,6 @@
 #define __MODULE_INFO_PREFIX KBUILD_MODNAME "."
 #endif
 
-/* Chosen so that structs with an unsigned long line up. */
-#define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long))
-
 #define __MODULE_INFO(tag, name, info)					  \
 	static const char __UNIQUE_ID(name)[]				  \
 		__used __section(".modinfo") __aligned(1)		  \
@@ -284,7 +288,7 @@ struct kparam_array
 
 /* This is the fundamental function for registering boot/module parameters. */
 #define __module_param_call(prefix, name, ops, arg, perm, level, flags)	\
-	static_assert(sizeof(""prefix) - 1 <= MAX_PARAM_PREFIX_LEN);	\
+	static_assert(sizeof(""prefix) - 1 <= __MODULE_NAME_LEN);	\
 	static const char __param_str_##name[] = prefix #name;		\
 	static struct kernel_param __moduleparam_const __param_##name	\
 	__used __section("__param")					\
-- 
cgit v1.2.3


From e2ba58ccc9099514380c3300cbc0750b5055fc1c Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 30 Jul 2025 21:49:53 -0700
Subject: block: Fix default IO priority if there is no IO context

Upstream commit 53889bcaf536 ("block: make __get_task_ioprio() easier to
read") changes the IO priority returned to the caller if no IO context
is defined for the task. Prior to this commit, the returned IO priority
was determined by task_nice_ioclass() and task_nice_ioprio(). Now it is
always IOPRIO_DEFAULT, which translates to IOPRIO_CLASS_NONE with priority
0. However, task_nice_ioclass() returns IOPRIO_CLASS_IDLE, IOPRIO_CLASS_RT,
or IOPRIO_CLASS_BE depending on the task scheduling policy, and
task_nice_ioprio() returns a value determined by task_nice(). This causes
regressions in test code checking the IO priority and class of IO
operations on tasks with no IO context.

Fix the problem by returning the IO priority calculated from
task_nice_ioclass() and task_nice_ioprio() if no IO context is defined
to match earlier behavior.

Fixes: 53889bcaf536 ("block: make __get_task_ioprio() easier to read")
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://lore.kernel.org/r/20250731044953.1852690-1-linux@roeck-us.net
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/ioprio.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index b25377b6ea98..5210e8371238 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -60,7 +60,8 @@ static inline int __get_task_ioprio(struct task_struct *p)
 	int prio;
 
 	if (!ioc)
-		return IOPRIO_DEFAULT;
+		return IOPRIO_PRIO_VALUE(task_nice_ioclass(p),
+					 task_nice_ioprio(p));
 
 	if (p != current)
 		lockdep_assert_held(&p->alloc_lock);
-- 
cgit v1.2.3


From 788fa4b47cdcd9b3d8c2d02ac0b3cd2540305f18 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 1 Aug 2025 16:37:24 -0400
Subject: tracing: Add guard(ring_buffer_nest)

Some calls to the tracing ring buffer can happen when the ring buffer is
already being written to by the same context (for example, a
trace_printk() in between a ring_buffer_lock_reserve() and a
ring_buffer_unlock_commit()).

In order to not trigger the recursion detection, these functions use
ring_buffer_nest_start() and ring_buffer_nest_end(). Create a guard() for
these functions so that their use cases can be simplified and not need to
use goto for the release.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: https://lore.kernel.org/20250801203857.710501021@kernel.org
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index cd7f0ae26615..8253cb69540c 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -144,6 +144,9 @@ int ring_buffer_write(struct trace_buffer *buffer,
 void ring_buffer_nest_start(struct trace_buffer *buffer);
 void ring_buffer_nest_end(struct trace_buffer *buffer);
 
+DEFINE_GUARD(ring_buffer_nest, struct trace_buffer *,
+	     ring_buffer_nest_start(_T), ring_buffer_nest_end(_T))
+
 struct ring_buffer_event *
 ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts,
 		 unsigned long *lost_events);
-- 
cgit v1.2.3


From d45cf1e7d7180256e17c9ce88e32e8061a7887fe Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 30 Jul 2025 13:17:38 +0000
Subject: ipv6: reject malicious packets in ipv6_gso_segment()

syzbot was able to craft a packet with very long IPv6 extension headers
leading to an overflow of skb->transport_header.

This 16bit field has a limited range.

Add skb_reset_transport_header_careful() helper and use it
from ipv6_gso_segment()

WARNING: CPU: 0 PID: 5871 at ./include/linux/skbuff.h:3032 skb_reset_transport_header include/linux/skbuff.h:3032 [inline]
WARNING: CPU: 0 PID: 5871 at ./include/linux/skbuff.h:3032 ipv6_gso_segment+0x15e2/0x21e0 net/ipv6/ip6_offload.c:151
Modules linked in:
CPU: 0 UID: 0 PID: 5871 Comm: syz-executor211 Not tainted 6.16.0-rc6-syzkaller-g7abc678e3084 #0 PREEMPT(full)
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025
 RIP: 0010:skb_reset_transport_header include/linux/skbuff.h:3032 [inline]
 RIP: 0010:ipv6_gso_segment+0x15e2/0x21e0 net/ipv6/ip6_offload.c:151
Call Trace:
 <TASK>
  skb_mac_gso_segment+0x31c/0x640 net/core/gso.c:53
  nsh_gso_segment+0x54a/0xe10 net/nsh/nsh.c:110
  skb_mac_gso_segment+0x31c/0x640 net/core/gso.c:53
  __skb_gso_segment+0x342/0x510 net/core/gso.c:124
  skb_gso_segment include/net/gso.h:83 [inline]
  validate_xmit_skb+0x857/0x11b0 net/core/dev.c:3950
  validate_xmit_skb_list+0x84/0x120 net/core/dev.c:4000
  sch_direct_xmit+0xd3/0x4b0 net/sched/sch_generic.c:329
  __dev_xmit_skb net/core/dev.c:4102 [inline]
  __dev_queue_xmit+0x17b6/0x3a70 net/core/dev.c:4679

Fixes: d1da932ed4ec ("ipv6: Separate ipv6 offload support")
Reported-by: syzbot+af43e647fd835acc02df@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/688a1a05.050a0220.5d226.0008.GAE@google.com/T/#u
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Dawid Osuchowski <dawid.osuchowski@linux.intel.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20250730131738.3385939-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b8b06e71b73e..14b923ddb6df 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3033,6 +3033,29 @@ static inline void skb_reset_transport_header(struct sk_buff *skb)
 	skb->transport_header = offset;
 }
 
+/**
+ * skb_reset_transport_header_careful - conditionally reset transport header
+ * @skb: buffer to alter
+ *
+ * Hardened version of skb_reset_transport_header().
+ *
+ * Returns: true if the operation was a success.
+ */
+static inline bool __must_check
+skb_reset_transport_header_careful(struct sk_buff *skb)
+{
+	long offset = skb->data - skb->head;
+
+	if (unlikely(offset != (typeof(skb->transport_header))offset))
+		return false;
+
+	if (unlikely(offset == (typeof(skb->transport_header))~0U))
+		return false;
+
+	skb->transport_header = offset;
+	return true;
+}
+
 static inline void skb_set_transport_header(struct sk_buff *skb,
 					    const int offset)
 {
-- 
cgit v1.2.3


From 86de56487e5f0017ffd5930b0dbd9dda43048849 Mon Sep 17 00:00:00 2001
From: Amery Hung <ameryhung@gmail.com>
Date: Wed, 30 Jul 2025 11:58:52 -0700
Subject: bpf: Allow syscall bpf programs to call non-recur helpers

Allow syscall programs to call non-recur helpers too since syscall bpf
programs runs in process context through bpf syscall, BPF_PROG_TEST_RUN,
and cannot run recursively.

bpf_task_storage_{get,set} have "_recur" versions that call trylock
instead of taking the lock directly to avoid deadlock when called by
bpf programs that run recursively. Currently, only bpf_lsm, bpf_iter,
struct_ops without private stack are allow to call the non-recur helpers
since they cannot be recursively called in another bpf program.

Signed-off-by: Amery Hung <ameryhung@gmail.com>
Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
Link: https://lore.kernel.org/r/20250730185903.3574598-2-ameryhung@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 94defa405c85..c823f8efe3ed 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -962,6 +962,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
 	case BPF_PROG_TYPE_STRUCT_OPS:
 		return prog->aux->jits_use_priv_stack;
 	case BPF_PROG_TYPE_LSM:
+	case BPF_PROG_TYPE_SYSCALL:
 		return false;
 	default:
 		return true;
-- 
cgit v1.2.3


From 6c6d8f8ba7789c221a2e4c43a0ed982c7a41f428 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Wed, 16 Jul 2025 14:32:45 +0100
Subject: lib/xxhash: remove unused functions

xxh32_digest() and xxh32_update() were added in 2017 in the original
xxhash commit, but have remained unused.

Remove them.

Link: https://lkml.kernel.org/r/20250716133245.243363-1-linux@treblig.org
Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Dave Gilbert <linux@treblig.org>
Cc: Nick Terrell <terrelln@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/xxhash.h | 26 --------------------------
 1 file changed, 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/xxhash.h b/include/linux/xxhash.h
index df42511438d0..27f57eca8cb1 100644
--- a/include/linux/xxhash.h
+++ b/include/linux/xxhash.h
@@ -177,32 +177,6 @@ struct xxh64_state {
  */
 void xxh32_reset(struct xxh32_state *state, uint32_t seed);
 
-/**
- * xxh32_update() - hash the data given and update the xxh32 state
- *
- * @state:  The xxh32 state to update.
- * @input:  The data to hash.
- * @length: The length of the data to hash.
- *
- * After calling xxh32_reset() call xxh32_update() as many times as necessary.
- *
- * Return:  Zero on success, otherwise an error code.
- */
-int xxh32_update(struct xxh32_state *state, const void *input, size_t length);
-
-/**
- * xxh32_digest() - produce the current xxh32 hash
- *
- * @state: Produce the current xxh32 hash of this state.
- *
- * A hash value can be produced at any time. It is still possible to continue
- * inserting input into the hash state after a call to xxh32_digest(), and
- * generate new hashes later on, by calling xxh32_digest() again.
- *
- * Return: The xxh32 hash stored in the state.
- */
-uint32_t xxh32_digest(const struct xxh32_state *state);
-
 /**
  * xxh64_reset() - reset the xxh64 state to start a new hashing operation
  *
-- 
cgit v1.2.3


From 07d24902977e4704fab8472981e73a0ad6dfa1fd Mon Sep 17 00:00:00 2001
From: Alexander Graf <graf@amazon.com>
Date: Tue, 10 Jun 2025 08:53:27 +0000
Subject: kexec: enable CMA based contiguous allocation

When booting a new kernel with kexec_file, the kernel picks a target
location that the kernel should live at, then allocates random pages,
checks whether any of those patches magically happens to coincide with a
target address range and if so, uses them for that range.

For every page allocated this way, it then creates a page list that the
relocation code - code that executes while all CPUs are off and we are
just about to jump into the new kernel - copies to their final memory
location.  We can not put them there before, because chances are pretty
good that at least some page in the target range is already in use by the
currently running Linux environment.  Copying is happening from a single
CPU at RAM rate, which takes around 4-50 ms per 100 MiB.

All of this is inefficient and error prone.

To successfully kexec, we need to quiesce all devices of the outgoing
kernel so they don't scribble over the new kernel's memory.  We have seen
cases where that does not happen properly (*cough* GIC *cough*) and hence
the new kernel was corrupted.  This started a month long journey to root
cause failing kexecs to eventually see memory corruption, because the new
kernel was corrupted severely enough that it could not emit output to tell
us about the fact that it was corrupted.  By allocating memory for the
next kernel from a memory range that is guaranteed scribbling free, we can
boot the next kernel up to a point where it is at least able to detect
corruption and maybe even stop it before it becomes severe.  This
increases the chance for successful kexecs.

Since kexec got introduced, Linux has gained the CMA framework which can
perform physically contiguous memory mappings, while keeping that memory
available for movable memory when it is not needed for contiguous
allocations.  The default CMA allocator is for DMA allocations.

This patch adds logic to the kexec file loader to attempt to place the
target payload at a location allocated from CMA.  If successful, it uses
that memory range directly instead of creating copy instructions during
the hot phase.  To ensure that there is a safety net in case anything goes
wrong with the CMA allocation, it also adds a flag for user space to force
disable CMA allocations.

Using CMA allocations has two advantages:

  1) Faster by 4-50 ms per 100 MiB. There is no more need to copy in the
     hot phase.
  2) More robust. Even if by accident some page is still in use for DMA,
     the new kernel image will be safe from that access because it resides
     in a memory region that is considered allocated in the old kernel and
     has a chance to reinitialize that component.

Link: https://lkml.kernel.org/r/20250610085327.51817-1-graf@amazon.com
Signed-off-by: Alexander Graf <graf@amazon.com>
Acked-by: Baoquan He <bhe@redhat.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Zhongkun He <hezhongkun.hzk@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kexec.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 03f85ad03025..1b10a5d84b68 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -79,6 +79,12 @@ extern note_buf_t __percpu *crash_notes;
 
 typedef unsigned long kimage_entry_t;
 
+/*
+ * This is a copy of the UAPI struct kexec_segment and must be identical
+ * to it because it gets copied straight from user space into kernel
+ * memory. Do not modify this structure unless you change the way segments
+ * get ingested from user space.
+ */
 struct kexec_segment {
 	/*
 	 * This pointer can point to user memory if kexec_load() system
@@ -172,6 +178,7 @@ int kexec_image_post_load_cleanup_default(struct kimage *image);
  * @buf_align:	Minimum alignment needed.
  * @buf_min:	The buffer can't be placed below this address.
  * @buf_max:	The buffer can't be placed above this address.
+ * @cma:	CMA page if the buffer is backed by CMA.
  * @top_down:	Allocate from top of memory.
  * @random:	Place the buffer at a random position.
  */
@@ -184,6 +191,7 @@ struct kexec_buf {
 	unsigned long buf_align;
 	unsigned long buf_min;
 	unsigned long buf_max;
+	struct page *cma;
 	bool top_down;
 #ifdef CONFIG_CRASH_DUMP
 	bool random;
@@ -340,6 +348,7 @@ struct kimage {
 
 	unsigned long nr_segments;
 	struct kexec_segment segment[KEXEC_SEGMENT_MAX];
+	struct page *segment_cma[KEXEC_SEGMENT_MAX];
 
 	struct list_head control_pages;
 	struct list_head dest_pages;
@@ -361,6 +370,7 @@ struct kimage {
 	 */
 	unsigned int hotplug_support:1;
 #endif
+	unsigned int no_cma:1;
 
 #ifdef ARCH_HAS_KIMAGE_ARCH
 	struct kimage_arch arch;
-- 
cgit v1.2.3


From d171b10b2d7b067c16d79e1d069a23a34f088d23 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Tue, 22 Jul 2025 11:22:30 -0700
Subject: mm/page-flags: remove folio_start_writeback_keepwrite()

Commit cd57b77197a4 ("ext4: Convert ext4_bio_write_page() to use a folio)
removed set_page_writeback_keepwrite() which was the last/only caller of
folio_start_writeback_keepwrite().

Link: https://lkml.kernel.org/r/20250722182230.2114587-1-joannelkoong@gmail.com
Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 8e4d6eda8a8d..8d3fa3a91ce4 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -837,8 +837,6 @@ void set_page_writeback(struct page *page);
 
 #define folio_start_writeback(folio)			\
 	__folio_start_writeback(folio, false)
-#define folio_start_writeback_keepwrite(folio)	\
-	__folio_start_writeback(folio, true)
 
 static __always_inline bool folio_test_head(const struct folio *folio)
 {
-- 
cgit v1.2.3


From f225b34f1e6c81c50e48f6207ddb6d290be1b932 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Fri, 25 Jul 2025 09:29:41 +0100
Subject: mm/mseal: always define VM_SEALED

Patch series "mseal cleanups", v4.

Perform a number of cleanups to the mseal logic.  Firstly, VM_SEALED is
treated differently from every other VMA flag, it really doesn't make
sense to do this, so we start by making this consistent with everything
else.

Next we place the madvise logic where it belongs - in mm/madvise.c.  It
really makes no sense to abstract this elsewhere.  In doing so, we go to
great lengths to explain very clearly the previously very confusing logic
as to what sealed mappings are impacted here.

In doing so, we retain existing logic regarding treatment of madvise()
discard operations for a sealed, read-only MAP_PRIVATE file-backed
mapping.  This is something we likely need to revisit.

We then abstract out and explain the 'are there are any gaps in this range
in the mm?' check being performed as a prerequisite to mseal being
performed.

Finally, we simplify the actual mseal logic which is really quite
straightforward.

No functional change is intended.


This patch (of 4):

There is no reason to treat VM_SEALED in a special way, in each other case
in which a VMA flag is unavailable due to configuration, we simply assign
that flag to VM_NONE, so make VM_SEALED consistent with all other VMA
flags in this respect.

Additionally, use the next available bit for VM_SEALED, 42, rather than
arbitrarily putting it at 63 and update the declaration to match all other
VMA flags.

No functional change intended.

Link: https://lkml.kernel.org/r/cover.1753431105.git.lorenzo.stoakes@oracle.com
Link: https://lkml.kernel.org/r/aeb398a77029b6e7377cd944328bc9bbc3c90537.1753431105.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reviewed-by: Pedro Falcato <pfalcato@suse.de>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: Kees Cook <kees@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8e3a4c5b78ff..ceaa780a703a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -414,8 +414,10 @@ extern unsigned int kobjsize(const void *objp);
 #endif
 
 #ifdef CONFIG_64BIT
-/* VM is sealed, in vm_flags */
-#define VM_SEALED	_BITUL(63)
+#define VM_SEALED_BIT	42
+#define VM_SEALED	BIT(VM_SEALED_BIT)
+#else
+#define VM_SEALED	VM_NONE
 #endif
 
 /* Bits set in the VMA until the stack is in its final location */
-- 
cgit v1.2.3


From 3dfde97800e06882960cc926d2c428f2128b7c70 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Thu, 24 Jul 2025 10:52:59 +0530
Subject: mm: add get_and_clear_ptes() and clear_ptes()

Patch series "Optimizations for khugepaged", v4.

If the underlying folio mapped by the ptes is large, we can process those
ptes in a batch using folio_pte_batch().

For arm64 specifically, this results in a 16x reduction in the number of
ptep_get() calls, since on a contig block, ptep_get() on arm64 will
iterate through all 16 entries to collect a/d bits.  Next, ptep_clear()
will cause a TLBI for every contig block in the range via
contpte_try_unfold().  Instead, use clear_ptes() to only do the TLBI at
the first and last contig block of the range.

For split folios, there will be no pte batching; the batch size returned
by folio_pte_batch() will be 1.  For pagetable split folios, the ptes will
still point to the same large folio; for arm64, this results in the
optimization described above, and for other arches, a minor improvement is
expected due to a reduction in the number of function calls and batching
atomic operations.


This patch (of 3):

Let's add variants to be used where "full" does not apply -- which will
be the majority of cases in the future. "full" really only applies if
we are about to tear down a full MM.

Use get_and_clear_ptes() in existing code, clear_ptes() users will
be added next.

Link: https://lkml.kernel.org/r/20250724052301.23844-2-dev.jain@arm.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Dev Jain <dev.jain@arm.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index e3b99920be05..4c035637eeb7 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -736,6 +736,29 @@ static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm,
 }
 #endif
 
+/**
+ * get_and_clear_ptes - Clear present PTEs that map consecutive pages of
+ *			the same folio, collecting dirty/accessed bits.
+ * @mm: Address space the pages are mapped into.
+ * @addr: Address the first page is mapped at.
+ * @ptep: Page table pointer for the first entry.
+ * @nr: Number of entries to clear.
+ *
+ * Use this instead of get_and_clear_full_ptes() if it is known that we don't
+ * need to clear the full mm, which is mostly the case.
+ *
+ * Note that PTE bits in the PTE range besides the PFN can differ. For example,
+ * some PTEs might be write-protected.
+ *
+ * Context: The caller holds the page table lock.  The PTEs map consecutive
+ * pages that belong to the same folio.  The PTEs are all in the same PMD.
+ */
+static inline pte_t get_and_clear_ptes(struct mm_struct *mm, unsigned long addr,
+		pte_t *ptep, unsigned int nr)
+{
+	return get_and_clear_full_ptes(mm, addr, ptep, nr, 0);
+}
+
 #ifndef clear_full_ptes
 /**
  * clear_full_ptes - Clear present PTEs that map consecutive pages of the same
@@ -768,6 +791,28 @@ static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr,
 }
 #endif
 
+/**
+ * clear_ptes - Clear present PTEs that map consecutive pages of the same folio.
+ * @mm: Address space the pages are mapped into.
+ * @addr: Address the first page is mapped at.
+ * @ptep: Page table pointer for the first entry.
+ * @nr: Number of entries to clear.
+ *
+ * Use this instead of clear_full_ptes() if it is known that we don't need to
+ * clear the full mm, which is mostly the case.
+ *
+ * Note that PTE bits in the PTE range besides the PFN can differ. For example,
+ * some PTEs might be write-protected.
+ *
+ * Context: The caller holds the page table lock.  The PTEs map consecutive
+ * pages that belong to the same folio.  The PTEs are all in the same PMD.
+ */
+static inline void clear_ptes(struct mm_struct *mm, unsigned long addr,
+		pte_t *ptep, unsigned int nr)
+{
+	clear_full_ptes(mm, addr, ptep, nr, 0);
+}
+
 /*
  * If two threads concurrently fault at the same page, the thread that
  * won the race updates the PTE and its local TLB/Cache. The other thread
-- 
cgit v1.2.3


From 9a4f90e246615d1f42a9b907deb9b4c0a418d996 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Fri, 25 Jul 2025 15:29:01 +0100
Subject: mm: remove mm/io-mapping.c

This is dead code, which was used from commit b739f125e4eb ("i915: use
io_mapping_map_user") but reverted a month later by commit 0e4fe0c9f2f9
("Revert "i915: use io_mapping_map_user"") back in 2021.

Since then nobody has used it, so remove it.

[akpm@linux-foundation.org: update Documentation/core-api/mm-api.rst, per Vlastimil]
Link: https://lkml.kernel.org/r/20250725142901.81502-1-lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/io-mapping.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index 7376c1df9c90..c16353cc6e3c 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -225,7 +225,4 @@ io_mapping_free(struct io_mapping *iomap)
 	kfree(iomap);
 }
 
-int io_mapping_map_user(struct io_mapping *iomap, struct vm_area_struct *vma,
-		unsigned long addr, unsigned long pfn, unsigned long size);
-
 #endif /* _LINUX_IO_MAPPING_H */
-- 
cgit v1.2.3


From a222439e1e273fa0f4e37ce17aeb109f3e91824f Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 25 Jul 2025 14:16:24 +0200
Subject: mm/rmap: add anon_vma lifetime debug check

If an anon folio is mapped into userspace, its anon_vma must be alive,
otherwise rmap walks can hit UAF.

There have been syzkaller reports a few months ago[1][2] of UAF in rmap
walks that seems to indicate that there can be pages with elevated
mapcount whose anon_vma has already been freed, but I think we never
figured out what the cause is; and syzkaller only hit these UAFs when
memory pressure randomly caused reclaim to rmap-walk the affected pages,
so it of course didn't manage to create a reproducer.

Add a VM_WARN_ON_FOLIO() when we add/remove mappings of anonymous folios
to hopefully catch such issues more reliably.

[1] https://lore.kernel.org/r/67abaeaf.050a0220.110943.0041.GAE@google.com
[2] https://lore.kernel.org/r/67a76f33.050a0220.3d72c.0028.GAE@google.com

Link: https://lkml.kernel.org/r/20250725-anonvma-uaf-debug-v2-1-bc3c7e5ba5b1@google.com
Signed-off-by: Jann Horn <jannh@google.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Harry Yoo <harry.yoo@oracle.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/rmap.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 20803fcb49a7..6cd020eea37a 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -449,6 +449,28 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio,
 	default:
 		VM_WARN_ON_ONCE(true);
 	}
+
+	/*
+	 * Anon folios must have an associated live anon_vma as long as they're
+	 * mapped into userspace.
+	 * Note that the atomic_read() mainly does two things:
+	 *
+	 * 1. In KASAN builds with CONFIG_SLUB_RCU_DEBUG, it causes KASAN to
+	 *    check that the associated anon_vma has not yet been freed (subject
+	 *    to KASAN's usual limitations). This check will pass if the
+	 *    anon_vma's refcount has already dropped to 0 but an RCU grace
+	 *    period hasn't passed since then.
+	 * 2. If the anon_vma has not yet been freed, it checks that the
+	 *    anon_vma still has a nonzero refcount (as opposed to being in the
+	 *    middle of an RCU delay for getting freed).
+	 */
+	if (folio_test_anon(folio) && !folio_test_ksm(folio)) {
+		unsigned long mapping = (unsigned long)folio->mapping;
+		struct anon_vma *anon_vma;
+
+		anon_vma = (void *)(mapping - FOLIO_MAPPING_ANON);
+		VM_WARN_ON_FOLIO(atomic_read(&anon_vma->refcount) == 0, folio);
+	}
 }
 
 /*
-- 
cgit v1.2.3


From 9bbffee67ffd16360179327b57f3b1245579ef08 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Mon, 28 Jul 2025 10:53:55 -0700
Subject: mm: fix a UAF when vma->mm is freed after vma->vm_refcnt got dropped

By inducing delays in the right places, Jann Horn created a reproducer for
a hard to hit UAF issue that became possible after VMAs were allowed to be
recycled by adding SLAB_TYPESAFE_BY_RCU to their cache.

Race description is borrowed from Jann's discovery report:
lock_vma_under_rcu() looks up a VMA locklessly with mas_walk() under
rcu_read_lock().  At that point, the VMA may be concurrently freed, and it
can be recycled by another process.  vma_start_read() then increments the
vma->vm_refcnt (if it is in an acceptable range), and if this succeeds,
vma_start_read() can return a recycled VMA.

In this scenario where the VMA has been recycled, lock_vma_under_rcu()
will then detect the mismatching ->vm_mm pointer and drop the VMA through
vma_end_read(), which calls vma_refcount_put().  vma_refcount_put() drops
the refcount and then calls rcuwait_wake_up() using a copy of vma->vm_mm.
This is wrong: It implicitly assumes that the caller is keeping the VMA's
mm alive, but in this scenario the caller has no relation to the VMA's mm,
so the rcuwait_wake_up() can cause UAF.

The diagram depicting the race:
T1         T2         T3
==         ==         ==
lock_vma_under_rcu
  mas_walk
          <VMA gets removed from mm>
                      mmap
                        <the same VMA is reallocated>
  vma_start_read
    __refcount_inc_not_zero_limited_acquire
                      munmap
                        __vma_enter_locked
                          refcount_add_not_zero
  vma_end_read
    vma_refcount_put
      __refcount_dec_and_test
                          rcuwait_wait_event
                            <finish operation>
      rcuwait_wake_up [UAF]

Note that rcuwait_wait_event() in T3 does not block because refcount was
already dropped by T1.  At this point T3 can exit and free the mm causing
UAF in T1.

To avoid this we move vma->vm_mm verification into vma_start_read() and
grab vma->vm_mm to stabilize it before vma_refcount_put() operation.

[surenb@google.com: v3]
  Link: https://lkml.kernel.org/r/20250729145709.2731370-1-surenb@google.com
Link: https://lkml.kernel.org/r/20250728175355.2282375-1-surenb@google.com
Fixes: 3104138517fc ("mm: make vma cache SLAB_TYPESAFE_BY_RCU")
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reported-by: Jann Horn <jannh@google.com>
Closes: https://lore.kernel.org/all/CAG48ez0-deFbVH=E3jbkWx=X3uVbd8nWeo6kbJPQ0KoUD+m2tA@mail.gmail.com/
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Jann Horn <jannh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmap_lock.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 1f4f44951abe..11a078de9150 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -12,6 +12,7 @@ extern int rcuwait_wake_up(struct rcuwait *w);
 #include <linux/tracepoint-defs.h>
 #include <linux/types.h>
 #include <linux/cleanup.h>
+#include <linux/sched/mm.h>
 
 #define MMAP_LOCK_INITIALIZER(name) \
 	.mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
@@ -154,6 +155,10 @@ static inline void vma_refcount_put(struct vm_area_struct *vma)
  * reused and attached to a different mm before we lock it.
  * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
  * detached.
+ *
+ * WARNING! The vma passed to this function cannot be used if the function
+ * fails to lock it because in certain cases RCU lock is dropped and then
+ * reacquired. Once RCU lock is dropped the vma can be concurently freed.
  */
 static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
 						    struct vm_area_struct *vma)
@@ -183,6 +188,31 @@ static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
 	}
 
 	rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
+
+	/*
+	 * If vma got attached to another mm from under us, that mm is not
+	 * stable and can be freed in the narrow window after vma->vm_refcnt
+	 * is dropped and before rcuwait_wake_up(mm) is called. Grab it before
+	 * releasing vma->vm_refcnt.
+	 */
+	if (unlikely(vma->vm_mm != mm)) {
+		/* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
+		struct mm_struct *other_mm = vma->vm_mm;
+
+		/*
+		 * __mmdrop() is a heavy operation and we don't need RCU
+		 * protection here. Release RCU lock during these operations.
+		 * We reinstate the RCU read lock as the caller expects it to
+		 * be held when this function returns even on error.
+		 */
+		rcu_read_unlock();
+		mmgrab(other_mm);
+		vma_refcount_put(vma);
+		mmdrop(other_mm);
+		rcu_read_lock();
+		return NULL;
+	}
+
 	/*
 	 * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
 	 * False unlocked result is impossible because we modify and check
-- 
cgit v1.2.3


From fcd90ad31e29d0b403f3a074a64cd7f0876175dd Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Sun, 13 Jul 2025 10:17:23 +0300
Subject: execmem: drop unused execmem_update_copy()

Patch series "x86: enable EXECMEM_ROX_CACHE for ftrace and kprobes", v3.

These patches enable use of EXECMEM_ROX_CACHE for ftrace and kprobes
allocations on x86.

They also include some ground work in execmem.

Since the execmem model for caching large ROX pages changed from the
initial assumption that the memory that is allocated from ROX cache is
always ROX to the current state where memory can be temporarily made RW
and then restored to ROX, we can stop using text poking to update it.
This also saves the hassle of trying lock text_mutex in
execmem_cache_free() when kprobes already hold that mutex.


This patch (of 8):

The execmem_update_copy() that used text poking was required when memory
allocated from ROX cache was always read-only.  Since now its permissions
can be switched to read-write there is no need in a function that updates
memory with text poking.

Remove it.

Link: https://lkml.kernel.org/r/20250713071730.4117334-1-rppt@kernel.org
Link: https://lkml.kernel.org/r/20250713071730.4117334-2-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Petr Pavlu <petr.pavlu@suse.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/execmem.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/execmem.h b/include/linux/execmem.h
index 3be35680a54f..734fbe83d98e 100644
--- a/include/linux/execmem.h
+++ b/include/linux/execmem.h
@@ -185,19 +185,6 @@ DEFINE_FREE(execmem, void *, if (_T) execmem_free(_T));
 struct vm_struct *execmem_vmap(size_t size);
 #endif
 
-/**
- * execmem_update_copy - copy an update to executable memory
- * @dst:  destination address to update
- * @src:  source address containing the data
- * @size: how many bytes of memory shold be copied
- *
- * Copy @size bytes from @src to @dst using text poking if the memory at
- * @dst is read-only.
- *
- * Return: a pointer to @dst or NULL on error
- */
-void *execmem_update_copy(void *dst, const void *src, size_t size);
-
 /**
  * execmem_is_rox - check if execmem is read-only
  * @type - the execmem type to check
-- 
cgit v1.2.3


From 838955f64ae7582f009a3538889bb9244f37ab26 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Sun, 13 Jul 2025 10:17:24 +0300
Subject: execmem: introduce execmem_alloc_rw()

Some callers of execmem_alloc() require the memory to be temporarily
writable even when it is allocated from ROX cache.  These callers use
execemem_make_temp_rw() right after the call to execmem_alloc().

Wrap this sequence in execmem_alloc_rw() API.

Link: https://lkml.kernel.org/r/20250713071730.4117334-3-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Daniel Gomez <da.gomez@samsung.com>
Reviewed-by: Petr Pavlu <petr.pavlu@suse.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/execmem.h | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/execmem.h b/include/linux/execmem.h
index 734fbe83d98e..8b61b05da7d5 100644
--- a/include/linux/execmem.h
+++ b/include/linux/execmem.h
@@ -67,21 +67,6 @@ enum execmem_range_flags {
  */
 void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable);
 
-/**
- * execmem_make_temp_rw - temporarily remap region with read-write
- *			  permissions
- * @ptr:	address of the region to remap
- * @size:	size of the region to remap
- *
- * Remaps a part of the cached large page in the ROX cache in the range
- * [@ptr, @ptr + @size) as writable and not executable. The caller must
- * have exclusive ownership of this range and ensure nothing will try to
- * execute code in this range.
- *
- * Return: 0 on success or negative error code on failure.
- */
-int execmem_make_temp_rw(void *ptr, size_t size);
-
 /**
  * execmem_restore_rox - restore read-only-execute permissions
  * @ptr:	address of the region to remap
@@ -95,7 +80,6 @@ int execmem_make_temp_rw(void *ptr, size_t size);
  */
 int execmem_restore_rox(void *ptr, size_t size);
 #else
-static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; }
 static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; }
 #endif
 
@@ -165,6 +149,28 @@ struct execmem_info *execmem_arch_setup(void);
  */
 void *execmem_alloc(enum execmem_type type, size_t size);
 
+/**
+ * execmem_alloc_rw - allocate writable executable memory
+ * @type: type of the allocation
+ * @size: how many bytes of memory are required
+ *
+ * Allocates memory that will contain executable code, either generated or
+ * loaded from kernel modules.
+ *
+ * Allocates memory that will contain data coupled with executable code,
+ * like data sections in kernel modules.
+ *
+ * Forces writable permissions on the allocated memory and the caller is
+ * responsible to manage the permissions afterwards.
+ *
+ * For architectures that use ROX cache the permissions will be set to R+W.
+ * For architectures that don't use ROX cache the default permissions for @type
+ * will be used as they must be writable.
+ *
+ * Return: a pointer to the allocated memory or %NULL
+ */
+void *execmem_alloc_rw(enum execmem_type type, size_t size);
+
 /**
  * execmem_free - free executable memory
  * @ptr: pointer to the memory that should be freed
-- 
cgit v1.2.3


From ab674b6871b049aab2e86d1d7375526368ed175a Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Sun, 13 Jul 2025 10:17:28 +0300
Subject: execmem: drop writable parameter from execmem_fill_trapping_insns()

After update of execmem_cache_free() that made memory writable before
updating it, there is no need to update read only memory, so the writable
parameter to execmem_fill_trapping_insns() is not needed.  Drop it.

Link: https://lkml.kernel.org/r/20250713071730.4117334-7-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Petr Pavlu <petr.pavlu@suse.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/execmem.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/execmem.h b/include/linux/execmem.h
index 8b61b05da7d5..7de229134e30 100644
--- a/include/linux/execmem.h
+++ b/include/linux/execmem.h
@@ -60,12 +60,11 @@ enum execmem_range_flags {
  *				 will trap
  * @ptr:	pointer to memory to fill
  * @size:	size of the range to fill
- * @writable:	is the memory poited by @ptr is writable or ROX
  *
  * A hook for architecures to fill execmem ranges with invalid instructions.
  * Architectures that use EXECMEM_ROX_CACHE must implement this.
  */
-void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable);
+void execmem_fill_trapping_insns(void *ptr, size_t size);
 
 /**
  * execmem_restore_rox - restore read-only-execute permissions
-- 
cgit v1.2.3


From bb5b0b4317c9516bdc5e9a4235e3b5f1a73b7e48 Mon Sep 17 00:00:00 2001
From: Joshua Kinard <kumba@gentoo.org>
Date: Mon, 21 Jul 2025 13:00:51 -0400
Subject: rtc: ds1685: Update Joshua Kinard's email address.

I am switching my address to a personal domain, so need to update the
driver's files and the entry in MAINTAINERS.

Signed-off-by: Joshua Kinard <kumba@gentoo.org>
Link: https://lore.kernel.org/r/20250721170051.32407-1-kumba@gentoo.org
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/rtc/ds1685.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rtc/ds1685.h b/include/linux/rtc/ds1685.h
index 5a41c3bbcbe3..01da4582db6d 100644
--- a/include/linux/rtc/ds1685.h
+++ b/include/linux/rtc/ds1685.h
@@ -8,7 +8,7 @@
  * include larger, battery-backed NV-SRAM, burst-mode access, and an RTC
  * write counter.
  *
- * Copyright (C) 2011-2014 Joshua Kinard <kumba@gentoo.org>.
+ * Copyright (C) 2011-2014 Joshua Kinard <linux@kumba.dev>.
  * Copyright (C) 2009 Matthias Fuchs <matthias.fuchs@esd-electronics.com>.
  *
  * References:
-- 
cgit v1.2.3


From 7c527c15cdda2e0a26a05ac15a44d3e14738fc55 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Sun, 3 Aug 2025 21:20:12 +0900
Subject: firewire: core: use reference counting to invoke address handlers
 safely

The lifetime of address handler has been managed by linked list and RCU.
This approach was introduced in commit 35202f7d8420 ("firewire: remove
global lock around address handlers, convert to RCU"). The invocations of
address handler are performed within RCU read-side critical sections.

In commit 57e6d9f85fff ("firewire: ohci: use workqueue to handle events
of AR request/response contexts"), the invocations are in a workqueue
context. The approach still imposes limitation that sleeping is not
allowed within RCU read-side critical sections. However, since sleeping
is not permitted within RCU read-side critical sections, this approach
still has a limitation.

This commit adds reference counting to decouple handler invocation from
handler discovery. The linked list and RCU is used to discover the
handlers, while the reference counting is used to invoke them safely.

Link: https://lore.kernel.org/r/20250803122015.236493-2-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/linux/firewire.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index cceb70415ed2..d38c6e538e5c 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -341,7 +341,11 @@ struct fw_address_handler {
 	u64 length;
 	fw_address_callback_t address_callback;
 	void *callback_data;
+
+	// Only for core functions.
 	struct list_head link;
+	struct kref kref;
+	struct completion done;
 };
 
 struct fw_address_region {
-- 
cgit v1.2.3


From 86624ba3b522b6512def25534341da93356c8da4 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Mon, 14 Jul 2025 13:08:25 -0300
Subject: vfio/pci: Do vf_token checks for VFIO_DEVICE_BIND_IOMMUFD

This was missed during the initial implementation. The VFIO PCI encodes
the vf_token inside the device name when opening the device from the group
FD, something like:

  "0000:04:10.0 vf_token=bd8d9d2b-5a5f-4f5a-a211-f591514ba1f3"

This is used to control access to a VF unless there is co-ordination with
the owner of the PF.

Since we no longer have a device name in the cdev path, pass the token
directly through VFIO_DEVICE_BIND_IOMMUFD using an optional field
indicated by VFIO_DEVICE_BIND_FLAG_TOKEN.

Fixes: 5fcc26969a16 ("vfio: Add VFIO_DEVICE_BIND_IOMMUFD")
Tested-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/0-v3-bdd8716e85fe+3978a-vfio_token_jgg@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/vfio.h          | 4 ++++
 include/linux/vfio_pci_core.h | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 707b00772ce1..eb563f538dee 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -105,6 +105,9 @@ struct vfio_device {
  * @match: Optional device name match callback (return: 0 for no-match, >0 for
  *         match, -errno for abort (ex. match with insufficient or incorrect
  *         additional args)
+ * @match_token_uuid: Optional device token match/validation. Return 0
+ *         if the uuid is valid for the device, -errno otherwise. uuid is NULL
+ *         if none was provided.
  * @dma_unmap: Called when userspace unmaps IOVA from the container
  *             this device is attached to.
  * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl
@@ -132,6 +135,7 @@ struct vfio_device_ops {
 	int	(*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma);
 	void	(*request)(struct vfio_device *vdev, unsigned int count);
 	int	(*match)(struct vfio_device *vdev, char *buf);
+	int	(*match_token_uuid)(struct vfio_device *vdev, const uuid_t *uuid);
 	void	(*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length);
 	int	(*device_feature)(struct vfio_device *device, u32 flags,
 				  void __user *arg, size_t argsz);
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index fbb472dd99b3..f541044e42a2 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -122,6 +122,8 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu
 int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma);
 void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count);
 int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf);
+int vfio_pci_core_match_token_uuid(struct vfio_device *core_vdev,
+				   const uuid_t *uuid);
 int vfio_pci_core_enable(struct vfio_pci_core_device *vdev);
 void vfio_pci_core_disable(struct vfio_pci_core_device *vdev);
 void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev);
-- 
cgit v1.2.3


From d87a513d093726d121dd5c816e26803111a259d0 Mon Sep 17 00:00:00 2001
From: Amery Hung <ameryhung@gmail.com>
Date: Wed, 6 Aug 2025 09:25:38 -0700
Subject: bpf: Allow struct_ops to get map id by kdata

Add bpf_struct_ops_id() to enable struct_ops implementors to use
struct_ops map id as the unique id of a struct_ops in their subsystem.
A subsystem that wishes to create a mapping between id and struct_ops
instance pointer can update the mapping accordingly during
bpf_struct_ops::reg(), unreg(), and update().

Signed-off-by: Amery Hung <ameryhung@gmail.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://patch.msgid.link/20250806162540.681679-2-ameryhung@gmail.com
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cc700925b802..e7ee089e8a31 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1985,6 +1985,7 @@ static inline void bpf_module_put(const void *data, struct module *owner)
 		module_put(owner);
 }
 int bpf_struct_ops_link_create(union bpf_attr *attr);
+u32 bpf_struct_ops_id(const void *kdata);
 
 #ifdef CONFIG_NET
 /* Define it here to avoid the use of forward declaration */
-- 
cgit v1.2.3


From 397a46c9aa3343e8efe6847bdaa124945bab1de4 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Fri, 25 Jul 2025 09:46:50 +0200
Subject: gpio: remove legacy GPIO line value setter callbacks

With no more users of the legacy GPIO line value setters - .set() and
.set_multiple() - we can now remove them from the kernel.

Link: https://lore.kernel.org/r/20250725074651.14002-1-brgl@bgdev.pl
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio/driver.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 4b984e8f8fcd..90567dde7d8e 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -347,8 +347,6 @@ struct gpio_irq_chip {
  * @get: returns value for signal "offset", 0=low, 1=high, or negative error
  * @get_multiple: reads values for multiple signals defined by "mask" and
  *	stores them in "bits", returns 0 on success or negative error
- * @set: **DEPRECATED** - please use set_rv() instead
- * @set_multiple: **DEPRECATED** - please use set_multiple_rv() instead
  * @set_rv: assigns output value for signal "offset", returns 0 on success or
  *          negative error value
  * @set_multiple_rv: assigns output values for multiple signals defined by
@@ -445,11 +443,6 @@ struct gpio_chip {
 	int			(*get_multiple)(struct gpio_chip *gc,
 						unsigned long *mask,
 						unsigned long *bits);
-	void			(*set)(struct gpio_chip *gc,
-						unsigned int offset, int value);
-	void			(*set_multiple)(struct gpio_chip *gc,
-						unsigned long *mask,
-						unsigned long *bits);
 	int			(*set_rv)(struct gpio_chip *gc,
 					  unsigned int offset,
 					  int value);
-- 
cgit v1.2.3


From d9d87d90cc0b10cd56ae353f50b11417e7d21712 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Thu, 17 Jul 2025 15:21:26 +0200
Subject: treewide: rename GPIO set callbacks back to their original names

The conversion of all GPIO drivers to using the .set_rv() and
.set_multiple_rv() callbacks from struct gpio_chip (which - unlike their
predecessors - return an integer and allow the controller drivers to
indicate failures to users) is now complete and the legacy ones have
been removed. Rename the new callbacks back to their original names in
one sweeping change.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio/driver.h  | 19 +++++++++----------
 include/linux/gpio/generic.h |  4 ++--
 2 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 90567dde7d8e..667f8fd58a79 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -347,10 +347,10 @@ struct gpio_irq_chip {
  * @get: returns value for signal "offset", 0=low, 1=high, or negative error
  * @get_multiple: reads values for multiple signals defined by "mask" and
  *	stores them in "bits", returns 0 on success or negative error
- * @set_rv: assigns output value for signal "offset", returns 0 on success or
- *          negative error value
- * @set_multiple_rv: assigns output values for multiple signals defined by
- *                   "mask", returns 0 on success or negative error value
+ * @set: assigns output value for signal "offset", returns 0 on success or
+ *       negative error value
+ * @set_multiple: assigns output values for multiple signals defined by
+ *                "mask", returns 0 on success or negative error value
  * @set_config: optional hook for all kinds of settings. Uses the same
  *	packed config format as generic pinconf. Must return 0 on success and
  *	a negative error number on failure.
@@ -443,12 +443,11 @@ struct gpio_chip {
 	int			(*get_multiple)(struct gpio_chip *gc,
 						unsigned long *mask,
 						unsigned long *bits);
-	int			(*set_rv)(struct gpio_chip *gc,
-					  unsigned int offset,
-					  int value);
-	int			(*set_multiple_rv)(struct gpio_chip *gc,
-						   unsigned long *mask,
-						   unsigned long *bits);
+	int			(*set)(struct gpio_chip *gc,
+				       unsigned int offset, int value);
+	int			(*set_multiple)(struct gpio_chip *gc,
+						unsigned long *mask,
+						unsigned long *bits);
 	int			(*set_config)(struct gpio_chip *gc,
 					      unsigned int offset,
 					      unsigned long config);
diff --git a/include/linux/gpio/generic.h b/include/linux/gpio/generic.h
index b511acd58ab0..f3a8db4598bb 100644
--- a/include/linux/gpio/generic.h
+++ b/include/linux/gpio/generic.h
@@ -88,10 +88,10 @@ static inline int
 gpio_generic_chip_set(struct gpio_generic_chip *chip, unsigned int offset,
 		      int value)
 {
-	if (WARN_ON(!chip->gc.set_rv))
+	if (WARN_ON(!chip->gc.set))
 		return -EOPNOTSUPP;
 
-	return chip->gc.set_rv(&chip->gc, offset, value);
+	return chip->gc.set(&chip->gc, offset, value);
 }
 
 #define gpio_generic_chip_lock(gen_gc) \
-- 
cgit v1.2.3


From 42e6c6ce03fd3e41e39a0f93f9b1a1d9fa664338 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Thu, 7 Aug 2025 11:24:12 +0800
Subject: lib/sbitmap: convert shallow_depth from one word to the whole sbitmap

Currently elevators will record internal 'async_depth' to throttle
asynchronous requests, and they both calculate shallow_dpeth based on
sb->shift, with the respect that sb->shift is the available tags in one
word.

However, sb->shift is not the availbale tags in the last word, see
__map_depth:

if (index == sb->map_nr - 1)
  return sb->depth - (index << sb->shift);

For consequence, if the last word is used, more tags can be get than
expected, for example, assume nr_requests=256 and there are four words,
in the worst case if user set nr_requests=32, then the first word is
the last word, and still use bits per word, which is 64, to calculate
async_depth is wrong.

One the ohter hand, due to cgroup qos, bfq can allow only one request
to be allocated, and set shallow_dpeth=1 will still allow the number
of words request to be allocated.

Fix this problems by using shallow_depth to the whole sbitmap instead
of per word, also change kyber, mq-deadline and bfq to follow this,
a new helper __map_depth_with_shallow() is introduced to calculate
available bits in each word.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20250807032413.1469456-2-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/sbitmap.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 189140bf11fc..4adf4b364fcd 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -213,12 +213,12 @@ int sbitmap_get(struct sbitmap *sb);
  * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap,
  * limiting the depth used from each word.
  * @sb: Bitmap to allocate from.
- * @shallow_depth: The maximum number of bits to allocate from a single word.
+ * @shallow_depth: The maximum number of bits to allocate from the bitmap.
  *
  * This rather specific operation allows for having multiple users with
  * different allocation limits. E.g., there can be a high-priority class that
  * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow()
- * with a @shallow_depth of (1 << (@sb->shift - 1)). Then, the low-priority
+ * with a @shallow_depth of (sb->depth >> 1). Then, the low-priority
  * class can only allocate half of the total bits in the bitmap, preventing it
  * from starving out the high-priority class.
  *
@@ -478,7 +478,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
  * sbitmap_queue, limiting the depth used from each word, with preemption
  * already disabled.
  * @sbq: Bitmap queue to allocate from.
- * @shallow_depth: The maximum number of bits to allocate from a single word.
+ * @shallow_depth: The maximum number of bits to allocate from the queue.
  * See sbitmap_get_shallow().
  *
  * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after
-- 
cgit v1.2.3


From 45fa9f97e65231a9fd4f9429489cb74c10ccd0fd Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Thu, 7 Aug 2025 11:24:13 +0800
Subject: lib/sbitmap: make sbitmap_get_shallow() internal

Because it's only used in sbitmap.c

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20250807032413.1469456-3-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/sbitmap.h | 17 -----------------
 1 file changed, 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 4adf4b364fcd..ffb9907c7070 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -209,23 +209,6 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth);
  */
 int sbitmap_get(struct sbitmap *sb);
 
-/**
- * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap,
- * limiting the depth used from each word.
- * @sb: Bitmap to allocate from.
- * @shallow_depth: The maximum number of bits to allocate from the bitmap.
- *
- * This rather specific operation allows for having multiple users with
- * different allocation limits. E.g., there can be a high-priority class that
- * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow()
- * with a @shallow_depth of (sb->depth >> 1). Then, the low-priority
- * class can only allocate half of the total bits in the bitmap, preventing it
- * from starving out the high-priority class.
- *
- * Return: Non-negative allocated bit number if successful, -1 otherwise.
- */
-int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth);
-
 /**
  * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap.
  * @sb: Bitmap to check.
-- 
cgit v1.2.3


From 2c223f7239f376a90d71903ec474ba887cf21d94 Mon Sep 17 00:00:00 2001
From: Oreoluwa Babatunde <oreoluwa.babatunde@oss.qualcomm.com>
Date: Wed, 6 Aug 2025 10:24:21 -0700
Subject: of: reserved_mem: Restructure call site for
 dma_contiguous_early_fixup()

Restructure the call site for dma_contiguous_early_fixup() to
where the reserved_mem nodes are being parsed from the DT so that
dma_mmu_remap[] is populated before dma_contiguous_remap() is called.

Fixes: 8a6e02d0c00e ("of: reserved_mem: Restructure how the reserved memory regions are processed")
Signed-off-by: Oreoluwa Babatunde <oreoluwa.babatunde@oss.qualcomm.com>
Tested-by: William Zhang <william.zhang@broadcom.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20250806172421.2748302-1-oreoluwa.babatunde@oss.qualcomm.com
---
 include/linux/dma-map-ops.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index f48e5fb88bd5..332b80c42b6f 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -153,6 +153,9 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page,
 {
 	__free_pages(page, get_order(size));
 }
+static inline void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
+{
+}
 #endif /* CONFIG_DMA_CMA*/
 
 #ifdef CONFIG_DMA_DECLARE_COHERENT
-- 
cgit v1.2.3


From 17e8b7e08fa8bf7a936f70444a42a88750410251 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 24 Jul 2025 09:48:54 +0200
Subject: fs: mark file_remove_privs_flags static

file_remove_privs_flags is only used inside of inode.c, mark it static.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/20250724074854.3316911-1-hch@lst.de
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7ab4f96d705..796319914b0a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3393,7 +3393,6 @@ static inline struct inode *new_inode_pseudo(struct super_block *sb)
 extern struct inode *new_inode(struct super_block *sb);
 extern void free_inode_nonrcu(struct inode *inode);
 extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *);
-extern int file_remove_privs_flags(struct file *file, unsigned int flags);
 extern int file_remove_privs(struct file *);
 int setattr_should_drop_sgid(struct mnt_idmap *idmap,
 			     const struct inode *inode);
-- 
cgit v1.2.3


From 4e021920812d164bb02c30cc40e08a3681b1c755 Mon Sep 17 00:00:00 2001
From: Kriish Sharma <kriish.sharma2006@gmail.com>
Date: Wed, 30 Jul 2025 20:18:53 +0000
Subject: fs: document 'name' parameter for name_contains_dotdot()

The kernel-doc for name_contains_dotdot() was missing the @name
parameter description, leading to a warning during make htmldocs.

Add the missing documentation to resolve this warning.

Signed-off-by: Kriish Sharma <kriish.sharma2006@gmail.com>
Link: https://lore.kernel.org/20250730201853.8436-1-kriish.sharma2006@gmail.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 796319914b0a..780e9c774c54 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3281,7 +3281,7 @@ static inline bool is_dot_dotdot(const char *name, size_t len)
 
 /**
  * name_contains_dotdot - check if a file name contains ".." path components
- *
+ * @name: File path string to check
  * Search for ".." surrounded by either '/' or start/end of string.
  */
 static inline bool name_contains_dotdot(const char *name)
-- 
cgit v1.2.3


From 56ecfd9175b999dfc303ac6a0f9ea4bd1bee49d7 Mon Sep 17 00:00:00 2001
From: Pedro Falcato <pfalcato@suse.de>
Date: Wed, 23 Jul 2025 14:21:54 +0100
Subject: fs: Remove mount_nodev

mount_nodev has had no in-tree users since
cc0876f817d6 ("vfs: Convert devpts to use the new mount API").
Remove it.

Signed-off-by: Pedro Falcato <pfalcato@suse.de>
Link: https://lore.kernel.org/20250723132156.225410-2-pfalcato@suse.de
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7ab4f96d705..204328ed7ebb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2716,9 +2716,6 @@ static inline bool is_mgtime(const struct inode *inode)
 extern struct dentry *mount_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
 	int (*fill_super)(struct super_block *, void *, int));
-extern struct dentry *mount_nodev(struct file_system_type *fs_type,
-	int flags, void *data,
-	int (*fill_super)(struct super_block *, void *, int));
 extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
 void retire_super(struct super_block *sb);
 void generic_shutdown_super(struct super_block *sb);
-- 
cgit v1.2.3


From f7d161c2804f3ad36bdc3222cb93c8fee67be98c Mon Sep 17 00:00:00 2001
From: Pedro Falcato <pfalcato@suse.de>
Date: Wed, 23 Jul 2025 14:21:55 +0100
Subject: fs: Remove mount_bdev

mount_bdev has no in-tree users ever since f2fs adopted the new mount
API. Remove it.

Signed-off-by: Pedro Falcato <pfalcato@suse.de>
Link: https://lore.kernel.org/20250723132156.225410-3-pfalcato@suse.de
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 204328ed7ebb..98afcf455b28 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2713,9 +2713,6 @@ static inline bool is_mgtime(const struct inode *inode)
 	return inode->i_opflags & IOP_MGTIME;
 }
 
-extern struct dentry *mount_bdev(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data,
-	int (*fill_super)(struct super_block *, void *, int));
 extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
 void retire_super(struct super_block *sb);
 void generic_shutdown_super(struct super_block *sb);
-- 
cgit v1.2.3


From ad7fe23b4b0dc0c26187df92a5649948ef7049fa Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar@cyphar.com>
Date: Wed, 6 Aug 2025 16:07:05 +1000
Subject: fscontext: add custom-prefix log helpers

Sometimes, errors associated with an fscontext come from the VFS or
otherwise outside of the filesystem driver itself. However, the default
logging of errorfc will always prefix the message with the filesystem
name.

So, add some *fcp() wrappers that allow for custom prefixes to be used
when emitting information to the fscontext log.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
Link: https://lore.kernel.org/20250806-errorfc-mount-too-revealing-v2-1-534b9b4d45bb@cyphar.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs_context.h | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 7773eb870039..671f031be173 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -186,10 +186,12 @@ struct fc_log {
 extern __attribute__((format(printf, 4, 5)))
 void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, ...);
 
-#define __logfc(fc, l, fmt, ...) logfc((fc)->log.log, NULL, \
-					l, fmt, ## __VA_ARGS__)
-#define __plog(p, l, fmt, ...) logfc((p)->log, (p)->prefix, \
-					l, fmt, ## __VA_ARGS__)
+#define __logfc(fc, l, fmt, ...) \
+	logfc((fc)->log.log, NULL, (l), (fmt), ## __VA_ARGS__)
+#define __plogp(p, prefix, l, fmt, ...) \
+	logfc((p)->log, (prefix), (l), (fmt), ## __VA_ARGS__)
+#define __plog(p, l, fmt, ...) __plogp(p, (p)->prefix, l, fmt, ## __VA_ARGS__)
+
 /**
  * infof - Store supplementary informational message
  * @fc: The context in which to log the informational message
@@ -201,6 +203,8 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt,
 #define infof(fc, fmt, ...) __logfc(fc, 'i', fmt, ## __VA_ARGS__)
 #define info_plog(p, fmt, ...) __plog(p, 'i', fmt, ## __VA_ARGS__)
 #define infofc(fc, fmt, ...) __plog((&(fc)->log), 'i', fmt, ## __VA_ARGS__)
+#define infofcp(fc, prefix, fmt, ...) \
+	__plogp((&(fc)->log), prefix, 'i', fmt, ## __VA_ARGS__)
 
 /**
  * warnf - Store supplementary warning message
@@ -213,6 +217,8 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt,
 #define warnf(fc, fmt, ...) __logfc(fc, 'w', fmt, ## __VA_ARGS__)
 #define warn_plog(p, fmt, ...) __plog(p, 'w', fmt, ## __VA_ARGS__)
 #define warnfc(fc, fmt, ...) __plog((&(fc)->log), 'w', fmt, ## __VA_ARGS__)
+#define warnfcp(fc, prefix, fmt, ...) \
+	__plogp((&(fc)->log), prefix, 'w', fmt, ## __VA_ARGS__)
 
 /**
  * errorf - Store supplementary error message
@@ -225,6 +231,8 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt,
 #define errorf(fc, fmt, ...) __logfc(fc, 'e', fmt, ## __VA_ARGS__)
 #define error_plog(p, fmt, ...) __plog(p, 'e', fmt, ## __VA_ARGS__)
 #define errorfc(fc, fmt, ...) __plog((&(fc)->log), 'e', fmt, ## __VA_ARGS__)
+#define errorfcp(fc, prefix, fmt, ...) \
+	__plogp((&(fc)->log), prefix, 'e', fmt, ## __VA_ARGS__)
 
 /**
  * invalf - Store supplementary invalid argument error message
@@ -237,5 +245,7 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt,
 #define invalf(fc, fmt, ...) (errorf(fc, fmt, ## __VA_ARGS__), -EINVAL)
 #define inval_plog(p, fmt, ...) (error_plog(p, fmt, ## __VA_ARGS__), -EINVAL)
 #define invalfc(fc, fmt, ...) (errorfc(fc, fmt, ## __VA_ARGS__), -EINVAL)
+#define invalfcp(fc, prefix, fmt, ...) \
+	(errorfcp(fc, prefix, fmt, ## __VA_ARGS__), -EINVAL)
 
 #endif /* _LINUX_FS_CONTEXT_H */
-- 
cgit v1.2.3


From bb2441402392ef1f49563be68e8f0dcb127ac965 Mon Sep 17 00:00:00 2001
From: Dzmitry Sankouski <dsankouski@gmail.com>
Date: Tue, 5 Aug 2025 22:40:56 +0300
Subject: regulator: add s2dos05 regulator support

S2DOS05 has 1 buck and 4 LDO regulators, used for powering
panel/touchscreen.

Signed-off-by: Dzmitry Sankouski <dsankouski@gmail.com>
Link: https://patch.msgid.link/20250805-starqltechn_integration_upstream-v8-1-09d8a321fafe@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/s2dos05.h | 73 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 include/linux/regulator/s2dos05.h

(limited to 'include/linux')

diff --git a/include/linux/regulator/s2dos05.h b/include/linux/regulator/s2dos05.h
new file mode 100644
index 000000000000..2e89fcbce769
--- /dev/null
+++ b/include/linux/regulator/s2dos05.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+// s2dos05.h
+//
+// Copyright (c) 2016 Samsung Electronics Co., Ltd
+//              http://www.samsung.com
+// Copyright (C) 2024 Dzmitry Sankouski <dsankouski@gmail.com>
+
+#ifndef __LINUX_S2DOS05_H
+#define __LINUX_S2DOS05_H
+
+// S2DOS05 registers
+// Slave Addr : 0xC0
+enum S2DOS05_reg {
+	S2DOS05_REG_DEV_ID,
+	S2DOS05_REG_TOPSYS_STAT,
+	S2DOS05_REG_STAT,
+	S2DOS05_REG_EN,
+	S2DOS05_REG_LDO1_CFG,
+	S2DOS05_REG_LDO2_CFG,
+	S2DOS05_REG_LDO3_CFG,
+	S2DOS05_REG_LDO4_CFG,
+	S2DOS05_REG_BUCK_CFG,
+	S2DOS05_REG_BUCK_VOUT,
+	S2DOS05_REG_IRQ_MASK = 0x0D,
+	S2DOS05_REG_SSD_TSD = 0x0E,
+	S2DOS05_REG_OCL = 0x10,
+	S2DOS05_REG_IRQ = 0x11
+};
+
+// S2DOS05 regulator ids
+enum S2DOS05_regulators {
+	S2DOS05_LDO1,
+	S2DOS05_LDO2,
+	S2DOS05_LDO3,
+	S2DOS05_LDO4,
+	S2DOS05_BUCK1,
+	S2DOS05_REG_MAX,
+};
+
+#define S2DOS05_IRQ_PWRMT_MASK	BIT(5)
+#define S2DOS05_IRQ_TSD_MASK	BIT(4)
+#define S2DOS05_IRQ_SSD_MASK	BIT(3)
+#define S2DOS05_IRQ_SCP_MASK	BIT(2)
+#define S2DOS05_IRQ_UVLO_MASK	BIT(1)
+#define S2DOS05_IRQ_OCD_MASK	BIT(0)
+
+#define S2DOS05_BUCK_MIN1	506250
+#define S2DOS05_LDO_MIN1	1500000
+#define S2DOS05_LDO_MIN2	2700000
+#define S2DOS05_BUCK_STEP1	6250
+#define S2DOS05_LDO_STEP1	25000
+#define S2DOS05_LDO_VSEL_MASK	0x7F
+#define S2DOS05_LDO_FD_MASK	0x80
+#define S2DOS05_BUCK_VSEL_MASK	0xFF
+#define S2DOS05_BUCK_FD_MASK	0x08
+
+#define S2DOS05_ENABLE_MASK_L1	BIT(0)
+#define S2DOS05_ENABLE_MASK_L2	BIT(1)
+#define S2DOS05_ENABLE_MASK_L3	BIT(2)
+#define S2DOS05_ENABLE_MASK_L4	BIT(3)
+#define S2DOS05_ENABLE_MASK_B1	BIT(4)
+
+#define S2DOS05_RAMP_DELAY	12000
+
+#define S2DOS05_ENABLE_TIME_LDO		50
+#define S2DOS05_ENABLE_TIME_BUCK	350
+
+#define S2DOS05_LDO_N_VOLTAGES	(S2DOS05_LDO_VSEL_MASK + 1)
+#define S2DOS05_BUCK_N_VOLTAGES (S2DOS05_BUCK_VSEL_MASK + 1)
+
+#define S2DOS05_REGULATOR_MAX	(S2DOS05_REG_MAX)
+
+#endif // __LINUX_S2DOS05_H
-- 
cgit v1.2.3


From 181fe022ecf8a8e85def0e94852c631c59a8b3f6 Mon Sep 17 00:00:00 2001
From: Thomas Richard <thomas.richard@bootlin.com>
Date: Mon, 11 Aug 2025 15:25:44 +0200
Subject: gpiolib: add support to register sparse pin range

Add support to register for GPIO<->pin mapping using a list of non
consecutive pins. The core already supports sparse pin range (pins member
of struct pinctrl_gpio_range), but it was not possible to register one. If
pins is not NULL the core uses it, otherwise it assumes that a consecutive
pin range was registered and it uses pin_base.

The function gpiochip_add_pin_range() which allocates and fills the struct
pinctrl_gpio_range was renamed to gpiochip_add_pin_range_with_pins() and
the pins parameter was added.

Two new functions were added, gpiochip_add_pin_range() and
gpiochip_add_sparse_pin_range() to register a consecutive or sparse pins
range. Both use gpiochip_add_pin_range_with_pins().

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Thomas Richard <thomas.richard@bootlin.com>
Link: https://lore.kernel.org/r/20250811-aaeon-up-board-pinctrl-support-v9-1-29f0cbbdfb30@bootlin.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio/driver.h | 51 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 48 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 667f8fd58a79..9fcd4a988081 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -772,16 +772,50 @@ struct gpio_pin_range {
 
 #ifdef CONFIG_PINCTRL
 
-int gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name,
-			   unsigned int gpio_offset, unsigned int pin_offset,
-			   unsigned int npins);
+int gpiochip_add_pin_range_with_pins(struct gpio_chip *gc,
+				     const char *pinctl_name,
+				     unsigned int gpio_offset,
+				     unsigned int pin_offset,
+				     unsigned int const *pins,
+				     unsigned int npins);
 int gpiochip_add_pingroup_range(struct gpio_chip *gc,
 			struct pinctrl_dev *pctldev,
 			unsigned int gpio_offset, const char *pin_group);
 void gpiochip_remove_pin_ranges(struct gpio_chip *gc);
 
+static inline int
+gpiochip_add_pin_range(struct gpio_chip *gc,
+		       const char *pinctl_name,
+		       unsigned int gpio_offset,
+		       unsigned int pin_offset,
+		       unsigned int npins)
+{
+	return gpiochip_add_pin_range_with_pins(gc, pinctl_name, gpio_offset,
+						pin_offset, NULL, npins);
+}
+
+static inline int
+gpiochip_add_sparse_pin_range(struct gpio_chip *gc,
+			      const char *pinctl_name,
+			      unsigned int gpio_offset,
+			      unsigned int const *pins,
+			      unsigned int npins)
+{
+	return gpiochip_add_pin_range_with_pins(gc, pinctl_name, gpio_offset, 0,
+						pins, npins);
+}
 #else /* ! CONFIG_PINCTRL */
 
+static inline int
+gpiochip_add_pin_range_with_pins(struct gpio_chip *gc,
+				 const char *pinctl_name,
+				 unsigned int gpio_offset,
+				 unsigned int pin_offset,
+				 unsigned int npins)
+{
+	return 0;
+}
+
 static inline int
 gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name,
 		       unsigned int gpio_offset, unsigned int pin_offset,
@@ -789,6 +823,17 @@ gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name,
 {
 	return 0;
 }
+
+static inline int
+gpiochip_add_sparse_pin_range(struct gpio_chip *gc,
+			      const char *pinctl_name,
+			      unsigned int gpio_offset,
+			      unsigned int const *pins,
+			      unsigned int npins)
+{
+	return 0;
+}
+
 static inline int
 gpiochip_add_pingroup_range(struct gpio_chip *gc,
 			struct pinctrl_dev *pctldev,
-- 
cgit v1.2.3


From 6e986f8852f56cf9214ea2ec02b4b432e201d02c Mon Sep 17 00:00:00 2001
From: Thomas Richard <thomas.richard@bootlin.com>
Date: Mon, 11 Aug 2025 15:25:49 +0200
Subject: gpio: aggregator: export symbols of the GPIO forwarder library

Export all symbols and create header file for the GPIO forwarder library.
It will be used in the next changes.

Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Thomas Richard <thomas.richard@bootlin.com>
Link: https://lore.kernel.org/r/20250811-aaeon-up-board-pinctrl-support-v9-6-29f0cbbdfb30@bootlin.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio/forwarder.h | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 include/linux/gpio/forwarder.h

(limited to 'include/linux')

diff --git a/include/linux/gpio/forwarder.h b/include/linux/gpio/forwarder.h
new file mode 100644
index 000000000000..e21a1b7b1905
--- /dev/null
+++ b/include/linux/gpio/forwarder.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_GPIO_FORWARDER_H
+#define __LINUX_GPIO_FORWARDER_H
+
+struct gpio_desc;
+struct gpio_chip;
+struct gpiochip_fwd;
+
+struct gpiochip_fwd *devm_gpiochip_fwd_alloc(struct device *dev,
+					     unsigned int ngpios);
+int gpiochip_fwd_desc_add(struct gpiochip_fwd *fwd,
+			  struct gpio_desc *desc, unsigned int offset);
+int gpiochip_fwd_register(struct gpiochip_fwd *fwd);
+
+struct gpio_chip *gpiochip_fwd_get_gpiochip(struct gpiochip_fwd *fwd);
+
+int gpiochip_fwd_gpio_get_direction(struct gpiochip_fwd *fwd,
+				    unsigned int offset);
+int gpiochip_fwd_gpio_direction_input(struct gpiochip_fwd *fwd,
+				      unsigned int offset);
+int gpiochip_fwd_gpio_direction_output(struct gpiochip_fwd *fwd,
+				       unsigned int offset,
+				       int value);
+int gpiochip_fwd_gpio_get(struct gpiochip_fwd *fwd, unsigned int offset);
+int gpiochip_fwd_gpio_get_multiple(struct gpiochip_fwd *fwd,
+				   unsigned long *mask,
+				   unsigned long *bits);
+int gpiochip_fwd_gpio_set(struct gpiochip_fwd *fwd, unsigned int offset,
+			  int value);
+int gpiochip_fwd_gpio_set_multiple(struct gpiochip_fwd *fwd,
+				   unsigned long *mask,
+				   unsigned long *bits);
+int gpiochip_fwd_gpio_set_config(struct gpiochip_fwd *fwd, unsigned int offset,
+				 unsigned long config);
+int gpiochip_fwd_gpio_to_irq(struct gpiochip_fwd *fwd, unsigned int offset);
+
+#endif
-- 
cgit v1.2.3


From b31c68fd851e74526ad963362ea205eb97b9a710 Mon Sep 17 00:00:00 2001
From: Thomas Richard <thomas.richard@bootlin.com>
Date: Mon, 11 Aug 2025 15:25:50 +0200
Subject: gpio: aggregator: handle runtime registration of gpio_desc in
 gpiochip_fwd

Add request() callback to check if the GPIO descriptor was well registered
in the gpiochip_fwd before using it. This is done to handle the case where
GPIO descriptor is added at runtime in the forwarder.

If at least one GPIO descriptor was not added before the forwarder
registration, we assume the forwarder can sleep as if a GPIO is added at
runtime it may sleep.

Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Thomas Richard <thomas.richard@bootlin.com>
Link: https://lore.kernel.org/r/20250811-aaeon-up-board-pinctrl-support-v9-7-29f0cbbdfb30@bootlin.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio/forwarder.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio/forwarder.h b/include/linux/gpio/forwarder.h
index e21a1b7b1905..45e0190308f0 100644
--- a/include/linux/gpio/forwarder.h
+++ b/include/linux/gpio/forwarder.h
@@ -10,10 +10,12 @@ struct gpiochip_fwd *devm_gpiochip_fwd_alloc(struct device *dev,
 					     unsigned int ngpios);
 int gpiochip_fwd_desc_add(struct gpiochip_fwd *fwd,
 			  struct gpio_desc *desc, unsigned int offset);
+void gpiochip_fwd_desc_free(struct gpiochip_fwd *fwd, unsigned int offset);
 int gpiochip_fwd_register(struct gpiochip_fwd *fwd);
 
 struct gpio_chip *gpiochip_fwd_get_gpiochip(struct gpiochip_fwd *fwd);
 
+int gpiochip_fwd_gpio_request(struct gpiochip_fwd *fwd, unsigned int offset);
 int gpiochip_fwd_gpio_get_direction(struct gpiochip_fwd *fwd,
 				    unsigned int offset);
 int gpiochip_fwd_gpio_direction_input(struct gpiochip_fwd *fwd,
-- 
cgit v1.2.3


From 60e92c1009c7c6abd4a9d0caf33a8cba5d09f67c Mon Sep 17 00:00:00 2001
From: Thomas Richard <thomas.richard@bootlin.com>
Date: Mon, 11 Aug 2025 15:25:51 +0200
Subject: gpio: aggregator: add possibility to attach data to the forwarder

Add a data pointer to store private data in the forwarder.

Reviewed-by: Andy Shevchenko <andy@kernel.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Thomas Richard <thomas.richard@bootlin.com>
Link: https://lore.kernel.org/r/20250811-aaeon-up-board-pinctrl-support-v9-8-29f0cbbdfb30@bootlin.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio/forwarder.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/forwarder.h b/include/linux/gpio/forwarder.h
index 45e0190308f0..ee5d8355f735 100644
--- a/include/linux/gpio/forwarder.h
+++ b/include/linux/gpio/forwarder.h
@@ -11,10 +11,12 @@ struct gpiochip_fwd *devm_gpiochip_fwd_alloc(struct device *dev,
 int gpiochip_fwd_desc_add(struct gpiochip_fwd *fwd,
 			  struct gpio_desc *desc, unsigned int offset);
 void gpiochip_fwd_desc_free(struct gpiochip_fwd *fwd, unsigned int offset);
-int gpiochip_fwd_register(struct gpiochip_fwd *fwd);
+int gpiochip_fwd_register(struct gpiochip_fwd *fwd, void *data);
 
 struct gpio_chip *gpiochip_fwd_get_gpiochip(struct gpiochip_fwd *fwd);
 
+void *gpiochip_fwd_get_data(struct gpiochip_fwd *fwd);
+
 int gpiochip_fwd_gpio_request(struct gpiochip_fwd *fwd, unsigned int offset);
 int gpiochip_fwd_gpio_get_direction(struct gpiochip_fwd *fwd,
 				    unsigned int offset);
-- 
cgit v1.2.3


From 53ec9169db1345f04174febb90f88a871fc28d9e Mon Sep 17 00:00:00 2001
From: Thomas Richard <thomas.richard@bootlin.com>
Date: Mon, 11 Aug 2025 15:25:52 +0200
Subject: lib/string_choices: Add str_input_output() helper

Add str_input_output() helper to return 'input' or 'output' string literal.
Also add the inversed variant str_output_input().

Suggested-by: Andy Shevchenko <andy@kernel.org>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Thomas Richard <thomas.richard@bootlin.com>
Link: https://lore.kernel.org/r/20250811-aaeon-up-board-pinctrl-support-v9-9-29f0cbbdfb30@bootlin.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/string_choices.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h
index f3ba4f52ff26..a27c87c954ae 100644
--- a/include/linux/string_choices.h
+++ b/include/linux/string_choices.h
@@ -41,6 +41,12 @@ static inline const char *str_high_low(bool v)
 }
 #define str_low_high(v)		str_high_low(!(v))
 
+static inline const char *str_input_output(bool v)
+{
+	return v ? "input" : "output";
+}
+#define str_output_input(v)	str_input_output(!(v))
+
 static inline const char *str_on_off(bool v)
 {
 	return v ? "on" : "off";
-- 
cgit v1.2.3


From 6d3c3ca4c77e93660cce5819bf707f75df03e0c8 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Fri, 8 Aug 2025 15:28:47 +0200
Subject: module: Rename EXPORT_SYMBOL_GPL_FOR_MODULES to
 EXPORT_SYMBOL_FOR_MODULES

Christoph suggested that the explicit _GPL_ can be dropped from the
module namespace export macro, as it's intended for in-tree modules
only. It would be possible to restrict it technically, but it was
pointed out [2] that some cases of using an out-of-tree build of an
in-tree module with the same name are legitimate. But in that case those
also have to be GPL anyway so it's unnecessary to spell it out in the
macro name.

Link: https://lore.kernel.org/all/aFleJN_fE-RbSoFD@infradead.org/ [1]
Link: https://lore.kernel.org/all/CAK7LNATRkZHwJGpojCnvdiaoDnP%2BaeUXgdey5sb_8muzdWTMkA@mail.gmail.com/ [2]
Suggested-by: Christoph Hellwig <hch@infradead.org>
Reviewed-by: Shivank Garg <shivankg@amd.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Nicolas Schier <n.schier@avm.de>
Reviewed-by: Daniel Gomez <da.gomez@samsung.com>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Link: https://lore.kernel.org/20250808-export_modules-v4-1-426945bcc5e1@suse.cz
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/export.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/export.h b/include/linux/export.h
index f35d03b4113b..a686fd0ba406 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -91,6 +91,6 @@
 #define EXPORT_SYMBOL_NS(sym, ns)	__EXPORT_SYMBOL(sym, "", ns)
 #define EXPORT_SYMBOL_NS_GPL(sym, ns)	__EXPORT_SYMBOL(sym, "GPL", ns)
 
-#define EXPORT_SYMBOL_GPL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods)
+#define EXPORT_SYMBOL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods)
 
 #endif /* _LINUX_EXPORT_H */
-- 
cgit v1.2.3


From 0daf35da397b083ea0ea5407196bb6bd210530ec Mon Sep 17 00:00:00 2001
From: Mukesh Ojha <mukesh.ojha@oss.qualcomm.com>
Date: Thu, 7 Aug 2025 13:13:10 +0530
Subject: soc: qcom: mdt_loader: Remove pas id parameter

pas id is not used in qcom_mdt_load_no_init() and it should not
be used as it is non-PAS specific function and has no relation
to PAS specific mechanism.

Reviewed-by: Dikshita Agarwal <quic_dikshita@quicinc.com>
Acked-by: Jeff Johnson <jjohnson@kernel.org> # drivers/net/wireless/ath/ath12k/ahb.c
Signed-off-by: Mukesh Ojha <mukesh.ojha@oss.qualcomm.com>
Reviewed-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20250807074311.2381713-2-mukesh.ojha@oss.qualcomm.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/soc/qcom/mdt_loader.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/soc/qcom/mdt_loader.h b/include/linux/soc/qcom/mdt_loader.h
index 9e8e60421192..8ea8230579a2 100644
--- a/include/linux/soc/qcom/mdt_loader.h
+++ b/include/linux/soc/qcom/mdt_loader.h
@@ -24,7 +24,7 @@ int qcom_mdt_load(struct device *dev, const struct firmware *fw,
 		  phys_addr_t *reloc_base);
 
 int qcom_mdt_load_no_init(struct device *dev, const struct firmware *fw,
-			  const char *fw_name, int pas_id, void *mem_region,
+			  const char *fw_name, void *mem_region,
 			  phys_addr_t mem_phys, size_t mem_size,
 			  phys_addr_t *reloc_base);
 void *qcom_mdt_read_metadata(const struct firmware *fw, size_t *data_len,
@@ -54,9 +54,8 @@ static inline int qcom_mdt_load(struct device *dev, const struct firmware *fw,
 
 static inline int qcom_mdt_load_no_init(struct device *dev,
 					const struct firmware *fw,
-					const char *fw_name, int pas_id,
-					void *mem_region, phys_addr_t mem_phys,
-					size_t mem_size,
+					const char *fw_name, void *mem_region,
+					phys_addr_t mem_phys, size_t mem_size,
 					phys_addr_t *reloc_base)
 {
 	return -ENODEV;
-- 
cgit v1.2.3


From 5634c8cb298a7146b4e38873473e280b50e27a2c Mon Sep 17 00:00:00 2001
From: Nitin Gote <nitin.r.gote@intel.com>
Date: Fri, 18 Jul 2025 16:20:51 +0530
Subject: iosys-map: Fix undefined behavior in iosys_map_clear()

The current iosys_map_clear() implementation reads the potentially
uninitialized 'is_iomem' boolean field to decide which union member
to clear. This causes undefined behavior when called on uninitialized
structures, as 'is_iomem' may contain garbage values like 0xFF.

UBSAN detects this as:
    UBSAN: invalid-load in include/linux/iosys-map.h:267
    load of value 255 is not a valid value for type '_Bool'

Fix by unconditionally clearing the entire structure with memset(),
eliminating the need to read uninitialized data and ensuring all
fields are set to known good values.

Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14639
Fixes: 01fd30da0474 ("dma-buf: Add struct dma-buf-map for storing struct dma_buf.vaddr_ptr")
Signed-off-by: Nitin Gote <nitin.r.gote@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://lore.kernel.org/r/20250718105051.2709487-1-nitin.r.gote@intel.com
---
 include/linux/iosys-map.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h
index 4696abfd311c..3e85afe794c0 100644
--- a/include/linux/iosys-map.h
+++ b/include/linux/iosys-map.h
@@ -264,12 +264,7 @@ static inline bool iosys_map_is_set(const struct iosys_map *map)
  */
 static inline void iosys_map_clear(struct iosys_map *map)
 {
-	if (map->is_iomem) {
-		map->vaddr_iomem = NULL;
-		map->is_iomem = false;
-	} else {
-		map->vaddr = NULL;
-	}
+	memset(map, 0, sizeof(*map));
 }
 
 /**
-- 
cgit v1.2.3


From ce8370e2e62a903e18be7dd0e0be2eee079501e1 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Wed, 6 Aug 2025 17:04:07 -0400
Subject: audit: record fanotify event regardless of presence of rules

When no audit rules are in place, fanotify event results are
unconditionally dropped due to an explicit check for the existence of
any audit rules.  Given this is a report from another security
sub-system, allow it to be recorded regardless of the existence of any
audit rules.

To test, install and run the fapolicyd daemon with default config.  Then
as an unprivileged user, create and run a very simple binary that should
be denied.  Then check for an event with
	ausearch -m FANOTIFY -ts recent

Link: https://issues.redhat.com/browse/RHEL-9065
Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/audit.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index a394614ccd0b..e3f06eba9c6e 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -527,7 +527,7 @@ static inline void audit_log_kern_module(const char *name)
 
 static inline void audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar)
 {
-	if (!audit_dummy_context())
+	if (audit_enabled)
 		__audit_fanotify(response, friar);
 }
 
-- 
cgit v1.2.3


From 5816bf4273edb32716a88c796e0b04f0e12962eb Mon Sep 17 00:00:00 2001
From: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
Date: Tue, 22 Jul 2025 14:21:34 -0700
Subject: lsm,selinux: Add LSM blob support for BPF objects

This patch introduces LSM blob support for BPF maps, programs, and
tokens to enable LSM stacking and multiplexing of LSM modules that
govern BPF objects. Additionally, the existing BPF hooks used by
SELinux have been updated to utilize the new blob infrastructure,
removing the assumption of exclusive ownership of the security
pointer.

Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
[PM: dropped local variable init, style fixes]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hooks.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 090d1d3e19fe..79ec5a2bdcca 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -116,6 +116,9 @@ struct lsm_blob_sizes {
 	int lbs_xattr_count; /* number of xattr slots in new_xattrs array */
 	int lbs_tun_dev;
 	int lbs_bdev;
+	int lbs_bpf_map;
+	int lbs_bpf_prog;
+	int lbs_bpf_token;
 };
 
 /*
-- 
cgit v1.2.3


From fb357dbadbebc7a9ca3c5ef26f6c792b0e8e1278 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 15 Jul 2025 14:24:42 +0200
Subject: fbcon: Add necessary include statements and forward declarations

Make the header self contained for including.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Simona Vetter <simona.vetter@ffwll.ch>
Link: https://lore.kernel.org/r/20250715122643.137027-6-tzimmermann@suse.de
---
 include/linux/fbcon.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h
index 2382dec6d6ab..81f0e698acbf 100644
--- a/include/linux/fbcon.h
+++ b/include/linux/fbcon.h
@@ -1,6 +1,13 @@
 #ifndef _LINUX_FBCON_H
 #define _LINUX_FBCON_H
 
+#include <linux/compiler_types.h>
+
+struct fb_blit_caps;
+struct fb_info;
+struct fb_var_screeninfo;
+struct fb_videomode;
+
 #ifdef CONFIG_FRAMEBUFFER_CONSOLE
 void __init fb_console_init(void);
 void __exit fb_console_exit(void);
-- 
cgit v1.2.3


From ccba9f6baa900e31ad1a4c36e6f3c176694f9eac Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 8 Aug 2025 17:12:04 -0700
Subject: net: update NAPI threaded config even for disabled NAPIs

We have to make sure that all future NAPIs will have the right threaded
state when the state is configured on the device level.
We chose not to have an "unset" state for threaded, and not to wipe
the NAPI config clean when channels are explicitly disabled.
This means the persistent config structs "exist" even when their NAPIs
are not instantiated.

Differently put - the NAPI persistent state lives in the net_device
(ncfg == struct napi_config):

    ,--- [napi 0] - [napi 1]
 [dev]      |          |
    `--- [ncfg 0] - [ncfg 1]

so say we a device with 2 queues but only 1 enabled:

    ,--- [napi 0]
 [dev]      |
    `--- [ncfg 0] - [ncfg 1]

now we set the device to threaded=1:

    ,---------- [napi 0 (thr:1)]
 [dev(thr:1)]      |
    `---------- [ncfg 0 (thr:1)] - [ncfg 1 (thr:?)]

Since [ncfg 1] was not attached to a NAPI during configuration we
skipped it. If we create a NAPI for it later it will have the old
setting (presumably disabled). One could argue if this is right
or not "in principle", but it's definitely not how things worked
before per-NAPI config..

Fixes: 2677010e7793 ("Add support to set NAPI threaded for individual NAPI")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Joe Damato <joe@dama.to>
Link: https://patch.msgid.link/20250809001205.1147153-3-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5e5de4b0a433..f3a3b761abfb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2071,6 +2071,8 @@ enum netdev_reg_state {
  *	@max_pacing_offload_horizon: max EDT offload horizon in nsec.
  *	@napi_config: An array of napi_config structures containing per-NAPI
  *		      settings.
+ *	@num_napi_configs:	number of allocated NAPI config structs,
+ *		always >= max(num_rx_queues, num_tx_queues).
  *	@gro_flush_timeout:	timeout for GRO layer in NAPI
  *	@napi_defer_hard_irqs:	If not zero, provides a counter that would
  *				allow to avoid NIC hard IRQ, on busy queues.
@@ -2482,8 +2484,9 @@ struct net_device {
 
 	u64			max_pacing_offload_horizon;
 	struct napi_config	*napi_config;
-	unsigned long		gro_flush_timeout;
+	u32			num_napi_configs;
 	u32			napi_defer_hard_irqs;
+	unsigned long		gro_flush_timeout;
 
 	/**
 	 * @up: copy of @state's IFF_UP, but safe to read with just @lock.
-- 
cgit v1.2.3


From bea90085dcb0f9a75748e73d723bde557a5ebf1a Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Wed, 23 Jul 2025 11:21:53 -0400
Subject: dlm: use defines for force values in dlm_release_lockspace

Clarify the use of the force parameter by renaming it to
"release_option" and adding defines (with descriptions) for
each of the accepted values.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 include/linux/dlm.h | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dlm.h b/include/linux/dlm.h
index bacda9898f2b..cc7a36244893 100644
--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@@ -87,13 +87,37 @@ int dlm_new_lockspace(const char *name, const char *cluster,
 		      const struct dlm_lockspace_ops *ops, void *ops_arg,
 		      int *ops_result, dlm_lockspace_t **lockspace);
 
+/*
+ * dlm_release_lockspace() release_option values:
+ *
+ * DLM_RELEASE_NO_LOCKS returns -EBUSY if any locks (lkb's)
+ *   exist in the local lockspace.
+ *
+ * DLM_RELEASE_UNUSED previous value that is no longer used.
+ *
+ * DLM_RELEASE_NORMAL releases the lockspace regardless of any
+ *   locks managed in the local lockspace.
+ *
+ * DLM_RELEASE_NO_EVENT release the lockspace regardless of any
+ *   locks managed in the local lockspace, and does not submit
+ *   a leave event to the cluster manager, so other nodes will
+ *   not be notified that the node should be removed from the
+ *   list of lockspace members.
+ */
+#define DLM_RELEASE_NO_LOCKS		0
+#define DLM_RELEASE_UNUSED		1
+#define DLM_RELEASE_NORMAL		2
+#define DLM_RELEASE_NO_EVENT		3
+
 /*
  * dlm_release_lockspace
  *
  * Stop a lockspace.
+ *
+ * release_option: see DLM_RELEASE values above.
  */
 
-int dlm_release_lockspace(dlm_lockspace_t *lockspace, int force);
+int dlm_release_lockspace(dlm_lockspace_t *lockspace, int release_option);
 
 /*
  * dlm_lock
-- 
cgit v1.2.3


From 6f4f4ca5caf73de5e86329547d4527b3e0c08488 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Wed, 23 Jul 2025 11:21:56 -0400
Subject: dlm: add new flag DLM_RELEASE_RECOVER for dlm_lockspace_release

When dlm_lockspace_release() is passed DLM_RELEASE_RECOVER, it
tells the dlm to handle the release/leave as if the node had failed,
i.e. perform recovery steps for a failed node, like recover_slot().

When DLM_RELEASE_RECOVER is set:
- dlm_release_lockspace() includes RELEASE_RECOVER=1 in the OFFLINE
  uevent sent to userspace.
- userspace/dlm_controld sends a message to all lockspace members
  indicating that the subsequent node removal should be handled as
  if the node had failed.
- when dlm_controld on all nodes receives the new message, it sets
  the release_recover configfs entry to 1 for the node.
- when the dlm/kernel next performs recovery and removes the node,
  it will see that release_recover has been set, and will perform
  recovery steps for the node as if it had failed, e.g. the
  recover_slot() callback is called to notify the fs.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 include/linux/dlm.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dlm.h b/include/linux/dlm.h
index cc7a36244893..108eb953eb18 100644
--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@@ -103,11 +103,16 @@ int dlm_new_lockspace(const char *name, const char *cluster,
  *   a leave event to the cluster manager, so other nodes will
  *   not be notified that the node should be removed from the
  *   list of lockspace members.
+ *
+ * DLM_RELEASE_RECOVER like DLM_RELEASE_NORMAL, but the remaining
+ *   nodes will handle the removal of the node as if the node
+ *   had failed, e.g. the recover_slot() callback would be used.
  */
 #define DLM_RELEASE_NO_LOCKS		0
 #define DLM_RELEASE_UNUSED		1
 #define DLM_RELEASE_NORMAL		2
 #define DLM_RELEASE_NO_EVENT		3
+#define DLM_RELEASE_RECOVER		4
 
 /*
  * dlm_release_lockspace
-- 
cgit v1.2.3


From 534b9bdeb4b80d843ca9f924524d4d103ad6605e Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sun, 29 Jun 2025 22:52:45 -0700
Subject: Input: tca6416-keypad - remove the driver

This input driver predates proper GPIO driver for the chip and now can
be replaced with the generic gpio-keys. Remove the driver.

Link: https://lore.kernel.org/r/ajfsei3keh4jjasd4lshjicgqixew7bak3cmty3suoliskzgz4@vj3ijycfxy4i
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/tca6416_keypad.h | 30 ------------------------------
 1 file changed, 30 deletions(-)
 delete mode 100644 include/linux/tca6416_keypad.h

(limited to 'include/linux')

diff --git a/include/linux/tca6416_keypad.h b/include/linux/tca6416_keypad.h
deleted file mode 100644
index 5cf6f6f82aa7..000000000000
--- a/include/linux/tca6416_keypad.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tca6416 keypad platform support
- *
- * Copyright (C) 2010 Texas Instruments
- *
- * Author: Sriramakrishnan <srk@ti.com>
- */
-
-#ifndef _TCA6416_KEYS_H
-#define _TCA6416_KEYS_H
-
-#include <linux/types.h>
-
-struct tca6416_button {
-	/* Configuration parameters */
-	int code;		/* input event code (KEY_*, SW_*) */
-	int active_low;
-	int type;		/* input event type (EV_KEY, EV_SW) */
-};
-
-struct tca6416_keys_platform_data {
-	struct tca6416_button *buttons;
-	int nbuttons;
-	unsigned int rep:1;	/* enable input subsystem auto repeat */
-	uint16_t pinmask;
-	uint16_t invert;
-	int use_polling;	/* use polling if Interrupt is not connected*/
-};
-#endif
-- 
cgit v1.2.3


From c93c59baa5ab57e94b874000cec56e26611b7a23 Mon Sep 17 00:00:00 2001
From: Paul Chaignon <paul.chaignon@gmail.com>
Date: Mon, 11 Aug 2025 20:58:20 +0200
Subject: bpf: Tidy verifier bug message

Yonghong noticed that error messages for potential verifier bugs often
have a '(1)' at the end. This is happening because verifier_bug_if(cond,
env, fmt, args...) prints "(" #cond ")\n" as part of the message and
verifier_bug() is defined as:

  #define verifier_bug(env, fmt, args...) verifier_bug_if(1, env, fmt, ##args)

Hence, verifier_bug() always ends up displaying '(1)'. This small patch
fixes it by having verifier_bug_if conditionally call verifier_bug
instead of the other way around.

Fixes: 1cb0f56d9618 ("bpf: WARN_ONCE on verifier bugs")
Reported-by: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Tested-by: Eduard Zingerman <eddyz87@gmail.com>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/bpf/aJo9THBrzo8jFXsh@mail.gmail.com
---
 include/linux/bpf_verifier.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c823f8efe3ed..020de62bd09c 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -875,13 +875,15 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env,
 #define verifier_bug_if(cond, env, fmt, args...)						\
 	({											\
 		bool __cond = (cond);								\
-		if (unlikely(__cond)) {								\
-			BPF_WARN_ONCE(1, "verifier bug: " fmt "(" #cond ")\n", ##args);		\
-			bpf_log(&env->log, "verifier bug: " fmt "(" #cond ")\n", ##args);	\
-		}										\
+		if (unlikely(__cond))								\
+			verifier_bug(env, fmt " (" #cond ")", ##args);				\
 		(__cond);									\
 	})
-#define verifier_bug(env, fmt, args...) verifier_bug_if(1, env, fmt, ##args)
+#define verifier_bug(env, fmt, args...)								\
+	({											\
+		BPF_WARN_ONCE(1, "verifier bug: " fmt "\n", ##args);				\
+		bpf_log(&env->log, "verifier bug: " fmt "\n", ##args);				\
+	})
 
 static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
 {
-- 
cgit v1.2.3


From 07bbbfe7addf5b032e04f3c38f0b183d067a3f0d Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Mon, 11 Aug 2025 19:50:43 +0100
Subject: net: stmmac: add suspend()/resume() platform ops

Add suspend/resume platform operations, which, when populated, override
the init/exit platform operations when we suspend and resume. These
suspend()/resume() methods are called by core code, and thus are
designed to support any struct device, not just platform devices. This
allows them to be used by the PCI drivers we have.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Link: https://patch.msgid.link/E1ulXbX-008gqZ-Bb@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/stmmac.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 26ddf95d23f9..22c24dacbc65 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -248,6 +248,8 @@ struct plat_stmmacenet_data {
 	void (*ptp_clk_freq_config)(struct stmmac_priv *priv);
 	int (*init)(struct platform_device *pdev, void *priv);
 	void (*exit)(struct platform_device *pdev, void *priv);
+	int (*suspend)(struct device *dev, void *priv);
+	int (*resume)(struct device *dev, void *priv);
 	struct mac_device_info *(*setup)(void *priv);
 	int (*clks_config)(void *priv, bool enabled);
 	int (*crosststamp)(ktime_t *device, struct system_counterval_t *system,
-- 
cgit v1.2.3


From b3ef7bdec66fb1813e865fd39d179a93cefd2015 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 11 Aug 2025 17:31:42 +0200
Subject: net: airoha: Add airoha_offload.h header

Move NPU definitions to airoha_offload.h in include/linux/soc/airoha/ in
order to allow the MT76 driver to access the callback definitions.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://patch.msgid.link/20250811-airoha-en7581-wlan-offlaod-v7-7-58823603bb4e@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/soc/airoha/airoha_offload.h | 260 ++++++++++++++++++++++++++++++
 1 file changed, 260 insertions(+)
 create mode 100644 include/linux/soc/airoha/airoha_offload.h

(limited to 'include/linux')

diff --git a/include/linux/soc/airoha/airoha_offload.h b/include/linux/soc/airoha/airoha_offload.h
new file mode 100644
index 000000000000..117c63c2448d
--- /dev/null
+++ b/include/linux/soc/airoha/airoha_offload.h
@@ -0,0 +1,260 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2025 AIROHA Inc
+ * Author: Lorenzo Bianconi <lorenzo@kernel.org>
+ */
+#ifndef AIROHA_OFFLOAD_H
+#define AIROHA_OFFLOAD_H
+
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#define NPU_NUM_CORES		8
+#define NPU_NUM_IRQ		6
+#define NPU_RX0_DESC_NUM	512
+#define NPU_RX1_DESC_NUM	512
+
+/* CTRL */
+#define NPU_RX_DMA_DESC_LAST_MASK	BIT(29)
+#define NPU_RX_DMA_DESC_LEN_MASK	GENMASK(28, 15)
+#define NPU_RX_DMA_DESC_CUR_LEN_MASK	GENMASK(14, 1)
+#define NPU_RX_DMA_DESC_DONE_MASK	BIT(0)
+/* INFO */
+#define NPU_RX_DMA_PKT_COUNT_MASK	GENMASK(31, 28)
+#define NPU_RX_DMA_PKT_ID_MASK		GENMASK(28, 26)
+#define NPU_RX_DMA_SRC_PORT_MASK	GENMASK(25, 21)
+#define NPU_RX_DMA_CRSN_MASK		GENMASK(20, 16)
+#define NPU_RX_DMA_FOE_ID_MASK		GENMASK(15, 0)
+/* DATA */
+#define NPU_RX_DMA_SID_MASK		GENMASK(31, 16)
+#define NPU_RX_DMA_FRAG_TYPE_MASK	GENMASK(15, 14)
+#define NPU_RX_DMA_PRIORITY_MASK	GENMASK(13, 10)
+#define NPU_RX_DMA_RADIO_ID_MASK	GENMASK(9, 6)
+#define NPU_RX_DMA_VAP_ID_MASK		GENMASK(5, 2)
+#define NPU_RX_DMA_FRAME_TYPE_MASK	GENMASK(1, 0)
+
+struct airoha_npu_rx_dma_desc {
+	u32 ctrl;
+	u32 info;
+	u32 data;
+	u32 addr;
+	u64 rsv;
+} __packed;
+
+/* CTRL */
+#define NPU_TX_DMA_DESC_SCHED_MASK	BIT(31)
+#define NPU_TX_DMA_DESC_LEN_MASK	GENMASK(30, 18)
+#define NPU_TX_DMA_DESC_VEND_LEN_MASK	GENMASK(17, 1)
+#define NPU_TX_DMA_DESC_DONE_MASK	BIT(0)
+
+#define NPU_TXWI_LEN	192
+
+struct airoha_npu_tx_dma_desc {
+	u32 ctrl;
+	u32 addr;
+	u64 rsv;
+	u8 txwi[NPU_TXWI_LEN];
+} __packed;
+
+enum airoha_npu_wlan_set_cmd {
+	WLAN_FUNC_SET_WAIT_PCIE_ADDR,
+	WLAN_FUNC_SET_WAIT_DESC,
+	WLAN_FUNC_SET_WAIT_NPU_INIT_DONE,
+	WLAN_FUNC_SET_WAIT_TRAN_TO_CPU,
+	WLAN_FUNC_SET_WAIT_BA_WIN_SIZE,
+	WLAN_FUNC_SET_WAIT_DRIVER_MODEL,
+	WLAN_FUNC_SET_WAIT_DEL_STA,
+	WLAN_FUNC_SET_WAIT_DRAM_BA_NODE_ADDR,
+	WLAN_FUNC_SET_WAIT_PKT_BUF_ADDR,
+	WLAN_FUNC_SET_WAIT_IS_TEST_NOBA,
+	WLAN_FUNC_SET_WAIT_FLUSHONE_TIMEOUT,
+	WLAN_FUNC_SET_WAIT_FLUSHALL_TIMEOUT,
+	WLAN_FUNC_SET_WAIT_IS_FORCE_TO_CPU,
+	WLAN_FUNC_SET_WAIT_PCIE_STATE,
+	WLAN_FUNC_SET_WAIT_PCIE_PORT_TYPE,
+	WLAN_FUNC_SET_WAIT_ERROR_RETRY_TIMES,
+	WLAN_FUNC_SET_WAIT_BAR_INFO,
+	WLAN_FUNC_SET_WAIT_FAST_FLAG,
+	WLAN_FUNC_SET_WAIT_NPU_BAND0_ONCPU,
+	WLAN_FUNC_SET_WAIT_TX_RING_PCIE_ADDR,
+	WLAN_FUNC_SET_WAIT_TX_DESC_HW_BASE,
+	WLAN_FUNC_SET_WAIT_TX_BUF_SPACE_HW_BASE,
+	WLAN_FUNC_SET_WAIT_RX_RING_FOR_TXDONE_HW_BASE,
+	WLAN_FUNC_SET_WAIT_TX_PKT_BUF_ADDR,
+	WLAN_FUNC_SET_WAIT_INODE_TXRX_REG_ADDR,
+	WLAN_FUNC_SET_WAIT_INODE_DEBUG_FLAG,
+	WLAN_FUNC_SET_WAIT_INODE_HW_CFG_INFO,
+	WLAN_FUNC_SET_WAIT_INODE_STOP_ACTION,
+	WLAN_FUNC_SET_WAIT_INODE_PCIE_SWAP,
+	WLAN_FUNC_SET_WAIT_RATELIMIT_CTRL,
+	WLAN_FUNC_SET_WAIT_HWNAT_INIT,
+	WLAN_FUNC_SET_WAIT_ARHT_CHIP_INFO,
+	WLAN_FUNC_SET_WAIT_TX_BUF_CHECK_ADDR,
+	WLAN_FUNC_SET_WAIT_TOKEN_ID_SIZE,
+};
+
+enum airoha_npu_wlan_get_cmd {
+	WLAN_FUNC_GET_WAIT_NPU_INFO,
+	WLAN_FUNC_GET_WAIT_LAST_RATE,
+	WLAN_FUNC_GET_WAIT_COUNTER,
+	WLAN_FUNC_GET_WAIT_DBG_COUNTER,
+	WLAN_FUNC_GET_WAIT_RXDESC_BASE,
+	WLAN_FUNC_GET_WAIT_WCID_DBG_COUNTER,
+	WLAN_FUNC_GET_WAIT_DMA_ADDR,
+	WLAN_FUNC_GET_WAIT_RING_SIZE,
+	WLAN_FUNC_GET_WAIT_NPU_SUPPORT_MAP,
+	WLAN_FUNC_GET_WAIT_MDC_LOCK_ADDRESS,
+	WLAN_FUNC_GET_WAIT_NPU_VERSION,
+};
+
+struct airoha_npu {
+#if (IS_BUILTIN(CONFIG_NET_AIROHA_NPU) || IS_MODULE(CONFIG_NET_AIROHA_NPU))
+	struct device *dev;
+	struct regmap *regmap;
+
+	struct airoha_npu_core {
+		struct airoha_npu *npu;
+		/* protect concurrent npu memory accesses */
+		spinlock_t lock;
+		struct work_struct wdt_work;
+	} cores[NPU_NUM_CORES];
+
+	int irqs[NPU_NUM_IRQ];
+
+	struct airoha_foe_stats __iomem *stats;
+
+	struct {
+		int (*ppe_init)(struct airoha_npu *npu);
+		int (*ppe_deinit)(struct airoha_npu *npu);
+		int (*ppe_flush_sram_entries)(struct airoha_npu *npu,
+					      dma_addr_t foe_addr,
+					      int sram_num_entries);
+		int (*ppe_foe_commit_entry)(struct airoha_npu *npu,
+					    dma_addr_t foe_addr,
+					    u32 entry_size, u32 hash,
+					    bool ppe2);
+		int (*wlan_init_reserved_memory)(struct airoha_npu *npu);
+		int (*wlan_send_msg)(struct airoha_npu *npu, int ifindex,
+				     enum airoha_npu_wlan_set_cmd func_id,
+				     void *data, int data_len, gfp_t gfp);
+		int (*wlan_get_msg)(struct airoha_npu *npu, int ifindex,
+				    enum airoha_npu_wlan_get_cmd func_id,
+				    void *data, int data_len, gfp_t gfp);
+		u32 (*wlan_get_queue_addr)(struct airoha_npu *npu, int qid,
+					   bool xmit);
+		void (*wlan_set_irq_status)(struct airoha_npu *npu, u32 val);
+		u32 (*wlan_get_irq_status)(struct airoha_npu *npu, int q);
+		void (*wlan_enable_irq)(struct airoha_npu *npu, int q);
+		void (*wlan_disable_irq)(struct airoha_npu *npu, int q);
+	} ops;
+#endif
+};
+
+#if (IS_BUILTIN(CONFIG_NET_AIROHA_NPU) || IS_MODULE(CONFIG_NET_AIROHA_NPU))
+struct airoha_npu *airoha_npu_get(struct device *dev, dma_addr_t *stats_addr);
+void airoha_npu_put(struct airoha_npu *npu);
+
+static inline int airoha_npu_wlan_init_reserved_memory(struct airoha_npu *npu)
+{
+	return npu->ops.wlan_init_reserved_memory(npu);
+}
+
+static inline int airoha_npu_wlan_send_msg(struct airoha_npu *npu,
+					   int ifindex,
+					   enum airoha_npu_wlan_set_cmd cmd,
+					   void *data, int data_len, gfp_t gfp)
+{
+	return npu->ops.wlan_send_msg(npu, ifindex, cmd, data, data_len, gfp);
+}
+
+static inline int airoha_npu_wlan_get_msg(struct airoha_npu *npu, int ifindex,
+					  enum airoha_npu_wlan_get_cmd cmd,
+					  void *data, int data_len, gfp_t gfp)
+{
+	return npu->ops.wlan_get_msg(npu, ifindex, cmd, data, data_len, gfp);
+}
+
+static inline u32 airoha_npu_wlan_get_queue_addr(struct airoha_npu *npu,
+						 int qid, bool xmit)
+{
+	return npu->ops.wlan_get_queue_addr(npu, qid, xmit);
+}
+
+static inline void airoha_npu_wlan_set_irq_status(struct airoha_npu *npu,
+						  u32 val)
+{
+	npu->ops.wlan_set_irq_status(npu, val);
+}
+
+static inline u32 airoha_npu_wlan_get_irq_status(struct airoha_npu *npu, int q)
+{
+	return npu->ops.wlan_get_irq_status(npu, q);
+}
+
+static inline void airoha_npu_wlan_enable_irq(struct airoha_npu *npu, int q)
+{
+	npu->ops.wlan_enable_irq(npu, q);
+}
+
+static inline void airoha_npu_wlan_disable_irq(struct airoha_npu *npu, int q)
+{
+	npu->ops.wlan_disable_irq(npu, q);
+}
+#else
+static inline struct airoha_npu *airoha_npu_get(struct device *dev,
+						dma_addr_t *foe_stats_addr)
+{
+	return NULL;
+}
+
+static inline void airoha_npu_put(struct airoha_npu *npu)
+{
+}
+
+static inline int airoha_npu_wlan_init_reserved_memory(struct airoha_npu *npu)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int airoha_npu_wlan_send_msg(struct airoha_npu *npu,
+					   int ifindex,
+					   enum airoha_npu_wlan_set_cmd cmd,
+					   void *data, int data_len, gfp_t gfp)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int airoha_npu_wlan_get_msg(struct airoha_npu *npu, int ifindex,
+					  enum airoha_npu_wlan_get_cmd cmd,
+					  void *data, int data_len, gfp_t gfp)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline u32 airoha_npu_wlan_get_queue_addr(struct airoha_npu *npu,
+						 int qid, bool xmit)
+{
+	return 0;
+}
+
+static inline void airoha_npu_wlan_set_irq_status(struct airoha_npu *npu,
+						  u32 val)
+{
+}
+
+static inline u32 airoha_npu_wlan_get_irq_status(struct airoha_npu *npu,
+						 int q)
+{
+	return 0;
+}
+
+static inline void airoha_npu_wlan_enable_irq(struct airoha_npu *npu, int q)
+{
+}
+
+static inline void airoha_npu_wlan_disable_irq(struct airoha_npu *npu, int q)
+{
+}
+#endif
+
+#endif /* AIROHA_OFFLOAD_H */
-- 
cgit v1.2.3


From 3eb50369c09efb0f668a7f568a7e6f7cf4194cde Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 21 May 2024 23:22:01 -0400
Subject: scsi: switch ->bios_param() to passing gendisk

Instances are passed struct block_device *bdev argument; the only thing
it is used for (if it's used in the first place) is bdev->bd_disk.
Might as well pass that in the first place...

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/libata.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 0620dd67369f..21de0935775d 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1203,7 +1203,7 @@ extern void ata_qc_complete(struct ata_queued_cmd *qc);
 extern u64 ata_qc_get_active(struct ata_port *ap);
 extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd);
 extern int ata_std_bios_param(struct scsi_device *sdev,
-			      struct block_device *bdev,
+			      struct gendisk *unused,
 			      sector_t capacity, int geom[]);
 extern void ata_scsi_unlock_native_capacity(struct scsi_device *sdev);
 extern int ata_scsi_sdev_init(struct scsi_device *sdev);
-- 
cgit v1.2.3


From 4fc8728aa34f54835b72e4db0f3db76a72948b65 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 21 May 2024 22:19:55 -0400
Subject: block: switch ->getgeo() to struct gendisk

Instances are happier that way and it makes more sense anyway -
the only part of the result that is related to partition we are given
is the start sector, and that has been filled in by the caller.

Everything else is a function of the disk.  Only one instance
(DASD) is ever looking at anything other than bdev->bd_disk and
that one is trivial to adjust.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 95886b404b16..fbc45121cd4f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1659,7 +1659,7 @@ struct block_device_operations {
 	unsigned int (*check_events) (struct gendisk *disk,
 				      unsigned int clearing);
 	void (*unlock_native_capacity) (struct gendisk *);
-	int (*getgeo)(struct block_device *, struct hd_geometry *);
+	int (*getgeo)(struct gendisk *, struct hd_geometry *);
 	int (*set_read_only)(struct block_device *bdev, bool ro);
 	void (*free_disk)(struct gendisk *disk);
 	/* this callback is with swap_lock and sometimes page table lock held */
-- 
cgit v1.2.3


From 21924af67d69d7c9fdaf845be69043cfe75196a1 Mon Sep 17 00:00:00 2001
From: John Stultz <jstultz@google.com>
Date: Tue, 5 Aug 2025 00:10:02 +0000
Subject: locking: Fix __clear_task_blocked_on() warning from
 __ww_mutex_wound() path

The __clear_task_blocked_on() helper added a number of sanity
checks ensuring we hold the mutex wait lock and that the task
we are clearing blocked_on pointer (if set) matches the mutex.

However, there is an edge case in the _ww_mutex_wound() logic
where we need to clear the blocked_on pointer for the task that
owns the mutex, not the task that is waiting on the mutex.

For this case the sanity checks aren't valid, so handle this
by allowing a NULL lock to skip the additional checks.

K Prateek Nayak and Maarten Lankhorst also pointed out that in
this case where we don't hold the owner's mutex wait_lock, we
need to be a bit more careful using READ_ONCE/WRITE_ONCE in both
the __clear_task_blocked_on() and __set_task_blocked_on()
implementations to avoid accidentally tripping WARN_ONs if two
instances race. So do that here as well.

This issue was easier to miss, I realized, as the test-ww_mutex
driver only exercises the wait-die class of ww_mutexes. I've
sent a patch[1] to address this so the logic will be easier to
test.

[1]: https://lore.kernel.org/lkml/20250801023358.562525-2-jstultz@google.com/

Fixes: a4f0b6fef4b0 ("locking/mutex: Add p->blocked_on wrappers for correctness checks")
Closes: https://lore.kernel.org/lkml/68894443.a00a0220.26d0e1.0015.GAE@google.com/
Reported-by: syzbot+602c4720aed62576cd79@syzkaller.appspotmail.com
Reported-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: John Stultz <jstultz@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: K Prateek Nayak <kprateek.nayak@amd.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Link: https://lore.kernel.org/r/20250805001026.2247040-1-jstultz@google.com
---
 include/linux/sched.h | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 40d2fa90df42..62103dd6a48e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2144,6 +2144,8 @@ static inline struct mutex *__get_task_blocked_on(struct task_struct *p)
 
 static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m)
 {
+	struct mutex *blocked_on = READ_ONCE(p->blocked_on);
+
 	WARN_ON_ONCE(!m);
 	/* The task should only be setting itself as blocked */
 	WARN_ON_ONCE(p != current);
@@ -2154,8 +2156,8 @@ static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m)
 	 * with a different mutex. Note, setting it to the same
 	 * lock repeatedly is ok.
 	 */
-	WARN_ON_ONCE(p->blocked_on && p->blocked_on != m);
-	p->blocked_on = m;
+	WARN_ON_ONCE(blocked_on && blocked_on != m);
+	WRITE_ONCE(p->blocked_on, m);
 }
 
 static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m)
@@ -2166,16 +2168,19 @@ static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m)
 
 static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m)
 {
-	WARN_ON_ONCE(!m);
-	/* Currently we serialize blocked_on under the mutex::wait_lock */
-	lockdep_assert_held_once(&m->wait_lock);
-	/*
-	 * There may be cases where we re-clear already cleared
-	 * blocked_on relationships, but make sure we are not
-	 * clearing the relationship with a different lock.
-	 */
-	WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m);
-	p->blocked_on = NULL;
+	if (m) {
+		struct mutex *blocked_on = READ_ONCE(p->blocked_on);
+
+		/* Currently we serialize blocked_on under the mutex::wait_lock */
+		lockdep_assert_held_once(&m->wait_lock);
+		/*
+		 * There may be cases where we re-clear already cleared
+		 * blocked_on relationships, but make sure we are not
+		 * clearing the relationship with a different lock.
+		 */
+		WARN_ON_ONCE(blocked_on && blocked_on != m);
+	}
+	WRITE_ONCE(p->blocked_on, NULL);
 }
 
 static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m)
-- 
cgit v1.2.3


From 9528d32873b38281ae105f2f5799e79ae9d086c2 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 11 Aug 2025 10:27:45 +0200
Subject: kcov, usb: Don't disable interrupts in
 kcov_remote_start_usb_softirq()

kcov_remote_start_usb_softirq() the begin of urb's completion callback.
HCDs marked HCD_BH will invoke this function from the softirq and
in_serving_softirq() will detect this properly.
Root-HUB (RH) requests will not be delayed to softirq but complete
immediately in IRQ context.
This will confuse kcov because in_serving_softirq() will report true if
the softirq is served after the hardirq and if the softirq got
interrupted by the hardirq in which currently runs.

This was addressed by simply disabling interrupts in
kcov_remote_start_usb_softirq() which avoided the interruption by the RH
while a regular completion callback was invoked.
This not only changes the behaviour while kconv is enabled but also
breaks PREEMPT_RT because now sleeping locks can no longer be acquired.

Revert the previous fix. Address the issue by invoking
kcov_remote_start_usb() only if the context is just "serving softirqs"
which is identified by checking in_serving_softirq() and in_hardirq()
must be false.

Fixes: f85d39dd7ed89 ("kcov, usb: disable interrupts in kcov_remote_start_usb_softirq")
Cc: stable <stable@kernel.org>
Reported-by: Yunseong Kim <ysk@kzalloc.com>
Closes: https://lore.kernel.org/all/20250725201400.1078395-2-ysk@kzalloc.com/
Tested-by: Yunseong Kim <ysk@kzalloc.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lore.kernel.org/r/20250811082745.ycJqBXMs@linutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kcov.h | 47 +++++++++--------------------------------------
 1 file changed, 9 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kcov.h b/include/linux/kcov.h
index 75a2fb8b16c3..0143358874b0 100644
--- a/include/linux/kcov.h
+++ b/include/linux/kcov.h
@@ -57,47 +57,21 @@ static inline void kcov_remote_start_usb(u64 id)
 
 /*
  * The softirq flavor of kcov_remote_*() functions is introduced as a temporary
- * workaround for KCOV's lack of nested remote coverage sections support.
- *
- * Adding support is tracked in https://bugzilla.kernel.org/show_bug.cgi?id=210337.
- *
- * kcov_remote_start_usb_softirq():
- *
- * 1. Only collects coverage when called in the softirq context. This allows
- *    avoiding nested remote coverage collection sections in the task context.
- *    For example, USB/IP calls usb_hcd_giveback_urb() in the task context
- *    within an existing remote coverage collection section. Thus, KCOV should
- *    not attempt to start collecting coverage within the coverage collection
- *    section in __usb_hcd_giveback_urb() in this case.
- *
- * 2. Disables interrupts for the duration of the coverage collection section.
- *    This allows avoiding nested remote coverage collection sections in the
- *    softirq context (a softirq might occur during the execution of a work in
- *    the BH workqueue, which runs with in_serving_softirq() > 0).
- *    For example, usb_giveback_urb_bh() runs in the BH workqueue with
- *    interrupts enabled, so __usb_hcd_giveback_urb() might be interrupted in
- *    the middle of its remote coverage collection section, and the interrupt
- *    handler might invoke __usb_hcd_giveback_urb() again.
+ * work around for kcov's lack of nested remote coverage sections support in
+ * task context. Adding support for nested sections is tracked in:
+ * https://bugzilla.kernel.org/show_bug.cgi?id=210337
  */
 
-static inline unsigned long kcov_remote_start_usb_softirq(u64 id)
+static inline void kcov_remote_start_usb_softirq(u64 id)
 {
-	unsigned long flags = 0;
-
-	if (in_serving_softirq()) {
-		local_irq_save(flags);
+	if (in_serving_softirq() && !in_hardirq())
 		kcov_remote_start_usb(id);
-	}
-
-	return flags;
 }
 
-static inline void kcov_remote_stop_softirq(unsigned long flags)
+static inline void kcov_remote_stop_softirq(void)
 {
-	if (in_serving_softirq()) {
+	if (in_serving_softirq() && !in_hardirq())
 		kcov_remote_stop();
-		local_irq_restore(flags);
-	}
 }
 
 #ifdef CONFIG_64BIT
@@ -131,11 +105,8 @@ static inline u64 kcov_common_handle(void)
 }
 static inline void kcov_remote_start_common(u64 id) {}
 static inline void kcov_remote_start_usb(u64 id) {}
-static inline unsigned long kcov_remote_start_usb_softirq(u64 id)
-{
-	return 0;
-}
-static inline void kcov_remote_stop_softirq(unsigned long flags) {}
+static inline void kcov_remote_start_usb_softirq(u64 id) {}
+static inline void kcov_remote_stop_softirq(void) {}
 
 #endif /* CONFIG_KCOV */
 #endif /* _LINUX_KCOV_H */
-- 
cgit v1.2.3


From 4c70fb2624ab1588faa58dcd407d4c61d64b288d Mon Sep 17 00:00:00 2001
From: Chen Ridong <chenridong@huawei.com>
Date: Wed, 13 Aug 2025 08:29:01 +0000
Subject: cpuset: remove redundant CS_ONLINE flag

The CS_ONLINE flag was introduced prior to the CSS_ONLINE flag in the
cpuset subsystem. Currently, the flag setting sequence is as follows:

1. cpuset_css_online() sets CS_ONLINE
2. css->flags gets CSS_ONLINE set
...
3. cgroup->kill_css sets CSS_DYING
4. cpuset_css_offline() clears CS_ONLINE
5. css->flags clears CSS_ONLINE

The is_cpuset_online() check currently occurs between steps 1 and 3.
However, it would be equally safe to perform this check between steps 2
and 3, as CSS_ONLINE provides the same synchronization guarantee as
CS_ONLINE.

Since CS_ONLINE is redundant with CSS_ONLINE and provides no additional
synchronization benefits, we can safely remove it to simplify the code.

Signed-off-by: Chen Ridong <chenridong@huawei.com>
Acked-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b18fb5fcb38e..ae73dbb19165 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -354,6 +354,11 @@ static inline bool css_is_dying(struct cgroup_subsys_state *css)
 	return css->flags & CSS_DYING;
 }
 
+static inline bool css_is_online(struct cgroup_subsys_state *css)
+{
+	return css->flags & CSS_ONLINE;
+}
+
 static inline bool css_is_self(struct cgroup_subsys_state *css)
 {
 	if (css == &css->cgroup->self) {
-- 
cgit v1.2.3


From 2caa6b88e0ba0231fb4ff0ba8e73cedd5fb81fc8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <thomas.weissschuh@linutronix.de>
Date: Mon, 11 Aug 2025 14:08:04 +0200
Subject: bpf: Don't use %pK through printk
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the past %pK was preferable to %p as it would not leak raw pointer
values into the kernel log.
Since commit ad67b74d2469 ("printk: hash addresses printed with %p")
the regular %p has been improved to avoid this issue.
Furthermore, restricted pointers ("%pK") were never meant to be used
through printk(). They can still unintentionally leak raw pointers or
acquire sleeping locks in atomic contexts.

Switch to the regular pointer formatting which is safer and
easier to reason about.

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20250811-restricted-pointers-bpf-v1-1-a1d7cc3cb9e7@linutronix.de
---
 include/linux/filter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1e7fd3ee759e..52fecb7a1fe3 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1296,7 +1296,7 @@ void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other);
 static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
 				u32 pass, void *image)
 {
-	pr_err("flen=%u proglen=%u pass=%u image=%pK from=%s pid=%d\n", flen,
+	pr_err("flen=%u proglen=%u pass=%u image=%p from=%s pid=%d\n", flen,
 	       proglen, pass, image, current->comm, task_pid_nr(current));
 
 	if (image)
-- 
cgit v1.2.3


From 139235103f6039c2c77cb8f51cb2e7e610fe0114 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Mon, 11 Aug 2025 15:35:05 +0800
Subject: net: stmmac: Change first parameter of fix_soc_reset()

In order to use netdev_err() to print message in the callback function of
fix_soc_reset(), change fix_soc_reset() to have "struct stmmac_priv *" as
its first parameter.

This is preparation for later patch, no functionality change.

Suggested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Link: https://patch.msgid.link/20250811073506.27513-3-yangtiezhu@loongson.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/stmmac.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 22c24dacbc65..e284f04964bf 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -238,7 +238,7 @@ struct plat_stmmacenet_data {
 	int (*set_clk_tx_rate)(void *priv, struct clk *clk_tx_i,
 			       phy_interface_t interface, int speed);
 	void (*fix_mac_speed)(void *priv, int speed, unsigned int mode);
-	int (*fix_soc_reset)(void *priv, void __iomem *ioaddr);
+	int (*fix_soc_reset)(struct stmmac_priv *priv, void __iomem *ioaddr);
 	int (*serdes_powerup)(struct net_device *ndev, void *priv);
 	void (*serdes_powerdown)(struct net_device *ndev, void *priv);
 	int (*mac_finish)(struct net_device *ndev,
-- 
cgit v1.2.3


From 96326447d466ca62b713f660cfc73ef7879151a0 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 12 Aug 2025 06:57:23 +0200
Subject: net: mediatek: wed: Introduce MT7992 WED support to MT7988 SoC

Introduce the second WDMA RX ring in WED driver for MT7988 SoC since the
Mediatek MT7992 WiFi chipset supports two separated WDMA rings.
Add missing MT7988 configurations to properly support WED for MT7992 in
MT76 driver.

Co-developed-by: Rex Lu <rex.lu@mediatek.com>
Signed-off-by: Rex Lu <rex.lu@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://patch.msgid.link/20250812-mt7992-wed-support-v3-1-9ada78a819a4@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/soc/mediatek/mtk_wed.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
index d8949a4ed0dc..c4ff6bab176d 100644
--- a/include/linux/soc/mediatek/mtk_wed.h
+++ b/include/linux/soc/mediatek/mtk_wed.h
@@ -147,7 +147,7 @@ struct mtk_wed_device {
 		u32 wpdma_tx;
 		u32 wpdma_txfree;
 		u32 wpdma_rx_glo;
-		u32 wpdma_rx;
+		u32 wpdma_rx[MTK_WED_RX_QUEUES];
 		u32 wpdma_rx_rro[MTK_WED_RX_QUEUES];
 		u32 wpdma_rx_pg;
 
-- 
cgit v1.2.3


From fb2f2a86f0cd9690357b9bb67af00d386a7e819f Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Mon, 16 Jun 2025 13:03:21 +0200
Subject: ice: cleanup capabilities evaluation

When evaluating the capabilities field, the ICE_AQC_BIT_ROCEV2_LAG and
ICE_AQC_BIT_SRIOV_LAG defines were both not using the BIT operator,
instead simply setting a hex value that set the correct bits.  While
not inaccurate, this method is misleading, and when it is expanded in
the following implementation it becomes even more confusing.

Switch to using the BIT() operator to clarify what is being checked.

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Marcin Szycik <marcin.szycik@linux.intel.com>
Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Tested-by: Sujai Buvaneswaran <sujai.buvaneswaran@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/net/intel/libie/adminq.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net/intel/libie/adminq.h b/include/linux/net/intel/libie/adminq.h
index 012b5d499c1a..dbe93f940ef0 100644
--- a/include/linux/net/intel/libie/adminq.h
+++ b/include/linux/net/intel/libie/adminq.h
@@ -192,8 +192,8 @@ LIBIE_CHECK_STRUCT_LEN(16, libie_aqc_list_caps);
 #define LIBIE_AQC_CAPS_TX_SCHED_TOPO_COMP_MODE		0x0085
 #define LIBIE_AQC_CAPS_NAC_TOPOLOGY			0x0087
 #define LIBIE_AQC_CAPS_FW_LAG_SUPPORT			0x0092
-#define LIBIE_AQC_BIT_ROCEV2_LAG			0x01
-#define LIBIE_AQC_BIT_SRIOV_LAG				0x02
+#define LIBIE_AQC_BIT_ROCEV2_LAG			BIT(0)
+#define LIBIE_AQC_BIT_SRIOV_LAG				BIT(1)
 #define LIBIE_AQC_CAPS_FLEX10				0x00F1
 #define LIBIE_AQC_CAPS_CEM				0x00F2
 
-- 
cgit v1.2.3


From 556c1ad666ad90c50ec8fccb930dd5046cfbecfb Mon Sep 17 00:00:00 2001
From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Date: Thu, 14 Aug 2025 10:20:42 -0700
Subject: x86/vmscape: Enable the mitigation

Enable the previously added mitigation for VMscape. Add the cmdline
vmscape={off|ibpb|force} and sysfs reporting.

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
---
 include/linux/cpu.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index b91b993f58ee..487b3bf2e1ea 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -83,6 +83,7 @@ extern ssize_t cpu_show_old_microcode(struct device *dev,
 extern ssize_t cpu_show_indirect_target_selection(struct device *dev,
 						  struct device_attribute *attr, char *buf);
 extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf);
 
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,
-- 
cgit v1.2.3


From 8d90041a0d285044b89629f539ca0685e156848b Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Thu, 14 Aug 2025 11:22:13 -0400
Subject: dlm: handle release_option as unsigned

Future patches will introduce a invalid argument check for undefined
values. All values for release_option are positive integer values to not
check on negative values as well we just change the parameter to
unsigned int.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 include/linux/dlm.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dlm.h b/include/linux/dlm.h
index 108eb953eb18..34015a008b80 100644
--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@@ -122,7 +122,8 @@ int dlm_new_lockspace(const char *name, const char *cluster,
  * release_option: see DLM_RELEASE values above.
  */
 
-int dlm_release_lockspace(dlm_lockspace_t *lockspace, int release_option);
+int dlm_release_lockspace(dlm_lockspace_t *lockspace,
+			  unsigned int release_option);
 
 /*
  * dlm_lock
-- 
cgit v1.2.3


From 8e40210788636619404871df07445fa4590138b4 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Thu, 14 Aug 2025 11:22:14 -0400
Subject: dlm: check for undefined release_option values

Checking on all undefined release_option values to return -EINVAL in
case a user is providing them to dlm_release_lockspace().

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 include/linux/dlm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/dlm.h b/include/linux/dlm.h
index 34015a008b80..7e7b45b0d097 100644
--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@@ -113,6 +113,7 @@ int dlm_new_lockspace(const char *name, const char *cluster,
 #define DLM_RELEASE_NORMAL		2
 #define DLM_RELEASE_NO_EVENT		3
 #define DLM_RELEASE_RECOVER		4
+#define __DLM_RELEASE_MAX		DLM_RELEASE_RECOVER
 
 /*
  * dlm_release_lockspace
-- 
cgit v1.2.3


From dab32f2576a39d5f54f3dbbbc718d92fa5109ce9 Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Thu, 7 Aug 2025 15:55:39 +0200
Subject: s390/pci: Use pci_uevent_ers() in PCI recovery

Issue uevents on s390 during PCI recovery using pci_uevent_ers() as done by
EEH and AER PCIe recovery routines.

Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
Link: https://patch.msgid.link/20250807-add_err_uevents-v5-2-adf85b0620b0@linux.ibm.com
---
 include/linux/pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 59876de13860..7735acf6f349 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2764,7 +2764,7 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev)
 	return false;
 }
 
-#if defined(CONFIG_PCIEPORTBUS) || defined(CONFIG_EEH)
+#if defined(CONFIG_PCIEPORTBUS) || defined(CONFIG_EEH) || defined(CONFIG_S390)
 void pci_uevent_ers(struct pci_dev *pdev, enum  pci_ers_result err_type);
 #endif
 
-- 
cgit v1.2.3


From f39494089aaa1022008eee245fb83ef1ae911b6d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 15 Jul 2025 21:09:13 -0700
Subject: srcu: Move rcu_is_watching() checks to srcu_read_{,un}lock_fast()

The rcu_is_watching() warnings are currently in the SRCU-tree
implementations of __srcu_read_lock_fast() and __srcu_read_unlock_fast().
However, this makes it difficult to create _notrace variants of
srcu_read_lock_fast() and srcu_read_unlock_fast().  This commit therefore
moves these checks to srcu_read_lock_fast(), srcu_read_unlock_fast(),
srcu_down_read_fast(), and srcu_up_read_fast().

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Joel Fernandes <joelagnelf@nvidia.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: <bpf@vger.kernel.org>
---
 include/linux/srcu.h     | 4 ++++
 include/linux/srcutree.h | 2 --
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index f179700fecaf..478c73d067f7 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -275,6 +275,7 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct *
 {
 	struct srcu_ctr __percpu *retval;
 
+	RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast().");
 	srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST);
 	retval = __srcu_read_lock_fast(ssp);
 	rcu_try_lock_acquire(&ssp->dep_map);
@@ -295,6 +296,7 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct *
 static inline struct srcu_ctr __percpu *srcu_down_read_fast(struct srcu_struct *ssp) __acquires(ssp)
 {
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi());
+	RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_down_read_fast().");
 	srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST);
 	return __srcu_read_lock_fast(ssp);
 }
@@ -389,6 +391,7 @@ static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ct
 	srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST);
 	srcu_lock_release(&ssp->dep_map);
 	__srcu_read_unlock_fast(ssp, scp);
+	RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast().");
 }
 
 /**
@@ -405,6 +408,7 @@ static inline void srcu_up_read_fast(struct srcu_struct *ssp, struct srcu_ctr __
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi());
 	srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST);
 	__srcu_read_unlock_fast(ssp, scp);
+	RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_up_read_fast().");
 }
 
 /**
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index bf44d8d1e69e..043b5a67ef71 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -244,7 +244,6 @@ static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct
 {
 	struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
 
-	RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast().");
 	if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
 		this_cpu_inc(scp->srcu_locks.counter); /* Y */
 	else
@@ -275,7 +274,6 @@ static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_
 		this_cpu_inc(scp->srcu_unlocks.counter);  /* Z */
 	else
 		atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks));  /* Z */
-	RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast().");
 }
 
 void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
-- 
cgit v1.2.3


From 7e2a2d060da4860af37e1000dc62a30a1551d9e8 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 16 Jul 2025 09:12:16 -0700
Subject: srcu: Add srcu_read_lock_fast_notrace() and
 srcu_read_unlock_fast_notrace()

This commit adds no-trace variants of the srcu_read_lock_fast() and
srcu_read_unlock_fast() functions for tracing use.

[ paulmck: Apply notrace feedback from Joel Fernandes, Steven Rostedt, and Mathieu Desnoyers. ]
[ paulmck: Apply excess-notrace feedback from Boqun Feng. ]

Link: https://lore.kernel.org/all/20250721162433.10454-1-paulmck@kernel.org
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Joel Fernandes <joelagnelf@nvidia.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: <bpf@vger.kernel.org>
---
 include/linux/srcu.h     | 25 +++++++++++++++++++++++++
 include/linux/srcutree.h |  5 +++--
 2 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 478c73d067f7..7a692bf8f99b 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -282,6 +282,20 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct *
 	return retval;
 }
 
+/*
+ * Used by tracing, cannot be traced and cannot call lockdep.
+ * See srcu_read_lock_fast() for more information.
+ */
+static inline struct srcu_ctr __percpu *srcu_read_lock_fast_notrace(struct srcu_struct *ssp)
+	__acquires(ssp)
+{
+	struct srcu_ctr __percpu *retval;
+
+	srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST);
+	retval = __srcu_read_lock_fast(ssp);
+	return retval;
+}
+
 /**
  * srcu_down_read_fast - register a new reader for an SRCU-protected structure.
  * @ssp: srcu_struct in which to register the new reader.
@@ -394,6 +408,17 @@ static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ct
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast().");
 }
 
+/*
+ * Used by tracing, cannot be traced and cannot call lockdep.
+ * See srcu_read_unlock_fast() for more information.
+ */
+static inline void srcu_read_unlock_fast_notrace(struct srcu_struct *ssp,
+						 struct srcu_ctr __percpu *scp) __releases(ssp)
+{
+	srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST);
+	__srcu_read_unlock_fast(ssp, scp);
+}
+
 /**
  * srcu_up_read_fast - unregister a old reader from an SRCU-protected structure.
  * @ssp: srcu_struct in which to unregister the old reader.
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 043b5a67ef71..4d2fee4d3828 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -240,7 +240,7 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss
  * on architectures that support NMIs but do not supply NMI-safe
  * implementations of this_cpu_inc().
  */
-static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp)
+static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct srcu_struct *ssp)
 {
 	struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
 
@@ -267,7 +267,8 @@ static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct
  * on architectures that support NMIs but do not supply NMI-safe
  * implementations of this_cpu_inc().
  */
-static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
+static inline void notrace
+__srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
 {
 	barrier();  /* Avoid leaking the critical section. */
 	if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
-- 
cgit v1.2.3


From cacadb630375b8c30ca4d0300812178bb884c0b0 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 16 Jul 2025 09:19:39 -0700
Subject: srcu: Add guards for notrace variants of SRCU-fast readers

This adds the usual scoped_guard(srcu_fast_notrace, &my_srcu) and
guard(srcu_fast_notrace)(&my_srcu).

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Joel Fernandes <joelagnelf@nvidia.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: <bpf@vger.kernel.org>
---
 include/linux/srcu.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 7a692bf8f99b..ada65b58bc4c 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -515,4 +515,9 @@ DEFINE_LOCK_GUARD_1(srcu_fast, struct srcu_struct,
 		    srcu_read_unlock_fast(_T->lock, _T->scp),
 		    struct srcu_ctr __percpu *scp)
 
+DEFINE_LOCK_GUARD_1(srcu_fast_notrace, struct srcu_struct,
+		    _T->scp = srcu_read_lock_fast_notrace(_T->lock),
+		    srcu_read_unlock_fast_notrace(_T->lock, _T->scp),
+		    struct srcu_ctr __percpu *scp)
+
 #endif
-- 
cgit v1.2.3


From 28f073b38372b99d8d33ff5e63897d28419bda20 Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Mon, 16 Jun 2025 13:03:23 +0200
Subject: ice: Implement support for SRIOV VFs across Active/Active bonds

This patch implements the software flows to handle SRIOV VF
communication across an Active/Active link aggregate.  The same
restrictions apply as are in place for the support of Active/Backup
bonds.

- the two interfaces must be on the same NIC
- the FW LLDP engine needs to be disabled
- the DDP package that supports VF LAG must be loaded on device
- the two interfaces must have the same QoS config
- only the first interface added to the bond will have VF support
- the interface with VFs must be in switchdev mode

With the additional requirement of
- the version of the FW on the NIC needs to have VF Active/Active support
This requirement is indicated in the capabilities struct associated
with the NVM loaded on the NIC.

The balancing of traffic between the two interfaces is done on a queue
basis.  Taking the queues allocated to all of the VFs as a whole, one
half of them will be distributed to each interface.  When a link goes
down, then the queues allocated to the down interface will migrate to
the active port.  When the down port comes back up, then the same
queues as were originally assigned there will be moved back.

Co-developed-by: Marcin Szycik <marcin.szycik@linux.intel.com>
Signed-off-by: Marcin Szycik <marcin.szycik@linux.intel.com>
Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Tested-by: Sujai Buvaneswaran <sujai.buvaneswaran@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/net/intel/libie/adminq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/net/intel/libie/adminq.h b/include/linux/net/intel/libie/adminq.h
index dbe93f940ef0..ba62f703df43 100644
--- a/include/linux/net/intel/libie/adminq.h
+++ b/include/linux/net/intel/libie/adminq.h
@@ -194,6 +194,7 @@ LIBIE_CHECK_STRUCT_LEN(16, libie_aqc_list_caps);
 #define LIBIE_AQC_CAPS_FW_LAG_SUPPORT			0x0092
 #define LIBIE_AQC_BIT_ROCEV2_LAG			BIT(0)
 #define LIBIE_AQC_BIT_SRIOV_LAG				BIT(1)
+#define LIBIE_AQC_BIT_SRIOV_AA_LAG			BIT(2)
 #define LIBIE_AQC_CAPS_FLEX10				0x00F1
 #define LIBIE_AQC_CAPS_CEM				0x00F2
 
-- 
cgit v1.2.3


From 448f97fba9013ffa13f5dd82febd18836b189499 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 12 Aug 2025 12:39:13 +0200
Subject: perf: Convert mmap() refcounts to refcount_t

The recently fixed reference count leaks could have been detected by using
refcount_t and refcount_t would have mitigated the potential overflow at
least.

Now that the code is properly structured, convert the mmap() related
mmap_count variants over to refcount_t.

No functional change intended.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Link: https://lore.kernel.org/r/20250812104020.071507932@infradead.org
---
 include/linux/perf_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ec9d96025683..bfbf9ea53f25 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -859,7 +859,7 @@ struct perf_event {
 
 	/* mmap bits */
 	struct mutex			mmap_mutex;
-	atomic_t			mmap_count;
+	refcount_t			mmap_count;
 
 	struct perf_buffer		*rb;
 	struct list_head		rb_entry;
-- 
cgit v1.2.3


From a3de58b12ce074ec05b8741fa28d62ccb1070468 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 14 Aug 2025 22:45:50 +0100
Subject: netfs: Fix unbuffered write error handling

If all the subrequests in an unbuffered write stream fail, the subrequest
collector doesn't update the stream->transferred value and it retains its
initial LONG_MAX value.  Unfortunately, if all active streams fail, then we
take the smallest value of { LONG_MAX, LONG_MAX, ... } as the value to set
in wreq->transferred - which is then returned from ->write_iter().

LONG_MAX was chosen as the initial value so that all the streams can be
quickly assessed by taking the smallest value of all stream->transferred -
but this only works if we've set any of them.

Fix this by adding a flag to indicate whether the value in
stream->transferred is valid and checking that when we integrate the
values.  stream->transferred can then be initialised to zero.

This was found by running the generic/750 xfstest against cifs with
cache=none.  It splices data to the target file.  Once (if) it has used up
all the available scratch space, the writes start failing with ENOSPC.
This causes ->write_iter() to fail.  However, it was returning
wreq->transferred, i.e. LONG_MAX, rather than an error (because it thought
the amount transferred was non-zero) and iter_file_splice_write() would
then try to clean up that amount of pipe bufferage - leading to an oops
when it overran.  The kernel log showed:

    CIFS: VFS: Send error in write = -28

followed by:

    BUG: kernel NULL pointer dereference, address: 0000000000000008

with:

    RIP: 0010:iter_file_splice_write+0x3a4/0x520
    do_splice+0x197/0x4e0

or:

    RIP: 0010:pipe_buf_release (include/linux/pipe_fs_i.h:282)
    iter_file_splice_write (fs/splice.c:755)

Also put a warning check into splice to announce if ->write_iter() returned
that it had written more than it was asked to.

Fixes: 288ace2f57c9 ("netfs: New writeback implementation")
Reported-by: Xiaoli Feng <fengxiaoli0714@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220445
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/915443.1755207950@warthog.procyon.org.uk
cc: Paulo Alcantara <pc@manguebit.org>
cc: Steve French <sfrench@samba.org>
cc: Shyam Prasad N <sprasad@microsoft.com>
cc: netfs@lists.linux.dev
cc: linux-cifs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: stable@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 185bd8196503..98c96d649bf9 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -150,6 +150,7 @@ struct netfs_io_stream {
 	bool			active;		/* T if stream is active */
 	bool			need_retry;	/* T if this stream needs retrying */
 	bool			failed;		/* T if this stream failed */
+	bool			transferred_valid; /* T is ->transferred is valid */
 };
 
 /*
-- 
cgit v1.2.3


From 2335b3f56690f76ac34b972fcaef368bab1f76f2 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@nvidia.com>
Date: Mon, 16 Jun 2025 23:41:53 -0700
Subject: net/mlx5: mlx5_ifc, Add hardware definitions needed for adjacent
 vports

Next patches will implement the discovery and creation of adjacent
functions vports, this patch introduces the hardware structures
definitions needed for the driver implementation.

Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Jack Morgenstein <jackm@nvidia.com>
Signed-off-by: Alexei Lazar <alazar@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 133 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 129 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 8360d9011d4f..44d497272162 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -189,6 +189,9 @@ enum {
 	MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS        = 0x727,
 	MLX5_CMD_OP_RELEASE_XRQ_ERROR             = 0x729,
 	MLX5_CMD_OP_MODIFY_XRQ                    = 0x72a,
+	MLX5_CMD_OPCODE_QUERY_DELEGATED_VHCA      = 0x732,
+	MLX5_CMD_OPCODE_CREATE_ESW_VPORT          = 0x733,
+	MLX5_CMD_OPCODE_DESTROY_ESW_VPORT         = 0x734,
 	MLX5_CMD_OP_QUERY_ESW_FUNCTIONS           = 0x740,
 	MLX5_CMD_OP_QUERY_VPORT_STATE             = 0x750,
 	MLX5_CMD_OP_MODIFY_VPORT_STATE            = 0x751,
@@ -2207,7 +2210,19 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
 
 	u8	   reserved_at_440[0x8];
 	u8	   max_num_eqs_24b[0x18];
-	u8	   reserved_at_460[0x3a0];
+
+	u8         reserved_at_460[0x160];
+
+	u8         query_adjacent_functions_id[0x1];
+	u8         ingress_egress_esw_vport_connect[0x1];
+	u8         function_id_type_vhca_id[0x1];
+	u8         reserved_at_5c3[0xd];
+	u8         delegate_vhca_management_profiles[0x10];
+
+	u8         delegated_vhca_max[0x10];
+	u8         delegate_vhca_max[0x10];
+
+	u8         reserved_at_600[0x200];
 };
 
 enum mlx5_ifc_flow_destination_type {
@@ -5159,7 +5174,9 @@ struct mlx5_ifc_set_hca_cap_in_bits {
 
 	u8         other_function[0x1];
 	u8         ec_vf_function[0x1];
-	u8         reserved_at_42[0xe];
+	u8         reserved_at_42[0x1];
+	u8         function_id_type[0x1];
+	u8         reserved_at_44[0xc];
 	u8         function_id[0x10];
 
 	u8         reserved_at_60[0x20];
@@ -6357,7 +6374,9 @@ struct mlx5_ifc_query_hca_cap_in_bits {
 
 	u8         other_function[0x1];
 	u8         ec_vf_function[0x1];
-	u8         reserved_at_42[0xe];
+	u8         reserved_at_42[0x1];
+	u8         function_id_type[0x1];
+	u8         reserved_at_44[0xc];
 	u8         function_id[0x10];
 
 	u8         reserved_at_60[0x20];
@@ -6983,6 +7002,28 @@ struct mlx5_ifc_query_esw_vport_context_in_bits {
 	u8         reserved_at_60[0x20];
 };
 
+struct mlx5_ifc_destroy_esw_vport_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x20];
+};
+
+struct mlx5_ifc_destroy_esw_vport_in_bits {
+	u8         opcode[0x10];
+	u8         uid[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x10];
+	u8         vport_num[0x10];
+
+	u8         reserved_at_60[0x20];
+};
+
 struct mlx5_ifc_modify_esw_vport_context_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -7484,6 +7525,85 @@ struct mlx5_ifc_query_adapter_in_bits {
 	u8         reserved_at_40[0x40];
 };
 
+struct mlx5_ifc_function_vhca_rid_info_reg_bits {
+	u8         host_number[0x8];
+	u8         host_pci_device_function[0x8];
+	u8         host_pci_bus[0x8];
+	u8         reserved_at_18[0x3];
+	u8         pci_bus_assigned[0x1];
+	u8         function_type[0x4];
+
+	u8         parent_pci_device_function[0x8];
+	u8         parent_pci_bus[0x8];
+	u8         vhca_id[0x10];
+
+	u8         reserved_at_40[0x10];
+	u8         function_id[0x10];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_delegated_function_vhca_rid_info_bits {
+	struct mlx5_ifc_function_vhca_rid_info_reg_bits function_vhca_rid_info;
+
+	u8         reserved_at_80[0x18];
+	u8         manage_profile[0x8];
+
+	u8         reserved_at_a0[0x60];
+};
+
+struct mlx5_ifc_query_delegated_vhca_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x20];
+
+	u8         reserved_at_60[0x10];
+	u8         functions_count[0x10];
+
+	u8         reserved_at_80[0x80];
+
+	struct mlx5_ifc_delegated_function_vhca_rid_info_bits
+			delegated_function_vhca_rid_info[];
+};
+
+struct mlx5_ifc_query_delegated_vhca_in_bits {
+	u8         opcode[0x10];
+	u8         uid[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_create_esw_vport_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x20];
+
+	u8         reserved_at_60[0x10];
+	u8         vport_num[0x10];
+};
+
+struct mlx5_ifc_create_esw_vport_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x10];
+	u8         managed_vhca_id[0x10];
+
+	u8         reserved_at_60[0x20];
+};
+
 struct mlx5_ifc_qp_2rst_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -7611,7 +7731,12 @@ struct mlx5_ifc_modify_vport_state_in_bits {
 	u8         reserved_at_41[0xf];
 	u8         vport_number[0x10];
 
-	u8         reserved_at_60[0x18];
+	u8         reserved_at_60[0x10];
+	u8         ingress_connect[0x1];
+	u8         egress_connect[0x1];
+	u8         ingress_connect_valid[0x1];
+	u8         egress_connect_valid[0x1];
+	u8         reserved_at_74[0x4];
 	u8         admin_state[0x4];
 	u8         reserved_at_7c[0x4];
 };
-- 
cgit v1.2.3


From 40653f280b2640e5caa94eeedee43e0f1df97704 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@nvidia.com>
Date: Mon, 16 Jun 2025 17:28:20 -0700
Subject: {rdma,net}/mlx5: export mlx5_vport_get_vhca_id

vhca id is already cached in the vport structure no need to query on
every mlx5 layer, use the mlx5_vport_get_vhca_id, where possible.

Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Alexei Lazar <alazar@nvidia.com>
Reviewed-by: Feng Liu <feliu@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
---
 include/linux/mlx5/vport.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index c36cc6d82926..c87b9507cfa1 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -135,4 +135,6 @@ int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev);
 u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev);
 int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *out,
 				  u16 opmod);
+int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id);
+
 #endif /* __MLX5_VPORT_H__ */
-- 
cgit v1.2.3


From 33cfb80d1910b41d1a25cef89b159c945aff0f24 Mon Sep 17 00:00:00 2001
From: Ashish Kalra <ashish.kalra@amd.com>
Date: Mon, 21 Jul 2025 14:13:10 +0000
Subject: crypto: ccp - Add support for SNP_FEATURE_INFO command

The FEATURE_INFO command provides hypervisors with a programmatic means
to learn about the supported features of the currently loaded firmware.
This command mimics the CPUID instruction relative to sub-leaf input and
the four unsigned integer output values. To obtain information
regarding the features present in the currently loaded SEV firmware,
use the SNP_FEATURE_INFO command.

Cache the SNP platform status and feature information from CPUID
0x8000_0024 in the sev_device structure. If SNP is enabled, utilize
this cached SNP platform status for the API major, minor and build
version.

Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Reviewed-by: Kim Phillips <kim.phillips@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/psp-sev.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h
index 0f5f94137f6d..5fb6ae0f51cc 100644
--- a/include/linux/psp-sev.h
+++ b/include/linux/psp-sev.h
@@ -107,6 +107,7 @@ enum sev_cmd {
 	SEV_CMD_SNP_DOWNLOAD_FIRMWARE_EX = 0x0CA,
 	SEV_CMD_SNP_COMMIT		= 0x0CB,
 	SEV_CMD_SNP_VLEK_LOAD		= 0x0CD,
+	SEV_CMD_SNP_FEATURE_INFO	= 0x0CE,
 
 	SEV_CMD_MAX,
 };
@@ -814,6 +815,34 @@ struct sev_data_snp_commit {
 	u32 len;
 } __packed;
 
+/**
+ * struct sev_data_snp_feature_info - SEV_SNP_FEATURE_INFO structure
+ *
+ * @length: len of the command buffer read by the PSP
+ * @ecx_in: subfunction index
+ * @feature_info_paddr : System Physical Address of the FEATURE_INFO structure
+ */
+struct sev_data_snp_feature_info {
+	u32 length;
+	u32 ecx_in;
+	u64 feature_info_paddr;
+} __packed;
+
+/**
+ * struct feature_info - FEATURE_INFO structure
+ *
+ * @eax: output of SNP_FEATURE_INFO command
+ * @ebx: output of SNP_FEATURE_INFO command
+ * @ecx: output of SNP_FEATURE_INFO command
+ * #edx: output of SNP_FEATURE_INFO command
+ */
+struct snp_feature_info {
+	u32 eax;
+	u32 ebx;
+	u32 ecx;
+	u32 edx;
+} __packed;
+
 #ifdef CONFIG_CRYPTO_DEV_SP_PSP
 
 /**
-- 
cgit v1.2.3


From 45d59bd4a3e0f0475b3646e8b9936d34794e503d Mon Sep 17 00:00:00 2001
From: Ashish Kalra <ashish.kalra@amd.com>
Date: Mon, 21 Jul 2025 14:13:27 +0000
Subject: crypto: ccp - Introduce new API interface to indicate SEV-SNP
 Ciphertext hiding feature

Implement an API that checks the overall feature support for SEV-SNP
ciphertext hiding.

This API verifies both the support of the SEV firmware for the feature
and its enablement in the platform's BIOS.

Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Reviewed-by: Kim Phillips <kim.phillips@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/psp-sev.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h
index 5fb6ae0f51cc..d83185b4268b 100644
--- a/include/linux/psp-sev.h
+++ b/include/linux/psp-sev.h
@@ -843,6 +843,8 @@ struct snp_feature_info {
 	u32 edx;
 } __packed;
 
+#define SNP_CIPHER_TEXT_HIDING_SUPPORTED	BIT(3)
+
 #ifdef CONFIG_CRYPTO_DEV_SP_PSP
 
 /**
@@ -986,6 +988,7 @@ void *psp_copy_user_blob(u64 uaddr, u32 len);
 void *snp_alloc_firmware_page(gfp_t mask);
 void snp_free_firmware_page(void *addr);
 void sev_platform_shutdown(void);
+bool sev_is_snp_ciphertext_hiding_supported(void);
 
 #else	/* !CONFIG_CRYPTO_DEV_SP_PSP */
 
@@ -1022,6 +1025,8 @@ static inline void snp_free_firmware_page(void *addr) { }
 
 static inline void sev_platform_shutdown(void) { }
 
+static inline bool sev_is_snp_ciphertext_hiding_supported(void) { return false; }
+
 #endif	/* CONFIG_CRYPTO_DEV_SP_PSP */
 
 #endif	/* __PSP_SEV_H__ */
-- 
cgit v1.2.3


From c9760b0fca6bfa250c02e14bfe81c542f3626a72 Mon Sep 17 00:00:00 2001
From: Ashish Kalra <ashish.kalra@amd.com>
Date: Mon, 21 Jul 2025 14:13:55 +0000
Subject: crypto: ccp - Add support to enable CipherTextHiding on SNP_INIT_EX

To enable ciphertext hiding, it must be specified in the SNP_INIT_EX
command as part of SNP initialization.

Modify the sev_platform_init_args structure, which is used as input to
sev_platform_init(), to include a field that, when non-zero,
indicates that ciphertext hiding should be enabled and specifies the
maximum ASID that can be used for an SEV-SNP guest.

Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Reviewed-by: Kim Phillips <kim.phillips@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/psp-sev.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h
index d83185b4268b..e0dbcb4b4fd9 100644
--- a/include/linux/psp-sev.h
+++ b/include/linux/psp-sev.h
@@ -748,10 +748,13 @@ struct sev_data_snp_guest_request {
 struct sev_data_snp_init_ex {
 	u32 init_rmp:1;
 	u32 list_paddr_en:1;
-	u32 rsvd:30;
+	u32 rapl_dis:1;
+	u32 ciphertext_hiding_en:1;
+	u32 rsvd:28;
 	u32 rsvd1;
 	u64 list_paddr;
-	u8  rsvd2[48];
+	u16 max_snp_asid;
+	u8  rsvd2[46];
 } __packed;
 
 /**
@@ -800,10 +803,13 @@ struct sev_data_snp_shutdown_ex {
  * @probe: True if this is being called as part of CCP module probe, which
  *  will defer SEV_INIT/SEV_INIT_EX firmware initialization until needed
  *  unless psp_init_on_probe module param is set
+ * @max_snp_asid: When non-zero, enable ciphertext hiding and specify the
+ *  maximum ASID that can be used for an SEV-SNP guest.
  */
 struct sev_platform_init_args {
 	int error;
 	bool probe;
+	unsigned int max_snp_asid;
 };
 
 /**
-- 
cgit v1.2.3


From b76c739c3d11d1dacc8efe7fa873bee28ac991f1 Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner@baylibre.com>
Date: Tue, 22 Jul 2025 16:52:38 -0500
Subject: iio: fix iio_push_to_buffers_with_ts() typo

Replace iio_push_to_buffer_with_ts() with iio_push_to_buffers_with_ts()
in some documentation comments in iio.h. The latter is the correct name
of the function, the former doesn't exist.

Signed-off-by: David Lechner <dlechner@baylibre.com>
Link: https://patch.msgid.link/20250722-iio-fix-iio_push_to_buffer_with_ts-typo-v1-1-6ac9efb856d3@baylibre.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index d11668f14a3e..2f5560646ee4 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -779,7 +779,7 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev)
  * them safe for use with non-coherent DMA.
  *
  * A number of drivers also use this on buffers that include a 64-bit timestamp
- * that is used with iio_push_to_buffer_with_ts(). Therefore, in the case where
+ * that is used with iio_push_to_buffers_with_ts(). Therefore, in the case where
  * DMA alignment is not sufficient for proper timestamp alignment, we align to
  * 8 bytes instead.
  */
@@ -794,7 +794,7 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev)
  * @name: identifier name of the buffer
  * @count: number of elements in the buffer
  *
- * Declares a buffer that is safe to use with iio_push_to_buffer_with_ts(). In
+ * Declares a buffer that is safe to use with iio_push_to_buffers_with_ts(). In
  * addition to allocating enough space for @count elements of @type, it also
  * allocates space for a s64 timestamp at the end of the buffer and ensures
  * proper alignment of the timestamp.
-- 
cgit v1.2.3


From 4847d1187402a5027d9a04393f12d52a5a1d7f98 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Thu, 14 Aug 2025 09:24:41 +0200
Subject: console: introduce console_lock guard()s

Having this, guards like these work:
  guard(console_lock)();
or
  scoped_guard(console_lock) {
    ...
  }

See e.g. "vc_screen: use guard()s" later in this series.

Signed-off-by: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20250814072456.182853-2-jirislaby@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/console.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/console.h b/include/linux/console.h
index 8f10d0a85bb4..031a58dc2b91 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -666,6 +666,8 @@ void vcs_remove_sysfs(int index);
  */
 extern atomic_t ignore_console_lock_warning;
 
+DEFINE_LOCK_GUARD_0(console_lock, console_lock(), console_unlock());
+
 extern void console_init(void);
 
 /* For deferred console takeover */
-- 
cgit v1.2.3


From e8398b8aed50382c21fcec77e80a5314e7c45c25 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Thu, 14 Aug 2025 09:24:42 +0200
Subject: tty: introduce tty_port_tty guard()

Having this, guards like these work:
  scoped_guard(tty_port_tty, port)
    tty_wakeup(scoped_tty());

See e.g. "tty_port: use scoped_guard()" later in this series.

The definitions depend on CONFIG_TTY. It's due to tty_kref_put().
On !CONFIG_TTY, it is an inline and its declaration would conflict. The
guards are not needed in that case, of course.

Signed-off-by: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20250814072456.182853-3-jirislaby@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty_port.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h
index 332ddb93603e..660c254f1efe 100644
--- a/include/linux/tty_port.h
+++ b/include/linux/tty_port.h
@@ -270,4 +270,18 @@ static inline void tty_port_tty_vhangup(struct tty_port *port)
 	__tty_port_tty_hangup(port, false, false);
 }
 
+#ifdef CONFIG_TTY
+void tty_kref_put(struct tty_struct *tty);
+__DEFINE_CLASS_IS_CONDITIONAL(tty_port_tty, true);
+__DEFINE_UNLOCK_GUARD(tty_port_tty, struct tty_struct, tty_kref_put(_T->lock));
+static inline class_tty_port_tty_t class_tty_port_tty_constructor(struct tty_port *tport)
+{
+	class_tty_port_tty_t _t = {
+		.lock = tty_port_tty_get(tport),
+	};
+	return _t;
+}
+#define scoped_tty()	((struct tty_struct *)(__guard_ptr(tty_port_tty)(&scope)))
+#endif
+
 #endif
-- 
cgit v1.2.3


From 0fd60b689b0dacce659253ec15cb3d3bf660e30b Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Thu, 14 Aug 2025 09:24:43 +0200
Subject: serial: introduce uart_port_lock() guard()s

Having this, guards like these work:
  guard(uart_port_lock_irq)(&up->port);
or
  scoped_guard(uart_port_lock_irqsave, port) {
    ...
  }

See e.g. "serial: 8250: use guard()s" later in this series.

Signed-off-by: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20250814072456.182853-4-jirislaby@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_core.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 84b4648ead7e..666430b47899 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -788,6 +788,19 @@ static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned lo
 	spin_unlock_irqrestore(&up->lock, flags);
 }
 
+DEFINE_GUARD(uart_port_lock, struct uart_port *, uart_port_lock(_T), uart_port_unlock(_T));
+DEFINE_GUARD_COND(uart_port_lock, _try, uart_port_trylock(_T));
+
+DEFINE_GUARD(uart_port_lock_irq, struct uart_port *, uart_port_lock_irq(_T),
+	     uart_port_unlock_irq(_T));
+
+DEFINE_LOCK_GUARD_1(uart_port_lock_irqsave, struct uart_port,
+                    uart_port_lock_irqsave(_T->lock, &_T->flags),
+                    uart_port_unlock_irqrestore(_T->lock, _T->flags),
+                    unsigned long flags);
+DEFINE_LOCK_GUARD_1_COND(uart_port_lock_irqsave, _try,
+			 uart_port_trylock_irqsave(_T->lock, &_T->flags));
+
 static inline int serial_port_in(struct uart_port *up, int offset)
 {
 	return up->serial_in(up, offset);
-- 
cgit v1.2.3


From 292cb391479d50f4379a0abab34324de92c82a92 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 13 Aug 2025 23:03:52 -0700
Subject: software node: Constify node_group in registration functions

The software_node_register_node_group() and
software_node_unregister_node_group() functions take in essence an
array of pointers to software_node structs. Since the functions do not
modify the array declare the argument as constant, so that static
arrays can be declared as const and annotated as __initconst.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Link: https://lore.kernel.org/r/2zny5grbgtwbplynxffxg6dkgjgqf45aigwmgxio5stesdr3wi@gf2zamk5amic
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/property.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/property.h b/include/linux/property.h
index 82f0cb3abd1e..d1e80b3c9918 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -574,8 +574,8 @@ const struct software_node *
 software_node_find_by_name(const struct software_node *parent,
 			   const char *name);
 
-int software_node_register_node_group(const struct software_node **node_group);
-void software_node_unregister_node_group(const struct software_node **node_group);
+int software_node_register_node_group(const struct software_node * const *node_group);
+void software_node_unregister_node_group(const struct software_node * const *node_group);
 
 int software_node_register(const struct software_node *node);
 void software_node_unregister(const struct software_node *node);
-- 
cgit v1.2.3


From a032fe30cf09b6723ab61a05aee057311b00f9e1 Mon Sep 17 00:00:00 2001
From: Dongcheng Yan <dongcheng.yan@intel.com>
Date: Fri, 25 Apr 2025 18:43:30 +0800
Subject: platform/x86: int3472: add hpd pin support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Typically HDMI to MIPI CSI-2 bridges have a pin to signal image data is
being received. On the host side this is wired to a GPIO for polling or
interrupts. This includes the Lontium HDMI to MIPI CSI-2 bridges
lt6911uxe and lt6911uxc.

The GPIO "hpd" is used already by other HDMI to CSI-2 bridges, use it
here as well.

Signed-off-by: Dongcheng Yan <dongcheng.yan@intel.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Fixes: 20244cbafbd6 ("media: i2c: change lt6911uxe irq_gpio name to "hpd"")
Cc: stable@vger.kernel.org
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil+cisco@kernel.org>
---
 include/linux/platform_data/x86/int3472.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h
index 78276a11c48d..1571e9157fa5 100644
--- a/include/linux/platform_data/x86/int3472.h
+++ b/include/linux/platform_data/x86/int3472.h
@@ -27,6 +27,7 @@
 #define INT3472_GPIO_TYPE_CLK_ENABLE				0x0c
 #define INT3472_GPIO_TYPE_PRIVACY_LED				0x0d
 #define INT3472_GPIO_TYPE_HANDSHAKE				0x12
+#define INT3472_GPIO_TYPE_HOTPLUG_DETECT			0x13
 
 #define INT3472_PDEV_MAX_NAME_LEN				23
 #define INT3472_MAX_SENSOR_GPIOS				3
-- 
cgit v1.2.3


From 894af4a1cde61c3401f237184fb770f72ff12df8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sat, 12 Apr 2025 13:56:01 +0200
Subject: objtool: Validate kCFI calls

Validate that all indirect calls adhere to kCFI rules. Notably doing
nocfi indirect call to a cfi function is broken.

Apparently some Rust 'core' code violates this and explodes when ran
with FineIBT.

All the ANNOTATE_NOCFI_SYM sites are prime targets for attackers.

 - runtime EFI is especially henous because it also needs to disable
   IBT. Basically calling unknown code without CFI protection at
   runtime is a massice security issue.

 - Kexec image handover; if you can exploit this, you get to keep it :-)

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Acked-by: Sean Christopherson <seanjc@google.com>
Link: https://lkml.kernel.org/r/20250714103441.496787279@infradead.org
---
 include/linux/objtool.h       | 10 ++++++++++
 include/linux/objtool_types.h |  1 +
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/objtool.h b/include/linux/objtool.h
index 366ad004d794..46ebaa46e6c5 100644
--- a/include/linux/objtool.h
+++ b/include/linux/objtool.h
@@ -184,6 +184,15 @@
  * WARN using UD2.
  */
 #define ANNOTATE_REACHABLE(label)	__ASM_ANNOTATE(label, ANNOTYPE_REACHABLE)
+/*
+ * This should not be used; it annotates away CFI violations. There are a few
+ * valid use cases like kexec handover to the next kernel image, and there is
+ * no security concern there.
+ *
+ * There are also a few real issues annotated away, like EFI because we can't
+ * control the EFI code.
+ */
+#define ANNOTATE_NOCFI_SYM(sym)		asm(__ASM_ANNOTATE(sym, ANNOTYPE_NOCFI))
 
 #else
 #define ANNOTATE_NOENDBR		ANNOTATE type=ANNOTYPE_NOENDBR
@@ -194,6 +203,7 @@
 #define ANNOTATE_INTRA_FUNCTION_CALL	ANNOTATE type=ANNOTYPE_INTRA_FUNCTION_CALL
 #define ANNOTATE_UNRET_BEGIN		ANNOTATE type=ANNOTYPE_UNRET_BEGIN
 #define ANNOTATE_REACHABLE		ANNOTATE type=ANNOTYPE_REACHABLE
+#define ANNOTATE_NOCFI_SYM		ANNOTATE type=ANNOTYPE_NOCFI
 #endif
 
 #if defined(CONFIG_NOINSTR_VALIDATION) && \
diff --git a/include/linux/objtool_types.h b/include/linux/objtool_types.h
index df5d9fa84dba..aceac94632c8 100644
--- a/include/linux/objtool_types.h
+++ b/include/linux/objtool_types.h
@@ -65,5 +65,6 @@ struct unwind_hint {
 #define ANNOTYPE_IGNORE_ALTS		6
 #define ANNOTYPE_INTRA_FUNCTION_CALL	7
 #define ANNOTYPE_REACHABLE		8
+#define ANNOTYPE_NOCFI			9
 
 #endif /* _LINUX_OBJTOOL_TYPES_H */
-- 
cgit v1.2.3


From 8ea815399c3fcce1889bd951fec25b5b9a3979c1 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Mon, 14 Apr 2025 16:41:07 +0200
Subject: compiler: remove __ADDRESSABLE_ASM{_STR,}() again

__ADDRESSABLE_ASM_STR() is where the necessary stringification happens.
As long as "sym" doesn't contain any odd characters, no quoting is
required for its use with .quad / .long. In fact the quotation gets in
the way with gas 2.25; it's only from 2.26 onwards that quoted symbols
are half-way properly supported.

However, assembly being different from C anyway, drop
__ADDRESSABLE_ASM_STR() and its helper macro altogether. A simple
.global directive will suffice to get the symbol "declared", i.e. into
the symbol table. While there also stop open-coding STATIC_CALL_TRAMP()
and STATIC_CALL_KEY().

Fixes: 0ef8047b737d ("x86/static-call: provide a way to do very early static-call updates")
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Juergen Gross <jgross@suse.com>
Message-ID: <609d2c74-de13-4fae-ab1a-1ec44afb948d@suse.com>
---
 include/linux/compiler.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 6f04a1d8c720..64ff73c533e5 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -288,14 +288,6 @@ static inline void *offset_to_ptr(const int *off)
 #define __ADDRESSABLE(sym) \
 	___ADDRESSABLE(sym, __section(".discard.addressable"))
 
-#define __ADDRESSABLE_ASM(sym)						\
-	.pushsection .discard.addressable,"aw";				\
-	.align ARCH_SEL(8,4);						\
-	ARCH_SEL(.quad, .long) __stringify(sym);			\
-	.popsection;
-
-#define __ADDRESSABLE_ASM_STR(sym) __stringify(__ADDRESSABLE_ASM(sym))
-
 /*
  * This returns a constant expression while determining if an argument is
  * a constant expression, most importantly without evaluating the argument.
-- 
cgit v1.2.3


From 89d912e494f786e79f69ed9d567a8842c71dbb03 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Thu, 14 Aug 2025 11:59:27 +0200
Subject: bpf: Add dynptr type for skb metadata

Add a dynptr type, similar to skb dynptr, but for the skb metadata access.

The dynptr provides an alternative to __sk_buff->data_meta for accessing
the custom metadata area allocated using the bpf_xdp_adjust_meta() helper.

More importantly, it abstracts away the fact where the storage for the
custom metadata lives, which opens up the way to persist the metadata by
relocating it as the skb travels through the network stack layers.

Writes to skb metadata invalidate any existing skb payload and metadata
slices. While this is more restrictive that needed at the moment, it leaves
the door open to reallocating the metadata on writes, and should be only a
minor inconvenience to the users.

Only the program types which can access __sk_buff->data_meta today are
allowed to create a dynptr for skb metadata at the moment. We need to
modify the network stack to persist the metadata across layers before
opening up access to other BPF hooks.

Once more BPF hooks gain access to skb_meta dynptr, we will also need to
add a read-only variant of the helper similar to
bpf_dynptr_from_skb_rdonly.

skb_meta dynptr ops are stubbed out and implemented by subsequent changes.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Reviewed-by: Jesse Brandeburg <jbrandeburg@cloudflare.com>
Link: https://patch.msgid.link/20250814-skb-metadata-thru-dynptr-v7-1-8a39e636e0fb@cloudflare.com
---
 include/linux/bpf.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cc700925b802..ec527b476dba 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -767,12 +767,15 @@ enum bpf_type_flag {
 	 */
 	MEM_WRITE		= BIT(18 + BPF_BASE_TYPE_BITS),
 
+	/* DYNPTR points to skb_metadata_end()-skb_metadata_len() */
+	DYNPTR_TYPE_SKB_META	= BIT(19 + BPF_BASE_TYPE_BITS),
+
 	__BPF_TYPE_FLAG_MAX,
 	__BPF_TYPE_LAST_FLAG	= __BPF_TYPE_FLAG_MAX - 1,
 };
 
 #define DYNPTR_TYPE_FLAG_MASK	(DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \
-				 | DYNPTR_TYPE_XDP)
+				 | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META)
 
 /* Max number of base types. */
 #define BPF_BASE_TYPE_LIMIT	(1UL << BPF_BASE_TYPE_BITS)
@@ -1358,6 +1361,8 @@ enum bpf_dynptr_type {
 	BPF_DYNPTR_TYPE_SKB,
 	/* Underlying data is a xdp_buff */
 	BPF_DYNPTR_TYPE_XDP,
+	/* Points to skb_metadata_end()-skb_metadata_len() */
+	BPF_DYNPTR_TYPE_SKB_META,
 };
 
 int bpf_dynptr_check_size(u32 size);
-- 
cgit v1.2.3


From 6877cd392baecf816c2ba896a9d42874628004a5 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Thu, 14 Aug 2025 11:59:28 +0200
Subject: bpf: Enable read/write access to skb metadata through a dynptr

Now that we can create a dynptr to skb metadata, make reads to the metadata
area possible with bpf_dynptr_read() or through a bpf_dynptr_slice(), and
make writes to the metadata area possible with bpf_dynptr_write() or
through a bpf_dynptr_slice_rdwr().

Note that for cloned skbs which share data with the original, we limit the
skb metadata dynptr to be read-only since we don't unclone on a
bpf_dynptr_write to metadata.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://patch.msgid.link/20250814-skb-metadata-thru-dynptr-v7-2-8a39e636e0fb@cloudflare.com
---
 include/linux/filter.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1e7fd3ee759e..9ed21b65e2e9 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1784,6 +1784,7 @@ int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
 void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len);
 void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
 		      void *buf, unsigned long len, bool flush);
+void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset);
 #else /* CONFIG_NET */
 static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset,
 				       void *to, u32 len)
@@ -1818,6 +1819,11 @@ static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, voi
 				    unsigned long len, bool flush)
 {
 }
+
+static inline void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset)
+{
+	return NULL;
+}
 #endif /* CONFIG_NET */
 
 #endif /* __LINUX_FILTER_H__ */
-- 
cgit v1.2.3


From 807221d3c5ff6e3c91ff57bc82a0b7a541462e20 Mon Sep 17 00:00:00 2001
From: Ricky Wu <ricky_wu@realtek.com>
Date: Tue, 12 Aug 2025 14:35:21 +0800
Subject: misc: rtsx_pci: Add separate CD/WP pin polarity reversal support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, the Card Detect (CD) and Write Protect (WP) pins shared the
same reverse polarity setting in the configuration space. This meant both
signals were reversed together, without the ability to configure them
individually.

This patch introduces two new parameters:
sd_cd_reverse_en – enable reverse polarity for the CD pin.
sd_wp_reverse_en – enable reverse polarity for the WP pin.

With this change, the controller can now support:
1.Reversing both CD and WP pins together (original behavior).
2.Reversing CD and WP pins separately (newly added behavior), if
supported by the configuration space.

This provides greater flexibility when dealing with devices that have
independent polarity requirements for CD and WP pins.

Signed-off-by: Ricky Wu <ricky_wu@realtek.com>
Link: https://lore.kernel.org/r/20250812063521.2427696-1-ricky_wu@realtek.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/rtsx_pci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h
index 3b4c36705a9b..3c5689356004 100644
--- a/include/linux/rtsx_pci.h
+++ b/include/linux/rtsx_pci.h
@@ -1160,6 +1160,8 @@ struct rtsx_cr_option {
 	bool ocp_en;
 	u8 sd_400mA_ocp_thd;
 	u8 sd_800mA_ocp_thd;
+	u8 sd_cd_reverse_en;
+	u8 sd_wp_reverse_en;
 };
 
 /*
-- 
cgit v1.2.3


From f5597840ac907858ad2a462b00e4a68fd199121e Mon Sep 17 00:00:00 2001
From: Zijun Hu <zijun.hu@oss.qualcomm.com>
Date: Mon, 14 Jul 2025 23:34:14 +0800
Subject: char: misc: Disallow registering miscdevice whose minor >
 MISC_DYNAMIC_MINOR

Currently, It is allowed to register miscdevice with minor > 255
which is defined by macro MISC_DYNAMIC_MINOR, and cause:

- Chaos regarding division and management of minor codes.
- Registering failure if the minor was allocated to other dynamic request.

Fortunately, in-kernel users have not had such usage yet.
Fix by refusing to register miscdevice whose minor > 255.

Also bring in a very simple minor code space division and management:

<  255 : Fixed minor code
== 255 : Indicator to request dynamic minor code
>  255 : Dynamic minor code requested, 1048320 minor codes totally
And all fixed minors allocated should be registered in 'linux/miscdevice.h'

Signed-off-by: Zijun Hu <zijun.hu@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20250714-rfc_miscdev-v6-3-2ed949665bde@oss.qualcomm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/miscdevice.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 3e6deb00fc85..565b88efeb23 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -71,6 +71,14 @@
 #define USERIO_MINOR		240
 #define VHOST_VSOCK_MINOR	241
 #define RFKILL_MINOR		242
+
+/*
+ * Misc char device minor code space division related to below macro:
+ *
+ * <  255  : Fixed minor code
+ * == 255  : Indicator to request dynamic minor code
+ * >  255  : Dynamic minor code requested, 1048320 minor codes totally.
+ */
 #define MISC_DYNAMIC_MINOR	255
 
 struct miscdevice {
-- 
cgit v1.2.3


From d7f8d0758b975db8406c91cf242d46cd9611ba3e Mon Sep 17 00:00:00 2001
From: Zijun Hu <zijun.hu@oss.qualcomm.com>
Date: Mon, 14 Jul 2025 23:34:18 +0800
Subject: char: misc: Register fixed minor EISA_EEPROM_MINOR in
 linux/miscdevice.h

Move fixed minor EISA_EEPROM_MINOR definition to linux/miscdevice.h.

Signed-off-by: Zijun Hu <zijun.hu@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20250714-rfc_miscdev-v6-7-2ed949665bde@oss.qualcomm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/miscdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 565b88efeb23..7d0aa718499c 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -70,6 +70,7 @@
 #define UHID_MINOR		239
 #define USERIO_MINOR		240
 #define VHOST_VSOCK_MINOR	241
+#define EISA_EEPROM_MINOR	241
 #define RFKILL_MINOR		242
 
 /*
-- 
cgit v1.2.3


From 1d6249c1ce826fcf03c695973095eb4a50fb7fd2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Mon, 11 Aug 2025 11:13:35 +0200
Subject: sysfs: remove bin_attribute::read_new/write_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These transitional fields are now unused and unnecessary.
Remove them and their logic in the sysfs core.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20250811-sysfs-const-bin_attr-final-v4-1-7b6053fd58bb@weissschuh.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index f418aae4f113..7544f6d81c05 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -308,12 +308,8 @@ struct bin_attribute {
 	struct address_space *(*f_mapping)(void);
 	ssize_t (*read)(struct file *, struct kobject *, const struct bin_attribute *,
 			char *, loff_t, size_t);
-	ssize_t (*read_new)(struct file *, struct kobject *, const struct bin_attribute *,
-			    char *, loff_t, size_t);
 	ssize_t (*write)(struct file *, struct kobject *, const struct bin_attribute *,
 			 char *, loff_t, size_t);
-	ssize_t (*write_new)(struct file *, struct kobject *,
-			     const struct bin_attribute *, char *, loff_t, size_t);
 	loff_t (*llseek)(struct file *, struct kobject *, const struct bin_attribute *,
 			 loff_t, int);
 	int (*mmap)(struct file *, struct kobject *, const struct bin_attribute *attr,
-- 
cgit v1.2.3


From 44d454fcffa8b08d6d66df132121c1d387fa85db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Mon, 11 Aug 2025 11:13:36 +0200
Subject: sysfs: remove attribute_group::bin_attrs_new
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This transitional field is now unused and unnecessary.
Remove it.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20250811-sysfs-const-bin_attr-final-v4-2-7b6053fd58bb@weissschuh.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 7544f6d81c05..9a25a2911652 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -106,10 +106,7 @@ struct attribute_group {
 					    const struct bin_attribute *,
 					    int);
 	struct attribute	**attrs;
-	union {
-		const struct bin_attribute	*const *bin_attrs;
-		const struct bin_attribute	*const *bin_attrs_new;
-	};
+	const struct bin_attribute	*const *bin_attrs;
 };
 
 #define SYSFS_PREALLOC		010000
@@ -293,7 +290,7 @@ __ATTRIBUTE_GROUPS(_name)
 
 #define BIN_ATTRIBUTE_GROUPS(_name)				\
 static const struct attribute_group _name##_group = {		\
-	.bin_attrs_new = _name##_attrs,				\
+	.bin_attrs = _name##_attrs,				\
 };								\
 __ATTRIBUTE_GROUPS(_name)
 
-- 
cgit v1.2.3


From 74f44ad07d1063933c237a7db16f6a4036643d60 Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Wed, 30 Jul 2025 17:46:14 +0100
Subject: mmc: tmio: Add 64-bit read/write support for SD_BUF0 in polling mode

As per the RZ/{G2L,G3E} HW manual SD_BUF0 can be accessed by 16/32/64
bits. Most of the data transfer in SD/SDIO/eMMC mode is more than 8 bytes.
During testing it is found that, if the DMA buffer is not aligned to 128
bit it fallback to PIO mode. In such cases, 64-bit access is much more
efficient than the current 16-bit.

Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Link: https://lore.kernel.org/r/20250730164618.233117-2-biju.das.jz@bp.renesas.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/platform_data/tmio.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/tmio.h b/include/linux/platform_data/tmio.h
index b060124ba1ae..426291713b83 100644
--- a/include/linux/platform_data/tmio.h
+++ b/include/linux/platform_data/tmio.h
@@ -47,6 +47,9 @@
 /* Some controllers have a CBSY bit */
 #define TMIO_MMC_HAVE_CBSY		BIT(11)
 
+/* Some controllers have a 64-bit wide data port register */
+#define TMIO_MMC_64BIT_DATA_PORT	BIT(12)
+
 struct tmio_mmc_data {
 	void				*chan_priv_tx;
 	void				*chan_priv_rx;
-- 
cgit v1.2.3


From d2e6fb2c31a07f34e5e7533df11431cb0d2ecf9f Mon Sep 17 00:00:00 2001
From: Ricky Wu <ricky_wu@realtek.com>
Date: Tue, 12 Aug 2025 11:08:11 +0800
Subject: misc: rtsx: usb card reader: add OCP support

This patch adds support for Over Current Protection (OCP) to the Realtek
USB card reader driver.

The OCP mechanism protects the hardware by detecting and handling current
overload conditions.
This implementation includes:

- Register configurations to enable OCP monitoring.
- Handling of OCP interrupt events and associated error reporting.
- Card power management changes in response to OCP triggers.

This enhancement improves the robustness of the driver when operating in
environments where electrical anomalies may occur, particularly with SD
and MS card interfaces.

Signed-off-by: Ricky Wu <ricky_wu@realtek.com>
Link: https://lore.kernel.org/r/20250812030811.2426112-1-ricky_wu@realtek.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/rtsx_usb.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rtsx_usb.h b/include/linux/rtsx_usb.h
index f267a06c6b1e..276b509c03e3 100644
--- a/include/linux/rtsx_usb.h
+++ b/include/linux/rtsx_usb.h
@@ -99,6 +99,17 @@ extern int rtsx_usb_card_exclusive_check(struct rtsx_ucr *ucr, int card);
 #define CD_MASK		(SD_CD | MS_CD | XD_CD)
 #define SD_WP		0x08
 
+/* OCPCTL */
+#define MS_OCP_DETECT_EN		0x08
+#define	MS_OCP_INT_EN			0x04
+#define	MS_OCP_INT_CLR			0x02
+#define	MS_OCP_CLEAR			0x01
+
+/* OCPSTAT */
+#define MS_OCP_DETECT			0x80
+#define MS_OCP_NOW			0x02
+#define MS_OCP_EVER			0x01
+
 /* reader command field offset & parameters */
 #define READ_REG_CMD		0
 #define WRITE_REG_CMD		1
-- 
cgit v1.2.3


From 99e6cc80d5ce5af5781f84d20e4f3478d66ee8ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Beno=C3=AEt=20Monin?= <benoit.monin@bootlin.com>
Date: Mon, 18 Aug 2025 16:02:50 +0200
Subject: mmc: core: add mmc_read_tuning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Provide a function to the MMC hosts to read some blocks of data as part
of their tuning.

This function only returns the status of the read operation, not the
data read.

Signed-off-by: Benoît Monin <benoit.monin@bootlin.com>
Link: https://lore.kernel.org/r/20250818-mobileye-emmc-for-upstream-4-v4-5-34ecb3995e96@bootlin.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 68f09a955a90..5ed5d203de23 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -743,5 +743,6 @@ int mmc_send_status(struct mmc_card *card, u32 *status);
 int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
 int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode);
 int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
+int mmc_read_tuning(struct mmc_host *host, unsigned int blksz, unsigned int blocks);
 
 #endif /* LINUX_MMC_HOST_H */
-- 
cgit v1.2.3


From 76d2e3890fb169168c73f2e4f8375c7cc24a765e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Sat, 16 Aug 2025 07:25:20 -0700
Subject: NFS: Fix a race when updating an existing write

After nfs_lock_and_join_requests() tests for whether the request is
still attached to the mapping, nothing prevents a call to
nfs_inode_remove_request() from succeeding until we actually lock the
page group.
The reason is that whoever called nfs_inode_remove_request() doesn't
necessarily have a lock on the page group head.

So in order to avoid races, let's take the page group lock earlier in
nfs_lock_and_join_requests(), and hold it across the removal of the
request in nfs_inode_remove_request().

Reported-by: Jeff Layton <jlayton@kernel.org>
Tested-by: Joe Quanaim <jdq@meta.com>
Tested-by: Andrew Steffen <aksteffen@meta.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Fixes: bd37d6fce184 ("NFSv4: Convert nfs_lock_and_join_requests() to use nfs_page_find_head_request()")
Cc: stable@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/nfs_page.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 169b4ae30ff4..9aed39abc94b 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -160,6 +160,7 @@ extern void nfs_join_page_group(struct nfs_page *head,
 extern int nfs_page_group_lock(struct nfs_page *);
 extern void nfs_page_group_unlock(struct nfs_page *);
 extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
+extern bool nfs_page_group_sync_on_bit_locked(struct nfs_page *, unsigned int);
 extern	int nfs_page_set_headlock(struct nfs_page *req);
 extern void nfs_page_clear_headlock(struct nfs_page *req);
 extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *);
-- 
cgit v1.2.3


From 808471ddb0fa785559c3e7aee59be20a13b46ef5 Mon Sep 17 00:00:00 2001
From: Dominique Martinet <asmadeus@codewreck.org>
Date: Wed, 13 Aug 2025 15:04:55 +0900
Subject: iov_iter: iterate_folioq: fix handling of offset >= folio size

It's apparently possible to get an iov advanced all the way up to the end
of the current page we're looking at, e.g.

(gdb) p *iter
$24 = {iter_type = 4 '\004', nofault = false, data_source = false, iov_offset = 4096, {__ubuf_iovec = {
      iov_base = 0xffff88800f5bc000, iov_len = 655}, {{__iov = 0xffff88800f5bc000, kvec = 0xffff88800f5bc000,
        bvec = 0xffff88800f5bc000, folioq = 0xffff88800f5bc000, xarray = 0xffff88800f5bc000,
        ubuf = 0xffff88800f5bc000}, count = 655}}, {nr_segs = 2, folioq_slot = 2 '\002', xarray_start = 2}}

Where iov_offset is 4k with 4k-sized folios

This should have been fine because we're only in the 2nd slot and there's
another one after this, but iterate_folioq should not try to map a folio
that skips the whole size, and more importantly part here does not end up
zero (because 'PAGE_SIZE - skip % PAGE_SIZE' ends up PAGE_SIZE and not
zero..), so skip forward to the "advance to next folio" code

Link: https://lkml.kernel.org/r/20250813-iot_iter_folio-v3-0-a0ffad2b665a@codewreck.org
Link: https://lkml.kernel.org/r/20250813-iot_iter_folio-v3-1-a0ffad2b665a@codewreck.org
Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
Fixes: db0aa2e9566f ("mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios")
Reported-by: Maximilian Bosch <maximilian@mbosch.me>
Reported-by: Ryan Lahfa <ryan@lahfa.xyz>
Reported-by: Christian Theune <ct@flyingcircus.io>
Reported-by: Arnout Engelen <arnout@bzzt.net>
Link: https://lkml.kernel.org/r/D4LHHUNLG79Y.12PI0X6BEHRHW@mbosch.me/
Acked-by: David Howells <dhowells@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: <stable@vger.kernel.org>	[6.12+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/iov_iter.h | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h
index c4aa58032faf..f9a17fbbd398 100644
--- a/include/linux/iov_iter.h
+++ b/include/linux/iov_iter.h
@@ -160,7 +160,7 @@ size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2
 
 	do {
 		struct folio *folio = folioq_folio(folioq, slot);
-		size_t part, remain, consumed;
+		size_t part, remain = 0, consumed;
 		size_t fsize;
 		void *base;
 
@@ -168,14 +168,16 @@ size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2
 			break;
 
 		fsize = folioq_folio_size(folioq, slot);
-		base = kmap_local_folio(folio, skip);
-		part = umin(len, PAGE_SIZE - skip % PAGE_SIZE);
-		remain = step(base, progress, part, priv, priv2);
-		kunmap_local(base);
-		consumed = part - remain;
-		len -= consumed;
-		progress += consumed;
-		skip += consumed;
+		if (skip < fsize) {
+			base = kmap_local_folio(folio, skip);
+			part = umin(len, PAGE_SIZE - skip % PAGE_SIZE);
+			remain = step(base, progress, part, priv, priv2);
+			kunmap_local(base);
+			consumed = part - remain;
+			len -= consumed;
+			progress += consumed;
+			skip += consumed;
+		}
 		if (skip >= fsize) {
 			skip = 0;
 			slot++;
-- 
cgit v1.2.3


From 053c8ebe74f7e1f4c072e59428da80b9d78bc4b7 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Sun, 17 Aug 2025 23:17:59 +0800
Subject: mm/migrate: fix NULL movable_ops if CONFIG_ZSMALLOC=m

After commit 84caf98838a3e5f4bdb34 ("mm: stop storing migration_ops in
page->mapping") we get such an error message if CONFIG_ZSMALLOC=m:

 WARNING: CPU: 3 PID: 42 at mm/migrate.c:142 isolate_movable_ops_page+0xa8/0x1c0
 CPU: 3 UID: 0 PID: 42 Comm: kcompactd0 Not tainted 6.16.0-rc5+ #2133 PREEMPT
 pc 9000000000540bd8 ra 9000000000540b84 tp 9000000100420000 sp 9000000100423a60
 a0 9000000100193a80 a1 000000000000000c a2 000000000000001b a3 ffffffffffffffff
 a4 ffffffffffffffff a5 0000000000000267 a6 0000000000000000 a7 9000000100423ae0
 t0 00000000000000f1 t1 00000000000000f6 t2 0000000000000000 t3 0000000000000001
 t4 ffffff00010eb834 t5 0000000000000040 t6 900000010c89d380 t7 90000000023fcc70
 t8 0000000000000018 u0 0000000000000000 s9 ffffff00010eb800 s0 ffffff00010eb800
 s1 000000000000000c s2 0000000000043ae0 s3 0000800000000000 s4 900000000219cc40
 s5 0000000000000000 s6 ffffff00010eb800 s7 0000000000000001 s8 90000000025b4000
    ra: 9000000000540b84 isolate_movable_ops_page+0x54/0x1c0
   ERA: 9000000000540bd8 isolate_movable_ops_page+0xa8/0x1c0
  CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE)
  PRMD: 00000004 (PPLV0 +PIE -PWE)
  EUEN: 00000000 (-FPE -SXE -ASXE -BTE)
  ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7)
 ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0)
  PRID: 0014c010 (Loongson-64bit, Loongson-3A5000)
 CPU: 3 UID: 0 PID: 42 Comm: kcompactd0 Not tainted 6.16.0-rc5+ #2133 PREEMPT
 Stack : 90000000021fd000 0000000000000000 9000000000247720 9000000100420000
         90000001004236a0 90000001004236a8 0000000000000000 90000001004237e8
         90000001004237e0 90000001004237e0 9000000100423550 0000000000000001
         0000000000000001 90000001004236a8 725a84864a19e2d9 90000000023fcc58
         9000000100420000 90000000024c6848 9000000002416848 0000000000000001
         0000000000000000 000000000000000a 0000000007fe0000 ffffff00010eb800
         0000000000000000 90000000021fd000 0000000000000000 900000000205cf30
         000000000000008e 0000000000000009 ffffff00010eb800 0000000000000001
         90000000025b4000 0000000000000000 900000000024773c 00007ffff103d748
         00000000000000b0 0000000000000004 0000000000000000 0000000000071c1d
         ...
 Call Trace:
 [<900000000024773c>] show_stack+0x5c/0x190
 [<90000000002415e0>] dump_stack_lvl+0x70/0x9c
 [<90000000004abe6c>] isolate_migratepages_block+0x3bc/0x16e0
 [<90000000004af408>] compact_zone+0x558/0x1000
 [<90000000004b0068>] compact_node+0xa8/0x1e0
 [<90000000004b0aa4>] kcompactd+0x394/0x410
 [<90000000002b3c98>] kthread+0x128/0x140
 [<9000000001779148>] ret_from_kernel_thread+0x28/0xc0
 [<9000000000245528>] ret_from_kernel_thread_asm+0x10/0x88

The reason is that defined(CONFIG_ZSMALLOC) evaluates to 1 only when
CONFIG_ZSMALLOC=y, we should use IS_ENABLED(CONFIG_ZSMALLOC) instead.  But
when I use IS_ENABLED(CONFIG_ZSMALLOC), page_movable_ops() cannot access
zsmalloc_mops because zsmalloc_mops is in a module.

To solve this problem, we define a set_movable_ops() interface to register
and unregister offline_movable_ops / zsmalloc_movable_ops in mm/migrate.c,
and call them at mm/balloon_compaction.c & mm/zsmalloc.c.  Since
offline_movable_ops / zsmalloc_movable_ops are always accessible, all
#ifdef / #endif are removed in page_movable_ops().

Link: https://lkml.kernel.org/r/20250817151759.2525174-1-chenhuacai@loongson.cn
Fixes: 84caf98838a3 ("mm: stop storing migration_ops in page->mapping")
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
Acked-by: Zi Yan <ziy@nvidia.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Huacai Chen <chenhuacai@loongson.cn>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/migrate.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index acadd41e0b5c..9009e27b5f44 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -79,6 +79,7 @@ void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl)
 void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
 int folio_migrate_mapping(struct address_space *mapping,
 		struct folio *newfolio, struct folio *folio, int extra_count);
+int set_movable_ops(const struct movable_operations *ops, enum pagetype type);
 
 #else
 
@@ -100,6 +101,10 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 {
 	return -ENOSYS;
 }
+static inline int set_movable_ops(const struct movable_operations *ops, enum pagetype type)
+{
+	return -ENOSYS;
+}
 
 #endif /* CONFIG_MIGRATION */
 
-- 
cgit v1.2.3


From c3f0c02997c7f8489fec259e28e0e04e9811edac Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@fomichev.me>
Date: Mon, 18 Aug 2025 08:40:26 -0700
Subject: net: Add skb_dstref_steal and skb_dstref_restore

Going forward skb_dst_set will assert that skb dst_entry
is empty during skb_dst_set to prevent potential leaks. There
are few places that still manually manage dst_entry not using
the helpers. Convert them to the following new helpers:
- skb_dstref_steal that resets dst_entry and returns previous dst_entry
  value
- skb_dstref_restore that restores dst_entry previously reset via
  skb_dstref_steal

Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250818154032.3173645-2-sdf@fomichev.me
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 14b923ddb6df..7538ca507ee9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1159,6 +1159,38 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
 	return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK);
 }
 
+/**
+ * skb_dstref_steal() - return current dst_entry value and clear it
+ * @skb: buffer
+ *
+ * Resets skb dst_entry without adjusting its reference count. Useful in
+ * cases where dst_entry needs to be temporarily reset and restored.
+ * Note that the returned value cannot be used directly because it
+ * might contain SKB_DST_NOREF bit.
+ *
+ * When in doubt, prefer skb_dst_drop() over skb_dstref_steal() to correctly
+ * handle dst_entry reference counting.
+ *
+ * Returns: original skb dst_entry.
+ */
+static inline unsigned long skb_dstref_steal(struct sk_buff *skb)
+{
+	unsigned long refdst = skb->_skb_refdst;
+
+	skb->_skb_refdst = 0;
+	return refdst;
+}
+
+/**
+ * skb_dstref_restore() - restore skb dst_entry removed via skb_dstref_steal()
+ * @skb: buffer
+ * @refdst: dst entry from a call to skb_dstref_steal()
+ */
+static inline void skb_dstref_restore(struct sk_buff *skb, unsigned long refdst)
+{
+	skb->_skb_refdst = refdst;
+}
+
 /**
  * skb_dst_set - sets skb dst
  * @skb: buffer
-- 
cgit v1.2.3


From a890348adcc993f48d1ae38f1174dc8de4c3c5ac Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@fomichev.me>
Date: Mon, 18 Aug 2025 08:40:32 -0700
Subject: net: Add skb_dst_check_unset

To prevent dst_entry leaks, add warning when the non-NULL dst_entry
is rewritten.

Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250818154032.3173645-8-sdf@fomichev.me
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7538ca507ee9..ca8be45dd8be 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1159,6 +1159,12 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
 	return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK);
 }
 
+static inline void skb_dst_check_unset(struct sk_buff *skb)
+{
+	DEBUG_NET_WARN_ON_ONCE((skb->_skb_refdst & SKB_DST_PTRMASK) &&
+			       !(skb->_skb_refdst & SKB_DST_NOREF));
+}
+
 /**
  * skb_dstref_steal() - return current dst_entry value and clear it
  * @skb: buffer
@@ -1188,6 +1194,7 @@ static inline unsigned long skb_dstref_steal(struct sk_buff *skb)
  */
 static inline void skb_dstref_restore(struct sk_buff *skb, unsigned long refdst)
 {
+	skb_dst_check_unset(skb);
 	skb->_skb_refdst = refdst;
 }
 
@@ -1201,6 +1208,7 @@ static inline void skb_dstref_restore(struct sk_buff *skb, unsigned long refdst)
  */
 static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
 {
+	skb_dst_check_unset(skb);
 	skb->slow_gro |= !!dst;
 	skb->_skb_refdst = (unsigned long)dst;
 }
@@ -1217,6 +1225,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
  */
 static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
 {
+	skb_dst_check_unset(skb);
 	WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
 	skb->slow_gro |= !!dst;
 	skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
-- 
cgit v1.2.3


From 68889dfd547bd8eabc5a98b58475d7b901cf5129 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@google.com>
Date: Fri, 15 Aug 2025 20:16:09 +0000
Subject: mptcp: Fix up subflow's memcg when CONFIG_SOCK_CGROUP_DATA=n.

When sk_alloc() allocates a socket, mem_cgroup_sk_alloc() sets
sk->sk_memcg based on the current task.

MPTCP subflow socket creation is triggered from userspace or
an in-kernel worker.

In the latter case, sk->sk_memcg is not what we want.  So, we fix
it up from the parent socket's sk->sk_memcg in mptcp_attach_cgroup().

Although the code is placed under #ifdef CONFIG_MEMCG, it is buried
under #ifdef CONFIG_SOCK_CGROUP_DATA.

The two configs are orthogonal.  If CONFIG_MEMCG is enabled without
CONFIG_SOCK_CGROUP_DATA, the subflow's memory usage is not charged
correctly.

Let's move the code out of the wrong ifdef guard.

Note that sk->sk_memcg is freed in sk_prot_free() and the parent
sk holds the refcnt of memcg->css here, so we don't need to use
css_tryget().

Fixes: 3764b0c5651e3 ("mptcp: attach subflow socket to parent cgroup")
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Link: https://patch.msgid.link/20250815201712.1745332-2-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/memcontrol.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 785173aa0739..25921fbec685 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1604,6 +1604,7 @@ extern struct static_key_false memcg_sockets_enabled_key;
 #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
 void mem_cgroup_sk_alloc(struct sock *sk);
 void mem_cgroup_sk_free(struct sock *sk);
+void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk);
 
 #if BITS_PER_LONG < 64
 static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg)
@@ -1661,6 +1662,11 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg);
 #define mem_cgroup_sockets_enabled 0
 static inline void mem_cgroup_sk_alloc(struct sock *sk) { };
 static inline void mem_cgroup_sk_free(struct sock *sk) { };
+
+static inline void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk)
+{
+}
+
 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
 	return false;
-- 
cgit v1.2.3


From bb178c6bc08525d758a57775458d644304011bf8 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@google.com>
Date: Fri, 15 Aug 2025 20:16:16 +0000
Subject: net-memcg: Pass struct sock to mem_cgroup_sk_(un)?charge().

We will store a flag in the lowest bit of sk->sk_memcg.

Then, we cannot pass the raw pointer to mem_cgroup_charge_skmem()
and mem_cgroup_uncharge_skmem().

Let's pass struct sock to the functions.

While at it, they are renamed to match other functions starting
with mem_cgroup_sk_.

Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Link: https://patch.msgid.link/20250815201712.1745332-9-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/memcontrol.h | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 25921fbec685..0837d3de3a68 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1596,15 +1596,16 @@ static inline void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
 struct sock;
-bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
-			     gfp_t gfp_mask);
-void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 #ifdef CONFIG_MEMCG
 extern struct static_key_false memcg_sockets_enabled_key;
 #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
+
 void mem_cgroup_sk_alloc(struct sock *sk);
 void mem_cgroup_sk_free(struct sock *sk);
 void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk);
+bool mem_cgroup_sk_charge(const struct sock *sk, unsigned int nr_pages,
+			  gfp_t gfp_mask);
+void mem_cgroup_sk_uncharge(const struct sock *sk, unsigned int nr_pages);
 
 #if BITS_PER_LONG < 64
 static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg)
@@ -1660,13 +1661,31 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id);
 void reparent_shrinker_deferred(struct mem_cgroup *memcg);
 #else
 #define mem_cgroup_sockets_enabled 0
-static inline void mem_cgroup_sk_alloc(struct sock *sk) { };
-static inline void mem_cgroup_sk_free(struct sock *sk) { };
+
+static inline void mem_cgroup_sk_alloc(struct sock *sk)
+{
+}
+
+static inline void mem_cgroup_sk_free(struct sock *sk)
+{
+}
 
 static inline void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk)
 {
 }
 
+static inline bool mem_cgroup_sk_charge(const struct sock *sk,
+					unsigned int nr_pages,
+					gfp_t gfp_mask)
+{
+	return false;
+}
+
+static inline void mem_cgroup_sk_uncharge(const struct sock *sk,
+					  unsigned int nr_pages)
+{
+}
+
 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
 	return false;
-- 
cgit v1.2.3


From b2ffd10cddde47cc6830e4981e91e3215def62b1 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@google.com>
Date: Fri, 15 Aug 2025 20:16:17 +0000
Subject: net-memcg: Pass struct sock to mem_cgroup_sk_under_memory_pressure().

We will store a flag in the lowest bit of sk->sk_memcg.

Then, we cannot pass the raw pointer to mem_cgroup_under_socket_pressure().

Let's pass struct sock to it and rename the function to match other
functions starting with mem_cgroup_sk_.

Note that the helper is moved to sock.h to use mem_cgroup_from_sk().

Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Link: https://patch.msgid.link/20250815201712.1745332-10-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/memcontrol.h | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0837d3de3a68..fb27e3d2fdac 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1642,19 +1642,6 @@ static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg)
 }
 #endif
 
-static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
-{
-#ifdef CONFIG_MEMCG_V1
-	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
-		return !!memcg->tcpmem_pressure;
-#endif /* CONFIG_MEMCG_V1 */
-	do {
-		if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg)))
-			return true;
-	} while ((memcg = parent_mem_cgroup(memcg)));
-	return false;
-}
-
 int alloc_shrinker_info(struct mem_cgroup *memcg);
 void free_shrinker_info(struct mem_cgroup *memcg);
 void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id);
@@ -1686,11 +1673,6 @@ static inline void mem_cgroup_sk_uncharge(const struct sock *sk,
 {
 }
 
-static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
-{
-	return false;
-}
-
 static inline void set_shrinker_bit(struct mem_cgroup *memcg,
 				    int nid, int shrinker_id)
 {
-- 
cgit v1.2.3


From d072148a8631f102de60ed5a3a827e85d09d24f0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Aug 2025 10:25:00 +0200
Subject: fs: add a FMODE_ flag to indicate IOCB_HAS_METADATA availability

Currently the kernel will happily route io_uring requests with metadata
to file operations that don't support it.  Add a FMODE_ flag to guard
that.

Fixes: 4de2ce04c862 ("fs: introduce IOCB_HAS_METADATA for metadata")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/20250819082517.2038819-2-hch@lst.de
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7ab4f96d705..601d036a6c78 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -149,7 +149,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* Expect random access pattern */
 #define FMODE_RANDOM		((__force fmode_t)(1 << 12))
 
-/* FMODE_* bit 13 */
+/* Supports IOCB_HAS_METADATA */
+#define FMODE_HAS_METADATA	((__force fmode_t)(1 << 13))
 
 /* File is opened with O_PATH; almost nothing can be done with it */
 #define FMODE_PATH		((__force fmode_t)(1 << 14))
-- 
cgit v1.2.3


From 8ef7f3132e4005a103b382e71abea7ad01fbeb86 Mon Sep 17 00:00:00 2001
From: Xianglai Li <lixianglai@loongson.cn>
Date: Wed, 20 Aug 2025 22:23:44 +0800
Subject: LoongArch: Add cpuhotplug hooks to fix high cpu usage of vCPU threads

When the CPU is offline, the timer of LoongArch is not correctly closed.
This is harmless for real machines, but resulting in an excessively high
cpu usage rate of the offline vCPU thread in the virtual machines.

To correctly close the timer, we have made the following modifications:

Register the cpu hotplug event (CPUHP_AP_LOONGARCH_ARCH_TIMER_STARTING)
for LoongArch. This event's hooks will be called to close the timer when
the CPU is offline.

Clear the timer interrupt when the timer is turned off. Since before the
timer is turned off, there may be a timer interrupt that has already been
in the pending state due to the interruption of the disabled, which also
affects the halt state of the offline vCPU.

Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 include/linux/cpuhotplug.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index edfa61d80702..62cd7b35a29c 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -168,6 +168,7 @@ enum cpuhp_state {
 	CPUHP_AP_QCOM_TIMER_STARTING,
 	CPUHP_AP_TEGRA_TIMER_STARTING,
 	CPUHP_AP_ARMADA_TIMER_STARTING,
+	CPUHP_AP_LOONGARCH_ARCH_TIMER_STARTING,
 	CPUHP_AP_MIPS_GIC_TIMER_STARTING,
 	CPUHP_AP_ARC_TIMER_STARTING,
 	CPUHP_AP_REALTEK_TIMER_STARTING,
-- 
cgit v1.2.3


From 8fc6056dcf79937c46c97fa4996cda65956437a9 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Thu, 7 Aug 2025 10:44:31 +0800
Subject: f2fs: fix to detect potential corrupted nid in free_nid_list

As reported, on-disk footer.ino and footer.nid is the same and
out-of-range, let's add sanity check on f2fs_alloc_nid() to detect
any potential corruption in free_nid_list.

Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 2f8b8bfc0e73..6afb4a13b81d 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -79,6 +79,7 @@ enum stop_cp_reason {
 	STOP_CP_REASON_FLUSH_FAIL,
 	STOP_CP_REASON_NO_SEGMENT,
 	STOP_CP_REASON_CORRUPTED_FREE_BITMAP,
+	STOP_CP_REASON_CORRUPTED_NID,
 	STOP_CP_REASON_MAX,
 };
 
-- 
cgit v1.2.3


From 5f8a4f34f6dcf4b64c3cbcfd4aa5a8d7dbcd268d Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Tue, 19 Aug 2025 09:39:15 -0700
Subject: bnxt_en: hsi: Update FW interface to 1.10.3.133

The major change is struct pcie_ctx_hw_stats_v2 which has new latency
histograms added.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Link: https://patch.msgid.link/20250819163919.104075-2-michael.chan@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/bnxt/hsi.h | 315 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 253 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bnxt/hsi.h b/include/linux/bnxt/hsi.h
index 549231703bce..8c5dac3b3ef3 100644
--- a/include/linux/bnxt/hsi.h
+++ b/include/linux/bnxt/hsi.h
@@ -276,6 +276,10 @@ struct cmd_nums {
 	#define HWRM_REG_POWER_QUERY                      0xe1UL
 	#define HWRM_CORE_FREQUENCY_QUERY                 0xe2UL
 	#define HWRM_REG_POWER_HISTOGRAM                  0xe3UL
+	#define HWRM_MONITOR_PAX_HISTOGRAM_START          0xe4UL
+	#define HWRM_MONITOR_PAX_HISTOGRAM_COLLECT        0xe5UL
+	#define HWRM_STAT_QUERY_ROCE_STATS                0xe6UL
+	#define HWRM_STAT_QUERY_ROCE_STATS_EXT            0xe7UL
 	#define HWRM_WOL_FILTER_ALLOC                     0xf0UL
 	#define HWRM_WOL_FILTER_FREE                      0xf1UL
 	#define HWRM_WOL_FILTER_QCFG                      0xf2UL
@@ -407,9 +411,8 @@ struct cmd_nums {
 	#define HWRM_FUNC_LAG_UPDATE                      0x1b1UL
 	#define HWRM_FUNC_LAG_FREE                        0x1b2UL
 	#define HWRM_FUNC_LAG_QCFG                        0x1b3UL
-	#define HWRM_FUNC_TIMEDTX_PACING_RATE_ADD         0x1c2UL
-	#define HWRM_FUNC_TIMEDTX_PACING_RATE_DELETE      0x1c3UL
-	#define HWRM_FUNC_TIMEDTX_PACING_RATE_QUERY       0x1c4UL
+	#define HWRM_FUNC_TTX_PACING_RATE_PROF_QUERY      0x1c3UL
+	#define HWRM_FUNC_TTX_PACING_RATE_QUERY           0x1c4UL
 	#define HWRM_SELFTEST_QLIST                       0x200UL
 	#define HWRM_SELFTEST_EXEC                        0x201UL
 	#define HWRM_SELFTEST_IRQ                         0x202UL
@@ -441,6 +444,7 @@ struct cmd_nums {
 	#define HWRM_MFG_WRITE_CERT_NVM                   0x21cUL
 	#define HWRM_PORT_POE_CFG                         0x230UL
 	#define HWRM_PORT_POE_QCFG                        0x231UL
+	#define HWRM_PORT_PHY_FDRSTAT                     0x232UL
 	#define HWRM_UDCC_QCAPS                           0x258UL
 	#define HWRM_UDCC_CFG                             0x259UL
 	#define HWRM_UDCC_QCFG                            0x25aUL
@@ -453,6 +457,8 @@ struct cmd_nums {
 	#define HWRM_QUEUE_PFCWD_TIMEOUT_QCAPS            0x261UL
 	#define HWRM_QUEUE_PFCWD_TIMEOUT_CFG              0x262UL
 	#define HWRM_QUEUE_PFCWD_TIMEOUT_QCFG             0x263UL
+	#define HWRM_QUEUE_ADPTV_QOS_RX_QCFG              0x264UL
+	#define HWRM_QUEUE_ADPTV_QOS_TX_QCFG              0x265UL
 	#define HWRM_TF                                   0x2bcUL
 	#define HWRM_TF_VERSION_GET                       0x2bdUL
 	#define HWRM_TF_SESSION_OPEN                      0x2c6UL
@@ -551,6 +557,8 @@ struct cmd_nums {
 	#define HWRM_DBG_COREDUMP_CAPTURE                 0xff2cUL
 	#define HWRM_DBG_PTRACE                           0xff2dUL
 	#define HWRM_DBG_SIM_CABLE_STATE                  0xff2eUL
+	#define HWRM_DBG_TOKEN_QUERY_AUTH_IDS             0xff2fUL
+	#define HWRM_DBG_TOKEN_CFG                        0xff30UL
 	#define HWRM_NVM_GET_VPD_FIELD_INFO               0xffeaUL
 	#define HWRM_NVM_SET_VPD_FIELD_INFO               0xffebUL
 	#define HWRM_NVM_DEFRAG                           0xffecUL
@@ -632,8 +640,8 @@ struct hwrm_err_output {
 #define HWRM_VERSION_MAJOR 1
 #define HWRM_VERSION_MINOR 10
 #define HWRM_VERSION_UPDATE 3
-#define HWRM_VERSION_RSVD 97
-#define HWRM_VERSION_STR "1.10.3.97"
+#define HWRM_VERSION_RSVD 133
+#define HWRM_VERSION_STR "1.10.3.133"
 
 /* hwrm_ver_get_input (size:192b/24B) */
 struct hwrm_ver_get_input {
@@ -688,6 +696,7 @@ struct hwrm_ver_get_output {
 	#define VER_GET_RESP_DEV_CAPS_CFG_CFA_TRUFLOW_SUPPORTED                    0x4000UL
 	#define VER_GET_RESP_DEV_CAPS_CFG_SECURE_BOOT_CAPABLE                      0x8000UL
 	#define VER_GET_RESP_DEV_CAPS_CFG_SECURE_SOC_CAPABLE                       0x10000UL
+	#define VER_GET_RESP_DEV_CAPS_CFG_DEBUG_TOKEN_SUPPORTED                    0x20000UL
 	u8	roce_fw_maj_8b;
 	u8	roce_fw_min_8b;
 	u8	roce_fw_bld_8b;
@@ -872,7 +881,8 @@ struct hwrm_async_event_cmpl {
 	#define ASYNC_EVENT_CMPL_EVENT_ID_REPRESENTOR_PAIR_CHANGE         0x4eUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_VF_STAT_CHANGE                  0x4fUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_HOST_COREDUMP                   0x50UL
-	#define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID               0x51UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_ADPTV_QOS                       0x51UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID               0x52UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG                    0xfeUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR                      0xffUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_LAST                           ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR
@@ -1344,7 +1354,8 @@ struct hwrm_async_event_cmpl_dbg_buf_producer {
 	#define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_CA2_TRACE            0x9UL
 	#define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_RIGP1_TRACE          0xaUL
 	#define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_AFM_KONG_HWRM_TRACE  0xbUL
-	#define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_LAST                ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_AFM_KONG_HWRM_TRACE
+	#define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_ERR_QPC_TRACE        0xcUL
+	#define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_LAST                ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_ERR_QPC_TRACE
 };
 
 /* hwrm_async_event_cmpl_hwrm_error (size:128b/16B) */
@@ -1401,7 +1412,11 @@ struct hwrm_async_event_cmpl_error_report_base {
 	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD       0x4UL
 	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_THERMAL_THRESHOLD             0x5UL
 	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED  0x6UL
-	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST                         ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUP_UDCC_SES                  0x7UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DB_DROP                       0x8UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MD_TEMP                       0x9UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_VNIC_ERR                      0xaUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST                         ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_VNIC_ERR
 };
 
 /* hwrm_async_event_cmpl_error_report_pause_storm (size:128b/16B) */
@@ -1914,6 +1929,12 @@ struct hwrm_func_qcaps_output {
 	#define FUNC_QCAPS_RESP_FLAGS_EXT3_RX_RATE_PROFILE_SEL_SUPPORTED     0x8UL
 	#define FUNC_QCAPS_RESP_FLAGS_EXT3_BIDI_OPT_SUPPORTED                0x10UL
 	#define FUNC_QCAPS_RESP_FLAGS_EXT3_MIRROR_ON_ROCE_SUPPORTED          0x20UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT3_ROCE_VF_DYN_ALLOC_SUPPORT         0x40UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT3_CHANGE_UDP_SRCPORT_SUPPORT        0x80UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT3_PCIE_COMPLIANCE_SUPPORTED         0x100UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT3_MULTI_L2_DB_SUPPORTED             0x200UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT3_PCIE_SECURE_ATS_SUPPORTED         0x400UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT3_MBUF_STATS_SUPPORTED              0x800UL
 	__le16	max_roce_vfs;
 	__le16	max_crypto_rx_flow_filters;
 	u8	unused_3[3];
@@ -1931,7 +1952,7 @@ struct hwrm_func_qcfg_input {
 	u8	unused_0[6];
 };
 
-/* hwrm_func_qcfg_output (size:1344b/168B) */
+/* hwrm_func_qcfg_output (size:1408b/176B) */
 struct hwrm_func_qcfg_output {
 	__le16	error_code;
 	__le16	req_type;
@@ -2124,7 +2145,43 @@ struct hwrm_func_qcfg_output {
 	#define FUNC_QCFG_RESP_XID_PARTITION_CFG_TX_CK     0x1UL
 	#define FUNC_QCFG_RESP_XID_PARTITION_CFG_RX_CK     0x2UL
 	__le16	mirror_vnic_id;
-	u8	unused_7[7];
+	u8	max_link_width;
+	#define FUNC_QCFG_RESP_MAX_LINK_WIDTH_UNKNOWN 0x0UL
+	#define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X1      0x1UL
+	#define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X2      0x2UL
+	#define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X4      0x4UL
+	#define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X8      0x8UL
+	#define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X16     0x10UL
+	#define FUNC_QCFG_RESP_MAX_LINK_WIDTH_LAST   FUNC_QCFG_RESP_MAX_LINK_WIDTH_X16
+	u8	max_link_speed;
+	#define FUNC_QCFG_RESP_MAX_LINK_SPEED_UNKNOWN 0x0UL
+	#define FUNC_QCFG_RESP_MAX_LINK_SPEED_G1      0x1UL
+	#define FUNC_QCFG_RESP_MAX_LINK_SPEED_G2      0x2UL
+	#define FUNC_QCFG_RESP_MAX_LINK_SPEED_G3      0x3UL
+	#define FUNC_QCFG_RESP_MAX_LINK_SPEED_G4      0x4UL
+	#define FUNC_QCFG_RESP_MAX_LINK_SPEED_G5      0x5UL
+	#define FUNC_QCFG_RESP_MAX_LINK_SPEED_LAST   FUNC_QCFG_RESP_MAX_LINK_SPEED_G5
+	u8	negotiated_link_width;
+	#define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_UNKNOWN 0x0UL
+	#define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X1      0x1UL
+	#define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X2      0x2UL
+	#define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X4      0x4UL
+	#define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X8      0x8UL
+	#define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X16     0x10UL
+	#define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_LAST   FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X16
+	u8	negotiated_link_speed;
+	#define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_UNKNOWN 0x0UL
+	#define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G1      0x1UL
+	#define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G2      0x2UL
+	#define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G3      0x3UL
+	#define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G4      0x4UL
+	#define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G5      0x5UL
+	#define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_LAST   FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G5
+	u8	unused_7[2];
+	u8	pcie_compliance;
+	u8	unused_8;
+	__le16	l2_db_multi_page_size_kb;
+	u8	unused_9[5];
 	u8	valid;
 };
 
@@ -2322,6 +2379,7 @@ struct hwrm_func_cfg_input {
 	#define FUNC_CFG_REQ_ENABLES2_ROCE_MAX_GID_PER_VF      0x200UL
 	#define FUNC_CFG_REQ_ENABLES2_XID_PARTITION_CFG        0x400UL
 	#define FUNC_CFG_REQ_ENABLES2_PHYSICAL_SLOT_NUMBER     0x800UL
+	#define FUNC_CFG_REQ_ENABLES2_PCIE_COMPLIANCE          0x1000UL
 	u8	port_kdnet_mode;
 	#define FUNC_CFG_REQ_PORT_KDNET_MODE_DISABLED 0x0UL
 	#define FUNC_CFG_REQ_PORT_KDNET_MODE_ENABLED  0x1UL
@@ -2353,7 +2411,8 @@ struct hwrm_func_cfg_input {
 	__le16	xid_partition_cfg;
 	#define FUNC_CFG_REQ_XID_PARTITION_CFG_TX_CK     0x1UL
 	#define FUNC_CFG_REQ_XID_PARTITION_CFG_RX_CK     0x2UL
-	__le16	unused_2;
+	u8	pcie_compliance;
+	u8	unused_2;
 };
 
 /* hwrm_func_cfg_output (size:128b/16B) */
@@ -2370,11 +2429,41 @@ struct hwrm_func_cfg_output {
 struct hwrm_func_cfg_cmd_err {
 	u8	code;
 	#define FUNC_CFG_CMD_ERR_CODE_UNKNOWN                      0x0UL
-	#define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_BW_RANGE       0x1UL
-	#define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_MORE_THAN_MAX  0x2UL
-	#define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_BW_UNSUPPORTED 0x3UL
-	#define FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_PERCENT         0x4UL
-	#define FUNC_CFG_CMD_ERR_CODE_LAST                        FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_PERCENT
+	#define FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_OUT_OF_RANGE    0x1UL
+	#define FUNC_CFG_CMD_ERR_CODE_NPAR_PARTITION_DOWN_FAILED   0x2UL
+	#define FUNC_CFG_CMD_ERR_CODE_TPID_SET_DFLT_VLAN_NOT_SET   0x3UL
+	#define FUNC_CFG_CMD_ERR_CODE_RES_ARRAY_ALLOC_FAILED       0x4UL
+	#define FUNC_CFG_CMD_ERR_CODE_TX_RING_ASSET_TEST_FAILED    0x5UL
+	#define FUNC_CFG_CMD_ERR_CODE_TX_RING_RES_UPDATE_FAILED    0x6UL
+	#define FUNC_CFG_CMD_ERR_CODE_APPLY_MAX_BW_FAILED          0x7UL
+	#define FUNC_CFG_CMD_ERR_CODE_ENABLE_EVB_FAILED            0x8UL
+	#define FUNC_CFG_CMD_ERR_CODE_RSS_CTXT_ASSET_TEST_FAILED   0x9UL
+	#define FUNC_CFG_CMD_ERR_CODE_RSS_CTXT_RES_UPDATE_FAILED   0xaUL
+	#define FUNC_CFG_CMD_ERR_CODE_CMPL_RING_ASSET_TEST_FAILED  0xbUL
+	#define FUNC_CFG_CMD_ERR_CODE_CMPL_RING_RES_UPDATE_FAILED  0xcUL
+	#define FUNC_CFG_CMD_ERR_CODE_NQ_ASSET_TEST_FAILED         0xdUL
+	#define FUNC_CFG_CMD_ERR_CODE_NQ_RES_UPDATE_FAILED         0xeUL
+	#define FUNC_CFG_CMD_ERR_CODE_RX_RING_ASSET_TEST_FAILED    0xfUL
+	#define FUNC_CFG_CMD_ERR_CODE_RX_RING_RES_UPDATE_FAILED    0x10UL
+	#define FUNC_CFG_CMD_ERR_CODE_VNIC_ASSET_TEST_FAILED       0x11UL
+	#define FUNC_CFG_CMD_ERR_CODE_VNIC_RES_UPDATE_FAILED       0x12UL
+	#define FUNC_CFG_CMD_ERR_CODE_FAILED_TO_START_STATS_THREAD 0x13UL
+	#define FUNC_CFG_CMD_ERR_CODE_RDMA_SRIOV_DISABLED          0x14UL
+	#define FUNC_CFG_CMD_ERR_CODE_TX_KTLS_DISABLED             0x15UL
+	#define FUNC_CFG_CMD_ERR_CODE_TX_KTLS_ASSET_TEST_FAILED    0x16UL
+	#define FUNC_CFG_CMD_ERR_CODE_TX_KTLS_RES_UPDATE_FAILED    0x17UL
+	#define FUNC_CFG_CMD_ERR_CODE_RX_KTLS_DISABLED             0x18UL
+	#define FUNC_CFG_CMD_ERR_CODE_RX_KTLS_ASSET_TEST_FAILED    0x19UL
+	#define FUNC_CFG_CMD_ERR_CODE_RX_KTLS_RES_UPDATE_FAILED    0x1aUL
+	#define FUNC_CFG_CMD_ERR_CODE_TX_QUIC_DISABLED             0x1bUL
+	#define FUNC_CFG_CMD_ERR_CODE_TX_QUIC_ASSET_TEST_FAILED    0x1cUL
+	#define FUNC_CFG_CMD_ERR_CODE_TX_QUIC_RES_UPDATE_FAILED    0x1dUL
+	#define FUNC_CFG_CMD_ERR_CODE_RX_QUIC_DISABLED             0x1eUL
+	#define FUNC_CFG_CMD_ERR_CODE_RX_QUIC_ASSET_TEST_FAILED    0x1fUL
+	#define FUNC_CFG_CMD_ERR_CODE_RX_QUIC_RES_UPDATE_FAILED    0x20UL
+	#define FUNC_CFG_CMD_ERR_CODE_INVALID_KDNET_MODE           0x21UL
+	#define FUNC_CFG_CMD_ERR_CODE_SCHQ_CFG_FAIL                0x22UL
+	#define FUNC_CFG_CMD_ERR_CODE_LAST                        FUNC_CFG_CMD_ERR_CODE_SCHQ_CFG_FAIL
 	u8	unused_0[7];
 };
 
@@ -3780,6 +3869,7 @@ struct hwrm_func_backing_store_cfg_v2_input {
 	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CA2_TRACE           0x28UL
 	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RIGP1_TRACE         0x29UL
 	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_ERR_QPC_TRACE       0x2bUL
 	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID             0xffffUL
 	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_LAST               FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID
 	__le16	instance;
@@ -3865,6 +3955,7 @@ struct hwrm_func_backing_store_qcfg_v2_input {
 	#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CA2_TRACE           0x28UL
 	#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RIGP1_TRACE         0x29UL
 	#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL
+	#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_ERR_QPC_TRACE       0x2bUL
 	#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID             0xffffUL
 	#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_LAST               FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID
 	__le16	instance;
@@ -3904,6 +3995,7 @@ struct hwrm_func_backing_store_qcfg_v2_output {
 	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CA1_TRACE           0x27UL
 	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CA2_TRACE           0x28UL
 	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RIGP1_TRACE         0x29UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_ERR_QPC_TRACE       0x2aUL
 	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID             0xffffUL
 	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_LAST               FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID
 	__le16	instance;
@@ -4027,6 +4119,7 @@ struct hwrm_func_backing_store_qcaps_v2_input {
 	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CA2_TRACE           0x28UL
 	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RIGP1_TRACE         0x29UL
 	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_ERR_QPC_TRACE       0x2bUL
 	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID             0xffffUL
 	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_LAST               FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID
 	u8	rsvd[6];
@@ -4070,6 +4163,7 @@ struct hwrm_func_backing_store_qcaps_v2_output {
 	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CA2_TRACE           0x28UL
 	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RIGP1_TRACE         0x29UL
 	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_ERR_QPC_TRACE       0x2bUL
 	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID             0xffffUL
 	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_LAST               FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID
 	__le16	entry_size;
@@ -4216,6 +4310,10 @@ struct hwrm_port_phy_cfg_input {
 	#define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_1XN_DISABLE      0x100000UL
 	#define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_IEEE_ENABLE      0x200000UL
 	#define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_IEEE_DISABLE     0x400000UL
+	#define PORT_PHY_CFG_REQ_FLAGS_LINK_TRAINING_ENABLE       0x800000UL
+	#define PORT_PHY_CFG_REQ_FLAGS_LINK_TRAINING_DISABLE      0x1000000UL
+	#define PORT_PHY_CFG_REQ_FLAGS_PRECODING_ENABLE           0x2000000UL
+	#define PORT_PHY_CFG_REQ_FLAGS_PRECODING_DISABLE          0x4000000UL
 	__le32	enables;
 	#define PORT_PHY_CFG_REQ_ENABLES_AUTO_MODE                     0x1UL
 	#define PORT_PHY_CFG_REQ_ENABLES_AUTO_DUPLEX                   0x2UL
@@ -4703,6 +4801,8 @@ struct hwrm_port_phy_qcfg_output {
 	#define PORT_PHY_QCFG_RESP_OPTION_FLAGS_MEDIA_AUTO_DETECT     0x1UL
 	#define PORT_PHY_QCFG_RESP_OPTION_FLAGS_SIGNAL_MODE_KNOWN     0x2UL
 	#define PORT_PHY_QCFG_RESP_OPTION_FLAGS_SPEEDS2_SUPPORTED     0x4UL
+	#define PORT_PHY_QCFG_RESP_OPTION_FLAGS_LINK_TRAINING         0x8UL
+	#define PORT_PHY_QCFG_RESP_OPTION_FLAGS_PRECODING             0x10UL
 	char	phy_vendor_name[16];
 	char	phy_vendor_partnumber[16];
 	__le16	support_pam4_speeds;
@@ -4725,6 +4825,10 @@ struct hwrm_port_phy_qcfg_output {
 	u8	link_down_reason;
 	#define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_RF                      0x1UL
 	#define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_OTP_SPEED_VIOLATION     0x2UL
+	#define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_CABLE_REMOVED           0x4UL
+	#define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_MODULE_FAULT            0x8UL
+	#define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_BMC_REQUEST             0x10UL
+	#define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_TX_LASER_DISABLED       0x20UL
 	__le16	support_speeds2;
 	#define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_1GB                0x1UL
 	#define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_10GB               0x2UL
@@ -5882,9 +5986,10 @@ struct hwrm_port_led_qcaps_output {
 	#define PORT_LED_QCAPS_RESP_LED0_STATE_CAPS_BLINK_SUPPORTED         0x8UL
 	#define PORT_LED_QCAPS_RESP_LED0_STATE_CAPS_BLINK_ALT_SUPPORTED     0x10UL
 	__le16	led0_color_caps;
-	#define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_RSVD                0x1UL
-	#define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_AMBER_SUPPORTED     0x2UL
-	#define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_GREEN_SUPPORTED     0x4UL
+	#define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_RSVD                 0x1UL
+	#define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_AMBER_SUPPORTED      0x2UL
+	#define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_GREEN_SUPPORTED      0x4UL
+	#define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_GRNAMB_SUPPORTED     0x8UL
 	u8	led1_id;
 	u8	led1_type;
 	#define PORT_LED_QCAPS_RESP_LED1_TYPE_SPEED    0x0UL
@@ -5900,9 +6005,10 @@ struct hwrm_port_led_qcaps_output {
 	#define PORT_LED_QCAPS_RESP_LED1_STATE_CAPS_BLINK_SUPPORTED         0x8UL
 	#define PORT_LED_QCAPS_RESP_LED1_STATE_CAPS_BLINK_ALT_SUPPORTED     0x10UL
 	__le16	led1_color_caps;
-	#define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_RSVD                0x1UL
-	#define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_AMBER_SUPPORTED     0x2UL
-	#define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_GREEN_SUPPORTED     0x4UL
+	#define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_RSVD                 0x1UL
+	#define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_AMBER_SUPPORTED      0x2UL
+	#define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_GREEN_SUPPORTED      0x4UL
+	#define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_GRNAMB_SUPPORTED     0x8UL
 	u8	led2_id;
 	u8	led2_type;
 	#define PORT_LED_QCAPS_RESP_LED2_TYPE_SPEED    0x0UL
@@ -5918,9 +6024,10 @@ struct hwrm_port_led_qcaps_output {
 	#define PORT_LED_QCAPS_RESP_LED2_STATE_CAPS_BLINK_SUPPORTED         0x8UL
 	#define PORT_LED_QCAPS_RESP_LED2_STATE_CAPS_BLINK_ALT_SUPPORTED     0x10UL
 	__le16	led2_color_caps;
-	#define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_RSVD                0x1UL
-	#define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_AMBER_SUPPORTED     0x2UL
-	#define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_GREEN_SUPPORTED     0x4UL
+	#define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_RSVD                 0x1UL
+	#define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_AMBER_SUPPORTED      0x2UL
+	#define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_GREEN_SUPPORTED      0x4UL
+	#define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_GRNAMB_SUPPORTED     0x8UL
 	u8	led3_id;
 	u8	led3_type;
 	#define PORT_LED_QCAPS_RESP_LED3_TYPE_SPEED    0x0UL
@@ -5936,9 +6043,10 @@ struct hwrm_port_led_qcaps_output {
 	#define PORT_LED_QCAPS_RESP_LED3_STATE_CAPS_BLINK_SUPPORTED         0x8UL
 	#define PORT_LED_QCAPS_RESP_LED3_STATE_CAPS_BLINK_ALT_SUPPORTED     0x10UL
 	__le16	led3_color_caps;
-	#define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_RSVD                0x1UL
-	#define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_AMBER_SUPPORTED     0x2UL
-	#define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_GREEN_SUPPORTED     0x4UL
+	#define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_RSVD                 0x1UL
+	#define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_AMBER_SUPPORTED      0x2UL
+	#define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_GREEN_SUPPORTED      0x4UL
+	#define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_GRNAMB_SUPPORTED     0x8UL
 	u8	unused_4[3];
 	u8	valid;
 };
@@ -7036,9 +7144,22 @@ struct hwrm_vnic_rss_cfg_output {
 /* hwrm_vnic_rss_cfg_cmd_err (size:64b/8B) */
 struct hwrm_vnic_rss_cfg_cmd_err {
 	u8	code;
-	#define VNIC_RSS_CFG_CMD_ERR_CODE_UNKNOWN             0x0UL
-	#define VNIC_RSS_CFG_CMD_ERR_CODE_INTERFACE_NOT_READY 0x1UL
-	#define VNIC_RSS_CFG_CMD_ERR_CODE_LAST               VNIC_RSS_CFG_CMD_ERR_CODE_INTERFACE_NOT_READY
+	#define VNIC_RSS_CFG_CMD_ERR_CODE_UNKNOWN                      0x0UL
+	#define VNIC_RSS_CFG_CMD_ERR_CODE_INTERFACE_NOT_READY          0x1UL
+	#define VNIC_RSS_CFG_CMD_ERR_CODE_UNABLE_TO_GET_RSS_CFG        0x2UL
+	#define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_TYPE_UNSUPPORTED        0x3UL
+	#define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_TYPE_ERR                0x4UL
+	#define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_MODE_FAIL               0x5UL
+	#define VNIC_RSS_CFG_CMD_ERR_CODE_RING_GRP_TABLE_ALLOC_ERR     0x6UL
+	#define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_KEY_ALLOC_ERR           0x7UL
+	#define VNIC_RSS_CFG_CMD_ERR_CODE_DMA_FAILED                   0x8UL
+	#define VNIC_RSS_CFG_CMD_ERR_CODE_RX_RING_ALLOC_ERR            0x9UL
+	#define VNIC_RSS_CFG_CMD_ERR_CODE_CMPL_RING_ALLOC_ERR          0xaUL
+	#define VNIC_RSS_CFG_CMD_ERR_CODE_HW_SET_RSS_FAILED            0xbUL
+	#define VNIC_RSS_CFG_CMD_ERR_CODE_CTX_INVALID                  0xcUL
+	#define VNIC_RSS_CFG_CMD_ERR_CODE_VNIC_INVALID                 0xdUL
+	#define VNIC_RSS_CFG_CMD_ERR_CODE_VNIC_RING_TABLE_PAIR_INVALID 0xeUL
+	#define VNIC_RSS_CFG_CMD_ERR_CODE_LAST                        VNIC_RSS_CFG_CMD_ERR_CODE_VNIC_RING_TABLE_PAIR_INVALID
 	u8	unused_0[7];
 };
 
@@ -7177,7 +7298,7 @@ struct hwrm_vnic_rss_cos_lb_ctx_free_output {
 	u8	valid;
 };
 
-/* hwrm_ring_alloc_input (size:704b/88B) */
+/* hwrm_ring_alloc_input (size:768b/96B) */
 struct hwrm_ring_alloc_input {
 	__le16	req_type;
 	__le16	cmpl_ring;
@@ -7195,6 +7316,7 @@ struct hwrm_ring_alloc_input {
 	#define RING_ALLOC_REQ_ENABLES_MPC_CHNLS_TYPE            0x400UL
 	#define RING_ALLOC_REQ_ENABLES_STEERING_TAG_VALID        0x800UL
 	#define RING_ALLOC_REQ_ENABLES_RX_RATE_PROFILE_VALID     0x1000UL
+	#define RING_ALLOC_REQ_ENABLES_DPI_VALID                 0x2000UL
 	u8	ring_type;
 	#define RING_ALLOC_REQ_RING_TYPE_L2_CMPL   0x0UL
 	#define RING_ALLOC_REQ_RING_TYPE_TX        0x1UL
@@ -7287,6 +7409,8 @@ struct hwrm_ring_alloc_input {
 	#define RING_ALLOC_REQ_RX_RATE_PROFILE_SEL_LAST     RING_ALLOC_REQ_RX_RATE_PROFILE_SEL_POLL_MODE
 	u8	unused_4;
 	__le64	cq_handle;
+	__le16	dpi;
+	__le16	unused_5[3];
 };
 
 /* hwrm_ring_alloc_output (size:128b/16B) */
@@ -7776,7 +7900,10 @@ struct hwrm_cfa_l2_set_rx_mask_cmd_err {
 	u8	code;
 	#define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_UNKNOWN                    0x0UL
 	#define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR 0x1UL
-	#define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_LAST                      CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR
+	#define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_MAX_VLAN_TAGS              0x2UL
+	#define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_INVALID_VNIC_ID            0x3UL
+	#define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_INVALID_ACTION             0x4UL
+	#define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_LAST                      CFA_L2_SET_RX_MASK_CMD_ERR_CODE_INVALID_ACTION
 	u8	unused_0[7];
 };
 
@@ -8109,9 +8236,38 @@ struct hwrm_cfa_ntuple_filter_alloc_output {
 /* hwrm_cfa_ntuple_filter_alloc_cmd_err (size:64b/8B) */
 struct hwrm_cfa_ntuple_filter_alloc_cmd_err {
 	u8	code;
-	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_UNKNOWN                   0x0UL
-	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR 0x1UL
-	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_LAST                     CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_UNKNOWN            0x0UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ZERO_MAC           0x65UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_BC_MC_MAC          0x66UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_VNIC       0x67UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_PF_FID     0x68UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_L2_CTXT_ID 0x69UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_NULL_L2_CTXT_CFG   0x6aUL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_NULL_L2_DATA_FLD   0x6bUL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_CFA_LAYOUT 0x6cUL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L2_CTXT_ALLOC_FAIL 0x6dUL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ROCE_FLOW_ERR      0x6eUL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_OWNER_FID  0x6fUL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ZERO_REF_CNT       0x70UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_FLOW_TYPE  0x71UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_IVLAN      0x72UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_MAX_VLAN_ID        0x73UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_TNL_REQ    0x74UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L2_ADDR            0x75UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L2_IVLAN           0x76UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L3_ADDR            0x77UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L3_ADDR_TYPE       0x78UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_T_L3_ADDR_TYPE     0x79UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_DST_VNIC_ID        0x7aUL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_VNI                0x7bUL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_DST_ID     0x7cUL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_FAIL_ROCE_L2_FLOW  0x7dUL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_NPAR_VLAN  0x7eUL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ATSP_ADD           0x7fUL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_DFLT_VLAN_FAIL     0x80UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_L3_TYPE    0x81UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_VAL_FAIL_TNL_FLOW  0x82UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_LAST              CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_VAL_FAIL_TNL_FLOW
 	u8	unused_0[7];
 };
 
@@ -9181,7 +9337,7 @@ struct pcie_ctx_hw_stats {
 	__le64	pcie_recovery_histogram;
 };
 
-/* pcie_ctx_hw_stats_v2 (size:4096b/512B) */
+/* pcie_ctx_hw_stats_v2 (size:4544b/568B) */
 struct pcie_ctx_hw_stats_v2 {
 	__le64	pcie_pl_signal_integrity;
 	__le64	pcie_dl_signal_integrity;
@@ -9212,6 +9368,9 @@ struct pcie_ctx_hw_stats_v2 {
 	__le64	pcie_other_packet_count;
 	__le64	pcie_blocked_packet_count;
 	__le64	pcie_cmpl_packet_count;
+	__le32	pcie_rd_latency_histogram[12];
+	__le32	pcie_rd_latency_all_normal_count;
+	__le32	unused_2;
 };
 
 /* hwrm_stat_generic_qstats_input (size:256b/32B) */
@@ -9406,7 +9565,8 @@ struct hwrm_struct_hdr {
 	#define STRUCT_HDR_STRUCT_ID_MSIX_PER_VF           0xc8UL
 	#define STRUCT_HDR_STRUCT_ID_UDCC_RTT_BUCKET_COUNT 0x12cUL
 	#define STRUCT_HDR_STRUCT_ID_UDCC_RTT_BUCKET_BOUND 0x12dUL
-	#define STRUCT_HDR_STRUCT_ID_LAST                 STRUCT_HDR_STRUCT_ID_UDCC_RTT_BUCKET_BOUND
+	#define STRUCT_HDR_STRUCT_ID_DBG_TOKEN_CLAIMS      0x190UL
+	#define STRUCT_HDR_STRUCT_ID_LAST                 STRUCT_HDR_STRUCT_ID_DBG_TOKEN_CLAIMS
 	__le16	len;
 	u8	version;
 	#define STRUCT_HDR_VERSION_0 0x0UL
@@ -9459,11 +9619,13 @@ struct hwrm_fw_set_structured_data_output {
 /* hwrm_fw_set_structured_data_cmd_err (size:64b/8B) */
 struct hwrm_fw_set_structured_data_cmd_err {
 	u8	code;
-	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_UNKNOWN     0x0UL
-	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_HDR_CNT 0x1UL
-	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_FMT     0x2UL
-	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID      0x3UL
-	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_LAST       FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID
+	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_UNKNOWN       0x0UL
+	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_HDR_CNT   0x1UL
+	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_FMT       0x2UL
+	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID        0x3UL
+	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_ALREADY_ADDED 0x4UL
+	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_INST_IN_PROG  0x5UL
+	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_LAST         FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_INST_IN_PROG
 	u8	unused_0[7];
 };
 
@@ -9487,7 +9649,9 @@ struct hwrm_fw_get_structured_data_input {
 	#define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NON_TPMR_PEER           0x201UL
 	#define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NON_TPMR_OPERATIONAL    0x202UL
 	#define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_HOST_OPERATIONAL        0x300UL
-	#define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_LAST                   FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_HOST_OPERATIONAL
+	#define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_CLAIMS_SUPPORTED        0x320UL
+	#define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_CLAIMS_ACTIVE           0x321UL
+	#define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_LAST                   FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_CLAIMS_ACTIVE
 	u8	count;
 	u8	unused_0;
 };
@@ -10172,7 +10336,8 @@ struct hwrm_dbg_log_buffer_flush_input {
 	#define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_CA2_TRACE           0x9UL
 	#define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_RIGP1_TRACE         0xaUL
 	#define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_AFM_KONG_HWRM_TRACE 0xbUL
-	#define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_LAST               DBG_LOG_BUFFER_FLUSH_REQ_TYPE_AFM_KONG_HWRM_TRACE
+	#define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_ERR_QPC_TRACE       0xcUL
+	#define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_LAST               DBG_LOG_BUFFER_FLUSH_REQ_TYPE_ERR_QPC_TRACE
 	u8	unused_1[2];
 	__le32	flags;
 	#define DBG_LOG_BUFFER_FLUSH_REQ_FLAGS_FLUSH_ALL_BUFFERS     0x1UL
@@ -10295,10 +10460,15 @@ struct hwrm_nvm_write_output {
 /* hwrm_nvm_write_cmd_err (size:64b/8B) */
 struct hwrm_nvm_write_cmd_err {
 	u8	code;
-	#define NVM_WRITE_CMD_ERR_CODE_UNKNOWN  0x0UL
-	#define NVM_WRITE_CMD_ERR_CODE_FRAG_ERR 0x1UL
-	#define NVM_WRITE_CMD_ERR_CODE_NO_SPACE 0x2UL
-	#define NVM_WRITE_CMD_ERR_CODE_LAST    NVM_WRITE_CMD_ERR_CODE_NO_SPACE
+	#define NVM_WRITE_CMD_ERR_CODE_UNKNOWN              0x0UL
+	#define NVM_WRITE_CMD_ERR_CODE_FRAG_ERR             0x1UL
+	#define NVM_WRITE_CMD_ERR_CODE_NO_SPACE             0x2UL
+	#define NVM_WRITE_CMD_ERR_CODE_WRITE_FAILED         0x3UL
+	#define NVM_WRITE_CMD_ERR_CODE_REQD_ERASE_FAILED    0x4UL
+	#define NVM_WRITE_CMD_ERR_CODE_VERIFY_FAILED        0x5UL
+	#define NVM_WRITE_CMD_ERR_CODE_INVALID_HEADER       0x6UL
+	#define NVM_WRITE_CMD_ERR_CODE_UPDATE_DIGEST_FAILED 0x7UL
+	#define NVM_WRITE_CMD_ERR_CODE_LAST                NVM_WRITE_CMD_ERR_CODE_UPDATE_DIGEST_FAILED
 	u8	unused_0[7];
 };
 
@@ -10438,7 +10608,11 @@ struct hwrm_nvm_get_dev_info_output {
 	__le16	srt2_fw_minor;
 	__le16	srt2_fw_build;
 	__le16	srt2_fw_patch;
-	u8	unused_0[7];
+	u8	security_soc_fw_major;
+	u8	security_soc_fw_minor;
+	u8	security_soc_fw_build;
+	u8	security_soc_fw_patch;
+	u8	unused_0[3];
 	u8	valid;
 };
 
@@ -10568,7 +10742,9 @@ struct hwrm_nvm_install_update_cmd_err {
 	#define NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_SPACE           0x2UL
 	#define NVM_INSTALL_UPDATE_CMD_ERR_CODE_ANTI_ROLLBACK      0x3UL
 	#define NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_VOLTREG_SUPPORT 0x4UL
-	#define NVM_INSTALL_UPDATE_CMD_ERR_CODE_LAST              NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_VOLTREG_SUPPORT
+	#define NVM_INSTALL_UPDATE_CMD_ERR_CODE_DEFRAG_FAILED      0x5UL
+	#define NVM_INSTALL_UPDATE_CMD_ERR_CODE_UNKNOWN_DIR_ERR    0x6UL
+	#define NVM_INSTALL_UPDATE_CMD_ERR_CODE_LAST              NVM_INSTALL_UPDATE_CMD_ERR_CODE_UNKNOWN_DIR_ERR
 	u8	unused_0[7];
 };
 
@@ -10591,7 +10767,8 @@ struct hwrm_nvm_get_variable_input {
 	__le16	index_2;
 	__le16	index_3;
 	u8	flags;
-	#define NVM_GET_VARIABLE_REQ_FLAGS_FACTORY_DFLT     0x1UL
+	#define NVM_GET_VARIABLE_REQ_FLAGS_FACTORY_DFLT           0x1UL
+	#define NVM_GET_VARIABLE_REQ_FLAGS_VALIDATE_OPT_VALUE     0x2UL
 	u8	unused_0;
 };
 
@@ -10606,18 +10783,25 @@ struct hwrm_nvm_get_variable_output {
 	#define NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_0    0x0UL
 	#define NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_FFFF 0xffffUL
 	#define NVM_GET_VARIABLE_RESP_OPTION_NUM_LAST     NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_FFFF
-	u8	unused_0[3];
+	u8	flags;
+	#define NVM_GET_VARIABLE_RESP_FLAGS_VALIDATE_OPT_VALUE     0x1UL
+	u8	unused_0[2];
 	u8	valid;
 };
 
 /* hwrm_nvm_get_variable_cmd_err (size:64b/8B) */
 struct hwrm_nvm_get_variable_cmd_err {
 	u8	code;
-	#define NVM_GET_VARIABLE_CMD_ERR_CODE_UNKNOWN       0x0UL
-	#define NVM_GET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL
-	#define NVM_GET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR   0x2UL
-	#define NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT 0x3UL
-	#define NVM_GET_VARIABLE_CMD_ERR_CODE_LAST         NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT
+	#define NVM_GET_VARIABLE_CMD_ERR_CODE_UNKNOWN          0x0UL
+	#define NVM_GET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST    0x1UL
+	#define NVM_GET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR      0x2UL
+	#define NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT    0x3UL
+	#define NVM_GET_VARIABLE_CMD_ERR_CODE_INDEX_INVALID    0x4UL
+	#define NVM_GET_VARIABLE_CMD_ERR_CODE_ACCESS_DENIED    0x5UL
+	#define NVM_GET_VARIABLE_CMD_ERR_CODE_CB_FAILED        0x6UL
+	#define NVM_GET_VARIABLE_CMD_ERR_CODE_INVALID_DATA_LEN 0x7UL
+	#define NVM_GET_VARIABLE_CMD_ERR_CODE_NO_MEM           0x8UL
+	#define NVM_GET_VARIABLE_CMD_ERR_CODE_LAST            NVM_GET_VARIABLE_CMD_ERR_CODE_NO_MEM
 	u8	unused_0[7];
 };
 
@@ -10667,10 +10851,17 @@ struct hwrm_nvm_set_variable_output {
 /* hwrm_nvm_set_variable_cmd_err (size:64b/8B) */
 struct hwrm_nvm_set_variable_cmd_err {
 	u8	code;
-	#define NVM_SET_VARIABLE_CMD_ERR_CODE_UNKNOWN       0x0UL
-	#define NVM_SET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL
-	#define NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR   0x2UL
-	#define NVM_SET_VARIABLE_CMD_ERR_CODE_LAST         NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR
+	#define NVM_SET_VARIABLE_CMD_ERR_CODE_UNKNOWN              0x0UL
+	#define NVM_SET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST        0x1UL
+	#define NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR          0x2UL
+	#define NVM_SET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT        0x3UL
+	#define NVM_SET_VARIABLE_CMD_ERR_CODE_ACTION_NOT_SUPPORTED 0x4UL
+	#define NVM_SET_VARIABLE_CMD_ERR_CODE_INDEX_INVALID        0x5UL
+	#define NVM_SET_VARIABLE_CMD_ERR_CODE_ACCESS_DENIED        0x6UL
+	#define NVM_SET_VARIABLE_CMD_ERR_CODE_CB_FAILED            0x7UL
+	#define NVM_SET_VARIABLE_CMD_ERR_CODE_INVALID_DATA_LEN     0x8UL
+	#define NVM_SET_VARIABLE_CMD_ERR_CODE_NO_MEM               0x9UL
+	#define NVM_SET_VARIABLE_CMD_ERR_CODE_LAST                NVM_SET_VARIABLE_CMD_ERR_CODE_NO_MEM
 	u8	unused_0[7];
 };
 
-- 
cgit v1.2.3


From 5a774b64cd6a008f016119782a5d3f30ed0bf3b7 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Mon, 18 Aug 2025 09:51:21 +0200
Subject: net: phy: micrel: Add support for lan8842

The LAN8842 is a low-power, single port triple-speed (10BASE-T/ 100BASE-TX/
1000BASE-T) ethernet physical layer transceiver (PHY) that supports
transmission and reception of data on standard CAT-5, as well as CAT-5e and
CAT-6, Unshielded Twisted Pair (UTP) cables.

The LAN8842 supports industry-standard SGMII (Serial Gigabit Media
Independent Interface) providing chip-to-chip connection to a Gigabit
Ethernet MAC using a single serialized link (differential pair) in each
direction.

There are 2 variants of the lan8842. The one that supports timestamping
(lan8842) and one that doesn't have timestamping (lan8832).

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/20250818075121.1298170-5-horatiu.vultur@microchip.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/micrel_phy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 9af01bdd86d2..ca691641788b 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -32,6 +32,7 @@
 #define PHY_ID_LAN8814		0x00221660
 #define PHY_ID_LAN8804		0x00221670
 #define PHY_ID_LAN8841		0x00221650
+#define PHY_ID_LAN8842		0x002216C0
 
 #define PHY_ID_KSZ886X		0x00221430
 #define PHY_ID_KSZ8863		0x00221435
-- 
cgit v1.2.3


From 6c9468aad215a198742c8375b0415e42521c905c Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@kernel.org>
Date: Sun, 10 Aug 2025 00:56:54 -0700
Subject: fscrypt: replace raw loads of info pointer with helper function

Add and use a helper function fscrypt_get_inode_info_raw().  It loads an
inode's fscrypt info pointer using a raw dereference, which is
appropriate when the caller knows the key setup already happened.

This eliminates most occurrences of inode::i_crypt_info in the source,
in preparation for replacing that with a filesystem-specific field.

Co-developed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Link: https://lore.kernel.org/20250810075706.172910-2-ebiggers@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fscrypt.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 10dd161690a2..23c5198612d1 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -195,6 +195,22 @@ struct fscrypt_operations {
 int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name,
 			 struct dentry *dentry, unsigned int flags);
 
+/*
+ * Load the inode's fscrypt info pointer, using a raw dereference.  Since this
+ * uses a raw dereference with no memory barrier, it is appropriate to use only
+ * when the caller knows the inode's key setup already happened, resulting in
+ * non-NULL fscrypt info.  E.g., the file contents en/decryption functions use
+ * this, since fscrypt_file_open() set up the key.
+ */
+static inline struct fscrypt_inode_info *
+fscrypt_get_inode_info_raw(const struct inode *inode)
+{
+	struct fscrypt_inode_info *ci = inode->i_crypt_info;
+
+	VFS_WARN_ON_ONCE(ci == NULL);
+	return ci;
+}
+
 static inline struct fscrypt_inode_info *
 fscrypt_get_inode_info(const struct inode *inode)
 {
-- 
cgit v1.2.3


From 93221de31a8df6710e02328f82dc68d7ab4ad9e6 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@kernel.org>
Date: Sun, 10 Aug 2025 00:56:55 -0700
Subject: fscrypt: add support for info in fs-specific part of inode

Add an inode_info_offs field to struct fscrypt_operations, and update
fs/crypto/ to support it.  When set to a nonzero value, it specifies the
offset to the fscrypt_inode_info pointer within the filesystem-specific
part of the inode structure, to be used instead of inode::i_crypt_info.

Since this makes inode::i_crypt_info no longer necessarily used, update
comments that mentioned it.

This is a prerequisite for a later commit that removes
inode::i_crypt_info, saving memory and improving cache efficiency with
filesystems that don't support fscrypt.

Co-developed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Link: https://lore.kernel.org/20250810075706.172910-3-ebiggers@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fscrypt.h | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 23c5198612d1..d7ff53accbfe 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -61,6 +61,12 @@ struct fscrypt_name {
 
 /* Crypto operations for filesystems */
 struct fscrypt_operations {
+	/*
+	 * The offset of the pointer to struct fscrypt_inode_info in the
+	 * filesystem-specific part of the inode, relative to the beginning of
+	 * the common part of the inode (the 'struct inode').
+	 */
+	ptrdiff_t inode_info_offs;
 
 	/*
 	 * If set, then fs/crypto/ will allocate a global bounce page pool the
@@ -195,6 +201,14 @@ struct fscrypt_operations {
 int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name,
 			 struct dentry *dentry, unsigned int flags);
 
+static inline struct fscrypt_inode_info **
+fscrypt_inode_info_addr(const struct inode *inode)
+{
+	if (inode->i_sb->s_cop->inode_info_offs == 0)
+		return (struct fscrypt_inode_info **)&inode->i_crypt_info;
+	return (void *)inode + inode->i_sb->s_cop->inode_info_offs;
+}
+
 /*
  * Load the inode's fscrypt info pointer, using a raw dereference.  Since this
  * uses a raw dereference with no memory barrier, it is appropriate to use only
@@ -205,7 +219,7 @@ int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name,
 static inline struct fscrypt_inode_info *
 fscrypt_get_inode_info_raw(const struct inode *inode)
 {
-	struct fscrypt_inode_info *ci = inode->i_crypt_info;
+	struct fscrypt_inode_info *ci = *fscrypt_inode_info_addr(inode);
 
 	VFS_WARN_ON_ONCE(ci == NULL);
 	return ci;
@@ -216,11 +230,11 @@ fscrypt_get_inode_info(const struct inode *inode)
 {
 	/*
 	 * Pairs with the cmpxchg_release() in fscrypt_setup_encryption_info().
-	 * I.e., another task may publish ->i_crypt_info concurrently, executing
-	 * a RELEASE barrier.  We need to use smp_load_acquire() here to safely
+	 * I.e., another task may publish the fscrypt info concurrently,
+	 * executing a RELEASE barrier.  Use smp_load_acquire() here to safely
 	 * ACQUIRE the memory the other task published.
 	 */
-	return smp_load_acquire(&inode->i_crypt_info);
+	return smp_load_acquire(fscrypt_inode_info_addr(inode));
 }
 
 /**
-- 
cgit v1.2.3


From ab90c2d2476c4dd6deddd089c7e83b858d135783 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@kernel.org>
Date: Sun, 10 Aug 2025 00:57:00 -0700
Subject: fs: remove inode::i_crypt_info

Now that all fscrypt-capable filesystems store the pointer to
fscrypt_inode_info in the filesystem-specific part of the inode
structure, inode::i_crypt_info is no longer needed.  Update
fscrypt_inode_info_addr() to no longer support the fallback to
inode::i_crypt_info.  Finally, remove inode::i_crypt_info itself along
with the now-unnecessary forward declaration of fscrypt_inode_info.

The end result of the migration to the filesystem-specific pointer is
memory savings on CONFIG_FS_ENCRYPTION=y kernels for all filesystems
that don't support fscrypt.  Specifically, their in-memory inodes are
now smaller by the size of a pointer: either 4 or 8 bytes.

Co-developed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Link: https://lore.kernel.org/20250810075706.172910-8-ebiggers@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h      | 5 -----
 include/linux/fscrypt.h | 8 ++++++--
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7ab4f96d705..1dafa18169be 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -72,7 +72,6 @@ struct swap_info_struct;
 struct seq_file;
 struct workqueue_struct;
 struct iov_iter;
-struct fscrypt_inode_info;
 struct fscrypt_operations;
 struct fsverity_info;
 struct fsverity_operations;
@@ -780,10 +779,6 @@ struct inode {
 	struct fsnotify_mark_connector __rcu	*i_fsnotify_marks;
 #endif
 
-#ifdef CONFIG_FS_ENCRYPTION
-	struct fscrypt_inode_info	*i_crypt_info;
-#endif
-
 #ifdef CONFIG_FS_VERITY
 	struct fsverity_info	*i_verity_info;
 #endif
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index d7ff53accbfe..516aba5b858b 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -201,11 +201,15 @@ struct fscrypt_operations {
 int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name,
 			 struct dentry *dentry, unsigned int flags);
 
+/*
+ * Returns the address of the fscrypt info pointer within the
+ * filesystem-specific part of the inode.  (To save memory on filesystems that
+ * don't support fscrypt, a field in 'struct inode' itself is no longer used.)
+ */
 static inline struct fscrypt_inode_info **
 fscrypt_inode_info_addr(const struct inode *inode)
 {
-	if (inode->i_sb->s_cop->inode_info_offs == 0)
-		return (struct fscrypt_inode_info **)&inode->i_crypt_info;
+	VFS_WARN_ON_ONCE(inode->i_sb->s_cop->inode_info_offs == 0);
 	return (void *)inode + inode->i_sb->s_cop->inode_info_offs;
 }
 
-- 
cgit v1.2.3


From 2a7349add18e5915cd87251af5f98db1772b6131 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@kernel.org>
Date: Sun, 10 Aug 2025 00:57:01 -0700
Subject: fsverity: add support for info in fs-specific part of inode

Add an inode_info_offs field to struct fsverity_operations, and update
fs/verity/ to support it.  When set to a nonzero value, it specifies the
offset to the fsverity_info pointer within the filesystem-specific part
of the inode structure, to be used instead of inode::i_verity_info.

Since this makes inode::i_verity_info no longer necessarily used, update
comments that mentioned it.

This is a prerequisite for a later commit that removes
inode::i_verity_info, saving memory and improving cache efficiency on
filesystems that don't support fsverity.

Co-developed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Link: https://lore.kernel.org/20250810075706.172910-9-ebiggers@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fsverity.h | 44 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 34 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
index 1eb7eae580be..e0f132cb7839 100644
--- a/include/linux/fsverity.h
+++ b/include/linux/fsverity.h
@@ -28,6 +28,12 @@
 
 /* Verity operations for filesystems */
 struct fsverity_operations {
+	/**
+	 * The offset of the pointer to struct fsverity_info in the
+	 * filesystem-specific part of the inode, relative to the beginning of
+	 * the common part of the inode (the 'struct inode').
+	 */
+	ptrdiff_t inode_info_offs;
 
 	/**
 	 * Begin enabling verity on the given file.
@@ -124,15 +130,33 @@ struct fsverity_operations {
 
 #ifdef CONFIG_FS_VERITY
 
+static inline struct fsverity_info **
+fsverity_info_addr(const struct inode *inode)
+{
+	if (inode->i_sb->s_vop->inode_info_offs == 0)
+		return (struct fsverity_info **)&inode->i_verity_info;
+	return (void *)inode + inode->i_sb->s_vop->inode_info_offs;
+}
+
 static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
 {
 	/*
-	 * Pairs with the cmpxchg_release() in fsverity_set_info().
-	 * I.e., another task may publish ->i_verity_info concurrently,
-	 * executing a RELEASE barrier.  We need to use smp_load_acquire() here
-	 * to safely ACQUIRE the memory the other task published.
+	 * Since this function can be called on inodes belonging to filesystems
+	 * that don't support fsverity at all, and fsverity_info_addr() doesn't
+	 * work on such filesystems, we have to start with an IS_VERITY() check.
+	 * Checking IS_VERITY() here is also useful to minimize the overhead of
+	 * fsverity_active() on non-verity files.
+	 */
+	if (!IS_VERITY(inode))
+		return NULL;
+
+	/*
+	 * Pairs with the cmpxchg_release() in fsverity_set_info().  I.e.,
+	 * another task may publish the inode's verity info concurrently,
+	 * executing a RELEASE barrier.  Use smp_load_acquire() here to safely
+	 * ACQUIRE the memory the other task published.
 	 */
-	return smp_load_acquire(&inode->i_verity_info);
+	return smp_load_acquire(fsverity_info_addr(inode));
 }
 
 /* enable.c */
@@ -156,11 +180,11 @@ void __fsverity_cleanup_inode(struct inode *inode);
  * fsverity_cleanup_inode() - free the inode's verity info, if present
  * @inode: an inode being evicted
  *
- * Filesystems must call this on inode eviction to free ->i_verity_info.
+ * Filesystems must call this on inode eviction to free the inode's verity info.
  */
 static inline void fsverity_cleanup_inode(struct inode *inode)
 {
-	if (inode->i_verity_info)
+	if (*fsverity_info_addr(inode))
 		__fsverity_cleanup_inode(inode);
 }
 
@@ -267,12 +291,12 @@ static inline bool fsverity_verify_page(struct page *page)
  * fsverity_active() - do reads from the inode need to go through fs-verity?
  * @inode: inode to check
  *
- * This checks whether ->i_verity_info has been set.
+ * This checks whether the inode's verity info has been set.
  *
  * Filesystems call this from ->readahead() to check whether the pages need to
  * be verified or not.  Don't use IS_VERITY() for this purpose; it's subject to
  * a race condition where the file is being read concurrently with
- * FS_IOC_ENABLE_VERITY completing.  (S_VERITY is set before ->i_verity_info.)
+ * FS_IOC_ENABLE_VERITY completing.  (S_VERITY is set before the verity info.)
  *
  * Return: true if reads need to go through fs-verity, otherwise false
  */
@@ -287,7 +311,7 @@ static inline bool fsverity_active(const struct inode *inode)
  * @filp: the struct file being set up
  *
  * When opening a verity file, deny the open if it is for writing.  Otherwise,
- * set up the inode's ->i_verity_info if not already done.
+ * set up the inode's verity info if not already done.
  *
  * When combined with fscrypt, this must be called after fscrypt_file_open().
  * Otherwise, we won't have the key set up to decrypt the verity metadata.
-- 
cgit v1.2.3


From 818c659ac164e4e4639ceaedaccbdfebb1ef63b5 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@kernel.org>
Date: Sun, 10 Aug 2025 00:57:05 -0700
Subject: fs: remove inode::i_verity_info

Now that all fsverity-capable filesystems store the pointer to
fsverity_info in the filesystem-specific part of the inode structure,
inode::i_verity_info is no longer needed.  Update fsverity_info_addr()
to no longer support the fallback to inode::i_verity_info.  Finally,
remove inode::i_verity_info itself, and move the forward declaration of
struct fsverity_info from fs.h (which no longer needs it) to fsverity.h.

The end result of the migration to the filesystem-specific pointer is
memory savings on CONFIG_FS_VERITY=y kernels for all filesystems that
don't support fsverity.  Specifically, their in-memory inodes are now
smaller by the size of a pointer: either 4 or 8 bytes.

Co-developed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Link: https://lore.kernel.org/20250810075706.172910-13-ebiggers@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h       |  5 -----
 include/linux/fsverity.h | 10 ++++++++--
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1dafa18169be..12ecc6b0e6f9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -73,7 +73,6 @@ struct seq_file;
 struct workqueue_struct;
 struct iov_iter;
 struct fscrypt_operations;
-struct fsverity_info;
 struct fsverity_operations;
 struct fsnotify_mark_connector;
 struct fsnotify_sb_info;
@@ -779,10 +778,6 @@ struct inode {
 	struct fsnotify_mark_connector __rcu	*i_fsnotify_marks;
 #endif
 
-#ifdef CONFIG_FS_VERITY
-	struct fsverity_info	*i_verity_info;
-#endif
-
 	void			*i_private; /* fs or device private pointer */
 } __randomize_layout;
 
diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
index e0f132cb7839..844f7b8b56bb 100644
--- a/include/linux/fsverity.h
+++ b/include/linux/fsverity.h
@@ -26,6 +26,8 @@
 /* Arbitrary limit to bound the kmalloc() size.  Can be changed. */
 #define FS_VERITY_MAX_DESCRIPTOR_SIZE	16384
 
+struct fsverity_info;
+
 /* Verity operations for filesystems */
 struct fsverity_operations {
 	/**
@@ -130,11 +132,15 @@ struct fsverity_operations {
 
 #ifdef CONFIG_FS_VERITY
 
+/*
+ * Returns the address of the verity info pointer within the filesystem-specific
+ * part of the inode.  (To save memory on filesystems that don't support
+ * fsverity, a field in 'struct inode' itself is no longer used.)
+ */
 static inline struct fsverity_info **
 fsverity_info_addr(const struct inode *inode)
 {
-	if (inode->i_sb->s_vop->inode_info_offs == 0)
-		return (struct fsverity_info **)&inode->i_verity_info;
+	VFS_WARN_ON_ONCE(inode->i_sb->s_vop->inode_info_offs == 0);
 	return (void *)inode + inode->i_sb->s_vop->inode_info_offs;
 }
 
-- 
cgit v1.2.3


From 8a3d00dde63a339d31d1fdeead24ddfd4d459c70 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@kernel.org>
Date: Sun, 10 Aug 2025 00:57:06 -0700
Subject: fsverity: check IS_VERITY() in fsverity_cleanup_inode()

Since getting the address of the fsverity_info has gotten a bit more
expensive, make fsverity_cleanup_inode() check for IS_VERITY() instead.
This avoids adding more overhead to non-verity files.

This assumes that verity info is never set when !IS_VERITY(), which is
currently true, but add a VFS_WARN_ON_ONCE() that asserts that.  (This
of course defeats the optimization, but only when CONFIG_VFS_DEBUG=y.)

Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Link: https://lore.kernel.org/20250810075706.172910-14-ebiggers@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fsverity.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
index 844f7b8b56bb..5bc7280425a7 100644
--- a/include/linux/fsverity.h
+++ b/include/linux/fsverity.h
@@ -190,8 +190,15 @@ void __fsverity_cleanup_inode(struct inode *inode);
  */
 static inline void fsverity_cleanup_inode(struct inode *inode)
 {
-	if (*fsverity_info_addr(inode))
+	/*
+	 * Only IS_VERITY() inodes can have verity info, so start by checking
+	 * for IS_VERITY() (which is faster than retrieving the pointer to the
+	 * verity info).  This minimizes overhead for non-verity inodes.
+	 */
+	if (IS_VERITY(inode))
 		__fsverity_cleanup_inode(inode);
+	else
+		VFS_WARN_ON_ONCE(*fsverity_info_addr(inode) != NULL);
 }
 
 /* read_metadata.c */
-- 
cgit v1.2.3


From 370ac285f23aecae40600851fb4a1a9e75e50973 Mon Sep 17 00:00:00 2001
From: Nilay Shroff <nilay@linux.ibm.com>
Date: Thu, 14 Aug 2025 13:54:59 +0530
Subject: block: avoid cpu_hotplug_lock depedency on freeze_lock

A recent lockdep[1] splat observed while running blktest block/005
reveals a potential deadlock caused by the cpu_hotplug_lock dependency
on ->freeze_lock. This dependency was introduced by commit 033b667a823e
("block: blk-rq-qos: guard rq-qos helpers by static key").

That change added a static key to avoid fetching q->rq_qos when
neither blk-wbt nor blk-iolatency is configured. The static key
dynamically patches kernel text to a NOP when disabled, eliminating
overhead of fetching q->rq_qos in the I/O hot path. However, enabling
a static key at runtime requires acquiring both cpu_hotplug_lock and
jump_label_mutex. When this happens after the queue has already been
frozen (i.e., while holding ->freeze_lock), it creates a locking
dependency from cpu_hotplug_lock to ->freeze_lock, which leads to a
potential deadlock reported by lockdep [1].

To resolve this, replace the static key mechanism with q->queue_flags:
QUEUE_FLAG_QOS_ENABLED. This flag is evaluated in the fast path before
accessing q->rq_qos. If the flag is set, we proceed to fetch q->rq_qos;
otherwise, the access is skipped.

Since q->queue_flags is commonly accessed in IO hotpath and resides in
the first cacheline of struct request_queue, checking it imposes minimal
overhead while eliminating the deadlock risk.

This change avoids the lockdep splat without introducing performance
regressions.

[1] https://lore.kernel.org/linux-block/4fdm37so3o4xricdgfosgmohn63aa7wj3ua4e5vpihoamwg3ui@fq42f5q5t5ic/

Reported-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Closes: https://lore.kernel.org/linux-block/4fdm37so3o4xricdgfosgmohn63aa7wj3ua4e5vpihoamwg3ui@fq42f5q5t5ic/
Fixes: 033b667a823e ("block: blk-rq-qos: guard rq-qos helpers by static key")
Tested-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20250814082612.500845-4-nilay@linux.ibm.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 95886b404b16..fe1797bbec42 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -656,6 +656,7 @@ enum {
 	QUEUE_FLAG_SQ_SCHED,		/* single queue style io dispatch */
 	QUEUE_FLAG_DISABLE_WBT_DEF,	/* for sched to disable/enable wbt */
 	QUEUE_FLAG_NO_ELV_SWITCH,	/* can't switch elevator any more */
+	QUEUE_FLAG_QOS_ENABLED,		/* qos is enabled */
 	QUEUE_FLAG_MAX
 };
 
-- 
cgit v1.2.3


From 0f07b7919d679050d354d3279faa74bdc7ce17a0 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 20 Jul 2025 13:21:12 +0200
Subject: uprobes: Rename arch_uretprobe_trampoline function

We are about to add uprobe trampoline, so cleaning up the namespace.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Link: https://lore.kernel.org/r/20250720112133.244369-3-jolsa@kernel.org
---
 include/linux/uprobes.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 516217c39094..01112f27cd21 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -224,7 +224,7 @@ extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
 extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
 					 void *src, unsigned long len);
 extern void uprobe_handle_trampoline(struct pt_regs *regs);
-extern void *arch_uprobe_trampoline(unsigned long *psize);
+extern void *arch_uretprobe_trampoline(unsigned long *psize);
 extern unsigned long uprobe_get_trampoline_vaddr(void);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
-- 
cgit v1.2.3


From 82afdd05a16a424409682e06a53d6afcda038d30 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 20 Jul 2025 13:21:13 +0200
Subject: uprobes: Make copy_from_page global

Making copy_from_page global and adding uprobe prefix.
Adding the uprobe prefix to copy_to_page as well for symmetry.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Link: https://lore.kernel.org/r/20250720112133.244369-4-jolsa@kernel.org
---
 include/linux/uprobes.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 01112f27cd21..7447e15559b8 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -226,6 +226,7 @@ extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
 extern void uprobe_handle_trampoline(struct pt_regs *regs);
 extern void *arch_uretprobe_trampoline(unsigned long *psize);
 extern unsigned long uprobe_get_trampoline_vaddr(void);
+extern void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
-- 
cgit v1.2.3


From 33d7b2beaf34a3c0f6406bc76f6e1b1755150ad9 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 20 Jul 2025 13:21:14 +0200
Subject: uprobes: Add uprobe_write function

Adding uprobe_write function that does what uprobe_write_opcode did
so far, but allows to pass verify callback function that checks the
memory location before writing the opcode.

It will be used in following changes to implement specific checking
logic for instruction update.

The uprobe_write_opcode now calls uprobe_write with verify_opcode as
the verify callback.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Link: https://lore.kernel.org/r/20250720112133.244369-5-jolsa@kernel.org
---
 include/linux/uprobes.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 7447e15559b8..e13382054435 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -187,6 +187,9 @@ struct uprobes_state {
 	struct xol_area		*xol_area;
 };
 
+typedef int (*uprobe_write_verify_t)(struct page *page, unsigned long vaddr,
+				     uprobe_opcode_t *opcode);
+
 extern void __init uprobes_init(void);
 extern int set_swbp(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr);
 extern int set_orig_insn(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr);
@@ -195,6 +198,8 @@ extern bool is_trap_insn(uprobe_opcode_t *insn);
 extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
 extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
 extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t);
+extern int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long opcode_vaddr,
+			uprobe_opcode_t opcode, uprobe_write_verify_t verify);
 extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool);
 extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc);
-- 
cgit v1.2.3


From f8b7c528b4fb7018d12b6bb63bb52576cfc73697 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 20 Jul 2025 13:21:15 +0200
Subject: uprobes: Add nbytes argument to uprobe_write

Adding nbytes argument to uprobe_write and related functions as
preparation for writing whole instructions in following changes.

Also renaming opcode arguments to insn, which seems to fit better.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Link: https://lore.kernel.org/r/20250720112133.244369-6-jolsa@kernel.org
---
 include/linux/uprobes.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index e13382054435..147c4a0a1af9 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -188,7 +188,7 @@ struct uprobes_state {
 };
 
 typedef int (*uprobe_write_verify_t)(struct page *page, unsigned long vaddr,
-				     uprobe_opcode_t *opcode);
+				     uprobe_opcode_t *insn, int nbytes);
 
 extern void __init uprobes_init(void);
 extern int set_swbp(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr);
@@ -199,7 +199,7 @@ extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
 extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
 extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t);
 extern int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long opcode_vaddr,
-			uprobe_opcode_t opcode, uprobe_write_verify_t verify);
+			uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify);
 extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool);
 extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc);
-- 
cgit v1.2.3


From ec46350fe1e2338f42ee84974c36b25afe8ba53a Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 20 Jul 2025 13:21:16 +0200
Subject: uprobes: Add is_register argument to uprobe_write and
 uprobe_write_opcode

The uprobe_write has special path to restore the original page when we
write original instruction back. This happens when uprobe_write detects
that we want to write anything else but breakpoint instruction.

Moving the detection away and passing it to uprobe_write as argument,
so it's possible to write different instructions (other than just
breakpoint and rest).

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Link: https://lore.kernel.org/r/20250720112133.244369-7-jolsa@kernel.org
---
 include/linux/uprobes.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 147c4a0a1af9..518b26756469 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -197,9 +197,10 @@ extern bool is_swbp_insn(uprobe_opcode_t *insn);
 extern bool is_trap_insn(uprobe_opcode_t *insn);
 extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
 extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
-extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t);
+extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t,
+			       bool is_register);
 extern int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long opcode_vaddr,
-			uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify);
+			uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify, bool is_register);
 extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool);
 extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc);
-- 
cgit v1.2.3


From 18a111256a0b4fedfe47101f084441a84d7e357a Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 20 Jul 2025 13:21:17 +0200
Subject: uprobes: Add do_ref_ctr argument to uprobe_write function

Making update_ref_ctr call in uprobe_write conditional based
on do_ref_ctr argument. This way we can use uprobe_write for
instruction update without doing ref_ctr_offset update.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Link: https://lore.kernel.org/r/20250720112133.244369-8-jolsa@kernel.org
---
 include/linux/uprobes.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 518b26756469..5080619560d4 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -200,7 +200,7 @@ extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
 extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t,
 			       bool is_register);
 extern int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long opcode_vaddr,
-			uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify, bool is_register);
+			uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify, bool is_register, bool do_update_ref_ctr);
 extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool);
 extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc);
-- 
cgit v1.2.3


From 91440ff4cafad4c86322a612e523f7f021a493e7 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 20 Jul 2025 13:21:18 +0200
Subject: uprobes/x86: Add mapping for optimized uprobe trampolines

Adding support to add special mapping for user space trampoline with
following functions:

  uprobe_trampoline_get - find or add uprobe_trampoline
  uprobe_trampoline_put - remove or destroy uprobe_trampoline

The user space trampoline is exported as arch specific user space special
mapping through tramp_mapping, which is initialized in following changes
with new uprobe syscall.

The uprobe trampoline needs to be callable/reachable from the probed address,
so while searching for available address we use is_reachable_by_call function
to decide if the uprobe trampoline is callable from the probe address.

All uprobe_trampoline objects are stored in uprobes_state object and are
cleaned up when the process mm_struct goes down. Adding new arch hooks
for that, because this change is x86_64 specific.

Locking is provided by callers in following changes.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/20250720112133.244369-9-jolsa@kernel.org
---
 include/linux/uprobes.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 5080619560d4..b40d33aae016 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -17,6 +17,7 @@
 #include <linux/wait.h>
 #include <linux/timer.h>
 #include <linux/seqlock.h>
+#include <linux/mutex.h>
 
 struct uprobe;
 struct vm_area_struct;
@@ -185,6 +186,9 @@ struct xol_area;
 
 struct uprobes_state {
 	struct xol_area		*xol_area;
+#ifdef CONFIG_X86_64
+	struct hlist_head	head_tramps;
+#endif
 };
 
 typedef int (*uprobe_write_verify_t)(struct page *page, unsigned long vaddr,
@@ -233,6 +237,8 @@ extern void uprobe_handle_trampoline(struct pt_regs *regs);
 extern void *arch_uretprobe_trampoline(unsigned long *psize);
 extern unsigned long uprobe_get_trampoline_vaddr(void);
 extern void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len);
+extern void arch_uprobe_clear_state(struct mm_struct *mm);
+extern void arch_uprobe_init_state(struct mm_struct *mm);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
-- 
cgit v1.2.3


From 56101b69c9190667f473b9f93f8b6d8209aaa816 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 20 Jul 2025 13:21:19 +0200
Subject: uprobes/x86: Add uprobe syscall to speed up uprobe

Adding new uprobe syscall that calls uprobe handlers for given
'breakpoint' address.

The idea is that the 'breakpoint' address calls the user space
trampoline which executes the uprobe syscall.

The syscall handler reads the return address of the initial call
to retrieve the original 'breakpoint' address. With this address
we find the related uprobe object and call its consumers.

Adding the arch_uprobe_trampoline_mapping function that provides
uprobe trampoline mapping. This mapping is backed with one global
page initialized at __init time and shared by the all the mapping
instances.

We do not allow to execute uprobe syscall if the caller is not
from uprobe trampoline mapping.

The uprobe syscall ensures the consumer (bpf program) sees registers
values in the state before the trampoline was called.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/20250720112133.244369-10-jolsa@kernel.org
---
 include/linux/syscalls.h | 2 ++
 include/linux/uprobes.h  | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 77f45e5d4413..66c06fcdfe19 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1005,6 +1005,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on);
 
 asmlinkage long sys_uretprobe(void);
 
+asmlinkage long sys_uprobe(void);
+
 /* pciconfig: alpha, arm, arm64, ia64, sparc */
 asmlinkage long sys_pciconfig_read(unsigned long bus, unsigned long dfn,
 				unsigned long off, unsigned long len,
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index b40d33aae016..b6b077cc7d0f 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -239,6 +239,7 @@ extern unsigned long uprobe_get_trampoline_vaddr(void);
 extern void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len);
 extern void arch_uprobe_clear_state(struct mm_struct *mm);
 extern void arch_uprobe_init_state(struct mm_struct *mm);
+extern void handle_syscall_uprobe(struct pt_regs *regs, unsigned long bp_vaddr);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
-- 
cgit v1.2.3


From ba2bfc97b4629b10bd8d02b36e04f3932a04cac4 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 20 Jul 2025 13:21:20 +0200
Subject: uprobes/x86: Add support to optimize uprobes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Putting together all the previously added pieces to support optimized
uprobes on top of 5-byte nop instruction.

The current uprobe execution goes through following:

  - installs breakpoint instruction over original instruction
  - exception handler hit and calls related uprobe consumers
  - and either simulates original instruction or does out of line single step
    execution of it
  - returns to user space

The optimized uprobe path does following:

  - checks the original instruction is 5-byte nop (plus other checks)
  - adds (or uses existing) user space trampoline with uprobe syscall
  - overwrites original instruction (5-byte nop) with call to user space
    trampoline
  - the user space trampoline executes uprobe syscall that calls related uprobe
    consumers
  - trampoline returns back to next instruction

This approach won't speed up all uprobes as it's limited to using nop5 as
original instruction, but we plan to use nop5 as USDT probe instruction
(which currently uses single byte nop) and speed up the USDT probes.

The arch_uprobe_optimize triggers the uprobe optimization and is called after
first uprobe hit. I originally had it called on uprobe installation but then
it clashed with elf loader, because the user space trampoline was added in a
place where loader might need to put elf segments, so I decided to do it after
first uprobe hit when loading is done.

The uprobe is un-optimized in arch specific set_orig_insn call.

The instruction overwrite is x86 arch specific and needs to go through 3 updates:
(on top of nop5 instruction)

  - write int3 into 1st byte
  - write last 4 bytes of the call instruction
  - update the call instruction opcode

And cleanup goes though similar reverse stages:

  - overwrite call opcode with breakpoint (int3)
  - write last 4 bytes of the nop5 instruction
  - write the nop5 first instruction byte

We do not unmap and release uprobe trampoline when it's no longer needed,
because there's no easy way to make sure none of the threads is still
inside the trampoline. But we do not waste memory, because there's just
single page for all the uprobe trampoline mappings.

We do waste frame on page mapping for every 4GB by keeping the uprobe
trampoline page mapped, but that seems ok.

We take the benefit from the fact that set_swbp and set_orig_insn are
called under mmap_write_lock(mm), so we can use the current instruction
as the state the uprobe is in - nop5/breakpoint/call trampoline -
and decide the needed action (optimize/un-optimize) based on that.

Attaching the speed up from benchs/run_bench_uprobes.sh script:

current:
        usermode-count :  152.604 ± 0.044M/s
        syscall-count  :   13.359 ± 0.042M/s
-->     uprobe-nop     :    3.229 ± 0.002M/s
        uprobe-push    :    3.086 ± 0.004M/s
        uprobe-ret     :    1.114 ± 0.004M/s
        uprobe-nop5    :    1.121 ± 0.005M/s
        uretprobe-nop  :    2.145 ± 0.002M/s
        uretprobe-push :    2.070 ± 0.001M/s
        uretprobe-ret  :    0.931 ± 0.001M/s
        uretprobe-nop5 :    0.957 ± 0.001M/s

after the change:
        usermode-count :  152.448 ± 0.244M/s
        syscall-count  :   14.321 ± 0.059M/s
        uprobe-nop     :    3.148 ± 0.007M/s
        uprobe-push    :    2.976 ± 0.004M/s
        uprobe-ret     :    1.068 ± 0.003M/s
-->     uprobe-nop5    :    7.038 ± 0.007M/s
        uretprobe-nop  :    2.109 ± 0.004M/s
        uretprobe-push :    2.035 ± 0.001M/s
        uretprobe-ret  :    0.908 ± 0.001M/s
        uretprobe-nop5 :    3.377 ± 0.009M/s

I see bit more speed up on Intel (above) compared to AMD. The big nop5
speed up is partly due to emulating nop5 and partly due to optimization.

The key speed up we do this for is the USDT switch from nop to nop5:
        uprobe-nop     :    3.148 ± 0.007M/s
        uprobe-nop5    :    7.038 ± 0.007M/s

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/20250720112133.244369-11-jolsa@kernel.org
---
 include/linux/uprobes.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index b6b077cc7d0f..08ef78439d0d 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -192,7 +192,7 @@ struct uprobes_state {
 };
 
 typedef int (*uprobe_write_verify_t)(struct page *page, unsigned long vaddr,
-				     uprobe_opcode_t *insn, int nbytes);
+				     uprobe_opcode_t *insn, int nbytes, void *data);
 
 extern void __init uprobes_init(void);
 extern int set_swbp(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr);
@@ -204,7 +204,8 @@ extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
 extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t,
 			       bool is_register);
 extern int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long opcode_vaddr,
-			uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify, bool is_register, bool do_update_ref_ctr);
+			uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify, bool is_register, bool do_update_ref_ctr,
+			void *data);
 extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool);
 extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc);
@@ -240,6 +241,7 @@ extern void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void *
 extern void arch_uprobe_clear_state(struct mm_struct *mm);
 extern void arch_uprobe_init_state(struct mm_struct *mm);
 extern void handle_syscall_uprobe(struct pt_regs *regs, unsigned long bp_vaddr);
+extern void arch_uprobe_optimize(struct arch_uprobe *auprobe, unsigned long vaddr);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
-- 
cgit v1.2.3


From b08a784a5d1495c42ff9b0c70887d49211cddfe0 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 18 Aug 2025 19:03:54 +0100
Subject: net: Introduce skb_copy_datagram_from_iter_full()

In a similar manner to copy_from_iter()/copy_from_iter_full(), introduce
skb_copy_datagram_from_iter_full() which reverts the iterator to its
initial state when returning an error.

A subsequent fix for a vsock regression will make use of this new
function.

Cc: Christian Brauner <brauner@kernel.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Will Deacon <will@kernel.org>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Link: https://patch.msgid.link/20250818180355.29275-2-will@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 14b923ddb6df..fa633657e4c0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4172,6 +4172,8 @@ int skb_copy_and_crc32c_datagram_iter(const struct sk_buff *skb, int offset,
 				      struct iov_iter *to, int len, u32 *crcp);
 int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
 				 struct iov_iter *from, int len);
+int skb_copy_datagram_from_iter_full(struct sk_buff *skb, int offset,
+				     struct iov_iter *from, int len);
 int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
 void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags);
-- 
cgit v1.2.3


From 3202d6ed9368fc1e842fda73727553ae614633f8 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Thu, 21 Aug 2025 15:07:50 +0200
Subject: mmc: core: Add infrastructure for undervoltage handling

Implement the core infrastructure to allow MMC bus types to handle
REGULATOR_EVENT_UNDER_VOLTAGE events from power regulators. This is
primarily aimed at allowing devices like eMMC to perform an emergency
shutdown to prevent data corruption when a power failure is imminent.

This patch introduces:
- A new 'handle_undervoltage' function pointer to 'struct mmc_bus_ops'.
  Bus drivers (e.g., for eMMC) can implement this to define their
  emergency procedures.
- A workqueue ('uv_work') in 'struct mmc_supply' to handle the event
  asynchronously in a high-priority context.
- A new function 'mmc_handle_undervoltage()' which is called from the
  workqueue. It stops the host queue to prevent races with card removal,
  checks for the bus op, and invokes the handler.
- Functions to register and unregister the regulator notifier, intended
  to be called by bus drivers like 'mmc_attach_mmc' when a compatible
  card is detected.

The notifier is only registered for the main vmmc supply, as
undervoltage handling for vqmmc or vqmmc2 is not required at this
time.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://lore.kernel.org/r/20250821130751.2089587-2-o.rempel@pengutronix.de
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/host.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 5ed5d203de23..e0d935a4ac1d 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -337,11 +337,15 @@ struct mmc_slot {
 
 struct regulator;
 struct mmc_pwrseq;
+struct notifier_block;
 
 struct mmc_supply {
 	struct regulator *vmmc;		/* Card power supply */
 	struct regulator *vqmmc;	/* Optional Vccq supply */
 	struct regulator *vqmmc2;	/* Optional supply for phy */
+
+	struct notifier_block vmmc_nb;	/* Notifier for vmmc */
+	struct work_struct uv_work;	/* Undervoltage work */
 };
 
 struct mmc_ctx {
@@ -494,6 +498,13 @@ struct mmc_host {
 	unsigned int		can_dma_map_merge:1; /* merging can be used */
 	unsigned int		vqmmc_enabled:1; /* vqmmc regulator is enabled */
 
+	/*
+	 * Indicates if an undervoltage event has already been handled.
+	 * This prevents repeated regulator notifiers from triggering
+	 * multiple REGULATOR_EVENT_UNDER_VOLTAGE events.
+	 */
+	unsigned int		undervoltage:1;	 /* Undervoltage state */
+
 	int			rescan_disable;	/* disable card detection */
 	int			rescan_entered;	/* used with nonremovable devices */
 
-- 
cgit v1.2.3


From f41345f47fb267a9c95ca710c33448f8d0d81d83 Mon Sep 17 00:00:00 2001
From: Paul Chaignon <paul.chaignon@gmail.com>
Date: Wed, 20 Aug 2025 15:18:06 +0200
Subject: bpf: Use tnums for JEQ/JNE is_branch_taken logic

In the following toy program (reg states minimized for readability), R0
and R1 always have different values at instruction 6. This is obvious
when reading the program but cannot be guessed from ranges alone as
they overlap (R0 in [0; 0xc0000000], R1 in [1024; 0xc0000400]).

  0: call bpf_get_prandom_u32#7  ; R0_w=scalar()
  1: w0 = w0                     ; R0_w=scalar(var_off=(0x0; 0xffffffff))
  2: r0 >>= 30                   ; R0_w=scalar(var_off=(0x0; 0x3))
  3: r0 <<= 30                   ; R0_w=scalar(var_off=(0x0; 0xc0000000))
  4: r1 = r0                     ; R1_w=scalar(var_off=(0x0; 0xc0000000))
  5: r1 += 1024                  ; R1_w=scalar(var_off=(0x400; 0xc0000000))
  6: if r1 != r0 goto pc+1

Looking at tnums however, we can deduce that R1 is always different from
R0 because their tnums don't agree on known bits. This patch uses this
logic to improve is_scalar_branch_taken in case of BPF_JEQ and BPF_JNE.

This change has a tiny impact on complexity, which was measured with
the Cilium complexity CI test. That test covers 72 programs with
various build and load time configurations for a total of 970 test
cases. For 80% of test cases, the patch has no impact. On the other
test cases, the patch decreases complexity by only 0.08% on average. In
the best case, the verifier needs to walk 3% less instructions and, in
the worst case, 1.5% more. Overall, the patch has a small positive
impact, especially for our largest programs.

Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/be3ee70b6e489c49881cb1646114b1d861b5c334.1755694147.git.paul.chaignon@gmail.com
---
 include/linux/tnum.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index 57ed3035cc30..0ffb77ffe0e8 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -51,6 +51,9 @@ struct tnum tnum_xor(struct tnum a, struct tnum b);
 /* Multiply two tnums, return @a * @b */
 struct tnum tnum_mul(struct tnum a, struct tnum b);
 
+/* Return true if the known bits of both tnums have the same value */
+bool tnum_overlap(struct tnum a, struct tnum b);
+
 /* Return a tnum representing numbers satisfying both @a and @b */
 struct tnum tnum_intersect(struct tnum a, struct tnum b);
 
-- 
cgit v1.2.3


From afa3701c0e45ecb9e4d160048ca4e353c7489948 Mon Sep 17 00:00:00 2001
From: Tiffany Yang <ynaffit@google.com>
Date: Thu, 21 Aug 2025 18:37:52 -0700
Subject: cgroup: cgroup.stat.local time accounting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There isn't yet a clear way to identify a set of "lost" time that
everyone (or at least a wider group of users) cares about. However,
users can perform some delay accounting by iterating over components of
interest. This patch allows cgroup v2 freezing time to be one of those
components.

Track the cumulative time that each v2 cgroup spends freezing and expose
it to userland via a new local stat file in cgroupfs. Thank you to
Michal, who provided the ASCII art in the updated documentation.

To access this value:
  $ mkdir /sys/fs/cgroup/test
  $ cat /sys/fs/cgroup/test/cgroup.stat.local
  freeze_time_total 0

Ensure consistent freeze time reads with freeze_seq, a per-cgroup
sequence counter. Writes are serialized using the css_set_lock.

Signed-off-by: Tiffany Yang <ynaffit@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Michal Koutný <mkoutny@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 6b93a64115fe..539c64eeef38 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -433,6 +433,23 @@ struct cgroup_freezer_state {
 	 * frozen, SIGSTOPped, and PTRACEd.
 	 */
 	int nr_frozen_tasks;
+
+	/* Freeze time data consistency protection */
+	seqcount_t freeze_seq;
+
+	/*
+	 * Most recent time the cgroup was requested to freeze.
+	 * Accesses guarded by freeze_seq counter. Writes serialized
+	 * by css_set_lock.
+	 */
+	u64 freeze_start_nsec;
+
+	/*
+	 * Total duration the cgroup has spent freezing.
+	 * Accesses guarded by freeze_seq counter. Writes serialized
+	 * by css_set_lock.
+	 */
+	u64 frozen_nsec;
 };
 
 struct cgroup {
-- 
cgit v1.2.3


From d47cc4dea17391c99b943fa8d70a279e906b2843 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@kernel.org>
Date: Mon, 11 Aug 2025 13:16:15 -0700
Subject: bpf: Use sha1() instead of sha1_transform() in bpf_prog_calc_tag()

Now that there's a proper SHA-1 library API, just use that instead of
the low-level SHA-1 compression function.  This eliminates the need for
bpf_prog_calc_tag() to implement the SHA-1 padding itself.  No
functional change; the computed tags remain the same.

Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/bpf/20250811201615.564461-1-ebiggers@kernel.org
---
 include/linux/filter.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index c0a74fb9fcb1..9092d8ea95c8 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -997,12 +997,6 @@ static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
 	return prog->len * sizeof(struct bpf_insn);
 }
 
-static inline u32 bpf_prog_tag_scratch_size(const struct bpf_prog *prog)
-{
-	return round_up(bpf_prog_insn_size(prog) +
-			sizeof(__be64) + 1, SHA1_BLOCK_SIZE);
-}
-
 static inline unsigned int bpf_prog_size(unsigned int proglen)
 {
 	return max(sizeof(struct bpf_prog),
-- 
cgit v1.2.3


From ec79003c5f9d2c7f9576fc69b8dbda80305cbe3a Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@google.com>
Date: Thu, 21 Aug 2025 02:18:24 +0000
Subject: atm: atmtcp: Prevent arbitrary write in atmtcp_recv_control().

syzbot reported the splat below. [0]

When atmtcp_v_open() or atmtcp_v_close() is called via connect()
or close(), atmtcp_send_control() is called to send an in-kernel
special message.

The message has ATMTCP_HDR_MAGIC in atmtcp_control.hdr.length.
Also, a pointer of struct atm_vcc is set to atmtcp_control.vcc.

The notable thing is struct atmtcp_control is uAPI but has a
space for an in-kernel pointer.

  struct atmtcp_control {
  	struct atmtcp_hdr hdr;	/* must be first */
  ...
  	atm_kptr_t vcc;		/* both directions */
  ...
  } __ATM_API_ALIGN;

  typedef struct { unsigned char _[8]; } __ATM_API_ALIGN atm_kptr_t;

The special message is processed in atmtcp_recv_control() called
from atmtcp_c_send().

atmtcp_c_send() is vcc->dev->ops->send() and called from 2 paths:

  1. .ndo_start_xmit() (vcc->send() == atm_send_aal0())
  2. vcc_sendmsg()

The problem is sendmsg() does not validate the message length and
userspace can abuse atmtcp_recv_control() to overwrite any kptr
by atmtcp_control.

Let's add a new ->pre_send() hook to validate messages from sendmsg().

[0]:
Oops: general protection fault, probably for non-canonical address 0xdffffc00200000ab: 0000 [#1] SMP KASAN PTI
KASAN: probably user-memory-access in range [0x0000000100000558-0x000000010000055f]
CPU: 0 UID: 0 PID: 5865 Comm: syz-executor331 Not tainted 6.17.0-rc1-syzkaller-00215-gbab3ce404553 #0 PREEMPT(full)
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025
RIP: 0010:atmtcp_recv_control drivers/atm/atmtcp.c:93 [inline]
RIP: 0010:atmtcp_c_send+0x1da/0x950 drivers/atm/atmtcp.c:297
Code: 4d 8d 75 1a 4c 89 f0 48 c1 e8 03 42 0f b6 04 20 84 c0 0f 85 15 06 00 00 41 0f b7 1e 4d 8d b7 60 05 00 00 4c 89 f0 48 c1 e8 03 <42> 0f b6 04 20 84 c0 0f 85 13 06 00 00 66 41 89 1e 4d 8d 75 1c 4c
RSP: 0018:ffffc90003f5f810 EFLAGS: 00010203
RAX: 00000000200000ab RBX: 0000000000000000 RCX: 0000000000000000
RDX: ffff88802a510000 RSI: 00000000ffffffff RDI: ffff888030a6068c
RBP: ffff88802699fb40 R08: ffff888030a606eb R09: 1ffff1100614c0dd
R10: dffffc0000000000 R11: ffffffff8718fc40 R12: dffffc0000000000
R13: ffff888030a60680 R14: 000000010000055f R15: 00000000ffffffff
FS:  00007f8d7e9236c0(0000) GS:ffff888125c1c000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000000045ad50 CR3: 0000000075bde000 CR4: 00000000003526f0
Call Trace:
 <TASK>
 vcc_sendmsg+0xa10/0xc60 net/atm/common.c:645
 sock_sendmsg_nosec net/socket.c:714 [inline]
 __sock_sendmsg+0x219/0x270 net/socket.c:729
 ____sys_sendmsg+0x505/0x830 net/socket.c:2614
 ___sys_sendmsg+0x21f/0x2a0 net/socket.c:2668
 __sys_sendmsg net/socket.c:2700 [inline]
 __do_sys_sendmsg net/socket.c:2705 [inline]
 __se_sys_sendmsg net/socket.c:2703 [inline]
 __x64_sys_sendmsg+0x19b/0x260 net/socket.c:2703
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f8d7e96a4a9
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 51 18 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f8d7e923198 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007f8d7e9f4308 RCX: 00007f8d7e96a4a9
RDX: 0000000000000000 RSI: 0000200000000240 RDI: 0000000000000005
RBP: 00007f8d7e9f4300 R08: 65732f636f72702f R09: 65732f636f72702f
R10: 65732f636f72702f R11: 0000000000000246 R12: 00007f8d7e9c10ac
R13: 00007f8d7e9231a0 R14: 0000200000000200 R15: 0000200000000250
 </TASK>
Modules linked in:

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot+1741b56d54536f4ec349@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/68a6767c.050a0220.3d78fd.0011.GAE@google.com/
Tested-by: syzbot+1741b56d54536f4ec349@syzkaller.appspotmail.com
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250821021901.2814721-1-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/atmdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 45f2f278b50a..70807c679f1a 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -185,6 +185,7 @@ struct atmdev_ops { /* only send is required */
 	int (*compat_ioctl)(struct atm_dev *dev,unsigned int cmd,
 			    void __user *arg);
 #endif
+	int (*pre_send)(struct atm_vcc *vcc, struct sk_buff *skb);
 	int (*send)(struct atm_vcc *vcc,struct sk_buff *skb);
 	int (*send_bh)(struct atm_vcc *vcc, struct sk_buff *skb);
 	int (*send_oam)(struct atm_vcc *vcc,void *cell,int flags);
-- 
cgit v1.2.3


From 3c716487936aa54083c130d46ad5747769695e09 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 14 Aug 2025 18:59:49 +0200
Subject: genirq: Remove GENERIC_IRQ_LEGACY

IA64 is gone and with it the last GENERIC_IRQ_LEGACY user.

Remove GENERIC_IRQ_LEGACY.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250814165949.hvtP03r4@linutronix.de
---
 include/linux/irq.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 1d6b606a81ef..c9bcdbf6bc63 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -976,10 +976,6 @@ static inline void irq_free_desc(unsigned int irq)
 	irq_free_descs(irq, 1);
 }
 
-#ifdef CONFIG_GENERIC_IRQ_LEGACY
-void irq_init_desc(unsigned int irq);
-#endif
-
 /**
  * struct irq_chip_regs - register offsets for struct irq_gci
  * @enable:	Enable register offset to reg_base
-- 
cgit v1.2.3


From 7a721a2fee2bce01af26699a87739db8ca8ea3c8 Mon Sep 17 00:00:00 2001
From: Inochi Amaoto <inochiama@gmail.com>
Date: Thu, 14 Aug 2025 07:28:31 +0800
Subject: genirq: Add irq_chip_(startup/shutdown)_parent()

As the MSI controller on SG2044 uses PLIC as the underlying interrupt
controller, it needs to call irq_enable() and irq_disable() to
startup/shutdown interrupts. Otherwise, the MSI interrupt can not be
startup correctly and will not respond any incoming interrupt.

Introduce irq_chip_startup_parent() and irq_chip_shutdown_parent() to allow
the interrupt controller to call the irq_startup()/irq_shutdown() callbacks
of the parent interrupt chip.

In case the irq_startup()/irq_shutdown() callbacks are not implemented for
the parent interrupt chip, this will fallback to irq_chip_enable_parent()
or irq_chip_disable_parent().

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Inochi Amaoto <inochiama@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Chen Wang <unicorn_wang@outlook.com> # Pioneerbox
Reviewed-by: Chen Wang <unicorn_wang@outlook.com>
Link: https://lore.kernel.org/all/20250813232835.43458-2-inochiama@gmail.com
Link: https://lore.kernel.org/lkml/20250722224513.22125-1-inochiama@gmail.com/
---
 include/linux/irq.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index c9bcdbf6bc63..c67e76fbcc07 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -669,6 +669,8 @@ extern int irq_chip_set_parent_state(struct irq_data *data,
 extern int irq_chip_get_parent_state(struct irq_data *data,
 				     enum irqchip_irq_state which,
 				     bool *state);
+extern void irq_chip_shutdown_parent(struct irq_data *data);
+extern unsigned int irq_chip_startup_parent(struct irq_data *data);
 extern void irq_chip_enable_parent(struct irq_data *data);
 extern void irq_chip_disable_parent(struct irq_data *data);
 extern void irq_chip_ack_parent(struct irq_data *data);
-- 
cgit v1.2.3


From 54f45a30c0d0153d2be091ba2d683ab6db6d1d5b Mon Sep 17 00:00:00 2001
From: Inochi Amaoto <inochiama@gmail.com>
Date: Thu, 14 Aug 2025 07:28:32 +0800
Subject: PCI/MSI: Add startup/shutdown for per device domains

As the RISC-V PLIC cannot apply affinity settings without invoking
irq_enable(), it will make the interrupt unavailble when used as an
underlying interrupt chip for the MSI controller.

Implement the irq_startup() and irq_shutdown() callbacks for the PCI MSI
and MSI-X templates.

For chips that specify MSI_FLAG_PCI_MSI_STARTUP_PARENT, the parent startup
and shutdown functions are invoked. That allows the interrupt on the parent
chip to be enabled if the interrupt has not been enabled during
allocation. This is necessary for MSI controllers which use PLIC as
underlying parent interrupt chip.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Inochi Amaoto <inochiama@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Chen Wang <unicorn_wang@outlook.com> # Pioneerbox
Reviewed-by: Chen Wang <unicorn_wang@outlook.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/all/20250813232835.43458-3-inochiama@gmail.com
---
 include/linux/msi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index e5e86a8529fb..3111ba95fbde 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -568,6 +568,8 @@ enum {
 	MSI_FLAG_PARENT_PM_DEV		= (1 << 8),
 	/* Support for parent mask/unmask */
 	MSI_FLAG_PCI_MSI_MASK_PARENT	= (1 << 9),
+	/* Support for parent startup/shutdown */
+	MSI_FLAG_PCI_MSI_STARTUP_PARENT	= (1 << 10),
 
 	/* Mask for the generic functionality */
 	MSI_GENERIC_FLAGS_MASK		= GENMASK(15, 0),
-- 
cgit v1.2.3


From 92a96b0a227e91dc42475265a1ce766b6cd044fa Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 17 Aug 2025 23:09:18 +0100
Subject: io_uring: add request poisoning

Poison various request fields on free. __io_req_caches_free() is a slow
path, so can be done unconditionally, but gate it on kasan for
io_req_add_to_cache(). Note that some fields are logically retained
between cache allocations and can't be poisoned in
io_req_add_to_cache().

Ideally, it'd be replaced with KASAN'ed caches, but that can't be
enabled because of some synchronisation nuances.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/7a78e8a7f5be434313c400650b862e36c211b312.1755459452.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/poison.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/poison.h b/include/linux/poison.h
index 8ca2235f78d5..299e2dd7da6d 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -90,4 +90,7 @@
 /********** lib/stackdepot.c **********/
 #define STACK_DEPOT_POISON ((void *)(0xD390 + POISON_POINTER_DELTA))
 
+/********** io_uring/ **********/
+#define IO_URING_PTR_POISON ((void *)(0x1091UL + POISON_POINTER_DELTA))
+
 #endif
-- 
cgit v1.2.3


From 5fda51255439addd1c9059098e30847a375a1008 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 20 Aug 2025 20:03:39 -0600
Subject: io_uring/kbuf: switch to storing struct io_buffer_list locally

Currently the buffer list is stored in struct io_kiocb. The buffer list
can be of two types:

1) Classic/legacy buffer list. These don't need to get referenced after
   a buffer pick, and hence storing them in struct io_kiocb is perfectly
   fine.

2) Ring provided buffer lists. These DO need to be referenced after the
   initial buffer pick, as they need to get consumed later on. This can
   be either just incrementing the head of the ring, or it can be
   consuming parts of a buffer if incremental buffer consumptions has
   been configured.

For case 2, io_uring needs to be careful not to access the buffer list
after the initial pick-and-execute context. The core does recycling of
these, but it's easy to make a mistake, because it's stored in the
io_kiocb which does persist across multiple execution contexts. Either
because it's a multishot request, or simply because it needed some kind
of async trigger (eg poll) for retry purposes.

Add a struct io_buffer_list to struct io_br_sel, which is always on
stack for the various users of it. This prevents the buffer list from
leaking outside of that execution context, and additionally it enables
kbuf to not even pass back the struct io_buffer_list if the given
context isn't appropriately locked already.

This doesn't fix any bugs, it's simply a defensive measure to prevent
any issues with reuse of a buffer list.

Link: https://lore.kernel.org/r/20250821020750.598432-12-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 80a178f3d896..1d33984611bc 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -674,12 +674,6 @@ struct io_kiocb {
 		/* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
 		struct io_buffer	*kbuf;
 
-		/*
-		 * stores buffer ID for ring provided buffers, valid IFF
-		 * REQ_F_BUFFER_RING is set.
-		 */
-		struct io_buffer_list	*buf_list;
-
 		struct io_rsrc_node	*buf_node;
 	};
 
-- 
cgit v1.2.3


From d589bcddaa3f8b1668499c3f0466863df3abe37a Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Thu, 21 Aug 2025 12:02:06 +0800
Subject: io-uring: move `struct io_br_sel` into io_uring_types.h

Move `struct io_br_sel` into io_uring_types.h and prepare for supporting
provided buffer on uring_cmd.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20250821040210.1152145-2-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 1d33984611bc..9c6c548f43f5 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -85,6 +85,25 @@ struct io_mapped_region {
 	unsigned		flags;
 };
 
+/*
+ * Return value from io_buffer_list selection, to avoid stashing it in
+ * struct io_kiocb. For legacy/classic provided buffers, keeping a reference
+ * across execution contexts are fine. But for ring provided buffers, the
+ * list may go away as soon as ->uring_lock is dropped. As the io_kiocb
+ * persists, it's better to just keep the buffer local for those cases.
+ */
+struct io_br_sel {
+	struct io_buffer_list *buf_list;
+	/*
+	 * Some selection parts return the user address, others return an error.
+	 */
+	union {
+		void __user *addr;
+		ssize_t val;
+	};
+};
+
+
 /*
  * Arbitrary limit, can be raised if need be
  */
-- 
cgit v1.2.3


From 620a50c927004f5c9420a7ca9b1a55673dbf3941 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Thu, 21 Aug 2025 12:02:07 +0800
Subject: io_uring: uring_cmd: add multishot support

Add UAPI flag IORING_URING_CMD_MULTISHOT for supporting multishot
uring_cmd operations with provided buffer.

This enables drivers to post multiple completion events from a single
uring_cmd submission, which is useful for:

- Notifying userspace of device events (e.g., interrupt handling)
- Supporting devices with multiple event sources (e.g., multi-queue devices)
- Avoiding the need for device poll() support when events originate
  from multiple sources device-wide

The implementation adds two new APIs:
- io_uring_cmd_select_buffer(): selects a buffer from the provided
  buffer group for multishot uring_cmd
- io_uring_mshot_cmd_post_cqe(): posts a CQE after event data is
  pushed to the provided buffer

Multishot uring_cmd must be used with buffer select (IOSQE_BUFFER_SELECT)
and is mutually exclusive with IORING_URING_CMD_FIXED for now.

The ublk driver will be the first user of this functionality:

	https://github.com/ming1/linux/commits/ublk-devel/

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20250821040210.1152145-3-ming.lei@redhat.com
[axboe: fold in fix for !CONFIG_IO_URING]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring/cmd.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index cfa6d0c0c322..4bd3a7339243 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -70,6 +70,21 @@ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
 /* Execute the request from a blocking context */
 void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd);
 
+/*
+ * Select a buffer from the provided buffer group for multishot uring_cmd.
+ * Returns the selected buffer address and size.
+ */
+struct io_br_sel io_uring_cmd_buffer_select(struct io_uring_cmd *ioucmd,
+					    unsigned buf_group, size_t *len,
+					    unsigned int issue_flags);
+
+/*
+ * Complete a multishot uring_cmd event. This will post a CQE to the completion
+ * queue and update the provided buffer.
+ */
+bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
+				 struct io_br_sel *sel, unsigned int issue_flags);
+
 #else
 static inline int
 io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
@@ -102,6 +117,17 @@ static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
 static inline void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd)
 {
 }
+static inline struct io_br_sel
+io_uring_cmd_buffer_select(struct io_uring_cmd *ioucmd, unsigned buf_group,
+			   size_t *len, unsigned int issue_flags)
+{
+	return (struct io_br_sel) { .val = -EOPNOTSUPP };
+}
+static inline bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
+				ssize_t ret, unsigned int issue_flags)
+{
+	return true;
+}
 #endif
 
 /*
-- 
cgit v1.2.3


From d0201c4436c53412146d526855c585fa9d54ca13 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 7 Aug 2025 14:01:46 -0600
Subject: io_uring: remove io_ctx_cqe32() helper

It's pretty pointless and only used for the tracing helper, get rid
of it.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 9c6c548f43f5..d1e25f3fe0b3 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -740,10 +740,4 @@ struct io_overflow_cqe {
 	struct list_head list;
 	struct io_uring_cqe cqe;
 };
-
-static inline bool io_ctx_cqe32(struct io_ring_ctx *ctx)
-{
-	return ctx->flags & IORING_SETUP_CQE32;
-}
-
 #endif
-- 
cgit v1.2.3


From 6e376f245f19feeadddafb2c3fa5fbd6469ecdfe Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Mon, 25 Aug 2025 11:48:42 +0200
Subject: gpio: generic: provide to_gpio_generic_chip()

Provide a helper allowing to convert a struct gpio_chip address to the
struct gpio_generic_chip that wraps it.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20250825-gpio-mmio-gpio-conv-v1-1-356b4b1d5110@linaro.org
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio/generic.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio/generic.h b/include/linux/gpio/generic.h
index f3a8db4598bb..5a85ecbef8d2 100644
--- a/include/linux/gpio/generic.h
+++ b/include/linux/gpio/generic.h
@@ -55,6 +55,12 @@ struct gpio_generic_chip {
 	struct gpio_chip gc;
 };
 
+static inline struct gpio_generic_chip *
+to_gpio_generic_chip(struct gpio_chip *gc)
+{
+	return container_of(gc, struct gpio_generic_chip, gc);
+}
+
 /**
  * gpio_generic_chip_init() - Initialize a generic GPIO chip.
  * @chip: Generic GPIO chip to set up.
-- 
cgit v1.2.3


From 16397871b6e35fa46a2bec27b3558f93b050c6fc Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Mon, 25 Aug 2025 11:48:43 +0200
Subject: gpio: generic: provide helpers for reading and writing registers

Provide helpers wrapping the read_reg() and write_reg() callbacks of the
generic GPIO API that are called directly by many users. This is done to
hide their implementation ahead of moving them into the separate generic
GPIO struct.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20250825-gpio-mmio-gpio-conv-v1-2-356b4b1d5110@linaro.org
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio/generic.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio/generic.h b/include/linux/gpio/generic.h
index 5a85ecbef8d2..4c0626b53ec9 100644
--- a/include/linux/gpio/generic.h
+++ b/include/linux/gpio/generic.h
@@ -100,6 +100,37 @@ gpio_generic_chip_set(struct gpio_generic_chip *chip, unsigned int offset,
 	return chip->gc.set(&chip->gc, offset, value);
 }
 
+/**
+ * gpio_generic_read_reg() - Read a register using the underlying callback.
+ * @chip: Generic GPIO chip to use.
+ * @reg: Register to read.
+ *
+ * Returns: value read from register.
+ */
+static inline unsigned long
+gpio_generic_read_reg(struct gpio_generic_chip *chip, void __iomem *reg)
+{
+	if (WARN_ON(!chip->gc.read_reg))
+		return 0;
+
+	return chip->gc.read_reg(reg);
+}
+
+/**
+ * gpio_generic_write_reg() - Write a register using the underlying callback.
+ * @chip: Generic GPIO chip to use.
+ * @reg: Register to write to.
+ * @val: New value to write.
+ */
+static inline void gpio_generic_write_reg(struct gpio_generic_chip *chip,
+					  void __iomem *reg, unsigned long val)
+{
+	if (WARN_ON(!chip->gc.write_reg))
+		return;
+
+	chip->gc.write_reg(reg, val);
+}
+
 #define gpio_generic_chip_lock(gen_gc) \
 	raw_spin_lock(&(gen_gc)->gc.bgpio_lock)
 
-- 
cgit v1.2.3


From 60ad9a07319283e6e1094cef3e972e754315c024 Mon Sep 17 00:00:00 2001
From: Junjie Cao <junjie.cao@intel.com>
Date: Wed, 20 Aug 2025 08:47:55 +0800
Subject: iio: core: switch info_mask fields to unsigned long to match find_bit
 helpers

for_each_set_bit()/find_*_bit() expect arrays of unsigned long (see
include/linux/find.h), but industrialio-core passed const long * into
iio_device_add_info_mask_type{,_avail}().

These masks are used purely as bit arrays and are populated via BIT()
(1UL << n). Switch the info_mask_* fields and the corresponding function
parameters to unsigned long so the types match the helpers. This removes
sparse warnings about signedness mismatches (seen with 'make C=1'
CF='-Wsparse-all') without changing behavior or struct layout.

No functional change intended.

Suggested-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Junjie Cao <junjie.cao@intel.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Link: https://patch.msgid.link/20250820004755.69627-1-junjie.cao@intel.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 2f5560646ee4..872ebdf0dd77 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -271,14 +271,14 @@ struct iio_chan_spec {
 			unsigned int num_ext_scan_type;
 		};
 	};
-	long			info_mask_separate;
-	long			info_mask_separate_available;
-	long			info_mask_shared_by_type;
-	long			info_mask_shared_by_type_available;
-	long			info_mask_shared_by_dir;
-	long			info_mask_shared_by_dir_available;
-	long			info_mask_shared_by_all;
-	long			info_mask_shared_by_all_available;
+	unsigned long			info_mask_separate;
+	unsigned long			info_mask_separate_available;
+	unsigned long			info_mask_shared_by_type;
+	unsigned long			info_mask_shared_by_type_available;
+	unsigned long			info_mask_shared_by_dir;
+	unsigned long			info_mask_shared_by_dir_available;
+	unsigned long			info_mask_shared_by_all;
+	unsigned long			info_mask_shared_by_all_available;
 	const struct iio_event_spec *event_spec;
 	unsigned int		num_event_specs;
 	const struct iio_chan_spec_ext_info *ext_info;
-- 
cgit v1.2.3


From 7a6fc1634cea6f220228a69b1c0210e6b8b1aaf0 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Wed, 13 Aug 2025 08:31:45 -0700
Subject: blk-mq-dma: create blk_map_iter type

The req_iterator happens to have a similar fields to what the dma
iterator needs, but we're not necessarily iterating a request's
bi_io_vec. Create a new type that can be amended for additional future
use.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20250813153153.3260897-2-kbusch@meta.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq-dma.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h
index c26a01aeae00..6a7e3828673d 100644
--- a/include/linux/blk-mq-dma.h
+++ b/include/linux/blk-mq-dma.h
@@ -5,6 +5,11 @@
 #include <linux/blk-mq.h>
 #include <linux/pci-p2pdma.h>
 
+struct blk_map_iter {
+	struct bvec_iter		iter;
+	struct bio			*bio;
+};
+
 struct blk_dma_iter {
 	/* Output address range for this iteration */
 	dma_addr_t			addr;
@@ -14,7 +19,7 @@ struct blk_dma_iter {
 	blk_status_t			status;
 
 	/* Internal to blk_rq_dma_map_iter_* */
-	struct req_iterator		iter;
+	struct blk_map_iter		iter;
 	struct pci_p2pdma_map_state	p2pdma;
 };
 
-- 
cgit v1.2.3


From dae75dead2359edd7c55e1964e0edf7d03535b31 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Wed, 13 Aug 2025 08:31:46 -0700
Subject: blk-mq-dma: provide the bio_vec array being iterated

This will make it easier to add different sources of the bvec array,
like for upcoming integrity support, rather than assume to use the bio's
bi_io_vec. It also makes iterating "special" payloads more in common
with iterating normal payloads.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20250813153153.3260897-3-kbusch@meta.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq-dma.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h
index 6a7e3828673d..e5cb5e46fc92 100644
--- a/include/linux/blk-mq-dma.h
+++ b/include/linux/blk-mq-dma.h
@@ -8,6 +8,7 @@
 struct blk_map_iter {
 	struct bvec_iter		iter;
 	struct bio			*bio;
+	struct bio_vec			*bvecs;
 };
 
 struct blk_dma_iter {
-- 
cgit v1.2.3


From 92fb75fd14b041038e30bc725ab4c1e625243573 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Wed, 13 Aug 2025 08:31:47 -0700
Subject: blk-mq-dma: require unmap caller provide p2p map type

In preparing for integrity dma mappings, we can't rely on the request
flag because data and metadata may have different mapping types.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20250813153153.3260897-4-kbusch@meta.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq-dma.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h
index e5cb5e46fc92..881880095e0d 100644
--- a/include/linux/blk-mq-dma.h
+++ b/include/linux/blk-mq-dma.h
@@ -47,14 +47,15 @@ static inline bool blk_rq_dma_map_coalesce(struct dma_iova_state *state)
  * @dma_dev:	device to unmap from
  * @state:	DMA IOVA state
  * @mapped_len: number of bytes to unmap
+ * @is_p2p:	true if mapped with PCI_P2PDMA_MAP_BUS_ADDR
  *
  * Returns %false if the callers need to manually unmap every DMA segment
  * mapped using @iter or %true if no work is left to be done.
  */
 static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev,
-		struct dma_iova_state *state, size_t mapped_len)
+		struct dma_iova_state *state, size_t mapped_len, bool is_p2p)
 {
-	if (req->cmd_flags & REQ_P2PDMA)
+	if (is_p2p)
 		return true;
 
 	if (dma_use_iova(state)) {
-- 
cgit v1.2.3


From 7092639031a1bd5320ab827e8f665350f332b7ce Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Wed, 13 Aug 2025 08:31:48 -0700
Subject: blk-mq: remove REQ_P2PDMA flag

It's not serving any particular purpose. pci_p2pdma_state() already has
all the appropriate checks, so the config and flag checks are not
guarding anything.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20250813153153.3260897-5-kbusch@meta.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk_types.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 09b99d52fd36..930daff207df 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -386,7 +386,6 @@ enum req_flag_bits {
 	__REQ_DRV,		/* for driver use */
 	__REQ_FS_PRIVATE,	/* for file system (submitter) use */
 	__REQ_ATOMIC,		/* for atomic write operations */
-	__REQ_P2PDMA,		/* contains P2P DMA pages */
 	/*
 	 * Command specific flags, keep last:
 	 */
@@ -419,7 +418,6 @@ enum req_flag_bits {
 #define REQ_DRV		(__force blk_opf_t)(1ULL << __REQ_DRV)
 #define REQ_FS_PRIVATE	(__force blk_opf_t)(1ULL << __REQ_FS_PRIVATE)
 #define REQ_ATOMIC	(__force blk_opf_t)(1ULL << __REQ_ATOMIC)
-#define REQ_P2PDMA	(__force blk_opf_t)(1ULL << __REQ_P2PDMA)
 
 #define REQ_NOUNMAP	(__force blk_opf_t)(1ULL << __REQ_NOUNMAP)
 
-- 
cgit v1.2.3


From fec9b16dc5550191fd85af118271ea00e8dcc5f8 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Wed, 13 Aug 2025 08:31:50 -0700
Subject: blk-mq-dma: add scatter-less integrity data DMA mapping

Similar to regular data, introduce more efficient integrity mapping
helpers that does away with the scatterlist structure. This uses the
block mapping iterator to add IOVA segments if IOMMU is enabled, or maps
directly if not. This also supports P2P segements if integrity data ever
wants to allocate that type of memory.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20250813153153.3260897-7-kbusch@meta.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-integrity.h | 17 +++++++++++++++++
 include/linux/blk-mq-dma.h    |  1 +
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index e67a2b6e8f11..78fe2459e661 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -4,6 +4,7 @@
 
 #include <linux/blk-mq.h>
 #include <linux/bio-integrity.h>
+#include <linux/blk-mq-dma.h>
 
 struct request;
 
@@ -31,6 +32,11 @@ int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
 			      ssize_t bytes);
 int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd,
 		     struct logical_block_metadata_cap __user *argp);
+bool blk_rq_integrity_dma_map_iter_start(struct request *req,
+		struct device *dma_dev,  struct dma_iova_state *state,
+		struct blk_dma_iter *iter);
+bool blk_rq_integrity_dma_map_iter_next(struct request *req,
+		struct device *dma_dev, struct blk_dma_iter *iter);
 
 static inline bool
 blk_integrity_queue_supports_integrity(struct request_queue *q)
@@ -115,6 +121,17 @@ static inline int blk_rq_integrity_map_user(struct request *rq,
 {
 	return -EINVAL;
 }
+static inline bool blk_rq_integrity_dma_map_iter_start(struct request *req,
+		struct device *dma_dev,  struct dma_iova_state *state,
+		struct blk_dma_iter *iter)
+{
+	return false;
+}
+static inline bool blk_rq_integrity_dma_map_iter_next(struct request *req,
+		struct device *dma_dev, struct blk_dma_iter *iter)
+{
+	return false;
+}
 static inline struct blk_integrity *bdev_get_integrity(struct block_device *b)
 {
 	return NULL;
diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h
index 881880095e0d..0f45ea110ca1 100644
--- a/include/linux/blk-mq-dma.h
+++ b/include/linux/blk-mq-dma.h
@@ -9,6 +9,7 @@ struct blk_map_iter {
 	struct bvec_iter		iter;
 	struct bio			*bio;
 	struct bio_vec			*bvecs;
+	bool				is_integrity;
 };
 
 struct blk_dma_iter {
-- 
cgit v1.2.3


From cbdd16b818eef876dd2de9d503fe7397a0666cbe Mon Sep 17 00:00:00 2001
From: Pin-yen Lin <treapking@chromium.org>
Date: Mon, 18 Aug 2025 19:49:34 +0800
Subject: HID: i2c-hid: Make elan touch controllers power on after panel is
 enabled

Introduce a new HID quirk to indicate that this device has to be enabled
after the panel's backlight is enabled, and update the driver data for
the elan devices to enable this quirk. This cannot be a I2C HID quirk
because the kernel needs to acknowledge this before powering up the
device and read the VID/PID. When this quirk is enabled, register
.panel_enabled()/.panel_disabling() instead for the panel follower.

Also rename the *panel_prepare* functions into *panel_follower* because
they could be called in other situations now.

Fixes: bd3cba00dcc63 ("HID: i2c-hid: elan: Add support for Elan eKTH6915 i2c-hid touchscreens")
Fixes: d06651bebf99e ("HID: i2c-hid: elan: Add elan-ekth6a12nay timing")
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Pin-yen Lin <treapking@chromium.org>
Acked-by: Jiri Kosina <jkosina@suse.com>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20250818115015.2909525-2-treapking@chromium.org
---
 include/linux/hid.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 2cc4f1e4ea96..c32425b5d011 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -364,6 +364,7 @@ struct hid_item {
  * | @HID_QUIRK_HAVE_SPECIAL_DRIVER:
  * | @HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE:
  * | @HID_QUIRK_IGNORE_SPECIAL_DRIVER
+ * | @HID_QUIRK_POWER_ON_AFTER_BACKLIGHT
  * | @HID_QUIRK_FULLSPEED_INTERVAL:
  * | @HID_QUIRK_NO_INIT_REPORTS:
  * | @HID_QUIRK_NO_IGNORE:
@@ -391,6 +392,7 @@ struct hid_item {
 #define HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE	BIT(20)
 #define HID_QUIRK_NOINVERT			BIT(21)
 #define HID_QUIRK_IGNORE_SPECIAL_DRIVER		BIT(22)
+#define HID_QUIRK_POWER_ON_AFTER_BACKLIGHT	BIT(23)
 #define HID_QUIRK_FULLSPEED_INTERVAL		BIT(28)
 #define HID_QUIRK_NO_INIT_REPORTS		BIT(29)
 #define HID_QUIRK_NO_IGNORE			BIT(30)
-- 
cgit v1.2.3


From a214365140cc3009f07d4e14a8b481fd3dc41d31 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 10 Jul 2025 15:15:28 +0300
Subject: rculist: move list_for_each_rcu() to where it belongs

The list_for_each_rcu() relies on the rcu_dereference() API which is not
provided by the list.h. At the same time list.h is a low-level basic header
that must not have dependencies like RCU, besides the fact of the potential
circular dependencies in some cases. With all that said, move RCU related
API to the rculist.h where it belongs.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Simona Vetter <simona.vetter@ffwll.ch>
Reviewed-by: "Paul E. McKenney" <paulmck@kernel.org>
Signed-off-by: Neeraj Upadhyay (AMD) <neeraj.upadhyay@kernel.org>
Signed-off-by: "Paul E. McKenney" <paulmck@kernel.org>
---
 include/linux/list.h    | 10 ----------
 include/linux/rculist.h | 10 ++++++++++
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index e7e28afd28f8..e7bdad9b8618 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -686,16 +686,6 @@ static inline void list_splice_tail_init(struct list_head *list,
 #define list_for_each(pos, head) \
 	for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next)
 
-/**
- * list_for_each_rcu - Iterate over a list in an RCU-safe fashion
- * @pos:	the &struct list_head to use as a loop cursor.
- * @head:	the head for your list.
- */
-#define list_for_each_rcu(pos, head)		  \
-	for (pos = rcu_dereference((head)->next); \
-	     !list_is_head(pos, (head)); \
-	     pos = rcu_dereference(pos->next))
-
 /**
  * list_for_each_continue - continue iteration over a list
  * @pos:	the &struct list_head to use as a loop cursor.
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 1b11926ddd47..2abba7552605 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -42,6 +42,16 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
  */
 #define list_bidir_prev_rcu(list) (*((struct list_head __rcu **)(&(list)->prev)))
 
+/**
+ * list_for_each_rcu - Iterate over a list in an RCU-safe fashion
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ */
+#define list_for_each_rcu(pos, head)		  \
+	for (pos = rcu_dereference((head)->next); \
+	     !list_is_head(pos, (head)); \
+	     pos = rcu_dereference(pos->next))
+
 /**
  * list_tail_rcu - returns the prev pointer of the head of the list
  * @head: the head of the list
-- 
cgit v1.2.3


From f45fc18b6de04483643e8aa2ab97737abfe03d59 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 23 Aug 2025 09:56:03 +0200
Subject: net: airoha: Add airoha_ppe_dev struct definition

Introduce airoha_ppe_dev struct as container for PPE offload callbacks
consumed by the MT76 driver during flowtable offload for traffic
received by the wlan NIC and forwarded to the wired one.
Add airoha_ppe_setup_tc_block_cb routine to PPE offload ops for MT76
driver.
Rely on airoha_ppe_dev pointer in airoha_ppe_setup_tc_block_cb
signature.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://patch.msgid.link/20250823-airoha-en7581-wlan-rx-offload-v3-2-f78600ec3ed8@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/soc/airoha/airoha_offload.h | 35 +++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/airoha/airoha_offload.h b/include/linux/soc/airoha/airoha_offload.h
index 117c63c2448d..4b4b8b9e426d 100644
--- a/include/linux/soc/airoha/airoha_offload.h
+++ b/include/linux/soc/airoha/airoha_offload.h
@@ -9,6 +9,41 @@
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
 
+struct airoha_ppe_dev {
+	struct {
+		int (*setup_tc_block_cb)(struct airoha_ppe_dev *dev,
+					 void *type_data);
+	} ops;
+
+	void *priv;
+};
+
+#if (IS_BUILTIN(CONFIG_NET_AIROHA) || IS_MODULE(CONFIG_NET_AIROHA))
+struct airoha_ppe_dev *airoha_ppe_get_dev(struct device *dev);
+void airoha_ppe_put_dev(struct airoha_ppe_dev *dev);
+
+static inline int airoha_ppe_dev_setup_tc_block_cb(struct airoha_ppe_dev *dev,
+						   void *type_data)
+{
+	return dev->ops.setup_tc_block_cb(dev, type_data);
+}
+#else
+static inline struct airoha_ppe_dev *airoha_ppe_get_dev(struct device *dev)
+{
+	return NULL;
+}
+
+static inline void airoha_ppe_put_dev(struct airoha_ppe_dev *dev)
+{
+}
+
+static inline int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev,
+					       void *type_data)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
 #define NPU_NUM_CORES		8
 #define NPU_NUM_IRQ		6
 #define NPU_RX0_DESC_NUM	512
-- 
cgit v1.2.3


From a7cc1aa151e3a9c0314b995f06102f7763d3bd71 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 23 Aug 2025 09:56:04 +0200
Subject: net: airoha: Introduce check_skb callback in ppe_dev ops

Export airoha_ppe_check_skb routine in ppe_dev ops. check_skb callback
will be used by the MT76 driver in order to offload the traffic received
by the wlan NIC and forwarded to the ethernet one.
Add rx_wlan parameter to airoha_ppe_check_skb routine signature.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://patch.msgid.link/20250823-airoha-en7581-wlan-rx-offload-v3-3-f78600ec3ed8@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/soc/airoha/airoha_offload.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/airoha/airoha_offload.h b/include/linux/soc/airoha/airoha_offload.h
index 4b4b8b9e426d..1dc5b4e35ef9 100644
--- a/include/linux/soc/airoha/airoha_offload.h
+++ b/include/linux/soc/airoha/airoha_offload.h
@@ -9,10 +9,17 @@
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
 
+enum {
+	PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f,
+};
+
 struct airoha_ppe_dev {
 	struct {
 		int (*setup_tc_block_cb)(struct airoha_ppe_dev *dev,
 					 void *type_data);
+		void (*check_skb)(struct airoha_ppe_dev *dev,
+				  struct sk_buff *skb, u16 hash,
+				  bool rx_wlan);
 	} ops;
 
 	void *priv;
@@ -27,6 +34,13 @@ static inline int airoha_ppe_dev_setup_tc_block_cb(struct airoha_ppe_dev *dev,
 {
 	return dev->ops.setup_tc_block_cb(dev, type_data);
 }
+
+static inline void airoha_ppe_dev_check_skb(struct airoha_ppe_dev *dev,
+					    struct sk_buff *skb,
+					    u16 hash, bool rx_wlan)
+{
+	dev->ops.check_skb(dev, skb, hash, rx_wlan);
+}
 #else
 static inline struct airoha_ppe_dev *airoha_ppe_get_dev(struct device *dev)
 {
@@ -42,6 +56,12 @@ static inline int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev,
 {
 	return -EOPNOTSUPP;
 }
+
+static inline void airoha_ppe_dev_check_skb(struct airoha_ppe_dev *dev,
+					    struct sk_buff *skb, u16 hash,
+					    bool rx_wlan)
+{
+}
 #endif
 
 #define NPU_NUM_CORES		8
-- 
cgit v1.2.3


From f1241200cd66b3e25fd2a44dd961d9720e965aa1 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@google.com>
Date: Fri, 22 Aug 2025 19:07:00 +0000
Subject: tcp: Don't pass hashinfo to inet_diag helpers.

These inet_diag functions required struct inet_hashinfo because
they are shared by TCP and DCCP:

  * inet_diag_dump_icsk()
  * inet_diag_dump_one_icsk()
  * inet_diag_find_one_icsk()

DCCP has gone, and we don't need to pass hashinfo down to them.

Let's fetch net->ipv4.tcp_death_row.hashinfo directly in the first
2 functions.

Note that inet_diag_find_one_icsk() don't need hashinfo since the
previous patch.

We will move TCP-specific functions to tcp_diag.c in the next patch.

Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250822190803.540788-6-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/inet_diag.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index a9033696b0aa..34de992b5bd9 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -48,15 +48,13 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 		      struct sk_buff *skb, struct netlink_callback *cb,
 		      const struct inet_diag_req_v2 *req,
 		      u16 nlmsg_flags, bool net_admin);
-void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb,
+void inet_diag_dump_icsk(struct sk_buff *skb,
 			 struct netlink_callback *cb,
 			 const struct inet_diag_req_v2 *r);
-int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
-			    struct netlink_callback *cb,
+int inet_diag_dump_one_icsk(struct netlink_callback *cb,
 			    const struct inet_diag_req_v2 *req);
 
 struct sock *inet_diag_find_one_icsk(struct net *net,
-				     struct inet_hashinfo *hashinfo,
 				     const struct inet_diag_req_v2 *req);
 
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
-- 
cgit v1.2.3


From 382a4d9cb6dc07643345e15c49738088a727d29b Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@google.com>
Date: Fri, 22 Aug 2025 19:07:01 +0000
Subject: tcp: Move TCP-specific diag functions to tcp_diag.c.

tcp_diag_dump() / tcp_diag_dump_one() is just a wrapper of
inet_diag_dump_icsk() / inet_diag_dump_one_icsk(), respectively.

Let's inline them in tcp_diag.c and move static callees as well.

Note that inet_sk_attr_size() is merged into tcp_diag_get_aux_size(),
and we remove inet_diag_handler.idiag_get_aux_size() accordingly.

While at it, BUG_ON() is replaced with DEBUG_NET_WARN_ON_ONCE().

Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250822190803.540788-7-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/inet_diag.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 34de992b5bd9..30bf8f7ea62b 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -24,9 +24,6 @@ struct inet_diag_handler {
 					 bool net_admin,
 					 struct sk_buff *skb);
 
-	size_t		(*idiag_get_aux_size)(struct sock *sk,
-					      bool net_admin);
-
 	int		(*destroy)(struct sk_buff *in_skb,
 				   const struct inet_diag_req_v2 *req);
 
@@ -48,14 +45,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 		      struct sk_buff *skb, struct netlink_callback *cb,
 		      const struct inet_diag_req_v2 *req,
 		      u16 nlmsg_flags, bool net_admin);
-void inet_diag_dump_icsk(struct sk_buff *skb,
-			 struct netlink_callback *cb,
-			 const struct inet_diag_req_v2 *r);
-int inet_diag_dump_one_icsk(struct netlink_callback *cb,
-			    const struct inet_diag_req_v2 *req);
-
-struct sock *inet_diag_find_one_icsk(struct net *net,
-				     const struct inet_diag_req_v2 *req);
 
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 
-- 
cgit v1.2.3


From df534e757321ae6efe848a6a787098c22a390ac6 Mon Sep 17 00:00:00 2001
From: David Yang <mmyangfl@gmail.com>
Date: Sun, 24 Aug 2025 09:30:03 +0800
Subject: net: phylink: remove stale an_enabled from doc

state->an_enabled was removed by
commit 4ee9b0dcf09f ("net: phylink: remove an_enabled")
but is left in mac_config() doc, so clean it.

Signed-off-by: David Yang <mmyangfl@gmail.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/20250824013009.2443580-1-mmyangfl@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phylink.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 30659b615fca..9af0411761d7 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -320,9 +320,8 @@ int mac_prepare(struct phylink_config *config, unsigned int mode,
  *   If in 802.3z mode, the link speed is fixed, dependent on the
  *   @state->interface. Duplex and pause modes are negotiated via
  *   the in-band configuration word. Advertised pause modes are set
- *   according to the @state->an_enabled and @state->advertising
- *   flags. Beware of MACs which only support full duplex at gigabit
- *   and higher speeds.
+ *   according to @state->advertising. Beware of MACs which only
+ *   support full duplex at gigabit and higher speeds.
  *
  *   If in Cisco SGMII mode, the link speed and duplex mode are passed
  *   in the serial bitstream 16-bit configuration word, and the MAC
@@ -331,7 +330,7 @@ int mac_prepare(struct phylink_config *config, unsigned int mode,
  *   responsible for reading the configuration word and configuring
  *   itself accordingly.
  *
- *   Valid state members: interface, an_enabled, pause, advertising.
+ *   Valid state members: interface, pause, advertising.
  *
  * Implementations are expected to update the MAC to reflect the
  * requested settings - i.o.w., if nothing has changed between two
-- 
cgit v1.2.3


From 1b93c03fb319d72a1f5f4723abd5df15ce40f4e2 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Thu, 21 Aug 2025 17:06:03 +0800
Subject: rcu: add rcu_read_lock_dont_migrate()

migrate_disable() is called to disable migration in the kernel, and it is
often used together with rcu_read_lock().

However, with PREEMPT_RCU disabled, it's unnecessary, as rcu_read_lock()
will always disable preemption, which will also disable migration.

Introduce rcu_read_lock_dont_migrate() and rcu_read_unlock_migrate(),
which will do the migration enable and disable only when PREEMPT_RCU.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/r/20250821090609.42508-2-dongml2@chinatelecom.cn
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/rcupdate.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 120536f4c6eb..9691ca380a4f 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -962,6 +962,20 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 	preempt_enable_notrace();
 }
 
+static __always_inline void rcu_read_lock_dont_migrate(void)
+{
+	if (IS_ENABLED(CONFIG_PREEMPT_RCU))
+		migrate_disable();
+	rcu_read_lock();
+}
+
+static inline void rcu_read_unlock_migrate(void)
+{
+	rcu_read_unlock();
+	if (IS_ENABLED(CONFIG_PREEMPT_RCU))
+		migrate_enable();
+}
+
 /**
  * RCU_INIT_POINTER() - initialize an RCU protected pointer
  * @p: The pointer to be initialized.
-- 
cgit v1.2.3


From 05db35963eef7a55f1782190185cb8ddb9d923b7 Mon Sep 17 00:00:00 2001
From: Krishna Chaitanya Chundru <krishna.chundru@oss.qualcomm.com>
Date: Wed, 20 Aug 2025 13:58:47 +0530
Subject: OPP: Add support to find OPP for a set of keys

Some clients, such as PCIe, may operate at the same clock frequency
across different data rates by varying link width. In such cases,
frequency alone is not sufficient to uniquely identify an OPP.

To support these scenarios, introduce a new API
dev_pm_opp_find_key_exact() that allows OPP lookup with different
set of keys like freq, level & bandwidth.

Signed-off-by: Krishna Chaitanya Chundru <krishna.chundru@oss.qualcomm.com>
[ Viresh: Minor cleanups ]
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 include/linux/pm_opp.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index cf477beae4bb..789406d95e69 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -98,6 +98,25 @@ struct dev_pm_opp_data {
 	unsigned long u_volt;
 };
 
+/**
+ * struct dev_pm_opp_key - Key used to identify OPP entries
+ * @freq:       Frequency in Hz. Use 0 if frequency is not to be matched.
+ * @level:      Performance level associated with the OPP entry.
+ *              Use OPP_LEVEL_UNSET if level is not to be matched.
+ * @bw:         Bandwidth associated with the OPP entry.
+ *              Use 0 if bandwidth is not to be matched.
+ *
+ * This structure is used to uniquely identify an OPP entry based on
+ * frequency, performance level, and bandwidth. Each field can be
+ * selectively ignored during matching by setting it to its respective
+ * NOP value.
+ */
+struct dev_pm_opp_key {
+	unsigned long freq;
+	unsigned int level;
+	u32 bw;
+};
+
 #if defined(CONFIG_PM_OPP)
 
 struct opp_table *dev_pm_opp_get_opp_table(struct device *dev);
@@ -131,6 +150,10 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 					      unsigned long freq,
 					      bool available);
 
+struct dev_pm_opp *dev_pm_opp_find_key_exact(struct device *dev,
+					     struct dev_pm_opp_key *key,
+					     bool available);
+
 struct dev_pm_opp *
 dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq,
 				   u32 index, bool available);
@@ -289,6 +312,13 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
+static inline struct dev_pm_opp *dev_pm_opp_find_key_exact(struct device *dev,
+							   struct dev_pm_opp_key *key,
+							   bool available)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
 static inline struct dev_pm_opp *
 dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq,
 				   u32 index, bool available)
-- 
cgit v1.2.3


From ced17ee32a9988b8a260628e7c31a100d7dc082e Mon Sep 17 00:00:00 2001
From: Igor Torrente <igor.torrente@collabora.com>
Date: Thu, 7 Aug 2025 09:41:45 -0300
Subject: Revert "virtio: reject shm region if length is zero"

The commit 206cc44588f7 ("virtio: reject shm region if length is zero")
breaks the Virtio-gpu `host_visible` feature.

As you can see in the snippet below, host_visible_region is zero because
of the `kzalloc`.  It's using the `vm_get_shm_region`
(drivers/virtio/virtio_mmio.c:536) to read the `addr` and `len` from
qemu/crosvm.

```
drivers/gpu/drm/virtio/virtgpu_kms.c
132         vgdev = drmm_kzalloc(dev, sizeof(struct virtio_gpu_device), GFP_KERNEL);
[...]
177         if (virtio_get_shm_region(vgdev->vdev, &vgdev->host_visible_region,
178                                   VIRTIO_GPU_SHM_ID_HOST_VISIBLE)) {
```
Now it always fails.

To fix, revert the offending commit.

Fixes: 206cc44588f7 ("virtio: reject shm region if length is zero")
Signed-off-by: Igor Torrente <igor.torrente@collabora.com>
Message-Id: <20250807124145.81816-1-igor.torrente@collabora.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_config.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 918cf25cd3c6..8bf156dde554 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -328,8 +328,6 @@ static inline
 bool virtio_get_shm_region(struct virtio_device *vdev,
 			   struct virtio_shm_region *region, u8 id)
 {
-	if (!region->len)
-		return false;
 	if (!vdev->config->get_shm_region)
 		return false;
 	return vdev->config->get_shm_region(vdev, region, id);
-- 
cgit v1.2.3


From b3dcc9d1d806fb1e175f85978713eef868531da4 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Tue, 26 Aug 2025 10:19:46 +0300
Subject: memblock: fix kernel-doc for MEMBLOCK_RSRV_NOINIT

The kernel-doc description of MEMBLOCK_RSRV_NOINIT and
memblock_reserved_mark_noinit() do not accurately describe their
functionality.

Expand their kernel doc to make it clear that the user of
MEMBLOCK_RSRV_NOINIT is responsible to properly initialize the struct pages
for such regions and add more details about effects of using this flag.

Reviewed-by: David Hildenbrand <david@redhat.com>
Link: https://lore.kernel.org/r/f8140a17-c4ec-489b-b314-d45abe48bf36@redhat.com
Link: https://lore.kernel.org/r/20250826071947.1949725-1-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
 include/linux/memblock.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index b96746376e17..fcda8481de9a 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -40,8 +40,9 @@ extern unsigned long long max_possible_pfn;
  * via a driver, and never indicated in the firmware-provided memory map as
  * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the
  * kernel resource tree.
- * @MEMBLOCK_RSRV_NOINIT: memory region for which struct pages are
- * not initialized (only for reserved regions).
+ * @MEMBLOCK_RSRV_NOINIT: reserved memory region for which struct pages are not
+ * fully initialized. Users of this flag are responsible to properly initialize
+ * struct pages of this region
  * @MEMBLOCK_RSRV_KERN: memory region that is reserved for kernel use,
  * either explictitly with memblock_reserve_kern() or via memblock
  * allocation APIs. All memblock allocations set this flag.
-- 
cgit v1.2.3


From e649bcda25b5ae1a30a182cc450f928a0b282c93 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@kernel.org>
Date: Wed, 20 Aug 2025 14:03:39 -0400
Subject: perf: Remove get_perf_callchain() init_nr argument

The 'init_nr' argument has double duty: it's used to initialize both the
number of contexts and the number of stack entries.  That's confusing
and the callers always pass zero anyway.  Hard code the zero.

Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Namhyung Kim <Namhyung@kernel.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/r/20250820180428.259565081@kernel.org
---
 include/linux/perf_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index bfbf9ea53f25..fd1d91017b99 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1719,7 +1719,7 @@ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
 extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
 extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
 extern struct perf_callchain_entry *
-get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
 		   u32 max_stack, bool crosstask, bool add_mark);
 extern int get_callchain_buffers(int max_stack);
 extern void put_callchain_buffers(void);
-- 
cgit v1.2.3


From d6d05e2af796ca25094f80a73d8841505d54368b Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 14 Jul 2025 17:13:02 +0200
Subject: video: screen_info: Add pixel-format helper for linear framebuffers

Add screen_info_pixel_format(), which converts a screen_info's
information about the color format to struct pixel_format. The encoding
within the screen_info structure is complex and therefore prone to
errors. Later patches will convert callers to use the pixel format.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://lore.kernel.org/r/20250714151513.309475-3-tzimmermann@suse.de
---
 include/linux/screen_info.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h
index 923d68e07679..1690706206e8 100644
--- a/include/linux/screen_info.h
+++ b/include/linux/screen_info.h
@@ -12,6 +12,7 @@
 #define SCREEN_INFO_MAX_RESOURCES	3
 
 struct pci_dev;
+struct pixel_format;
 struct resource;
 
 static inline bool __screen_info_has_lfb(unsigned int type)
@@ -136,6 +137,7 @@ static inline u32 __screen_info_vesapm_info_base(const struct screen_info *si)
 ssize_t screen_info_resources(const struct screen_info *si, struct resource *r, size_t num);
 
 u32 __screen_info_lfb_bits_per_pixel(const struct screen_info *si);
+int screen_info_pixel_format(const struct screen_info *si, struct pixel_format *f);
 
 #if defined(CONFIG_PCI)
 void screen_info_apply_fixups(void);
-- 
cgit v1.2.3


From 1abe21ef1adf0c5b6dbb5878c2fa4573df8d29fc Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Sat, 23 Aug 2025 15:44:28 +0200
Subject: net: phy: introduce phy_id_compare_vendor() PHY ID helper

Introduce phy_id_compare_vendor() PHY ID helper to compare a PHY ID with
the PHY ID Vendor using the generic PHY ID Vendor mask.

While at it also rework the PHY_ID_MATCH macro and move the mask to
dedicated define so that PHY driver can make use of the mask if needed.

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/20250823134431.4854-1-ansuelsmth@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 4c2b8b6e7187..b67079796402 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1268,9 +1268,13 @@ struct phy_driver {
 #define to_phy_driver(d) container_of_const(to_mdio_common_driver(d),		\
 				      struct phy_driver, mdiodrv)
 
-#define PHY_ID_MATCH_EXACT(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 0)
-#define PHY_ID_MATCH_MODEL(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 4)
-#define PHY_ID_MATCH_VENDOR(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 10)
+#define PHY_ID_MATCH_EXTACT_MASK GENMASK(31, 0)
+#define PHY_ID_MATCH_MODEL_MASK GENMASK(31, 4)
+#define PHY_ID_MATCH_VENDOR_MASK GENMASK(31, 10)
+
+#define PHY_ID_MATCH_EXACT(id) .phy_id = (id), .phy_id_mask = PHY_ID_MATCH_EXTACT_MASK
+#define PHY_ID_MATCH_MODEL(id) .phy_id = (id), .phy_id_mask = PHY_ID_MATCH_MODEL_MASK
+#define PHY_ID_MATCH_VENDOR(id) .phy_id = (id), .phy_id_mask = PHY_ID_MATCH_VENDOR_MASK
 
 /**
  * phy_id_compare - compare @id1 with @id2 taking account of @mask
@@ -1286,6 +1290,19 @@ static inline bool phy_id_compare(u32 id1, u32 id2, u32 mask)
 	return !((id1 ^ id2) & mask);
 }
 
+/**
+ * phy_id_compare_vendor - compare @id with @vendor mask
+ * @id: PHY ID
+ * @vendor_mask: PHY Vendor mask
+ *
+ * Return: true if the bits from @id match @vendor using the
+ *	   generic PHY Vendor mask.
+ */
+static inline bool phy_id_compare_vendor(u32 id, u32 vendor_mask)
+{
+	return phy_id_compare(id, vendor_mask, PHY_ID_MATCH_VENDOR_MASK);
+}
+
 /**
  * phydev_id_compare - compare @id with the PHY's Clause 22 ID
  * @phydev: the PHY device
-- 
cgit v1.2.3


From 39e94fdce45fa611abf48472873e4ba2f67228a3 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 22 Aug 2025 22:36:11 +0200
Subject: net: phy: fixed: let fixed_phy_add always use addr 0 and remove
 return value

We have only two users of fixed_phy_add(), both use address 0 and
ignore the return value. So simplify fixed_phy_add() accordingly.

Whilst at it, constify the fixed_phy_status configs.

Note:
fixed_phy_add() is a legacy function which shouldn't be used in new
code, as it's use may be problematic:
- No check whether a fixed phy exists already at the given address
- If fixed_phy_register() is called afterwards by any other driver,
  then it will also use phy_addr 0, because fixed_phy_add() ignores
  the ida which manages address assignment
Drivers using a fixed phy created by fixed_phy_add() in platform code,
should dynamically create a fixed phy with fixed_phy_register()
instead.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/762700e5-a0b1-41af-aa03-929822a39475@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy_fixed.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 5399b9e41e35..6227a1bdefec 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -17,7 +17,7 @@ struct net_device;
 
 #if IS_ENABLED(CONFIG_FIXED_PHY)
 extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier);
-int fixed_phy_add(int phy_id, const struct fixed_phy_status *status);
+void fixed_phy_add(const struct fixed_phy_status *status);
 struct phy_device *fixed_phy_register(const struct fixed_phy_status *status,
 				      struct device_node *np);
 
@@ -26,11 +26,7 @@ extern int fixed_phy_set_link_update(struct phy_device *phydev,
 			int (*link_update)(struct net_device *,
 					   struct fixed_phy_status *));
 #else
-static inline int fixed_phy_add(int phy_id,
-				const struct fixed_phy_status *status)
-{
-	return -ENODEV;
-}
+static inline void fixed_phy_add(const struct fixed_phy_status *status) {}
 static inline struct phy_device *
 fixed_phy_register(const struct fixed_phy_status *status,
 		   struct device_node *np)
-- 
cgit v1.2.3


From 19a9a1ab5c3dce65fff4ac50700117039c23d525 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Tue, 29 Jul 2025 15:54:32 -0700
Subject: KVM: Rename CONFIG_KVM_PRIVATE_MEM to CONFIG_KVM_GUEST_MEMFD

Rename the Kconfig option CONFIG_KVM_PRIVATE_MEM to
CONFIG_KVM_GUEST_MEMFD. The original name implied that the feature only
supported "private" memory. However, CONFIG_KVM_PRIVATE_MEM enables
guest_memfd in general, which is not exclusively for private memory.
Subsequent patches in this series will add guest_memfd support for
non-CoCo VMs, whose memory is not private.

Renaming the Kconfig option to CONFIG_KVM_GUEST_MEMFD more accurately
reflects its broader scope as the main Kconfig option for all
guest_memfd-backed memory. This provides clearer semantics for the
option and avoids confusion as new features are introduced.

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Shivank Garg <shivankg@amd.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Co-developed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20250729225455.670324-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 15656b7fba6c..8cdc0b3cc1b1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -602,7 +602,7 @@ struct kvm_memory_slot {
 	short id;
 	u16 as_id;
 
-#ifdef CONFIG_KVM_PRIVATE_MEM
+#ifdef CONFIG_KVM_GUEST_MEMFD
 	struct {
 		/*
 		 * Writes protected by kvm->slots_lock.  Acquiring a
@@ -720,10 +720,10 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
 #endif
 
 /*
- * Arch code must define kvm_arch_has_private_mem if support for private memory
- * is enabled.
+ * Arch code must define kvm_arch_has_private_mem if support for guest_memfd is
+ * enabled.
  */
-#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
+#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_GUEST_MEMFD)
 static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
 {
 	return false;
@@ -2505,7 +2505,7 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
 
 static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
 {
-	return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) &&
+	return IS_ENABLED(CONFIG_KVM_GUEST_MEMFD) &&
 	       kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE;
 }
 #else
@@ -2515,7 +2515,7 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
 }
 #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
 
-#ifdef CONFIG_KVM_PRIVATE_MEM
+#ifdef CONFIG_KVM_GUEST_MEMFD
 int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
 		     gfn_t gfn, kvm_pfn_t *pfn, struct page **page,
 		     int *max_order);
@@ -2528,7 +2528,7 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm,
 	KVM_BUG_ON(1, kvm);
 	return -EIO;
 }
-#endif /* CONFIG_KVM_PRIVATE_MEM */
+#endif /* CONFIG_KVM_GUEST_MEMFD */
 
 #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
 int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order);
-- 
cgit v1.2.3


From 36cf63bb5df68836e55e2839f8174b404d47670b Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Tue, 29 Jul 2025 15:54:36 -0700
Subject: KVM: Rename CONFIG_KVM_GENERIC_PRIVATE_MEM to
 CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE

The original name was vague regarding its functionality. This Kconfig
option specifically enables and gates the kvm_gmem_populate() function,
which is responsible for populating a GPA range with guest data.

The new name, HAVE_KVM_ARCH_GMEM_POPULATE, describes the purpose of the
option: to enable arch-specific guest_memfd population mechanisms. It
also follows the same pattern as the other HAVE_KVM_ARCH_* configuration
options.

This improves clarity for developers and ensures the name accurately
reflects the functionality it controls, especially as guest_memfd
support expands beyond purely "private" memory scenarios.

Temporarily keep KVM_GENERIC_PRIVATE_MEM as an x86-only config so as to
minimize churn, and to hopefully make it easier to see what features
require HAVE_KVM_ARCH_GMEM_POPULATE.  On that note, omit GMEM_POPULATE
for KVM_X86_SW_PROTECTED_VM, as regular ol' memset() suffices for
software-protected VMs.

As for KVM_GENERIC_PRIVATE_MEM, a future change will select KVM_GUEST_MEMFD
for all 64-bit KVM builds, at which point the intermediate config will
become obsolete and can/will be dropped.

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Shivank Garg <shivankg@amd.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Co-developed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20250729225455.670324-6-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8cdc0b3cc1b1..ddfb6cfe20a6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2534,7 +2534,7 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm,
 int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order);
 #endif
 
-#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM
+#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE
 /**
  * kvm_gmem_populate() - Populate/prepare a GPA range with guest data
  *
-- 
cgit v1.2.3


From 923310be23b275f730e8869abc783db6296fc043 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Tue, 29 Jul 2025 15:54:37 -0700
Subject: KVM: Rename kvm_slot_can_be_private() to kvm_slot_has_gmem()

Rename kvm_slot_can_be_private() to kvm_slot_has_gmem() to improve
clarity and accurately reflect its purpose.

The function kvm_slot_can_be_private() was previously used to check if a
given kvm_memory_slot is backed by guest_memfd. However, its name
implied that the memory in such a slot was exclusively "private".

As guest_memfd support expands to include non-private memory (e.g.,
shared host mappings), it's important to remove this association. The
new name, kvm_slot_has_gmem(), states that the slot is backed by
guest_memfd without making assumptions about the memory's privacy
attributes.

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Shivank Garg <shivankg@amd.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Co-developed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20250729225455.670324-7-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ddfb6cfe20a6..4c5e0a898652 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -615,7 +615,7 @@ struct kvm_memory_slot {
 #endif
 };
 
-static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot)
+static inline bool kvm_slot_has_gmem(const struct kvm_memory_slot *slot)
 {
 	return slot && (slot->flags & KVM_MEM_GUEST_MEMFD);
 }
-- 
cgit v1.2.3


From 69116e01f6fee030db45d269f28f9c300b8dc9d6 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Tue, 29 Jul 2025 15:54:38 -0700
Subject: KVM: Fix comments that refer to slots_lock

Fix comments so that they refer to slots_lock instead of slots_locks
(remove trailing s).

Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Shivank Garg <shivankg@amd.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20250729225455.670324-8-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4c5e0a898652..5c25b03d3d50 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -860,7 +860,7 @@ struct kvm {
 	struct notifier_block pm_notifier;
 #endif
 #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
-	/* Protected by slots_locks (for writes) and RCU (for reads) */
+	/* Protected by slots_lock (for writes) and RCU (for reads) */
 	struct xarray mem_attr_array;
 #endif
 	char stats_id[KVM_STATS_NAME_SIZE];
-- 
cgit v1.2.3


From 68d189938709a5918d7308eb922c30bcbf16ebb9 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Tue, 29 Jul 2025 15:54:39 -0700
Subject: KVM: Fix comment that refers to kvm uapi header path

The comment that points to the path where the user-visible memslot flags
are refers to an outdated path and has a typo.

Update the comment to refer to the correct path.

Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Shivank Garg <shivankg@amd.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20250729225455.670324-9-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5c25b03d3d50..56ea8c862cfd 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -52,7 +52,7 @@
 /*
  * The bit 16 ~ bit 31 of kvm_userspace_memory_region::flags are internally
  * used in kvm, other bits are visible for userspace which are defined in
- * include/linux/kvm_h.
+ * include/uapi/linux/kvm.h.
  */
 #define KVM_MEMSLOT_INVALID	(1UL << 16)
 
-- 
cgit v1.2.3


From d1e54dd08f163a9021433020d16a8f8f70ddc41c Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Tue, 29 Jul 2025 15:54:40 -0700
Subject: KVM: x86: Enable KVM_GUEST_MEMFD for all 64-bit builds

Enable KVM_GUEST_MEMFD for all KVM x86 64-bit builds, i.e. for "default"
VM types when running on 64-bit KVM.  This will allow using guest_memfd
to back non-private memory for all VM shapes, by supporting mmap() on
guest_memfd.

Opportunistically clean up various conditionals that become tautologies
once x86 selects KVM_GUEST_MEMFD more broadly.  Specifically, because
SW protected VMs, SEV, and TDX are all 64-bit only, private memory no
longer needs to take explicit dependencies on KVM_GUEST_MEMFD, because
it is effectively a prerequisite.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20250729225455.670324-10-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 56ea8c862cfd..4d1c44622056 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -719,11 +719,7 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
 }
 #endif
 
-/*
- * Arch code must define kvm_arch_has_private_mem if support for guest_memfd is
- * enabled.
- */
-#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_GUEST_MEMFD)
+#ifndef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
 static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
 {
 	return false;
@@ -2505,8 +2501,7 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
 
 static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
 {
-	return IS_ENABLED(CONFIG_KVM_GUEST_MEMFD) &&
-	       kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE;
+	return kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE;
 }
 #else
 static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
-- 
cgit v1.2.3


From a12578e1477cbfb547256ed8dee6d5142a59cdcd Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Tue, 29 Jul 2025 15:54:41 -0700
Subject: KVM: guest_memfd: Add plumbing to host to map guest_memfd pages

Introduce the core infrastructure to enable host userspace to mmap()
guest_memfd-backed memory. This is needed for several evolving KVM use
cases:

* Non-CoCo VM backing: Allows VMMs like Firecracker to run guests
  entirely backed by guest_memfd, even for non-CoCo VMs [1]. This
  provides a unified memory management model and simplifies guest memory
  handling.

* Direct map removal for enhanced security: This is an important step
  for direct map removal of guest memory [2]. By allowing host userspace
  to fault in guest_memfd pages directly, we can avoid maintaining host
  kernel direct maps of guest memory. This provides additional hardening
  against Spectre-like transient execution attacks by removing a
  potential attack surface within the kernel.

* Future guest_memfd features: This also lays the groundwork for future
  enhancements to guest_memfd, such as supporting huge pages and
  enabling in-place sharing of guest memory with the host for CoCo
  platforms that permit it [3].

Enable the basic mmap and fault handling logic within guest_memfd, but
hold off on allow userspace to actually do mmap() until the architecture
support is also in place.

[1] https://github.com/firecracker-microvm/firecracker/tree/feature/secret-hiding
[2] https://lore.kernel.org/linux-mm/cc1bb8e9bc3e1ab637700a4d3defeec95b55060a.camel@amazon.com
[3] https://lore.kernel.org/all/c1c9591d-218a-495c-957b-ba356c8f8e09@redhat.com/T/#u

Reviewed-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Shivank Garg <shivankg@amd.com>
Acked-by: David Hildenbrand <david@redhat.com>
Co-developed-by: Ackerley Tng <ackerleytng@google.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20250729225455.670324-11-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4d1c44622056..26bad600f9fa 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -726,6 +726,10 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
 }
 #endif
 
+#ifdef CONFIG_KVM_GUEST_MEMFD
+bool kvm_arch_supports_gmem_mmap(struct kvm *kvm);
+#endif
+
 #ifndef kvm_arch_has_readonly_mem
 static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
 {
-- 
cgit v1.2.3


From 576d035e2aef52f8d8d3ce29af556d4c6bd2e0fe Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Tue, 29 Jul 2025 15:54:42 -0700
Subject: KVM: guest_memfd: Track guest_memfd mmap support in memslot

Add a new internal flag, KVM_MEMSLOT_GMEM_ONLY, to the top half of
memslot->flags (which makes it strictly for KVM's internal use). This
flag tracks when a guest_memfd-backed memory slot supports host
userspace mmap operations, which implies that all memory, not just
private memory for CoCo VMs, is consumed through guest_memfd: "gmem
only".

This optimization avoids repeatedly checking the underlying guest_memfd
file for mmap support, which would otherwise require taking and
releasing a reference on the file for each check. By caching this
information directly in the memslot, we reduce overhead and simplify the
logic involved in handling guest_memfd-backed pages for host mappings.

Reviewed-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Shivank Garg <shivankg@amd.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: David Hildenbrand <david@redhat.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20250729225455.670324-12-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 26bad600f9fa..8b47891adca1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -54,7 +54,8 @@
  * used in kvm, other bits are visible for userspace which are defined in
  * include/uapi/linux/kvm.h.
  */
-#define KVM_MEMSLOT_INVALID	(1UL << 16)
+#define KVM_MEMSLOT_INVALID			(1UL << 16)
+#define KVM_MEMSLOT_GMEM_ONLY			(1UL << 17)
 
 /*
  * Bit 63 of the memslot generation number is an "update in-progress flag",
@@ -2490,6 +2491,14 @@ static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
 		vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_PRIVATE;
 }
 
+static inline bool kvm_memslot_is_gmem_only(const struct kvm_memory_slot *slot)
+{
+	if (!IS_ENABLED(CONFIG_KVM_GUEST_MEMFD))
+		return false;
+
+	return slot->flags & KVM_MEMSLOT_GMEM_ONLY;
+}
+
 #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
 static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn)
 {
-- 
cgit v1.2.3


From dd6a5a71c811289eec234e78cb9ca34d055d2ad5 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Tue, 10 Jun 2025 13:52:28 +0900
Subject: sched/wait: Add wait_event_state_exclusive()

Allows exclusive waits with a custom @state.

Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/wait.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 09855d819418..f648044466d5 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -965,6 +965,18 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
 	__ret;									\
 })
 
+#define __wait_event_state_exclusive(wq, condition, state)			\
+	___wait_event(wq, condition, state, 1, 0, schedule())
+
+#define wait_event_state_exclusive(wq, condition, state)			\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_state_exclusive(wq, condition, state);	\
+	__ret;									\
+})
+
 #define __wait_event_killable_timeout(wq_head, condition, timeout)		\
 	___wait_event(wq_head, ___wait_cond_timeout(condition),			\
 		      TASK_KILLABLE, 0, timeout,				\
-- 
cgit v1.2.3


From 494d2f508883a6e5c4530e5c6b3c8b2bbfb7318d Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Mon, 7 Jul 2025 16:46:05 -0700
Subject: fuse: use default writeback accounting

commit 0c58a97f919c ("fuse: remove tmp folio for writebacks and internal
rb tree") removed temp folios for dirty page writeback. Consequently,
fuse can now use the default writeback accounting.

With switching fuse to use default writeback accounting, there are some
added benefits. This updates wb->writeback_inodes tracking as well now
and updates writeback throughput estimates after writeback completion.

This commit also removes inc_wb_stat() and dec_wb_stat(). These have no
callers anymore now that fuse does not call them.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Bernd Schubert <bschubert@ddn.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/backing-dev.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index e721148c95d0..9a1e895dd5df 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -66,16 +66,6 @@ static inline void wb_stat_mod(struct bdi_writeback *wb,
 	percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH);
 }
 
-static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
-{
-	wb_stat_mod(wb, item, 1);
-}
-
-static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
-{
-	wb_stat_mod(wb, item, -1);
-}
-
 static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
 {
 	return percpu_counter_read_positive(&wb->stat[item]);
-- 
cgit v1.2.3


From 2841808f35eebfd07150333f3af3007cb2904a09 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Mon, 7 Jul 2025 16:46:06 -0700
Subject: mm: remove BDI_CAP_WRITEBACK_ACCT

There are no users of BDI_CAP_WRITEBACK_ACCT now that fuse doesn't do
its own writeback accounting. This commit removes
BDI_CAP_WRITEBACK_ACCT.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/backing-dev.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 9a1e895dd5df..3e64f14739dd 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -108,12 +108,10 @@ int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit
  *
  * BDI_CAP_WRITEBACK:		Supports dirty page writeback, and dirty pages
  *				should contribute to accounting
- * BDI_CAP_WRITEBACK_ACCT:	Automatically account writeback pages
  * BDI_CAP_STRICTLIMIT:		Keep number of dirty pages below bdi threshold
  */
 #define BDI_CAP_WRITEBACK		(1 << 0)
-#define BDI_CAP_WRITEBACK_ACCT		(1 << 1)
-#define BDI_CAP_STRICTLIMIT		(1 << 2)
+#define BDI_CAP_STRICTLIMIT		(1 << 1)
 
 extern struct backing_dev_info noop_backing_dev_info;
 
-- 
cgit v1.2.3


From 08383cd479f8212fafee2f557b58cfd48818bee0 Mon Sep 17 00:00:00 2001
From: Christian Bruel <christian.bruel@foss.st.com>
Date: Wed, 20 Aug 2025 09:54:02 +0200
Subject: pinctrl: Add pinctrl_pm_select_init_state helper function

If a platform requires an initial pinctrl state during probing, this
helper function provides the client with access to the same initial
state.

eg:
 xxx_suspend_noirq
    ...
    pinctrl_pm_select_sleep_state

 xxx resume_noirq
    pinctrl_pm_select_init_state
    ...
    pinctrl_pm_select_default_state

Signed-off-by: Christian Bruel <christian.bruel@foss.st.com>
Signed-off-by: Manivannan Sadhasivam <mani@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://patch.msgid.link/20250820075411.1178729-3-christian.bruel@foss.st.com
---
 include/linux/pinctrl/consumer.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h
index 73de70362b98..63ce16191eb9 100644
--- a/include/linux/pinctrl/consumer.h
+++ b/include/linux/pinctrl/consumer.h
@@ -48,6 +48,7 @@ int pinctrl_select_default_state(struct device *dev);
 
 #ifdef CONFIG_PM
 int pinctrl_pm_select_default_state(struct device *dev);
+int pinctrl_pm_select_init_state(struct device *dev);
 int pinctrl_pm_select_sleep_state(struct device *dev);
 int pinctrl_pm_select_idle_state(struct device *dev);
 #else
@@ -55,6 +56,10 @@ static inline int pinctrl_pm_select_default_state(struct device *dev)
 {
 	return 0;
 }
+static inline int pinctrl_pm_select_init_state(struct device *dev)
+{
+	return 0;
+}
 static inline int pinctrl_pm_select_sleep_state(struct device *dev)
 {
 	return 0;
@@ -143,6 +148,11 @@ static inline int pinctrl_pm_select_default_state(struct device *dev)
 	return 0;
 }
 
+static inline int pinctrl_pm_select_init_state(struct device *dev)
+{
+	return 0;
+}
+
 static inline int pinctrl_pm_select_sleep_state(struct device *dev)
 {
 	return 0;
-- 
cgit v1.2.3


From 1df7dad4d5c49335b72e26d833def960b2de76e3 Mon Sep 17 00:00:00 2001
From: Nandakumar Edamana <nandakumar@nandakumar.co.in>
Date: Tue, 26 Aug 2025 09:15:23 +0530
Subject: bpf: Improve the general precision of tnum_mul

Drop the value-mask decomposition technique and adopt straightforward
long-multiplication with a twist: when LSB(a) is uncertain, find the
two partial products (for LSB(a) = known 0 and LSB(a) = known 1) and
take a union.

Experiment shows that applying this technique in long multiplication
improves the precision in a significant number of cases (at the cost
of losing precision in a relatively lower number of cases).

Signed-off-by: Nandakumar Edamana <nandakumar@nandakumar.co.in>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Tested-by: Harishankar Vishwanathan <harishankar.vishwanathan@gmail.com>
Reviewed-by: Harishankar Vishwanathan <harishankar.vishwanathan@gmail.com>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/bpf/20250826034524.2159515-1-nandakumar@nandakumar.co.in
---
 include/linux/tnum.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index 0ffb77ffe0e8..c52b862dad45 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -57,6 +57,9 @@ bool tnum_overlap(struct tnum a, struct tnum b);
 /* Return a tnum representing numbers satisfying both @a and @b */
 struct tnum tnum_intersect(struct tnum a, struct tnum b);
 
+/* Returns a tnum representing numbers satisfying either @a or @b */
+struct tnum tnum_union(struct tnum t1, struct tnum t2);
+
 /* Return @a with all but the lowest @size bytes cleared */
 struct tnum tnum_cast(struct tnum a, u8 size);
 
-- 
cgit v1.2.3


From 6310c149e5dede74bb47110e0d7a38c78772c152 Mon Sep 17 00:00:00 2001
From: Brian Mak <makb@juniper.net>
Date: Tue, 5 Aug 2025 14:15:26 -0700
Subject: kexec: add KEXEC_FILE_NO_CMA as a legal flag

Commit 07d24902977e ("kexec: enable CMA based contiguous allocation")
introduces logic to use CMA-based allocation in kexec by default.  As part
of the changes, it introduces a kexec_file_load flag to disable the use of
CMA allocations from userspace.  However, this flag is broken since it is
missing from the list of legal flags for kexec_file_load.  kexec_file_load
returns EINVAL when attempting to use the flag.

Fix this by adding the KEXEC_FILE_NO_CMA flag to the list of legal flags
for kexec_file_load.

Without this fix, kexec_file_load syscall will failed and return
'-EINVAL' when KEXEC_FILE_NO_CMA is specified.

Link: https://lkml.kernel.org/r/20250805211527.122367-2-makb@juniper.net
Fixes: 07d24902977e ("kexec: enable CMA based contiguous allocation")
Signed-off-by: Brian Mak <makb@juniper.net>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Dave Young <dyoung@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kexec.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 1b10a5d84b68..39fe3e6cd282 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -460,7 +460,8 @@ bool kexec_load_permitted(int kexec_image_type);
 
 /* List of defined/legal kexec file flags */
 #define KEXEC_FILE_FLAGS	(KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
-				 KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG)
+				 KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG | \
+				 KEXEC_FILE_NO_CMA)
 
 /* flag to track if kexec reboot is in progress */
 extern bool kexec_in_progress;
-- 
cgit v1.2.3


From 7cc183f2e67d19b03ee5c13a6664b8c6cc37ff9d Mon Sep 17 00:00:00 2001
From: Harry Yoo <harry.yoo@oracle.com>
Date: Mon, 18 Aug 2025 11:02:04 +0900
Subject: mm: move page table sync declarations to linux/pgtable.h

During our internal testing, we started observing intermittent boot
failures when the machine uses 4-level paging and has a large amount of
persistent memory:

  BUG: unable to handle page fault for address: ffffe70000000034
  #PF: supervisor write access in kernel mode
  #PF: error_code(0x0002) - not-present page
  PGD 0 P4D 0
  Oops: 0002 [#1] SMP NOPTI
  RIP: 0010:__init_single_page+0x9/0x6d
  Call Trace:
   <TASK>
   __init_zone_device_page+0x17/0x5d
   memmap_init_zone_device+0x154/0x1bb
   pagemap_range+0x2e0/0x40f
   memremap_pages+0x10b/0x2f0
   devm_memremap_pages+0x1e/0x60
   dev_dax_probe+0xce/0x2ec [device_dax]
   dax_bus_probe+0x6d/0xc9
   [... snip ...]
   </TASK>

It turns out that the kernel panics while initializing vmemmap (struct
page array) when the vmemmap region spans two PGD entries, because the new
PGD entry is only installed in init_mm.pgd, but not in the page tables of
other tasks.

And looking at __populate_section_memmap():
  if (vmemmap_can_optimize(altmap, pgmap))
          // does not sync top level page tables
          r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap);
  else
          // sync top level page tables in x86
          r = vmemmap_populate(start, end, nid, altmap);

In the normal path, vmemmap_populate() in arch/x86/mm/init_64.c
synchronizes the top level page table (See commit 9b861528a801 ("x86-64,
mem: Update all PGDs for direct mapping and vmemmap mapping changes")) so
that all tasks in the system can see the new vmemmap area.

However, when vmemmap_can_optimize() returns true, the optimized path
skips synchronization of top-level page tables.  This is because
vmemmap_populate_compound_pages() is implemented in core MM code, which
does not handle synchronization of the top-level page tables.  Instead,
the core MM has historically relied on each architecture to perform this
synchronization manually.

We're not the first party to encounter a crash caused by not-sync'd top
level page tables: earlier this year, Gwan-gyeong Mun attempted to address
the issue [1] [2] after hitting a kernel panic when x86 code accessed the
vmemmap area before the corresponding top-level entries were synced.  At
that time, the issue was believed to be triggered only when struct page
was enlarged for debugging purposes, and the patch did not get further
updates.

It turns out that current approach of relying on each arch to handle the
page table sync manually is fragile because 1) it's easy to forget to sync
the top level page table, and 2) it's also easy to overlook that the
kernel should not access the vmemmap and direct mapping areas before the
sync.

# The solution: Make page table sync more code robust and harder to miss

To address this, Dave Hansen suggested [3] [4] introducing
{pgd,p4d}_populate_kernel() for updating kernel portion of the page tables
and allow each architecture to explicitly perform synchronization when
installing top-level entries.  With this approach, we no longer need to
worry about missing the sync step, reducing the risk of future
regressions.

The new interface reuses existing ARCH_PAGE_TABLE_SYNC_MASK,
PGTBL_P*D_MODIFIED and arch_sync_kernel_mappings() facility used by
vmalloc and ioremap to synchronize page tables.

pgd_populate_kernel() looks like this:
static inline void pgd_populate_kernel(unsigned long addr, pgd_t *pgd,
                                       p4d_t *p4d)
{
        pgd_populate(&init_mm, pgd, p4d);
        if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_PGD_MODIFIED)
                arch_sync_kernel_mappings(addr, addr);
}

It is worth noting that vmalloc() and apply_to_range() carefully
synchronizes page tables by calling p*d_alloc_track() and
arch_sync_kernel_mappings(), and thus they are not affected by this patch
series.

This series was hugely inspired by Dave Hansen's suggestion and hence
added Suggested-by: Dave Hansen.

Cc stable because lack of this series opens the door to intermittent
boot failures.


This patch (of 3):

Move ARCH_PAGE_TABLE_SYNC_MASK and arch_sync_kernel_mappings() to
linux/pgtable.h so that they can be used outside of vmalloc and ioremap.

Link: https://lkml.kernel.org/r/20250818020206.4517-1-harry.yoo@oracle.com
Link: https://lkml.kernel.org/r/20250818020206.4517-2-harry.yoo@oracle.com
Link: https://lore.kernel.org/linux-mm/20250220064105.808339-1-gwan-gyeong.mun@intel.com [1]
Link: https://lore.kernel.org/linux-mm/20250311114420.240341-1-gwan-gyeong.mun@intel.com [2]
Link: https://lore.kernel.org/linux-mm/d1da214c-53d3-45ac-a8b6-51821c5416e4@intel.com [3]
Link: https://lore.kernel.org/linux-mm/4d800744-7b88-41aa-9979-b245e8bf794b@intel.com  [4]
Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges")
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
Acked-by: Kiryl Shutsemau <kas@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: bibo mao <maobibo@loongson.cn>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Christoph Lameter (Ampere) <cl@gentwo.org>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jane Chu <jane.chu@oracle.com>
Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Kevin Brodsky <kevin.brodsky@arm.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Huth <thuth@redhat.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 16 ++++++++++++++++
 include/linux/vmalloc.h | 16 ----------------
 2 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 4c035637eeb7..ba699df6ef69 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1467,6 +1467,22 @@ static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned
 }
 #endif
 
+/*
+ * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
+ * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
+ * needs to be called.
+ */
+#ifndef ARCH_PAGE_TABLE_SYNC_MASK
+#define ARCH_PAGE_TABLE_SYNC_MASK 0
+#endif
+
+/*
+ * There is no default implementation for arch_sync_kernel_mappings(). It is
+ * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK
+ * is 0.
+ */
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end);
+
 #endif /* CONFIG_MMU */
 
 /*
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index fdc9aeb74a44..2759dac6be44 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -219,22 +219,6 @@ extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 int vmap_pages_range(unsigned long addr, unsigned long end, pgprot_t prot,
 		     struct page **pages, unsigned int page_shift);
 
-/*
- * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
- * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
- * needs to be called.
- */
-#ifndef ARCH_PAGE_TABLE_SYNC_MASK
-#define ARCH_PAGE_TABLE_SYNC_MASK 0
-#endif
-
-/*
- * There is no default implementation for arch_sync_kernel_mappings(). It is
- * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK
- * is 0.
- */
-void arch_sync_kernel_mappings(unsigned long start, unsigned long end);
-
 /*
  *	Lowlevel-APIs (not for driver use!)
  */
-- 
cgit v1.2.3


From f2d2f9598ebb0158a3fe17cda0106d7752e654a2 Mon Sep 17 00:00:00 2001
From: Harry Yoo <harry.yoo@oracle.com>
Date: Mon, 18 Aug 2025 11:02:05 +0900
Subject: mm: introduce and use {pgd,p4d}_populate_kernel()

Introduce and use {pgd,p4d}_populate_kernel() in core MM code when
populating PGD and P4D entries for the kernel address space.  These
helpers ensure proper synchronization of page tables when updating the
kernel portion of top-level page tables.

Until now, the kernel has relied on each architecture to handle
synchronization of top-level page tables in an ad-hoc manner.  For
example, see commit 9b861528a801 ("x86-64, mem: Update all PGDs for direct
mapping and vmemmap mapping changes").

However, this approach has proven fragile for following reasons:

  1) It is easy to forget to perform the necessary page table
     synchronization when introducing new changes.
     For instance, commit 4917f55b4ef9 ("mm/sparse-vmemmap: improve memory
     savings for compound devmaps") overlooked the need to synchronize
     page tables for the vmemmap area.

  2) It is also easy to overlook that the vmemmap and direct mapping areas
     must not be accessed before explicit page table synchronization.
     For example, commit 8d400913c231 ("x86/vmemmap: handle unpopulated
     sub-pmd ranges")) caused crashes by accessing the vmemmap area
     before calling sync_global_pgds().

To address this, as suggested by Dave Hansen, introduce _kernel() variants
of the page table population helpers, which invoke architecture-specific
hooks to properly synchronize page tables.  These are introduced in a new
header file, include/linux/pgalloc.h, so they can be called from common
code.

They reuse existing infrastructure for vmalloc and ioremap.
Synchronization requirements are determined by ARCH_PAGE_TABLE_SYNC_MASK,
and the actual synchronization is performed by
arch_sync_kernel_mappings().

This change currently targets only x86_64, so only PGD and P4D level
helpers are introduced.  Currently, these helpers are no-ops since no
architecture sets PGTBL_{PGD,P4D}_MODIFIED in ARCH_PAGE_TABLE_SYNC_MASK.

In theory, PUD and PMD level helpers can be added later if needed by other
architectures.  For now, 32-bit architectures (x86-32 and arm) only handle
PGTBL_PMD_MODIFIED, so p*d_populate_kernel() will never affect them unless
we introduce a PMD level helper.

[harry.yoo@oracle.com: fix KASAN build error due to p*d_populate_kernel()]
  Link: https://lkml.kernel.org/r/20250822020727.202749-1-harry.yoo@oracle.com
Link: https://lkml.kernel.org/r/20250818020206.4517-3-harry.yoo@oracle.com
Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges")
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
Suggested-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Kiryl Shutsemau <kas@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: bibo mao <maobibo@loongson.cn>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Christoph Lameter (Ampere) <cl@gentwo.org>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jane Chu <jane.chu@oracle.com>
Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Kevin Brodsky <kevin.brodsky@arm.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Huth <thuth@redhat.com>
Cc: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgalloc.h | 29 +++++++++++++++++++++++++++++
 include/linux/pgtable.h | 13 +++++++------
 2 files changed, 36 insertions(+), 6 deletions(-)
 create mode 100644 include/linux/pgalloc.h

(limited to 'include/linux')

diff --git a/include/linux/pgalloc.h b/include/linux/pgalloc.h
new file mode 100644
index 000000000000..9174fa59bbc5
--- /dev/null
+++ b/include/linux/pgalloc.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PGALLOC_H
+#define _LINUX_PGALLOC_H
+
+#include <linux/pgtable.h>
+#include <asm/pgalloc.h>
+
+/*
+ * {pgd,p4d}_populate_kernel() are defined as macros to allow
+ * compile-time optimization based on the configured page table levels.
+ * Without this, linking may fail because callers (e.g., KASAN) may rely
+ * on calls to these functions being optimized away when passing symbols
+ * that exist only for certain page table levels.
+ */
+#define pgd_populate_kernel(addr, pgd, p4d)				\
+	do {								\
+		pgd_populate(&init_mm, pgd, p4d);			\
+		if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_PGD_MODIFIED)	\
+			arch_sync_kernel_mappings(addr, addr);		\
+	} while (0)
+
+#define p4d_populate_kernel(addr, p4d, pud)				\
+	do {								\
+		p4d_populate(&init_mm, p4d, pud);			\
+		if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_P4D_MODIFIED)	\
+			arch_sync_kernel_mappings(addr, addr);		\
+	} while (0)
+
+#endif /* _LINUX_PGALLOC_H */
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index ba699df6ef69..2b80fd456c8b 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1469,8 +1469,8 @@ static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned
 
 /*
  * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
- * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
- * needs to be called.
+ * and let generic vmalloc, ioremap and page table update code know when
+ * arch_sync_kernel_mappings() needs to be called.
  */
 #ifndef ARCH_PAGE_TABLE_SYNC_MASK
 #define ARCH_PAGE_TABLE_SYNC_MASK 0
@@ -1954,10 +1954,11 @@ static inline bool arch_has_pfn_modify_check(void)
 /*
  * Page Table Modification bits for pgtbl_mod_mask.
  *
- * These are used by the p?d_alloc_track*() set of functions an in the generic
- * vmalloc/ioremap code to track at which page-table levels entries have been
- * modified. Based on that the code can better decide when vmalloc and ioremap
- * mapping changes need to be synchronized to other page-tables in the system.
+ * These are used by the p?d_alloc_track*() and p*d_populate_kernel()
+ * functions in the generic vmalloc, ioremap and page table update code
+ * to track at which page-table levels entries have been modified.
+ * Based on that the code can better decide when page table changes need
+ * to be synchronized to other page-tables in the system.
  */
 #define		__PGTBL_PGD_MODIFIED	0
 #define		__PGTBL_P4D_MODIFIED	1
-- 
cgit v1.2.3


From 9df8043a546d2eb3adfaba920c027c0d701c73a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 26 Aug 2025 15:18:57 +0300
Subject: iopoll: Generalize read_poll_timeout() into poll_timeout_us()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While read_poll_timeout() & co. were originally introduced just
for simple I/O usage scenarios they have since been generalized to
be useful in more cases.

However the interface is very cumbersome to use in the general case.
Attempt to make it more flexible by combining the 'op', 'var' and
'args' parameter into just a single 'op' that the caller can fully
specify.

For example i915 has one case where one might currently
have to write something like:
	ret = read_poll_timeout(drm_dp_dpcd_read_byte, err,
				err || (status & mask),
				0 * 1000, 200 * 1000, false,
				aux, DP_FEC_STATUS, &status);
which is practically illegible, but with the adjusted macro
we do:
	ret = poll_timeout_us(err = drm_dp_dpcd_read_byte(aux, DP_FEC_STATUS, &status),
			      err || (status & mask),
			      0 * 1000, 200 * 1000, false);
which much easier to understand.

One could even combine the 'op' and 'cond'  parameters into
one, but that might make the caller a bit too unwieldly with
assignments and checks being done on the same statement.

This makes poll_timeout_us() closer to the i915 __wait_for()
macro, with the main difference being that __wait_for() uses
expenential backoff as opposed to the fixed polling interval
used by poll_timeout_us(). Eventually we might be able to switch
(at least most of) i915 to use poll_timeout_us().

v2: Fix typos (Jani)
    Fix delay_us docs for poll_timeout_us_atomic() (Jani)

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Dibin Moolakadan Subrahmanian <dibin.moolakadan.subrahmanian@intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: David Laight <david.laight.linux@gmail.com>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Matt Wagantall <mattw@codeaurora.org>
Cc: Dejin Zheng <zhengdejin5@gmail.com>
Cc: intel-gfx@lists.freedesktop.org
Cc: intel-xe@lists.freedesktop.org
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Acked-by: Simona Vetter <simona.vetter@ffwll.ch>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://lore.kernel.org/r/20250826121859.15497-1-ville.syrjala@linux.intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/linux/iopoll.h | 110 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 78 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
index 91324c331a4b..440aca5b4b59 100644
--- a/include/linux/iopoll.h
+++ b/include/linux/iopoll.h
@@ -14,41 +14,38 @@
 #include <linux/io.h>
 
 /**
- * read_poll_timeout - Periodically poll an address until a condition is
- *			met or a timeout occurs
- * @op: accessor function (takes @args as its arguments)
- * @val: Variable to read the value into
- * @cond: Break condition (usually involving @val)
- * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please
- *            read usleep_range() function description for details and
+ * poll_timeout_us - Periodically poll and perform an operation until
+ *                   a condition is met or a timeout occurs
+ *
+ * @op: Operation
+ * @cond: Break condition
+ * @sleep_us: Maximum time to sleep between operations in us (0 tight-loops).
+ *            Please read usleep_range() function description for details and
  *            limitations.
  * @timeout_us: Timeout in us, 0 means never timeout
- * @sleep_before_read: if it is true, sleep @sleep_us before read.
- * @args: arguments for @op poll
+ * @sleep_before_op: if it is true, sleep @sleep_us before operation.
  *
  * When available, you'll probably want to use one of the specialized
  * macros defined below rather than this macro directly.
  *
- * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either
- * case, the last read value at @args is stored in @val. Must not
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout. Must not
  * be called from atomic context if sleep_us or timeout_us are used.
  */
-#define read_poll_timeout(op, val, cond, sleep_us, timeout_us, \
-				sleep_before_read, args...) \
+#define poll_timeout_us(op, cond, sleep_us, timeout_us, sleep_before_op) \
 ({ \
 	u64 __timeout_us = (timeout_us); \
 	unsigned long __sleep_us = (sleep_us); \
 	ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \
 	might_sleep_if((__sleep_us) != 0); \
-	if (sleep_before_read && __sleep_us) \
+	if ((sleep_before_op) && __sleep_us) \
 		usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
 	for (;;) { \
-		(val) = op(args); \
+		op; \
 		if (cond) \
 			break; \
 		if (__timeout_us && \
 		    ktime_compare(ktime_get(), __timeout) > 0) { \
-			(val) = op(args); \
+			op; \
 			break; \
 		} \
 		if (__sleep_us) \
@@ -59,17 +56,16 @@
 })
 
 /**
- * read_poll_timeout_atomic - Periodically poll an address until a condition is
- * 				met or a timeout occurs
- * @op: accessor function (takes @args as its arguments)
- * @val: Variable to read the value into
- * @cond: Break condition (usually involving @val)
- * @delay_us: Time to udelay between reads in us (0 tight-loops). Please
- *            read udelay() function description for details and
+ * poll_timeout_us_atomic - Periodically poll and perform an operation until
+ *                          a condition is met or a timeout occurs
+ *
+ * @op: Operation
+ * @cond: Break condition
+ * @delay_us: Time to udelay between operations in us (0 tight-loops).
+ *            Please read udelay() function description for details and
  *            limitations.
  * @timeout_us: Timeout in us, 0 means never timeout
- * @delay_before_read: if it is true, delay @delay_us before read.
- * @args: arguments for @op poll
+ * @delay_before_op: if it is true, delay @delay_us before operation.
  *
  * This macro does not rely on timekeeping.  Hence it is safe to call even when
  * timekeeping is suspended, at the expense of an underestimation of wall clock
@@ -78,27 +74,26 @@
  * When available, you'll probably want to use one of the specialized
  * macros defined below rather than this macro directly.
  *
- * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either
- * case, the last read value at @args is stored in @val.
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout.
  */
-#define read_poll_timeout_atomic(op, val, cond, delay_us, timeout_us, \
-					delay_before_read, args...) \
+#define poll_timeout_us_atomic(op, cond, delay_us, timeout_us, \
+			       delay_before_op) \
 ({ \
 	u64 __timeout_us = (timeout_us); \
 	s64 __left_ns = __timeout_us * NSEC_PER_USEC; \
 	unsigned long __delay_us = (delay_us); \
 	u64 __delay_ns = __delay_us * NSEC_PER_USEC; \
-	if (delay_before_read && __delay_us) { \
+	if ((delay_before_op) && __delay_us) { \
 		udelay(__delay_us); \
 		if (__timeout_us) \
 			__left_ns -= __delay_ns; \
 	} \
 	for (;;) { \
-		(val) = op(args); \
+		op; \
 		if (cond) \
 			break; \
 		if (__timeout_us && __left_ns < 0) { \
-			(val) = op(args); \
+			op; \
 			break; \
 		} \
 		if (__delay_us) { \
@@ -113,6 +108,57 @@
 	(cond) ? 0 : -ETIMEDOUT; \
 })
 
+/**
+ * read_poll_timeout - Periodically poll an address until a condition is
+ *                     met or a timeout occurs
+ * @op: accessor function (takes @args as its arguments)
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please
+ *            read usleep_range() function description for details and
+ *            limitations.
+ * @timeout_us: Timeout in us, 0 means never timeout
+ * @sleep_before_read: if it is true, sleep @sleep_us before read.
+ * @args: arguments for @op poll
+ *
+ * When available, you'll probably want to use one of the specialized
+ * macros defined below rather than this macro directly.
+ *
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @args is stored in @val. Must not
+ * be called from atomic context if sleep_us or timeout_us are used.
+ */
+#define read_poll_timeout(op, val, cond, sleep_us, timeout_us, \
+			  sleep_before_read, args...) \
+	poll_timeout_us((val) = op(args), cond, sleep_us, timeout_us, sleep_before_read)
+
+/**
+ * read_poll_timeout_atomic - Periodically poll an address until a condition is
+ *                            met or a timeout occurs
+ * @op: accessor function (takes @args as its arguments)
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @delay_us: Time to udelay between reads in us (0 tight-loops). Please
+ *            read udelay() function description for details and
+ *            limitations.
+ * @timeout_us: Timeout in us, 0 means never timeout
+ * @delay_before_read: if it is true, delay @delay_us before read.
+ * @args: arguments for @op poll
+ *
+ * This macro does not rely on timekeeping.  Hence it is safe to call even when
+ * timekeeping is suspended, at the expense of an underestimation of wall clock
+ * time, which is rather minimal with a non-zero delay_us.
+ *
+ * When available, you'll probably want to use one of the specialized
+ * macros defined below rather than this macro directly.
+ *
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @args is stored in @val.
+ */
+#define read_poll_timeout_atomic(op, val, cond, sleep_us, timeout_us, \
+				 sleep_before_read, args...) \
+	poll_timeout_us_atomic((val) = op(args), cond, sleep_us, timeout_us, sleep_before_read)
+
 /**
  * readx_poll_timeout - Periodically poll an address until a condition is met or a timeout occurs
  * @op: accessor function (takes @addr as its only argument)
-- 
cgit v1.2.3


From 563e5eca4ea3b6e1901cbc7cd6dc42731a8d2999 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 26 Aug 2025 15:18:58 +0300
Subject: iopoll: Avoid evaluating 'cond' twice in poll_timeout_us()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently poll_timeout_us() evaluates 'cond' twice at the end
of the success case. This not desirable in case 'cond' itself
is expensive.

Avoid the double evaluation by tracking the return value in
a variable. Need to use a triple undescore '___ret' name to
avoid a conflict with an existing double undescore '__ret'
variable in the regmap code.

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Dibin Moolakadan Subrahmanian <dibin.moolakadan.subrahmanian@intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: David Laight <david.laight.linux@gmail.com>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Matt Wagantall <mattw@codeaurora.org>
Cc: Dejin Zheng <zhengdejin5@gmail.com>
Cc: intel-gfx@lists.freedesktop.org
Cc: intel-xe@lists.freedesktop.org
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Acked-by: Simona Vetter <simona.vetter@ffwll.ch>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://lore.kernel.org/r/20250826121859.15497-2-ville.syrjala@linux.intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/linux/iopoll.h | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
index 440aca5b4b59..d8c801ad68fa 100644
--- a/include/linux/iopoll.h
+++ b/include/linux/iopoll.h
@@ -36,23 +36,30 @@
 	u64 __timeout_us = (timeout_us); \
 	unsigned long __sleep_us = (sleep_us); \
 	ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \
+	int ___ret; \
 	might_sleep_if((__sleep_us) != 0); \
 	if ((sleep_before_op) && __sleep_us) \
 		usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
 	for (;;) { \
 		op; \
-		if (cond) \
+		if (cond) { \
+			___ret = 0; \
 			break; \
+		} \
 		if (__timeout_us && \
 		    ktime_compare(ktime_get(), __timeout) > 0) { \
 			op; \
+			if (cond) \
+				___ret = 0; \
+			else \
+				___ret = -ETIMEDOUT; \
 			break; \
 		} \
 		if (__sleep_us) \
 			usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
 		cpu_relax(); \
 	} \
-	(cond) ? 0 : -ETIMEDOUT; \
+	___ret; \
 })
 
 /**
@@ -83,6 +90,7 @@
 	s64 __left_ns = __timeout_us * NSEC_PER_USEC; \
 	unsigned long __delay_us = (delay_us); \
 	u64 __delay_ns = __delay_us * NSEC_PER_USEC; \
+	int ___ret; \
 	if ((delay_before_op) && __delay_us) { \
 		udelay(__delay_us); \
 		if (__timeout_us) \
@@ -90,10 +98,16 @@
 	} \
 	for (;;) { \
 		op; \
-		if (cond) \
+		if (cond) { \
+			___ret = 0; \
 			break; \
+		} \
 		if (__timeout_us && __left_ns < 0) { \
 			op; \
+			if (cond) \
+				___ret = 0; \
+			else \
+				___ret = -ETIMEDOUT; \
 			break; \
 		} \
 		if (__delay_us) { \
@@ -105,7 +119,7 @@
 		if (__timeout_us) \
 			__left_ns--; \
 	} \
-	(cond) ? 0 : -ETIMEDOUT; \
+	___ret; \
 })
 
 /**
-- 
cgit v1.2.3


From 3b6f62b6b5777f56e3fedc45041c2f61645b5204 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 26 Aug 2025 15:18:59 +0300
Subject: iopoll: Reorder the timeout handling in poll_timeout_us()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently poll_timeout_us() evaluates 'op' and 'cond' twice
within the loop, once at the start, and a second time after
the timeout check. While it's probably not a big deal to do
it twice almost back to back, it does make the macro a bit messy.

Simplify the implementation to evaluate the timeout at the
very start, then follow up with 'op'/'cond', and finally
check if the timeout did in fact happen or not.

For good measure throw in a compiler barrier between the timeout
and 'op'/'cond' evaluations to make sure the compiler can't reoder
the operations (which could cause false positive timeouts).
The similar i915 __wait_for() macro already has the barrier, though
there it is between the 'op' and 'cond' evaluations, which seems
like it could still allow 'op' and the timeout evaluations to get
reordered incorrectly. I suppose the ktime_get() might itself act
as a sufficient barrier here, but better safe than sorry I guess.

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Dibin Moolakadan Subrahmanian <dibin.moolakadan.subrahmanian@intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: David Laight <david.laight.linux@gmail.com>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Matt Wagantall <mattw@codeaurora.org>
Cc: Dejin Zheng <zhengdejin5@gmail.com>
Cc: intel-gfx@lists.freedesktop.org
Cc: intel-xe@lists.freedesktop.org
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Acked-by: Simona Vetter <simona.vetter@ffwll.ch>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://lore.kernel.org/r/20250826121859.15497-3-ville.syrjala@linux.intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/linux/iopoll.h | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
index d8c801ad68fa..bdd2e0652bc3 100644
--- a/include/linux/iopoll.h
+++ b/include/linux/iopoll.h
@@ -41,18 +41,17 @@
 	if ((sleep_before_op) && __sleep_us) \
 		usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
 	for (;;) { \
+		bool __expired = __timeout_us && \
+			ktime_compare(ktime_get(), __timeout) > 0; \
+		/* guarantee 'op' and 'cond' are evaluated after timeout expired */ \
+		barrier(); \
 		op; \
 		if (cond) { \
 			___ret = 0; \
 			break; \
 		} \
-		if (__timeout_us && \
-		    ktime_compare(ktime_get(), __timeout) > 0) { \
-			op; \
-			if (cond) \
-				___ret = 0; \
-			else \
-				___ret = -ETIMEDOUT; \
+		if (__expired) { \
+			___ret = -ETIMEDOUT; \
 			break; \
 		} \
 		if (__sleep_us) \
@@ -97,17 +96,16 @@
 			__left_ns -= __delay_ns; \
 	} \
 	for (;;) { \
+		bool __expired = __timeout_us && __left_ns < 0; \
+		/* guarantee 'op' and 'cond' are evaluated after timeout expired */ \
+		barrier(); \
 		op; \
 		if (cond) { \
 			___ret = 0; \
 			break; \
 		} \
-		if (__timeout_us && __left_ns < 0) { \
-			op; \
-			if (cond) \
-				___ret = 0; \
-			else \
-				___ret = -ETIMEDOUT; \
+		if (__expired) { \
+			___ret = -ETIMEDOUT; \
 			break; \
 		} \
 		if (__delay_us) { \
-- 
cgit v1.2.3


From cb4d5a6eb600a43c2e3ec7f54e06d07aa33d8062 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 26 Aug 2025 12:50:28 +0000
Subject: net: add sk_drops_skbadd() helper

Existing sk_drops_add() helper is renamed to sk_drops_skbadd().

Add sk_drops_add() and convert sk_drops_inc() to use it.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250826125031.1578842-3-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/skmsg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 0b9095a281b8..49847888c287 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -315,7 +315,7 @@ static inline bool sk_psock_test_state(const struct sk_psock *psock,
 
 static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
 {
-	sk_drops_add(sk, skb);
+	sk_drops_skbadd(sk, skb);
 	kfree_skb(skb);
 }
 
-- 
cgit v1.2.3


From 51132b99f01ce05f8008f0fb189d83eed484bd53 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 26 Aug 2025 12:50:30 +0000
Subject: udp: add drop_counters to udp socket

When a packet flood hits one or more UDP sockets, many cpus
have to update sk->sk_drops.

This slows down other cpus, because currently
sk_drops is in sock_write_rx group.

Add a socket_drop_counters structure to udp sockets.

Using dedicated cache lines to hold drop counters
makes sure that consumers no longer suffer from
false sharing if/when producers only change sk->sk_drops.

This adds 128 bytes per UDP socket.

Tested with the following stress test, sending about 11 Mpps
to a dual socket AMD EPYC 7B13 64-Core.

super_netperf 20 -t UDP_STREAM -H DUT -l10 -- -n -P,1000 -m 120
Note: due to socket lookup, only one UDP socket is receiving
packets on DUT.

Then measure receiver (DUT) behavior. We can see both
consumer and BH handlers can process more packets per second.

Before:

nstat -n ; sleep 1 ; nstat | grep Udp
Udp6InDatagrams                 615091             0.0
Udp6InErrors                    3904277            0.0
Udp6RcvbufErrors                3904277            0.0

After:

nstat -n ; sleep 1 ; nstat | grep Udp
Udp6InDatagrams                 816281             0.0
Udp6InErrors                    7497093            0.0
Udp6RcvbufErrors                7497093            0.0

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250826125031.1578842-5-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/udp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 4e1a672af4c5..981506be1e15 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -108,6 +108,7 @@ struct udp_sock {
 	 * the last UDP socket cacheline.
 	 */
 	struct hlist_node	tunnel_list;
+	struct socket_drop_counters drop_counters;
 };
 
 #define udp_test_bit(nr, sk)			\
-- 
cgit v1.2.3


From b81aa23234d94d99951761d9864061d774633ba9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 26 Aug 2025 12:50:31 +0000
Subject: inet: raw: add drop_counters to raw sockets

When a packet flood hits one or more RAW sockets, many cpus
have to update sk->sk_drops.

This slows down other cpus, because currently
sk_drops is in sock_write_rx group.

Add a socket_drop_counters structure to raw sockets.

Using dedicated cache lines to hold drop counters
makes sure that consumers no longer suffer from
false sharing if/when producers only change sk->sk_drops.

This adds 128 bytes per RAW socket.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250826125031.1578842-6-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/ipv6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index bc6ec2959173..261d02efb615 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -295,7 +295,7 @@ struct raw6_sock {
 	__u32			offset;		/* checksum offset  */
 	struct icmp6_filter	filter;
 	__u32			ip6mr_table;
-
+	struct socket_drop_counters drop_counters;
 	struct ipv6_pinfo	inet6;
 };
 
-- 
cgit v1.2.3


From 3ea299d3dccdb8554057d0a87552e7673baea95d Mon Sep 17 00:00:00 2001
From: Gabor Juhos <j4g8y7@gmail.com>
Date: Mon, 11 Aug 2025 09:40:40 +0200
Subject: mtd: nand: qpic-common: remove a bunch of unused defines

A bunch of definitions in the 'nand-qpic-common.h' header became
unused after the conversion of the 'qcom_nandc' and 'spi-qpic-snand'
drivers to use the FIELD_PREP() macro, so remove those.

No functional changes.

Signed-off-by: Gabor Juhos <j4g8y7@gmail.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/nand-qpic-common.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand-qpic-common.h b/include/linux/mtd/nand-qpic-common.h
index 4e694b1aabbd..e8201d1b7cf9 100644
--- a/include/linux/mtd/nand-qpic-common.h
+++ b/include/linux/mtd/nand-qpic-common.h
@@ -71,14 +71,10 @@
 
 /* NAND_DEVn_CFG0 bits */
 #define	DISABLE_STATUS_AFTER_WRITE	BIT(4)
-#define	CW_PER_PAGE			6
 #define	CW_PER_PAGE_MASK		GENMASK(8, 6)
-#define	UD_SIZE_BYTES			9
 #define	UD_SIZE_BYTES_MASK		GENMASK(18, 9)
 #define	ECC_PARITY_SIZE_BYTES_RS	GENMASK(22, 19)
-#define	SPARE_SIZE_BYTES		23
 #define	SPARE_SIZE_BYTES_MASK		GENMASK(26, 23)
-#define	NUM_ADDR_CYCLES			27
 #define	NUM_ADDR_CYCLES_MASK		GENMASK(29, 27)
 #define	STATUS_BFR_READ			BIT(30)
 #define	SET_RD_MODE_AFTER_STATUS	BIT(31)
@@ -86,26 +82,20 @@
 /* NAND_DEVn_CFG0 bits */
 #define	DEV0_CFG1_ECC_DISABLE		BIT(0)
 #define	WIDE_FLASH			BIT(1)
-#define	NAND_RECOVERY_CYCLES		2
 #define	NAND_RECOVERY_CYCLES_MASK	GENMASK(4, 2)
 #define	CS_ACTIVE_BSY			BIT(5)
-#define	BAD_BLOCK_BYTE_NUM		6
 #define	BAD_BLOCK_BYTE_NUM_MASK		GENMASK(15, 6)
 #define	BAD_BLOCK_IN_SPARE_AREA		BIT(16)
-#define	WR_RD_BSY_GAP			17
 #define	WR_RD_BSY_GAP_MASK		GENMASK(22, 17)
 #define	ENABLE_BCH_ECC			BIT(27)
 
 /* NAND_DEV0_ECC_CFG bits */
 #define	ECC_CFG_ECC_DISABLE		BIT(0)
 #define	ECC_SW_RESET			BIT(1)
-#define	ECC_MODE			4
 #define	ECC_MODE_MASK			GENMASK(5, 4)
 #define	ECC_MODE_4BIT			0
 #define	ECC_MODE_8BIT			1
-#define	ECC_PARITY_SIZE_BYTES_BCH	8
 #define	ECC_PARITY_SIZE_BYTES_BCH_MASK	GENMASK(12, 8)
-#define	ECC_NUM_DATA_BYTES		16
 #define	ECC_NUM_DATA_BYTES_MASK		GENMASK(25, 16)
 #define	ECC_FORCE_CLK_OPEN		BIT(30)
 
@@ -120,7 +110,6 @@
 #define	SEQ_READ_START_VLD		BIT(4)
 
 /* NAND_EBI2_ECC_BUF_CFG bits */
-#define	NUM_STEPS			0
 #define	NUM_STEPS_MASK			GENMASK(9, 0)
 
 /* NAND_ERASED_CW_DETECT_CFG bits */
@@ -141,11 +130,8 @@
 #define	ERASED_CW			(CODEWORD_ALL_ERASED | CODEWORD_ERASED)
 
 /* NAND_READ_LOCATION_n bits */
-#define READ_LOCATION_OFFSET		0
 #define READ_LOCATION_OFFSET_MASK	GENMASK(9, 0)
-#define READ_LOCATION_SIZE		16
 #define READ_LOCATION_SIZE_MASK		GENMASK(25, 16)
-#define READ_LOCATION_LAST		31
 #define READ_LOCATION_LAST_MASK		BIT(31)
 
 /* Version Mask */
-- 
cgit v1.2.3


From 5f284dc15ca8695d0394414045ac64616a3b0e69 Mon Sep 17 00:00:00 2001
From: Tianling Shen <cnsztl@gmail.com>
Date: Mon, 25 Aug 2025 01:00:13 +0800
Subject: mtd: spinand: add support for FudanMicro FM25S01A

Add support for FudanMicro FM25S01A SPI NAND.
Datasheet: http://eng.fmsh.com/nvm/FM25S01A_ds_eng.pdf

Signed-off-by: Tianling Shen <cnsztl@gmail.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/spinand.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 27a45bdab7ec..927c10d78769 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -355,6 +355,7 @@ struct spinand_manufacturer {
 extern const struct spinand_manufacturer alliancememory_spinand_manufacturer;
 extern const struct spinand_manufacturer ato_spinand_manufacturer;
 extern const struct spinand_manufacturer esmt_c8_spinand_manufacturer;
+extern const struct spinand_manufacturer fmsh_spinand_manufacturer;
 extern const struct spinand_manufacturer foresee_spinand_manufacturer;
 extern const struct spinand_manufacturer gigadevice_spinand_manufacturer;
 extern const struct spinand_manufacturer macronix_spinand_manufacturer;
-- 
cgit v1.2.3


From 30c2b98aa84c76f2ae60e66dd4ec2d9497713359 Mon Sep 17 00:00:00 2001
From: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Date: Thu, 28 Aug 2025 12:33:17 +0530
Subject: x86/apic: Add new driver for Secure AVIC

The Secure AVIC feature provides SEV-SNP guests hardware acceleration for
performance sensitive APIC accesses while securely managing the guest-owned
APIC state through the use of a private APIC backing page.

This helps prevent the hypervisor from generating unexpected interrupts for
a vCPU or otherwise violate architectural assumptions around the APIC
behavior.

Add a new x2APIC driver that will serve as the base of the Secure AVIC
support. It is initially the same as the x2APIC physical driver (without IPI
callbacks), but will be modified as features are implemented.

As the new driver does not implement Secure AVIC features yet, if the
hypervisor sets the Secure AVIC bit in SEV_STATUS, maintain the existing
behavior to enforce the guest termination.

  [ bp: Massage commit message. ]

Co-developed-by: Kishon Vijay Abraham I <kvijayab@amd.com>
Signed-off-by: Kishon Vijay Abraham I <kvijayab@amd.com>
Signed-off-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tianyu Lan <tiala@microsoft.com>
Link: https://lore.kernel.org/20250828070334.208401-2-Neeraj.Upadhyay@amd.com
---
 include/linux/cc_platform.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h
index 0bf7d33a1048..7fcec025c5e0 100644
--- a/include/linux/cc_platform.h
+++ b/include/linux/cc_platform.h
@@ -96,6 +96,14 @@ enum cc_attr {
 	 * enabled to run SEV-SNP guests.
 	 */
 	CC_ATTR_HOST_SEV_SNP,
+
+	/**
+	 * @CC_ATTR_SNP_SECURE_AVIC: Secure AVIC mode is active.
+	 *
+	 * The host kernel is running with the necessary features enabled
+	 * to run SEV-SNP guests with full Secure AVIC capabilities.
+	 */
+	CC_ATTR_SNP_SECURE_AVIC,
 };
 
 #ifdef CONFIG_ARCH_HAS_CC_PLATFORM
-- 
cgit v1.2.3


From 9a98f9e84cfbeaa51af42ba2b8bbbde046c709a7 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Tue, 26 Aug 2025 11:39:01 -0400
Subject: fs: make the i_state flags an enum

Adjusting i_state flags always means updating the values manually. Bring
these forward into the 2020's and make a nice clean macro for defining
the i_state values as an enum, providing __ variants for the cases where
we need the bit position instead of the actual value, and leaving the
actual NAME as the 1U << bit value.

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Link: https://lore.kernel.org/0da9348da6ece0dce12fccec07b1dd2b8e4cfdab.1756222464.git.josef@toxicpanda.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 231 +++++++++++++++++++++++++++--------------------------
 1 file changed, 119 insertions(+), 112 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 12ecc6b0e6f9..c34554d8c4fe 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -664,6 +664,124 @@ is_uncached_acl(struct posix_acl *acl)
 #define IOP_MGTIME	0x0020
 #define IOP_CACHED_LINK	0x0040
 
+/*
+ * Inode state bits.  Protected by inode->i_lock
+ *
+ * Four bits determine the dirty state of the inode: I_DIRTY_SYNC,
+ * I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME.
+ *
+ * Four bits define the lifetime of an inode.  Initially, inodes are I_NEW,
+ * until that flag is cleared.  I_WILL_FREE, I_FREEING and I_CLEAR are set at
+ * various stages of removing an inode.
+ *
+ * Two bits are used for locking and completion notification, I_NEW and I_SYNC.
+ *
+ * I_DIRTY_SYNC		Inode is dirty, but doesn't have to be written on
+ *			fdatasync() (unless I_DIRTY_DATASYNC is also set).
+ *			Timestamp updates are the usual cause.
+ * I_DIRTY_DATASYNC	Data-related inode changes pending.  We keep track of
+ *			these changes separately from I_DIRTY_SYNC so that we
+ *			don't have to write inode on fdatasync() when only
+ *			e.g. the timestamps have changed.
+ * I_DIRTY_PAGES	Inode has dirty pages.  Inode itself may be clean.
+ * I_DIRTY_TIME		The inode itself has dirty timestamps, and the
+ *			lazytime mount option is enabled.  We keep track of this
+ *			separately from I_DIRTY_SYNC in order to implement
+ *			lazytime.  This gets cleared if I_DIRTY_INODE
+ *			(I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But
+ *			I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already
+ *			in place because writeback might already be in progress
+ *			and we don't want to lose the time update
+ * I_NEW		Serves as both a mutex and completion notification.
+ *			New inodes set I_NEW.  If two processes both create
+ *			the same inode, one of them will release its inode and
+ *			wait for I_NEW to be released before returning.
+ *			Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
+ *			also cause waiting on I_NEW, without I_NEW actually
+ *			being set.  find_inode() uses this to prevent returning
+ *			nearly-dead inodes.
+ * I_WILL_FREE		Must be set when calling write_inode_now() if i_count
+ *			is zero.  I_FREEING must be set when I_WILL_FREE is
+ *			cleared.
+ * I_FREEING		Set when inode is about to be freed but still has dirty
+ *			pages or buffers attached or the inode itself is still
+ *			dirty.
+ * I_CLEAR		Added by clear_inode().  In this state the inode is
+ *			clean and can be destroyed.  Inode keeps I_FREEING.
+ *
+ *			Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are
+ *			prohibited for many purposes.  iget() must wait for
+ *			the inode to be completely released, then create it
+ *			anew.  Other functions will just ignore such inodes,
+ *			if appropriate.  I_NEW is used for waiting.
+ *
+ * I_SYNC		Writeback of inode is running. The bit is set during
+ *			data writeback, and cleared with a wakeup on the bit
+ *			address once it is done. The bit is also used to pin
+ *			the inode in memory for flusher thread.
+ *
+ * I_REFERENCED		Marks the inode as recently references on the LRU list.
+ *
+ * I_WB_SWITCH		Cgroup bdi_writeback switching in progress.  Used to
+ *			synchronize competing switching instances and to tell
+ *			wb stat updates to grab the i_pages lock.  See
+ *			inode_switch_wbs_work_fn() for details.
+ *
+ * I_OVL_INUSE		Used by overlayfs to get exclusive ownership on upper
+ *			and work dirs among overlayfs mounts.
+ *
+ * I_CREATING		New object's inode in the middle of setting up.
+ *
+ * I_DONTCACHE		Evict inode as soon as it is not used anymore.
+ *
+ * I_SYNC_QUEUED	Inode is queued in b_io or b_more_io writeback lists.
+ *			Used to detect that mark_inode_dirty() should not move
+ *			inode between dirty lists.
+ *
+ * I_PINNING_FSCACHE_WB	Inode is pinning an fscache object for writeback.
+ *
+ * I_LRU_ISOLATING	Inode is pinned being isolated from LRU without holding
+ *			i_count.
+ *
+ * Q: What is the difference between I_WILL_FREE and I_FREEING?
+ *
+ * __I_{SYNC,NEW,LRU_ISOLATING} are used to derive unique addresses to wait
+ * upon. There's one free address left.
+ */
+
+enum inode_state_bits {
+	__I_NEW			= 0U,
+	__I_SYNC		= 1U,
+	__I_LRU_ISOLATING	= 2U
+	/* reserved wait address bit 3 */
+};
+
+enum inode_state_flags_t {
+	I_NEW			= (1U << __I_NEW),
+	I_SYNC			= (1U << __I_SYNC),
+	I_LRU_ISOLATING         = (1U << __I_LRU_ISOLATING),
+	/* reserved flag bit 3 */
+	I_DIRTY_SYNC		= (1U << 4),
+	I_DIRTY_DATASYNC	= (1U << 5),
+	I_DIRTY_PAGES		= (1U << 6),
+	I_WILL_FREE		= (1U << 7),
+	I_FREEING		= (1U << 8),
+	I_CLEAR			= (1U << 9),
+	I_REFERENCED		= (1U << 10),
+	I_LINKABLE		= (1U << 11),
+	I_DIRTY_TIME		= (1U << 12),
+	I_WB_SWITCH		= (1U << 13),
+	I_OVL_INUSE		= (1U << 14),
+	I_CREATING		= (1U << 15),
+	I_DONTCACHE		= (1U << 16),
+	I_SYNC_QUEUED		= (1U << 17),
+	I_PINNING_NETFS_WB	= (1U << 18)
+};
+
+#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
+#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
+#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
+
 /*
  * Keep mostly read-only and often accessed (especially for
  * the RCU path lookup and 'stat' data) fields at the beginning
@@ -722,7 +840,7 @@ struct inode {
 #endif
 
 	/* Misc */
-	u32			i_state;
+	enum inode_state_flags_t	i_state;
 	/* 32-bit hole */
 	struct rw_semaphore	i_rwsem;
 
@@ -2482,117 +2600,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
 	};
 }
 
-/*
- * Inode state bits.  Protected by inode->i_lock
- *
- * Four bits determine the dirty state of the inode: I_DIRTY_SYNC,
- * I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME.
- *
- * Four bits define the lifetime of an inode.  Initially, inodes are I_NEW,
- * until that flag is cleared.  I_WILL_FREE, I_FREEING and I_CLEAR are set at
- * various stages of removing an inode.
- *
- * Two bits are used for locking and completion notification, I_NEW and I_SYNC.
- *
- * I_DIRTY_SYNC		Inode is dirty, but doesn't have to be written on
- *			fdatasync() (unless I_DIRTY_DATASYNC is also set).
- *			Timestamp updates are the usual cause.
- * I_DIRTY_DATASYNC	Data-related inode changes pending.  We keep track of
- *			these changes separately from I_DIRTY_SYNC so that we
- *			don't have to write inode on fdatasync() when only
- *			e.g. the timestamps have changed.
- * I_DIRTY_PAGES	Inode has dirty pages.  Inode itself may be clean.
- * I_DIRTY_TIME		The inode itself has dirty timestamps, and the
- *			lazytime mount option is enabled.  We keep track of this
- *			separately from I_DIRTY_SYNC in order to implement
- *			lazytime.  This gets cleared if I_DIRTY_INODE
- *			(I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But
- *			I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already
- *			in place because writeback might already be in progress
- *			and we don't want to lose the time update
- * I_NEW		Serves as both a mutex and completion notification.
- *			New inodes set I_NEW.  If two processes both create
- *			the same inode, one of them will release its inode and
- *			wait for I_NEW to be released before returning.
- *			Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
- *			also cause waiting on I_NEW, without I_NEW actually
- *			being set.  find_inode() uses this to prevent returning
- *			nearly-dead inodes.
- * I_WILL_FREE		Must be set when calling write_inode_now() if i_count
- *			is zero.  I_FREEING must be set when I_WILL_FREE is
- *			cleared.
- * I_FREEING		Set when inode is about to be freed but still has dirty
- *			pages or buffers attached or the inode itself is still
- *			dirty.
- * I_CLEAR		Added by clear_inode().  In this state the inode is
- *			clean and can be destroyed.  Inode keeps I_FREEING.
- *
- *			Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are
- *			prohibited for many purposes.  iget() must wait for
- *			the inode to be completely released, then create it
- *			anew.  Other functions will just ignore such inodes,
- *			if appropriate.  I_NEW is used for waiting.
- *
- * I_SYNC		Writeback of inode is running. The bit is set during
- *			data writeback, and cleared with a wakeup on the bit
- *			address once it is done. The bit is also used to pin
- *			the inode in memory for flusher thread.
- *
- * I_REFERENCED		Marks the inode as recently references on the LRU list.
- *
- * I_WB_SWITCH		Cgroup bdi_writeback switching in progress.  Used to
- *			synchronize competing switching instances and to tell
- *			wb stat updates to grab the i_pages lock.  See
- *			inode_switch_wbs_work_fn() for details.
- *
- * I_OVL_INUSE		Used by overlayfs to get exclusive ownership on upper
- *			and work dirs among overlayfs mounts.
- *
- * I_CREATING		New object's inode in the middle of setting up.
- *
- * I_DONTCACHE		Evict inode as soon as it is not used anymore.
- *
- * I_SYNC_QUEUED	Inode is queued in b_io or b_more_io writeback lists.
- *			Used to detect that mark_inode_dirty() should not move
- * 			inode between dirty lists.
- *
- * I_PINNING_FSCACHE_WB	Inode is pinning an fscache object for writeback.
- *
- * I_LRU_ISOLATING	Inode is pinned being isolated from LRU without holding
- *			i_count.
- *
- * Q: What is the difference between I_WILL_FREE and I_FREEING?
- *
- * __I_{SYNC,NEW,LRU_ISOLATING} are used to derive unique addresses to wait
- * upon. There's one free address left.
- */
-#define __I_NEW			0
-#define I_NEW			(1 << __I_NEW)
-#define __I_SYNC		1
-#define I_SYNC			(1 << __I_SYNC)
-#define __I_LRU_ISOLATING	2
-#define I_LRU_ISOLATING		(1 << __I_LRU_ISOLATING)
-
-#define I_DIRTY_SYNC		(1 << 3)
-#define I_DIRTY_DATASYNC	(1 << 4)
-#define I_DIRTY_PAGES		(1 << 5)
-#define I_WILL_FREE		(1 << 6)
-#define I_FREEING		(1 << 7)
-#define I_CLEAR			(1 << 8)
-#define I_REFERENCED		(1 << 9)
-#define I_LINKABLE		(1 << 10)
-#define I_DIRTY_TIME		(1 << 11)
-#define I_WB_SWITCH		(1 << 12)
-#define I_OVL_INUSE		(1 << 13)
-#define I_CREATING		(1 << 14)
-#define I_DONTCACHE		(1 << 15)
-#define I_SYNC_QUEUED		(1 << 16)
-#define I_PINNING_NETFS_WB	(1 << 17)
-
-#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
-#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
-#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
-
 extern void __mark_inode_dirty(struct inode *, int);
 static inline void mark_inode_dirty(struct inode *inode)
 {
-- 
cgit v1.2.3


From db2ab24a341ce89351a1bede37a96a3e3ce1726a Mon Sep 17 00:00:00 2001
From: Lauri Vasama <git@vasama.org>
Date: Wed, 27 Aug 2025 16:39:00 +0300
Subject: Add RWF_NOSIGNAL flag for pwritev2

For a user mode library to avoid generating SIGPIPE signals (e.g.
because this behaviour is not portable across operating systems) is
cumbersome. It is generally bad form to change the process-wide signal
mask in a library, so a local solution is needed instead.

For I/O performed directly using system calls (synchronous or readiness
based asynchronous) this currently involves applying a thread-specific
signal mask before the operation and reverting it afterwards. This can be
avoided when it is known that the file descriptor refers to neither a
pipe nor a socket, but a conservative implementation must always apply
the mask. This incurs the cost of two additional system calls. In the
case of sockets, the existing MSG_NOSIGNAL flag can be used with send.

For asynchronous I/O performed using io_uring, currently the only option
(apart from MSG_NOSIGNAL for sockets), is to mask SIGPIPE entirely in the
call to io_uring_enter. Thankfully io_uring_enter takes a signal mask, so
only a single syscall is needed. However, copying the signal mask on
every call incurs a non-zero performance penalty. Furthermore, this mask
applies to all completions, meaning that if the non-signaling behaviour
is desired only for some subset of operations, the desired signals must
be raised manually from user-mode depending on the completed operation.

Add RWF_NOSIGNAL flag for pwritev2. This flag prevents the SIGPIPE signal
from being raised when writing on disconnected pipes or sockets. The flag
is handled directly by the pipe filesystem and converted to the existing
MSG_NOSIGNAL flag for sockets.

Signed-off-by: Lauri Vasama <git@vasama.org>
Link: https://lore.kernel.org/20250827133901.1820771-1-git@vasama.org
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 780e9c774c54..34693cae15a2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -356,6 +356,7 @@ struct readahead_control;
 #define IOCB_APPEND		(__force int) RWF_APPEND
 #define IOCB_ATOMIC		(__force int) RWF_ATOMIC
 #define IOCB_DONTCACHE		(__force int) RWF_DONTCACHE
+#define IOCB_NOSIGNAL		(__force int) RWF_NOSIGNAL
 
 /* non-RWF related bits - start at 16 */
 #define IOCB_EVENTFD		(1 << 16)
-- 
cgit v1.2.3


From 72cdc67e7fa74931b055df3a76852bab551f1a04 Mon Sep 17 00:00:00 2001
From: Qingfang Deng <dqfext@gmail.com>
Date: Thu, 28 Aug 2025 09:20:16 +0800
Subject: pppoe: remove rwlock usage

Like ppp_generic.c, convert the PPPoE socket hash table to use RCU for
lookups and a spinlock for updates. This removes rwlock usage and allows
lockless readers on the fast path.

- Mark hash table and list pointers as __rcu.
- Use spin_lock() to protect writers.
- Readers use rcu_dereference() under rcu_read_lock(). All known callers
  of get_item() already hold the RCU read lock, so no additional locking
  is needed.
- get_item() now uses refcount_inc_not_zero() instead of sock_hold() to
  safely take a reference. This prevents crashes if a socket is already
  in the process of being freed (sk_refcnt == 0).
- Set SOCK_RCU_FREE to defer socket freeing until after an RCU grace
  period.
- Move skb_queue_purge() into sk_destruct callback to ensure purge
  happens after an RCU grace period.

Signed-off-by: Qingfang Deng <dqfext@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250828012018.15922-1-dqfext@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/if_pppox.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index ff3beda1312c..db45d6f1c4f4 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -43,7 +43,7 @@ struct pppox_sock {
 	/* struct sock must be the first member of pppox_sock */
 	struct sock sk;
 	struct ppp_channel chan;
-	struct pppox_sock	*next;	  /* for hash table */
+	struct pppox_sock __rcu	*next;	  /* for hash table */
 	union {
 		struct pppoe_opt pppoe;
 		struct pptp_opt  pptp;
-- 
cgit v1.2.3


From 9529320ad64e614cfaf96e6b8e3d8c0a1245160c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 28 Aug 2025 10:27:37 +0000
Subject: inet_diag: change inet_diag_bc_sk() first argument

We want to have access to the inet_diag_dump_data structure
in the following patch.

This patch removes duplication in callers.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250828102738.2065992-5-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/inet_diag.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 30bf8f7ea62b..86a0641ec36e 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -46,7 +46,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 		      const struct inet_diag_req_v2 *req,
 		      u16 nlmsg_flags, bool net_admin);
 
-int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
+int inet_diag_bc_sk(const struct inet_diag_dump_data *cb_data, struct sock *sk);
 
 void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk);
 
-- 
cgit v1.2.3


From 95fa78830e5b2eb2041174c7f9549c746e003dd6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 28 Aug 2025 10:27:38 +0000
Subject: inet_diag: avoid cache line misses in inet_diag_bc_sk()

inet_diag_bc_sk() pulls five cache lines per socket,
while most filters only need the two first ones.

Add three booleans to struct inet_diag_dump_data,
that are selectively set if a filter needs specific socket fields.

- mark_needed       /* INET_DIAG_BC_MARK_COND present. */
- cgroup_needed     /* INET_DIAG_BC_CGROUP_COND present. */
- userlocks_needed  /* INET_DIAG_BC_AUTO present. */

This removes millions of cache lines misses per ss invocation
when simple filters are specified on busy servers.

offsetof(struct sock, sk_userlocks) = 0xf3
offsetof(struct sock, sk_mark) = 0x20c
offsetof(struct sock, sk_cgrp_data) = 0x298

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250828102738.2065992-6-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/inet_diag.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 86a0641ec36e..704fd415c2b4 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -38,6 +38,11 @@ struct inet_diag_dump_data {
 #define inet_diag_nla_bpf_stgs req_nlas[INET_DIAG_REQ_SK_BPF_STORAGES]
 
 	struct bpf_sk_storage_diag *bpf_stg_diag;
+	bool mark_needed;	/* INET_DIAG_BC_MARK_COND present. */
+#ifdef CONFIG_SOCK_CGROUP_DATA
+	bool cgroup_needed;	/* INET_DIAG_BC_CGROUP_COND present. */
+#endif
+	bool userlocks_needed;	/* INET_DIAG_BC_AUTO present. */
 };
 
 struct inet_connection_sock;
-- 
cgit v1.2.3


From a59076f2669ec23a122549e1f4114e8d4255b632 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Sat, 16 Aug 2025 10:28:57 -0700
Subject: lsm: security_lsmblob_to_secctx module selection

Add a parameter lsmid to security_lsmblob_to_secctx() to identify which
of the security modules that may be active should provide the security
context. If the value of lsmid is LSM_ID_UNDEF the first LSM providing
a hook is used. security_secid_to_secctx() is unchanged, and will
always report the first LSM providing a hook.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: subj tweak]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/security.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 521bcb5b9717..6d1ed6e7387b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -567,7 +567,8 @@ int security_getprocattr(struct task_struct *p, int lsmid, const char *name,
 int security_setprocattr(int lsmid, const char *name, void *value, size_t size);
 int security_ismaclabel(const char *name);
 int security_secid_to_secctx(u32 secid, struct lsm_context *cp);
-int security_lsmprop_to_secctx(struct lsm_prop *prop, struct lsm_context *cp);
+int security_lsmprop_to_secctx(struct lsm_prop *prop, struct lsm_context *cp,
+			       int lsmid);
 int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
 void security_release_secctx(struct lsm_context *cp);
 void security_inode_invalidate_secctx(struct inode *inode);
@@ -1551,7 +1552,8 @@ static inline int security_secid_to_secctx(u32 secid, struct lsm_context *cp)
 }
 
 static inline int security_lsmprop_to_secctx(struct lsm_prop *prop,
-					     struct lsm_context *cp)
+					     struct lsm_context *cp,
+					     int lsmid)
 {
 	return -EOPNOTSUPP;
 }
-- 
cgit v1.2.3


From eb59d494eebd4c5414728a35cdea6a0ba78ff26e Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Sat, 16 Aug 2025 10:28:58 -0700
Subject: audit: add record for multiple task security contexts

Replace the single skb pointer in an audit_buffer with a list of
skb pointers. Add the audit_stamp information to the audit_buffer as
there's no guarantee that there will be an audit_context containing
the stamp associated with the event. At audit_log_end() time create
auxiliary records as have been added to the list. Functions are
created to manage the skb list in the audit_buffer.

Create a new audit record AUDIT_MAC_TASK_CONTEXTS.
An example of the MAC_TASK_CONTEXTS record is:

    type=MAC_TASK_CONTEXTS
      msg=audit(1600880931.832:113)
      subj_apparmor=unconfined
      subj_smack=_

When an audit event includes a AUDIT_MAC_TASK_CONTEXTS record the
"subj=" field in other records in the event will be "subj=?".
An AUDIT_MAC_TASK_CONTEXTS record is supplied when the system has
multiple security modules that may make access decisions based on a
subject security context.

Refactor audit_log_task_context(), creating a new audit_log_subj_ctx().
This is used in netlabel auditing to provide multiple subject security
contexts as necessary.

Suggested-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: subj tweak, audit example readability indents]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/audit.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index e3f06eba9c6e..a1f068bcb3a0 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -37,6 +37,8 @@ struct audit_watch;
 struct audit_tree;
 struct sk_buff;
 struct kern_ipc_perm;
+struct lsm_id;
+struct lsm_prop;
 
 struct audit_krule {
 	u32			pflags;
@@ -147,6 +149,9 @@ extern unsigned compat_signal_class[];
 #define AUDIT_TTY_ENABLE	BIT(0)
 #define AUDIT_TTY_LOG_PASSWD	BIT(1)
 
+/* bit values for audit_cfg_lsm */
+#define AUDIT_CFG_LSM_SECCTX_SUBJECT	BIT(0)
+
 struct filename;
 
 #define AUDIT_OFF	0
@@ -185,6 +190,7 @@ extern void		    audit_log_path_denied(int type,
 						  const char *operation);
 extern void		    audit_log_lost(const char *message);
 
+extern int audit_log_subj_ctx(struct audit_buffer *ab, struct lsm_prop *prop);
 extern int audit_log_task_context(struct audit_buffer *ab);
 extern void audit_log_task_info(struct audit_buffer *ab);
 
@@ -210,6 +216,8 @@ extern u32 audit_enabled;
 
 extern int audit_signal_info(int sig, struct task_struct *t);
 
+extern void audit_cfg_lsm(const struct lsm_id *lsmid, int flags);
+
 #else /* CONFIG_AUDIT */
 static inline __printf(4, 5)
 void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type,
@@ -245,6 +253,11 @@ static inline void audit_log_key(struct audit_buffer *ab, char *key)
 { }
 static inline void audit_log_path_denied(int type, const char *operation)
 { }
+static inline int audit_log_subj_ctx(struct audit_buffer *ab,
+				     struct lsm_prop *prop)
+{
+	return 0;
+}
 static inline int audit_log_task_context(struct audit_buffer *ab)
 {
 	return 0;
@@ -269,6 +282,9 @@ static inline int audit_signal_info(int sig, struct task_struct *t)
 	return 0;
 }
 
+static inline void audit_cfg_lsm(const struct lsm_id *lsmid, int flags)
+{ }
+
 #endif /* CONFIG_AUDIT */
 
 #ifdef CONFIG_AUDIT_COMPAT_GENERIC
-- 
cgit v1.2.3


From 0ffbc876d03c80b83d70aeefac7bbb94a9f4e135 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Sat, 16 Aug 2025 10:28:59 -0700
Subject: audit: add record for multiple object contexts

Create a new audit record AUDIT_MAC_OBJ_CONTEXTS.
An example of the MAC_OBJ_CONTEXTS record is:

    type=MAC_OBJ_CONTEXTS
      msg=audit(1601152467.009:1050):
      obj_selinux=unconfined_u:object_r:user_home_t:s0

When an audit event includes a AUDIT_MAC_OBJ_CONTEXTS record
the "obj=" field in other records in the event will be "obj=?".
An AUDIT_MAC_OBJ_CONTEXTS record is supplied when the system has
multiple security modules that may make access decisions based
on an object security context.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: subj tweak, audit example readability indents]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/audit.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index a1f068bcb3a0..536f8ee8da81 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -151,6 +151,7 @@ extern unsigned compat_signal_class[];
 
 /* bit values for audit_cfg_lsm */
 #define AUDIT_CFG_LSM_SECCTX_SUBJECT	BIT(0)
+#define AUDIT_CFG_LSM_SECCTX_OBJECT	BIT(1)
 
 struct filename;
 
@@ -191,6 +192,7 @@ extern void		    audit_log_path_denied(int type,
 extern void		    audit_log_lost(const char *message);
 
 extern int audit_log_subj_ctx(struct audit_buffer *ab, struct lsm_prop *prop);
+extern int audit_log_obj_ctx(struct audit_buffer *ab, struct lsm_prop *prop);
 extern int audit_log_task_context(struct audit_buffer *ab);
 extern void audit_log_task_info(struct audit_buffer *ab);
 
@@ -258,6 +260,11 @@ static inline int audit_log_subj_ctx(struct audit_buffer *ab,
 {
 	return 0;
 }
+static inline int audit_log_obj_ctx(struct audit_buffer *ab,
+				    struct lsm_prop *prop)
+{
+	return 0;
+}
 static inline int audit_log_task_context(struct audit_buffer *ab)
 {
 	return 0;
-- 
cgit v1.2.3


From 1d8fdabe19267338f29b58f968499e5b55e6a3b6 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Fri, 29 Aug 2025 12:25:43 +0100
Subject: iio: frequency: adf4350: Fix ADF4350_REG3_12BIT_CLKDIV_MODE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The clk div bits (2 bits wide) do not start in bit 16 but in bit 15. Fix it
accordingly.

Fixes: e31166f0fd48 ("iio: frequency: New driver for Analog Devices ADF4350/ADF4351 Wideband Synthesizers")
Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://patch.msgid.link/20250829-adf4350-fix-v2-2-0bf543ba797d@analog.com
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/frequency/adf4350.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/frequency/adf4350.h b/include/linux/iio/frequency/adf4350.h
index de45cf2ee1e4..ce2086f97e3f 100644
--- a/include/linux/iio/frequency/adf4350.h
+++ b/include/linux/iio/frequency/adf4350.h
@@ -51,7 +51,7 @@
 
 /* REG3 Bit Definitions */
 #define ADF4350_REG3_12BIT_CLKDIV(x)		((x) << 3)
-#define ADF4350_REG3_12BIT_CLKDIV_MODE(x)	((x) << 16)
+#define ADF4350_REG3_12BIT_CLKDIV_MODE(x)	((x) << 15)
 #define ADF4350_REG3_12BIT_CSR_EN		(1 << 18)
 #define ADF4351_REG3_CHARGE_CANCELLATION_EN	(1 << 21)
 #define ADF4351_REG3_ANTI_BACKLASH_3ns_EN	(1 << 22)
-- 
cgit v1.2.3


From 37b27bd5d6217b75d315f28b4399aad0a336f299 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Tue, 26 Aug 2025 11:39:02 -0400
Subject: fs: add an icount_read helper

Instead of doing direct access to ->i_count, add a helper to handle
this. This will make it easier to convert i_count to a refcount later.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Link: https://lore.kernel.org/9bc62a84c6b9d6337781203f60837bd98fbc4a96.1756222464.git.josef@toxicpanda.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c34554d8c4fe..c4fd010cf5bf 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2611,6 +2611,11 @@ static inline void mark_inode_dirty_sync(struct inode *inode)
 	__mark_inode_dirty(inode, I_DIRTY_SYNC);
 }
 
+static inline int icount_read(const struct inode *inode)
+{
+	return atomic_read(&inode->i_count);
+}
+
 /*
  * Returns true if the given inode itself only has dirty timestamps (its pages
  * may still be dirty) and isn't currently being allocated or freed.
-- 
cgit v1.2.3


From e5bca063c150877c45b88ff134b6ef7a5eae8e7a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 30 Aug 2025 12:55:39 +0200
Subject: fs: remove vfs_ioctl export

vfs_ioctl() is no longer called by anything outside of fs/ioctl.c, so
remove the global symbol and export as it is not needed.

Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/2025083038-carving-amuck-a4ae@gregkh
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 34693cae15a2..4daf9b30a641 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2053,8 +2053,6 @@ int vfs_fchown(struct file *file, uid_t user, gid_t group);
 int vfs_fchmod(struct file *file, umode_t mode);
 int vfs_utimes(const struct path *path, struct timespec64 *times);
 
-int vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
-
 #ifdef CONFIG_COMPAT
 extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
 					unsigned long arg);
-- 
cgit v1.2.3


From edd3cb05c00a040dc72bed20b14b5ba865188bce Mon Sep 17 00:00:00 2001
From: Simon Schuster <schuster.simon@siemens-energy.com>
Date: Mon, 1 Sep 2025 15:09:51 +0200
Subject: copy_process: pass clone_flags as u64 across calltree

With the introduction of clone3 in commit 7f192e3cd316 ("fork: add
clone3") the effective bit width of clone_flags on all architectures was
increased from 32-bit to 64-bit, with a new type of u64 for the flags.
However, for most consumers of clone_flags the interface was not
changed from the previous type of unsigned long.

While this works fine as long as none of the new 64-bit flag bits
(CLONE_CLEAR_SIGHAND and CLONE_INTO_CGROUP) are evaluated, this is still
undesirable in terms of the principle of least surprise.

Thus, this commit fixes all relevant interfaces of callees to
sys_clone3/copy_process (excluding the architecture-specific
copy_thread) to consistently pass clone_flags as u64, so that
no truncation to 32-bit integers occurs on 32-bit architectures.

Signed-off-by: Simon Schuster <schuster.simon@siemens-energy.com>
Link: https://lore.kernel.org/20250901-nios2-implement-clone3-v2-2-53fcf5577d57@siemens-energy.com
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/cgroup.h         | 4 ++--
 include/linux/cred.h           | 2 +-
 include/linux/iocontext.h      | 6 +++---
 include/linux/ipc_namespace.h  | 4 ++--
 include/linux/lsm_hook_defs.h  | 2 +-
 include/linux/mnt_namespace.h  | 2 +-
 include/linux/nsproxy.h        | 2 +-
 include/linux/pid_namespace.h  | 4 ++--
 include/linux/rseq.h           | 4 ++--
 include/linux/sched/task.h     | 2 +-
 include/linux/security.h       | 4 ++--
 include/linux/sem.h            | 4 ++--
 include/linux/time_namespace.h | 4 ++--
 include/linux/uprobes.h        | 4 ++--
 include/linux/user_events.h    | 4 ++--
 include/linux/utsname.h        | 4 ++--
 16 files changed, 28 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b18fb5fcb38e..56d9556a181a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -796,7 +796,7 @@ extern struct cgroup_namespace init_cgroup_ns;
 
 void free_cgroup_ns(struct cgroup_namespace *ns);
 
-struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
+struct cgroup_namespace *copy_cgroup_ns(u64 flags,
 					struct user_namespace *user_ns,
 					struct cgroup_namespace *old_ns);
 
@@ -818,7 +818,7 @@ static inline void put_cgroup_ns(struct cgroup_namespace *ns)
 
 static inline void free_cgroup_ns(struct cgroup_namespace *ns) { }
 static inline struct cgroup_namespace *
-copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns,
+copy_cgroup_ns(u64 flags, struct user_namespace *user_ns,
 	       struct cgroup_namespace *old_ns)
 {
 	return old_ns;
diff --git a/include/linux/cred.h b/include/linux/cred.h
index a102a10f833f..89ae50ad2ace 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -148,7 +148,7 @@ struct cred {
 
 extern void __put_cred(struct cred *);
 extern void exit_creds(struct task_struct *);
-extern int copy_creds(struct task_struct *, unsigned long);
+extern int copy_creds(struct task_struct *, u64);
 extern const struct cred *get_task_cred(struct task_struct *);
 extern struct cred *cred_alloc_blank(void);
 extern struct cred *prepare_creds(void);
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 14f7eaf1b443..079d8773790c 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -118,8 +118,8 @@ struct task_struct;
 #ifdef CONFIG_BLOCK
 void put_io_context(struct io_context *ioc);
 void exit_io_context(struct task_struct *task);
-int __copy_io(unsigned long clone_flags, struct task_struct *tsk);
-static inline int copy_io(unsigned long clone_flags, struct task_struct *tsk)
+int __copy_io(u64 clone_flags, struct task_struct *tsk);
+static inline int copy_io(u64 clone_flags, struct task_struct *tsk)
 {
 	if (!current->io_context)
 		return 0;
@@ -129,7 +129,7 @@ static inline int copy_io(unsigned long clone_flags, struct task_struct *tsk)
 struct io_context;
 static inline void put_io_context(struct io_context *ioc) { }
 static inline void exit_io_context(struct task_struct *task) { }
-static inline int copy_io(unsigned long clone_flags, struct task_struct *tsk)
+static inline int copy_io(u64 clone_flags, struct task_struct *tsk)
 {
 	return 0;
 }
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index e8240cf2611a..4b399893e2b3 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -129,7 +129,7 @@ static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
 #endif
 
 #if defined(CONFIG_IPC_NS)
-extern struct ipc_namespace *copy_ipcs(unsigned long flags,
+extern struct ipc_namespace *copy_ipcs(u64 flags,
 	struct user_namespace *user_ns, struct ipc_namespace *ns);
 
 static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
@@ -151,7 +151,7 @@ static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns
 
 extern void put_ipc_ns(struct ipc_namespace *ns);
 #else
-static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
+static inline struct ipc_namespace *copy_ipcs(u64 flags,
 	struct user_namespace *user_ns, struct ipc_namespace *ns)
 {
 	if (flags & CLONE_NEWIPC)
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index fd11fffdd3c3..adbe234a6f6c 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -211,7 +211,7 @@ LSM_HOOK(int, 0, file_open, struct file *file)
 LSM_HOOK(int, 0, file_post_open, struct file *file, int mask)
 LSM_HOOK(int, 0, file_truncate, struct file *file)
 LSM_HOOK(int, 0, task_alloc, struct task_struct *task,
-	 unsigned long clone_flags)
+	 u64 clone_flags)
 LSM_HOOK(void, LSM_RET_VOID, task_free, struct task_struct *task)
 LSM_HOOK(int, 0, cred_alloc_blank, struct cred *cred, gfp_t gfp)
 LSM_HOOK(void, LSM_RET_VOID, cred_free, struct cred *cred)
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 70b366b64816..ff290c87b2e7 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -11,7 +11,7 @@ struct fs_struct;
 struct user_namespace;
 struct ns_common;
 
-extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
+extern struct mnt_namespace *copy_mnt_ns(u64, struct mnt_namespace *,
 		struct user_namespace *, struct fs_struct *);
 extern void put_mnt_ns(struct mnt_namespace *ns);
 DEFINE_FREE(put_mnt_ns, struct mnt_namespace *, if (!IS_ERR_OR_NULL(_T)) put_mnt_ns(_T))
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index dab6a1734a22..82533e899ff4 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -103,7 +103,7 @@ static inline struct cred *nsset_cred(struct nsset *set)
  *
  */
 
-int copy_namespaces(unsigned long flags, struct task_struct *tsk);
+int copy_namespaces(u64 flags, struct task_struct *tsk);
 void exit_task_namespaces(struct task_struct *tsk);
 void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
 int exec_task_namespaces(void);
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 7c67a5811199..0620a3e08e83 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -78,7 +78,7 @@ static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns)
 }
 #endif
 
-extern struct pid_namespace *copy_pid_ns(unsigned long flags,
+extern struct pid_namespace *copy_pid_ns(u64 flags,
 	struct user_namespace *user_ns, struct pid_namespace *ns);
 extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
 extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
@@ -97,7 +97,7 @@ static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns)
 	return 0;
 }
 
-static inline struct pid_namespace *copy_pid_ns(unsigned long flags,
+static inline struct pid_namespace *copy_pid_ns(u64 flags,
 	struct user_namespace *user_ns, struct pid_namespace *ns)
 {
 	if (flags & CLONE_NEWPID)
diff --git a/include/linux/rseq.h b/include/linux/rseq.h
index bc8af3eb5598..a96fd345aa38 100644
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -65,7 +65,7 @@ static inline void rseq_migrate(struct task_struct *t)
  * If parent process has a registered restartable sequences area, the
  * child inherits. Unregister rseq for a clone with CLONE_VM set.
  */
-static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
+static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
 {
 	if (clone_flags & CLONE_VM) {
 		t->rseq = NULL;
@@ -107,7 +107,7 @@ static inline void rseq_preempt(struct task_struct *t)
 static inline void rseq_migrate(struct task_struct *t)
 {
 }
-static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
+static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
 {
 }
 static inline void rseq_execve(struct task_struct *t)
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index ea41795a352b..34d6a0e108c3 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -63,7 +63,7 @@ extern int lockdep_tasklist_lock_is_held(void);
 extern asmlinkage void schedule_tail(struct task_struct *prev);
 extern void init_idle(struct task_struct *idle, int cpu);
 
-extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
+extern int sched_fork(u64 clone_flags, struct task_struct *p);
 extern int sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs);
 extern void sched_cancel_fork(struct task_struct *p);
 extern void sched_post_fork(struct task_struct *p);
diff --git a/include/linux/security.h b/include/linux/security.h
index 521bcb5b9717..9a1d4a6c8673 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -489,7 +489,7 @@ int security_file_receive(struct file *file);
 int security_file_open(struct file *file);
 int security_file_post_open(struct file *file, int mask);
 int security_file_truncate(struct file *file);
-int security_task_alloc(struct task_struct *task, unsigned long clone_flags);
+int security_task_alloc(struct task_struct *task, u64 clone_flags);
 void security_task_free(struct task_struct *task);
 int security_cred_alloc_blank(struct cred *cred, gfp_t gfp);
 void security_cred_free(struct cred *cred);
@@ -1215,7 +1215,7 @@ static inline int security_file_truncate(struct file *file)
 }
 
 static inline int security_task_alloc(struct task_struct *task,
-				      unsigned long clone_flags)
+				      u64 clone_flags)
 {
 	return 0;
 }
diff --git a/include/linux/sem.h b/include/linux/sem.h
index c4deefe42aeb..275269ce2ec8 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -9,12 +9,12 @@ struct task_struct;
 
 #ifdef CONFIG_SYSVIPC
 
-extern int copy_semundo(unsigned long clone_flags, struct task_struct *tsk);
+extern int copy_semundo(u64 clone_flags, struct task_struct *tsk);
 extern void exit_sem(struct task_struct *tsk);
 
 #else
 
-static inline int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
+static inline int copy_semundo(u64 clone_flags, struct task_struct *tsk)
 {
 	return 0;
 }
diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index bb2c52f4fc94..b6e36525e0be 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -43,7 +43,7 @@ static inline struct time_namespace *get_time_ns(struct time_namespace *ns)
 	return ns;
 }
 
-struct time_namespace *copy_time_ns(unsigned long flags,
+struct time_namespace *copy_time_ns(u64 flags,
 				    struct user_namespace *user_ns,
 				    struct time_namespace *old_ns);
 void free_time_ns(struct time_namespace *ns);
@@ -129,7 +129,7 @@ static inline void put_time_ns(struct time_namespace *ns)
 }
 
 static inline
-struct time_namespace *copy_time_ns(unsigned long flags,
+struct time_namespace *copy_time_ns(u64 flags,
 				    struct user_namespace *user_ns,
 				    struct time_namespace *old_ns)
 {
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 516217c39094..915303a82d84 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -205,7 +205,7 @@ extern void uprobe_start_dup_mmap(void);
 extern void uprobe_end_dup_mmap(void);
 extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm);
 extern void uprobe_free_utask(struct task_struct *t);
-extern void uprobe_copy_process(struct task_struct *t, unsigned long flags);
+extern void uprobe_copy_process(struct task_struct *t, u64 flags);
 extern int uprobe_post_sstep_notifier(struct pt_regs *regs);
 extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
 extern void uprobe_notify_resume(struct pt_regs *regs);
@@ -281,7 +281,7 @@ static inline bool uprobe_deny_signal(void)
 static inline void uprobe_free_utask(struct task_struct *t)
 {
 }
-static inline void uprobe_copy_process(struct task_struct *t, unsigned long flags)
+static inline void uprobe_copy_process(struct task_struct *t, u64 flags)
 {
 }
 static inline void uprobe_clear_state(struct mm_struct *mm)
diff --git a/include/linux/user_events.h b/include/linux/user_events.h
index 8afa8c3a0973..57d1ff006090 100644
--- a/include/linux/user_events.h
+++ b/include/linux/user_events.h
@@ -33,7 +33,7 @@ extern void user_event_mm_dup(struct task_struct *t,
 extern void user_event_mm_remove(struct task_struct *t);
 
 static inline void user_events_fork(struct task_struct *t,
-				    unsigned long clone_flags)
+				    u64 clone_flags)
 {
 	struct user_event_mm *old_mm;
 
@@ -68,7 +68,7 @@ static inline void user_events_exit(struct task_struct *t)
 }
 #else
 static inline void user_events_fork(struct task_struct *t,
-				    unsigned long clone_flags)
+				    u64 clone_flags)
 {
 }
 
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index bf7613ba412b..ba34ec0e2f95 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -35,7 +35,7 @@ static inline void get_uts_ns(struct uts_namespace *ns)
 	refcount_inc(&ns->ns.count);
 }
 
-extern struct uts_namespace *copy_utsname(unsigned long flags,
+extern struct uts_namespace *copy_utsname(u64 flags,
 	struct user_namespace *user_ns, struct uts_namespace *old_ns);
 extern void free_uts_ns(struct uts_namespace *ns);
 
@@ -55,7 +55,7 @@ static inline void put_uts_ns(struct uts_namespace *ns)
 {
 }
 
-static inline struct uts_namespace *copy_utsname(unsigned long flags,
+static inline struct uts_namespace *copy_utsname(u64 flags,
 	struct user_namespace *user_ns, struct uts_namespace *old_ns)
 {
 	if (flags & CLONE_NEWUTS)
-- 
cgit v1.2.3


From d9a3e9929452780df16f3414f0d59b5f69d058cf Mon Sep 17 00:00:00 2001
From: Thomas Andreatta <thomasandreatta2000@gmail.com>
Date: Wed, 27 Aug 2025 17:24:43 +0200
Subject: dmaengine: sh: setup_xref error handling

This patch modifies the type of setup_xref from void to int and handles
errors since the function can fail.

`setup_xref` now returns the (eventual) error from
`dmae_set_dmars`|`dmae_set_chcr`, while `shdma_tx_submit` handles the
result, removing the chunks from the queue and marking PM as idle in
case of an error.

Signed-off-by: Thomas Andreatta <thomas.andreatta2000@gmail.com>
Link: https://lore.kernel.org/r/20250827152442.90962-1-thomas.andreatta2000@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/shdma-base.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h
index 6dfd05ef5c2d..03ba4dab2ef7 100644
--- a/include/linux/shdma-base.h
+++ b/include/linux/shdma-base.h
@@ -96,7 +96,7 @@ struct shdma_ops {
 	int (*desc_setup)(struct shdma_chan *, struct shdma_desc *,
 			  dma_addr_t, dma_addr_t, size_t *);
 	int (*set_slave)(struct shdma_chan *, int, dma_addr_t, bool);
-	void (*setup_xfer)(struct shdma_chan *, int);
+	int (*setup_xfer)(struct shdma_chan *, int);
 	void (*start_xfer)(struct shdma_chan *, struct shdma_desc *);
 	struct shdma_desc *(*embedded_desc)(void *, int);
 	bool (*chan_irq)(struct shdma_chan *, int);
-- 
cgit v1.2.3


From 7ea95d55e63176899eb96f7aaa34a5646f501b2c Mon Sep 17 00:00:00 2001
From: Nathan Lynch <nathan.lynch@amd.com>
Date: Tue, 26 Aug 2025 11:07:38 -0500
Subject: dmaengine: Fix dma_async_tx_descriptor->tx_submit documentation

Commit 790fb9956eea ("linux/dmaengine.h: fix a few kernel-doc
warnings") inserted new documentation for @desc_free in the middle of
@tx_submit's description.

Put @tx_submit's description back together, matching the indentation
style of the rest of the documentation for dma_async_tx_descriptor.

Fixes: 790fb9956eea ("linux/dmaengine.h: fix a few kernel-doc warnings")
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Nathan Lynch <nathan.lynch@amd.com>
Link: https://lore.kernel.org/r/20250826-dma_async_tx_desc-tx_submit-doc-fix-v1-1-18a4b51697db@amd.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/dmaengine.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 6de7c05d6bd8..99efe2b9b4ea 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -594,9 +594,9 @@ struct dma_descriptor_metadata_ops {
  * @phys: physical address of the descriptor
  * @chan: target channel for this operation
  * @tx_submit: accept the descriptor, assign ordered cookie and mark the
+ *	descriptor pending. To be pushed on .issue_pending() call
  * @desc_free: driver's callback function to free a resusable descriptor
  *	after completion
- * descriptor pending. To be pushed on .issue_pending() call
  * @callback: routine to call after this operation is complete
  * @callback_result: error result from a DMA transaction
  * @callback_param: general parameter to pass to the callback routine
-- 
cgit v1.2.3


From c9f62564252c21d739a5003e9b2d6ad0828aa7bd Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Sat, 30 Aug 2025 19:01:11 +0200
Subject: mtd: rawnand: s3c2410: Drop driver (no actual S3C64xx user)

The s3c2410 NAND driver still supports S3C64xx platform, which in
general is supported in the kernel.  There are however no references of
"s3c6400-nand" platform device ID or "s3c24xx-nand" driver, thus this
driver cannot be instantiated for S3C64xx platform and is basically
unused.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/platform_data/mtd-nand-s3c2410.h | 70 --------------------------
 1 file changed, 70 deletions(-)
 delete mode 100644 include/linux/platform_data/mtd-nand-s3c2410.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/mtd-nand-s3c2410.h b/include/linux/platform_data/mtd-nand-s3c2410.h
deleted file mode 100644
index 25390fc3e795..000000000000
--- a/include/linux/platform_data/mtd-nand-s3c2410.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2004 Simtec Electronics
- *	Ben Dooks <ben@simtec.co.uk>
- *
- * S3C2410 - NAND device controller platform_device info
-*/
-
-#ifndef __MTD_NAND_S3C2410_H
-#define __MTD_NAND_S3C2410_H
-
-#include <linux/mtd/rawnand.h>
-
-/**
- * struct s3c2410_nand_set - define a set of one or more nand chips
- * @flash_bbt: 		Openmoko u-boot can create a Bad Block Table
- *			Setting this flag will allow the kernel to
- *			look for it at boot time and also skip the NAND
- *			scan.
- * @options:		Default value to set into 'struct nand_chip' options.
- * @nr_chips:		Number of chips in this set
- * @nr_partitions:	Number of partitions pointed to by @partitions
- * @name:		Name of set (optional)
- * @nr_map:		Map for low-layer logical to physical chip numbers (option)
- * @partitions:		The mtd partition list
- *
- * define a set of one or more nand chips registered with an unique mtd. Also
- * allows to pass flag to the underlying NAND layer. 'disable_ecc' will trigger
- * a warning at boot time.
- */
-struct s3c2410_nand_set {
-	unsigned int		flash_bbt:1;
-
-	unsigned int		options;
-	int			nr_chips;
-	int			nr_partitions;
-	char			*name;
-	int			*nr_map;
-	struct mtd_partition	*partitions;
-	struct device_node	*of_node;
-};
-
-struct s3c2410_platform_nand {
-	/* timing information for controller, all times in nanoseconds */
-
-	int	tacls;	/* time for active CLE/ALE to nWE/nOE */
-	int	twrph0;	/* active time for nWE/nOE */
-	int	twrph1;	/* time for release CLE/ALE from nWE/nOE inactive */
-
-	unsigned int	ignore_unset_ecc:1;
-
-	enum nand_ecc_engine_type engine_type;
-
-	int			nr_sets;
-	struct s3c2410_nand_set *sets;
-
-	void			(*select_chip)(struct s3c2410_nand_set *,
-					       int chip);
-};
-
-/**
- * s3c_nand_set_platdata() - register NAND platform data.
- * @nand: The NAND platform data to register with s3c_device_nand.
- *
- * This function copies the given NAND platform data, @nand and registers
- * it with the s3c_device_nand. This allows @nand to be __initdata.
-*/
-extern void s3c_nand_set_platdata(struct s3c2410_platform_nand *nand);
-
-#endif /*__MTD_NAND_S3C2410_H */
-- 
cgit v1.2.3


From 7df87820122acd3204565109f636a1367912655a Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar@cyphar.com>
Date: Tue, 5 Aug 2025 15:45:08 +1000
Subject: pidns: move is-ancestor logic to helper

This check will be needed in later patches, and there's no point
open-coding it each time.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
Link: https://lore.kernel.org/20250805-procfs-pidns-api-v4-1-705f984940e7@cyphar.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/pid_namespace.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 7c67a5811199..17fdc059f8da 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -84,6 +84,9 @@ extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
 extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
 extern void put_pid_ns(struct pid_namespace *ns);
 
+extern bool pidns_is_ancestor(struct pid_namespace *child,
+			      struct pid_namespace *ancestor);
+
 #else /* !CONFIG_PID_NS */
 #include <linux/err.h>
 
@@ -118,6 +121,12 @@ static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
 {
 	return 0;
 }
+
+static inline bool pidns_is_ancestor(struct pid_namespace *child,
+				     struct pid_namespace *ancestor)
+{
+	return false;
+}
 #endif /* CONFIG_PID_NS */
 
 extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
-- 
cgit v1.2.3


From cb86408b1fc2e3f6fe45ebe8509a5404060e01e0 Mon Sep 17 00:00:00 2001
From: Luca Ceresoli <luca.ceresoli@bootlin.com>
Date: Fri, 1 Aug 2025 19:05:23 +0200
Subject: list: add list_last_entry_or_null()

Add an equivalent of list_first_entry_or_null() to obtain the last element
of a list.

Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20250801-drm-bridge-alloc-getput-drm_bridge_get_next_bridge-v2-1-888912b0be13@bootlin.com
Signed-off-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
---
 include/linux/list.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index e7e28afd28f8..7f7657e41620 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -636,6 +636,20 @@ static inline void list_splice_tail_init(struct list_head *list,
 	pos__ != head__ ? list_entry(pos__, type, member) : NULL; \
 })
 
+/**
+ * list_last_entry_or_null - get the last element from a list
+ * @ptr:	the list head to take the element from.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the list_head within the struct.
+ *
+ * Note that if the list is empty, it returns NULL.
+ */
+#define list_last_entry_or_null(ptr, type, member) ({ \
+	struct list_head *head__ = (ptr); \
+	struct list_head *pos__ = READ_ONCE(head__->prev); \
+	pos__ != head__ ? list_entry(pos__, type, member) : NULL; \
+})
+
 /**
  * list_next_entry - get the next element in list
  * @pos:	the type * to cursor
-- 
cgit v1.2.3


From 72ca981dba5e98c2b1c2956016cc4be934d9fbea Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Sun, 31 Aug 2025 14:52:37 +0800
Subject: firmware: arm_scmi: Fix function name typo in scmi_perf_proto_ops
 struct

The performance protocol ops table incorrectly referenced
power_scale_mw_get instead of the correct power_scale_get.
Fix the typo to use the proper function.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Message-Id: <20250831-scmi-cpufreq-v1-1-493031cf6e9b@nxp.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 include/linux/scmi_protocol.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h
index 688466a0e816..aafaac1496b0 100644
--- a/include/linux/scmi_protocol.h
+++ b/include/linux/scmi_protocol.h
@@ -153,7 +153,7 @@ struct scmi_perf_domain_info {
  *	for a given device
  * @fast_switch_rate_limit: gets the minimum time (us) required between
  *	successive fast_switching requests
- * @power_scale_mw_get: indicates if the power values provided are in milliWatts
+ * @power_scale_get: indicates if the power values provided are in milliWatts
  *	or in some other (abstract) scale
  */
 struct scmi_perf_proto_ops {
-- 
cgit v1.2.3


From 61f132ca8c46ffee368a951e516d19d4ae767ea8 Mon Sep 17 00:00:00 2001
From: Wei Fang <wei.fang@nxp.com>
Date: Fri, 29 Aug 2025 13:06:04 +0800
Subject: ptp: add helpers to get the phc_index by of_node or dev

Some Ethernet controllers do not have an integrated PTP timer function.
Instead, the PTP timer is a separated device and provides PTP hardware
clock to the Ethernet controller to use. Therefore, the Ethernet
controller driver needs to obtain the PTP clock's phc_index in its
ethtool_ops::get_ts_info(). Currently, most drivers implement this in
the following ways.

1. The PTP device driver adds a custom API and exports it to the Ethernet
controller driver.
2. The PTP device driver adds private data to its device structure. So
the private data structure needs to be exposed to the Ethernet controller
driver.

When registering the ptp clock, ptp_clock_register() always saves the
ptp_clock pointer to the private data of ptp_clock::dev. Therefore, as
long as ptp_clock::dev is obtained, the phc_index can be obtained. So
the following generic APIs can be added to the ptp driver to obtain the
phc_index.

1. ptp_clock_index_by_dev(): Obtain the phc_index by the device pointer
of the PTP device.
2.ptp_clock_index_by_of_node(): Obtain the phc_index by the of_node
pointer of the PTP device.

Also, we can add another API like ptp_clock_index_by_fwnode() to get the
phc_index by fwnode of PTP device. However, this API is not used in this
patch set, so it is better to add it when needed.

Suggested-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Wei Fang <wei.fang@nxp.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20250829050615.1247468-4-wei.fang@nxp.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/ptp_clock_kernel.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 3d089bd4d5e9..7dd7951b23d5 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -360,6 +360,24 @@ extern void ptp_clock_event(struct ptp_clock *ptp,
 
 extern int ptp_clock_index(struct ptp_clock *ptp);
 
+/**
+ * ptp_clock_index_by_of_node() - obtain the device index of
+ * a PTP clock based on the PTP device of_node
+ *
+ * @np:    The device of_node pointer of the PTP device.
+ * Return: The PHC index on success or -1 on failure.
+ */
+int ptp_clock_index_by_of_node(struct device_node *np);
+
+/**
+ * ptp_clock_index_by_dev() - obtain the device index of
+ * a PTP clock based on the PTP device.
+ *
+ * @parent:    The parent device (PTP device) pointer of the PTP clock.
+ * Return: The PHC index on success or -1 on failure.
+ */
+int ptp_clock_index_by_dev(struct device *parent);
+
 /**
  * ptp_find_pin() - obtain the pin index of a given auxiliary function
  *
@@ -425,6 +443,10 @@ static inline void ptp_clock_event(struct ptp_clock *ptp,
 { }
 static inline int ptp_clock_index(struct ptp_clock *ptp)
 { return -1; }
+static inline int ptp_clock_index_by_of_node(struct device_node *np)
+{ return -1; }
+static inline int ptp_clock_index_by_dev(struct device *parent)
+{ return -1; }
 static inline int ptp_find_pin(struct ptp_clock *ptp,
 			       enum ptp_pin_function func, unsigned int chan)
 { return -1; }
-- 
cgit v1.2.3


From e551fa3159e3050c26ff010c3b595b45d7eb071a Mon Sep 17 00:00:00 2001
From: Qunqin Zhao <zhaoqunqin@loongson.cn>
Date: Sat, 5 Jul 2025 15:20:42 +0800
Subject: mfd: Add support for Loongson Security Engine chip controller

Loongson Security Engine chip supports RNG, SM2, SM3 and SM4 accelerator
engines. This is the base driver for other specific engine drivers.

Co-developed-by: Yinggang Gu <guyinggang@loongson.cn>
Signed-off-by: Yinggang Gu <guyinggang@loongson.cn>
Signed-off-by: Qunqin Zhao <zhaoqunqin@loongson.cn>
Reviewed-by: Huacai Chen <chenhuacai@loongson.cn>
Link: https://lore.kernel.org/r/20250705072045.1067-2-zhaoqunqin@loongson.cn
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/loongson-se.h | 53 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 include/linux/mfd/loongson-se.h

(limited to 'include/linux')

diff --git a/include/linux/mfd/loongson-se.h b/include/linux/mfd/loongson-se.h
new file mode 100644
index 000000000000..07afa0c2524d
--- /dev/null
+++ b/include/linux/mfd/loongson-se.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (C) 2025 Loongson Technology Corporation Limited */
+
+#ifndef __MFD_LOONGSON_SE_H__
+#define __MFD_LOONGSON_SE_H__
+
+#define LOONGSON_ENGINE_CMD_TIMEOUT_US	10000
+#define SE_SEND_CMD_REG			0x0
+#define SE_SEND_CMD_REG_LEN		0x8
+/* Controller command ID */
+#define SE_CMD_START			0x0
+#define SE_CMD_SET_DMA			0x3
+#define SE_CMD_SET_ENGINE_CMDBUF	0x4
+
+#define SE_S2LINT_STAT			0x88
+#define SE_S2LINT_EN			0x8c
+#define SE_S2LINT_CL			0x94
+#define SE_L2SINT_STAT			0x98
+#define SE_L2SINT_SET			0xa0
+
+#define SE_INT_ALL			0xffffffff
+#define SE_INT_CONTROLLER		BIT(0)
+
+#define SE_ENGINE_MAX			16
+#define SE_ENGINE_RNG			1
+#define SE_CMD_RNG			0x100
+
+#define SE_ENGINE_TPM			5
+#define SE_CMD_TPM			0x500
+
+#define SE_ENGINE_CMD_SIZE		32
+
+struct loongson_se_engine {
+	struct loongson_se *se;
+	int id;
+
+	/* Command buffer */
+	void *command;
+	void *command_ret;
+
+	void *data_buffer;
+	uint buffer_size;
+	/* Data buffer offset to DMA base */
+	uint buffer_off;
+
+	struct completion completion;
+
+};
+
+struct loongson_se_engine *loongson_se_init_engine(struct device *dev, int id);
+int loongson_se_send_engine_cmd(struct loongson_se_engine *engine);
+
+#endif
-- 
cgit v1.2.3


From 65128868bb3b0621d2d8e71f19852675a064b373 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 29 Aug 2025 15:29:04 -0700
Subject: mm/memory_hotplug: Update comment for hotplug memory callback
 priorities

Add clarification to comment for memory hotplug callback ordering as the
current comment does not provide clear language on which callback happens
first.

Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20250829222907.1290912-2-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 include/linux/memory.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory.h b/include/linux/memory.h
index 40eb70ccb09d..1305102688d0 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -115,8 +115,8 @@ struct notifier_block;
 struct mem_section;
 
 /*
- * Priorities for the hotplug memory callback routines (stored in decreasing
- * order in the callback chain)
+ * Priorities for the hotplug memory callback routines. Invoked from
+ * high to low. Higher priorities correspond to higher numbers.
  */
 #define DEFAULT_CALLBACK_PRI	0
 #define SLAB_CALLBACK_PRI	1
-- 
cgit v1.2.3


From b57fc652ca24ada3b0c888327f9944ed21559286 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 29 Aug 2025 15:29:05 -0700
Subject: drivers/base/node: Add a helper function node_update_perf_attrs()

Add helper function node_update_perf_attrs() to allow update of node access
coordinates computed by an external agent such as CXL. The helper allows
updating of coordinates after the attribute being created by HMAT.

Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20250829222907.1290912-3-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 include/linux/node.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/node.h b/include/linux/node.h
index 2c7529335b21..866e3323f1fd 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -85,6 +85,8 @@ struct node_cache_attrs {
 void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs);
 void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
 			 enum access_coordinate_class access);
+void node_update_perf_attrs(unsigned int nid, struct access_coordinate *coord,
+			    enum access_coordinate_class access);
 #else
 static inline void node_add_cache(unsigned int nid,
 				  struct node_cache_attrs *cache_attrs)
@@ -96,6 +98,12 @@ static inline void node_set_perf_attrs(unsigned int nid,
 				       enum access_coordinate_class access)
 {
 }
+
+static inline void node_update_perf_attrs(unsigned int nid,
+					  struct access_coordinate *coord,
+					  enum access_coordinate_class access)
+{
+}
 #endif
 
 struct node {
-- 
cgit v1.2.3


From 2e454fb8056df6da4bba7d89a57bf60e217463c0 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 29 Aug 2025 15:29:06 -0700
Subject: cxl, acpi/hmat: Update CXL access coordinates directly instead of
 through HMAT

The current implementation of CXL memory hotplug notifier gets called
before the HMAT memory hotplug notifier. The CXL driver calculates the
access coordinates (bandwidth and latency values) for the CXL end to
end path (i.e. CPU to endpoint). When the CXL region is onlined, the CXL
memory hotplug notifier writes the access coordinates to the HMAT target
structs. Then the HMAT memory hotplug notifier is called and it creates
the access coordinates for the node sysfs attributes.

During testing on an Intel platform, it was found that although the
newly calculated coordinates were pushed to sysfs, the sysfs attributes for
the access coordinates showed up with the wrong initiator. The system has
4 nodes (0, 1, 2, 3) where node 0 and 1 are CPU nodes and node 2 and 3 are
CXL nodes. The expectation is that node 2 would show up as a target to node
0:
/sys/devices/system/node/node2/access0/initiators/node0

However it was observed that node 2 showed up as a target under node 1:
/sys/devices/system/node/node2/access0/initiators/node1

The original intent of the 'ext_updated' flag in HMAT handling code was to
stop HMAT memory hotplug callback from clobbering the access coordinates
after CXL has injected its calculated coordinates and replaced the generic
target access coordinates provided by the HMAT table in the HMAT target
structs. However the flag is hacky at best and blocks the updates from
other CXL regions that are onlined in the same node later on. Remove the
'ext_updated' flag usage and just update the access coordinates for the
nodes directly without touching HMAT target data.

The hotplug memory callback ordering is changed. Instead of changing CXL,
move HMAT back so there's room for the levels rather than have CXL share
the same level as SLAB_CALLBACK_PRI. The change will resulting in the CXL
callback to be executed after the HMAT callback.

With the change, the CXL hotplug memory notifier runs after the HMAT
callback. The HMAT callback will create the node sysfs attributes for
access coordinates. The CXL callback will write the access coordinates to
the now created node sysfs attributes directly and will not pollute the
HMAT target values.

A nodemask is introduced to keep track if a node has been updated and
prevents further updates.

Fixes: 067353a46d8c ("cxl/region: Add memory hotplug notifier for cxl region")
Cc: stable@vger.kernel.org
Tested-by: Marc Herbert <marc.herbert@linux.intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20250829222907.1290912-4-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 include/linux/memory.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memory.h b/include/linux/memory.h
index 1305102688d0..0b755d1ef1ec 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -120,8 +120,8 @@ struct mem_section;
  */
 #define DEFAULT_CALLBACK_PRI	0
 #define SLAB_CALLBACK_PRI	1
-#define HMAT_CALLBACK_PRI	2
 #define CXL_CALLBACK_PRI	5
+#define HMAT_CALLBACK_PRI	6
 #define MM_COMPUTE_BATCH_PRI	10
 #define CPUSET_CALLBACK_PRI	10
 #define MEMTIER_HOTPLUG_PRI	100
-- 
cgit v1.2.3


From e99ecbc4c89adf551cccbbc00b5cb08c50969af6 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 29 Aug 2025 15:29:07 -0700
Subject: acpi/hmat: Remove now unused hmat_update_target_coordinates()

Remove deadcode since CXL no longer calls hmat_update_target_coordinates().

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20250829222907.1290912-5-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 include/linux/acpi.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 1c5bb1e887cd..5ff5d99f6ead 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1595,18 +1595,6 @@ static inline void acpi_use_parent_companion(struct device *dev)
 	ACPI_COMPANION_SET(dev, ACPI_COMPANION(dev->parent));
 }
 
-#ifdef CONFIG_ACPI_HMAT
-int hmat_update_target_coordinates(int nid, struct access_coordinate *coord,
-				   enum access_coordinate_class access);
-#else
-static inline int hmat_update_target_coordinates(int nid,
-						 struct access_coordinate *coord,
-						 enum access_coordinate_class access)
-{
-	return -EOPNOTSUPP;
-}
-#endif
-
 #ifdef CONFIG_ACPI_NUMA
 bool acpi_node_backed_by_real_pxm(int nid);
 #else
-- 
cgit v1.2.3


From 4beb44a2d62dddfe450f310aa1a950901731cb3a Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Sun, 31 Aug 2025 18:34:33 +0100
Subject: net: phy: add phy_interface_weight()

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/E1uslwn-00000001SOx-0a7H@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 4c2b8b6e7187..bb45787d8684 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -169,6 +169,11 @@ static inline bool phy_interface_empty(const unsigned long *intf)
 	return bitmap_empty(intf, PHY_INTERFACE_MODE_MAX);
 }
 
+static inline unsigned int phy_interface_weight(const unsigned long *intf)
+{
+	return bitmap_weight(intf, PHY_INTERFACE_MODE_MAX);
+}
+
 static inline void phy_interface_and(unsigned long *dst, const unsigned long *a,
 				     const unsigned long *b)
 {
-- 
cgit v1.2.3


From 21368fcbb124d51b5d8bd8fa0a286a23c34a0888 Mon Sep 17 00:00:00 2001
From: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
Date: Mon, 25 Aug 2025 10:28:21 +0200
Subject: bitmap: introduce hardware-specific bitfield operations

Hardware of various vendors, but very notably Rockchip, often uses
32-bit registers where the upper 16-bit half of the register is a
write-enable mask for the lower half.

This type of hardware setup allows for more granular concurrent register
write access.

Over the years, many drivers have hand-rolled their own version of this
macro, usually without any checks, often called something like
HIWORD_UPDATE or FIELD_PREP_HIWORD, commonly with slightly different
semantics between them.

Clearly there is a demand for such a macro, and thus the demand should
be satisfied in a common header file. As this is a convention that spans
across multiple vendors, and similar conventions may also have
cross-vendor adoption, it's best if it lives in a vendor-agnostic header
file that can be expanded over time.

Add hw_bitfield.h with two macros: FIELD_PREP_WM16, and
FIELD_PREP_WM16_CONST. The latter is a version that can be used in
initializers, like FIELD_PREP_CONST.

Suggested-by: Yury Norov (NVIDIA) <yury.norov@gmail.com>
Signed-off-by: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Yury Norov (NVIDIA) <yury.norov@gmail.com>
---
 include/linux/hw_bitfield.h | 62 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 include/linux/hw_bitfield.h

(limited to 'include/linux')

diff --git a/include/linux/hw_bitfield.h b/include/linux/hw_bitfield.h
new file mode 100644
index 000000000000..df202e167ce4
--- /dev/null
+++ b/include/linux/hw_bitfield.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2025, Collabora Ltd.
+ */
+
+#ifndef _LINUX_HW_BITFIELD_H
+#define _LINUX_HW_BITFIELD_H
+
+#include <linux/bitfield.h>
+#include <linux/build_bug.h>
+#include <linux/limits.h>
+
+/**
+ * FIELD_PREP_WM16() - prepare a bitfield element with a mask in the upper half
+ * @_mask: shifted mask defining the field's length and position
+ * @_val:  value to put in the field
+ *
+ * FIELD_PREP_WM16() masks and shifts up the value, as well as bitwise ORs the
+ * result with the mask shifted up by 16.
+ *
+ * This is useful for a common design of hardware registers where the upper
+ * 16-bit half of a 32-bit register is used as a write-enable mask. In such a
+ * register, a bit in the lower half is only updated if the corresponding bit
+ * in the upper half is high.
+ */
+#define FIELD_PREP_WM16(_mask, _val)					     \
+	({								     \
+		typeof(_val) __val = _val;				     \
+		typeof(_mask) __mask = _mask;				     \
+		__BF_FIELD_CHECK(__mask, ((u16)0U), __val,		     \
+				 "HWORD_UPDATE: ");			     \
+		(((typeof(__mask))(__val) << __bf_shf(__mask)) & (__mask)) | \
+		((__mask) << 16);					     \
+	})
+
+/**
+ * FIELD_PREP_WM16_CONST() - prepare a constant bitfield element with a mask in
+ *                           the upper half
+ * @_mask: shifted mask defining the field's length and position
+ * @_val:  value to put in the field
+ *
+ * FIELD_PREP_WM16_CONST() masks and shifts up the value, as well as bitwise ORs
+ * the result with the mask shifted up by 16.
+ *
+ * This is useful for a common design of hardware registers where the upper
+ * 16-bit half of a 32-bit register is used as a write-enable mask. In such a
+ * register, a bit in the lower half is only updated if the corresponding bit
+ * in the upper half is high.
+ *
+ * Unlike FIELD_PREP_WM16(), this is a constant expression and can therefore
+ * be used in initializers. Error checking is less comfortable for this
+ * version.
+ */
+#define FIELD_PREP_WM16_CONST(_mask, _val)				 \
+	(								 \
+		FIELD_PREP_CONST(_mask, _val) |				 \
+		(BUILD_BUG_ON_ZERO(const_true((u64)(_mask) > U16_MAX)) + \
+		 ((_mask) << 16))					 \
+	)
+
+
+#endif /* _LINUX_HW_BITFIELD_H */
-- 
cgit v1.2.3


From df3a7762ee24ba6a33d4215244e329ca300f4819 Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Tue, 2 Sep 2025 10:06:56 -0600
Subject: io_uring/uring_cmd: add io_uring_cmd_tw_t type alias

Introduce a function pointer type alias io_uring_cmd_tw_t for the
uring_cmd task work callback. This avoids repeating the signature in
several places. Also name both arguments to the callback to clarify what
they represent.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Link: https://lore.kernel.org/r/20250902160657.1726828-1-csander@purestorage.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring/cmd.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index 4bd3a7339243..7211157edfe9 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -11,11 +11,14 @@
 /* io_uring_cmd is being issued again */
 #define IORING_URING_CMD_REISSUE	(1U << 31)
 
+typedef void (*io_uring_cmd_tw_t)(struct io_uring_cmd *cmd,
+				  unsigned issue_flags);
+
 struct io_uring_cmd {
 	struct file	*file;
 	const struct io_uring_sqe *sqe;
 	/* callback to defer completions to task context */
-	void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
+	io_uring_cmd_tw_t task_work_cb;
 	u32		cmd_op;
 	u32		flags;
 	u8		pdu[32]; /* available inline for free use */
@@ -57,7 +60,7 @@ void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, u64 res2,
 			unsigned issue_flags);
 
 void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
-			    void (*task_work_cb)(struct io_uring_cmd *, unsigned),
+			    io_uring_cmd_tw_t task_work_cb,
 			    unsigned flags);
 
 /*
@@ -106,7 +109,7 @@ static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
 {
 }
 static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
-			    void (*task_work_cb)(struct io_uring_cmd *, unsigned),
+			    io_uring_tw_t task_work_cb,
 			    unsigned flags)
 {
 }
@@ -143,13 +146,13 @@ static inline void io_uring_cmd_iopoll_done(struct io_uring_cmd *ioucmd,
 
 /* users must follow the IOU_F_TWQ_LAZY_WAKE semantics */
 static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
-			void (*task_work_cb)(struct io_uring_cmd *, unsigned))
+			io_uring_cmd_tw_t task_work_cb)
 {
 	__io_uring_cmd_do_in_task(ioucmd, task_work_cb, IOU_F_TWQ_LAZY_WAKE);
 }
 
 static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
-			void (*task_work_cb)(struct io_uring_cmd *, unsigned))
+			io_uring_cmd_tw_t task_work_cb)
 {
 	__io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
 }
-- 
cgit v1.2.3


From 04a3134f88a4bd03001a3093144819523cfca99e Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@nvidia.com>
Date: Tue, 2 Sep 2025 22:45:24 -0700
Subject: net/mlx5: Add PSP capabilities structures and bits

Add mlx5_ifc PSP related capabilities structures and HW definitions
needed for PSP support in mlx5.

Link: https://lore.kernel.org/netdev/20250828162953.2707727-1-daniel.zahka@gmail.com/
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/device.h   |  4 ++
 include/linux/mlx5/mlx5_ifc.h | 95 +++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 95 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 9d2467f982ad..72a83666e67f 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1248,6 +1248,7 @@ enum mlx5_cap_type {
 	MLX5_CAP_IPSEC,
 	MLX5_CAP_CRYPTO = 0x1a,
 	MLX5_CAP_SHAMPO = 0x1d,
+	MLX5_CAP_PSP = 0x1e,
 	MLX5_CAP_MACSEC = 0x1f,
 	MLX5_CAP_GENERAL_2 = 0x20,
 	MLX5_CAP_PORT_SELECTION = 0x25,
@@ -1487,6 +1488,9 @@ enum mlx5_qcam_feature_groups {
 #define MLX5_CAP_SHAMPO(mdev, cap) \
 	MLX5_GET(shampo_cap, mdev->caps.hca[MLX5_CAP_SHAMPO]->cur, cap)
 
+#define MLX5_CAP_PSP(mdev, cap)\
+	MLX5_GET(psp_cap, (mdev)->caps.hca[MLX5_CAP_PSP]->cur, cap)
+
 enum {
 	MLX5_CMD_STAT_OK			= 0x0,
 	MLX5_CMD_STAT_INT_ERR			= 0x1,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 44d497272162..e9f14a0c7f4f 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -314,6 +314,8 @@ enum {
 	MLX5_CMD_OP_CREATE_UMEM                   = 0xa08,
 	MLX5_CMD_OP_DESTROY_UMEM                  = 0xa0a,
 	MLX5_CMD_OP_SYNC_STEERING                 = 0xb00,
+	MLX5_CMD_OP_PSP_GEN_SPI                   = 0xb10,
+	MLX5_CMD_OP_PSP_ROTATE_KEY                = 0xb11,
 	MLX5_CMD_OP_QUERY_VHCA_STATE              = 0xb0d,
 	MLX5_CMD_OP_MODIFY_VHCA_STATE             = 0xb0e,
 	MLX5_CMD_OP_SYNC_CRYPTO                   = 0xb12,
@@ -489,12 +491,14 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         execute_aso[0x1];
 	u8         reserved_at_47[0x19];
 
-	u8         reserved_at_60[0x2];
+	u8         reformat_l2_to_l3_psp_tunnel[0x1];
+	u8         reformat_l3_psp_tunnel_to_l2[0x1];
 	u8         reformat_insert[0x1];
 	u8         reformat_remove[0x1];
 	u8         macsec_encrypt[0x1];
 	u8         macsec_decrypt[0x1];
-	u8         reserved_at_66[0x2];
+	u8         psp_encrypt[0x1];
+	u8         psp_decrypt[0x1];
 	u8         reformat_add_macsec[0x1];
 	u8         reformat_remove_macsec[0x1];
 	u8         reparse[0x1];
@@ -703,7 +707,7 @@ struct mlx5_ifc_fte_match_set_misc2_bits {
 
 	u8         metadata_reg_a[0x20];
 
-	u8         reserved_at_1a0[0x8];
+	u8         psp_syndrome[0x8];
 	u8         macsec_syndrome[0x8];
 	u8         ipsec_syndrome[0x8];
 	u8         ipsec_next_header[0x8];
@@ -1511,6 +1515,21 @@ struct mlx5_ifc_macsec_cap_bits {
 	u8    reserved_at_40[0x7c0];
 };
 
+struct mlx5_ifc_psp_cap_bits {
+	u8         reserved_at_0[0x1];
+	u8         psp_crypto_offload[0x1];
+	u8         reserved_at_2[0x1];
+	u8         psp_crypto_esp_aes_gcm_256_encrypt[0x1];
+	u8         psp_crypto_esp_aes_gcm_128_encrypt[0x1];
+	u8         psp_crypto_esp_aes_gcm_256_decrypt[0x1];
+	u8         psp_crypto_esp_aes_gcm_128_decrypt[0x1];
+	u8         reserved_at_7[0x4];
+	u8         log_max_num_of_psp_spi[0x5];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x7e0];
+};
+
 enum {
 	MLX5_WQ_TYPE_LINKED_LIST  = 0x0,
 	MLX5_WQ_TYPE_CYCLIC       = 0x1,
@@ -1876,7 +1895,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
 	u8         reserved_at_2a0[0x7];
 	u8         mkey_pcie_tph[0x1];
-	u8         reserved_at_2a8[0x3];
+	u8         reserved_at_2a8[0x2];
+
+	u8         psp[0x1];
 	u8         shampo[0x1];
 	u8         reserved_at_2ac[0x4];
 	u8         max_wqe_sz_rq[0x10];
@@ -3803,6 +3824,7 @@ union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_macsec_cap_bits macsec_cap;
 	struct mlx5_ifc_crypto_cap_bits crypto_cap;
 	struct mlx5_ifc_ipsec_cap_bits ipsec_cap;
+	struct mlx5_ifc_psp_cap_bits psp_cap;
 	u8         reserved_at_0[0x8000];
 };
 
@@ -3832,6 +3854,7 @@ enum {
 enum {
 	MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC   = 0x0,
 	MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC  = 0x1,
+	MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_PSP     = 0x2,
 };
 
 struct mlx5_ifc_vlan_bits {
@@ -7159,6 +7182,8 @@ enum mlx5_reformat_ctx_type {
 	MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT_OVER_UDP = 0xa,
 	MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6 = 0xb,
 	MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV6 = 0xc,
+	MLX5_REFORMAT_TYPE_ADD_PSP_TUNNEL = 0xd,
+	MLX5_REFORMAT_TYPE_DEL_PSP_TUNNEL = 0xe,
 	MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf,
 	MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10,
 	MLX5_REFORMAT_TYPE_ADD_MACSEC = 0x11,
@@ -7285,6 +7310,7 @@ enum {
 	MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME    = 0x5D,
 	MLX5_ACTION_IN_FIELD_OUT_EMD_47_32     = 0x6F,
 	MLX5_ACTION_IN_FIELD_OUT_EMD_31_0      = 0x70,
+	MLX5_ACTION_IN_FIELD_PSP_SYNDROME      = 0x71,
 };
 
 struct mlx5_ifc_alloc_modify_header_context_out_bits {
@@ -13079,6 +13105,7 @@ enum {
 	MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_TLS = 0x1,
 	MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_IPSEC = 0x2,
 	MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_MACSEC = 0x4,
+	MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_PSP = 0x6,
 };
 
 struct mlx5_ifc_tls_static_params_bits {
@@ -13496,4 +13523,64 @@ enum mlx5e_pcie_cong_event_mod_field {
 	MLX5_PCIE_CONG_EVENT_MOD_THRESH   = BIT(2),
 };
 
+struct mlx5_ifc_psp_rotate_key_in_bits {
+	u8         opcode[0x10];
+	u8         uid[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_psp_rotate_key_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+enum mlx5_psp_gen_spi_in_key_size {
+	MLX5_PSP_GEN_SPI_IN_KEY_SIZE_128 = 0x0,
+	MLX5_PSP_GEN_SPI_IN_KEY_SIZE_256 = 0x1,
+};
+
+struct mlx5_ifc_key_spi_bits {
+	u8         spi[0x20];
+
+	u8         reserved_at_20[0x60];
+
+	u8         key[8][0x20];
+};
+
+struct mlx5_ifc_psp_gen_spi_in_bits {
+	u8         opcode[0x10];
+	u8         uid[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x20];
+
+	u8         key_size[0x2];
+	u8         reserved_at_62[0xe];
+	u8         num_of_spi[0x10];
+};
+
+struct mlx5_ifc_psp_gen_spi_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x10];
+	u8         num_of_spi[0x10];
+
+	u8         reserved_at_60[0x20];
+
+	struct mlx5_ifc_key_spi_bits key_spi[];
+};
+
 #endif /* MLX5_IFC_H */
-- 
cgit v1.2.3


From ddeb66d2cb10f03a43d97a0ff2c3869d1951c87d Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Tue, 26 Aug 2025 11:54:36 +0200
Subject: gpio: nomadik: don't print out global GPIO numbers in debugfs
 callbacks

In order to further limit the number of references to the GPIO base
number stored in struct gpio_chip, replace the global GPIO numbers in
the output of debugfs callbacks by hardware offsets.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20250826-gpio-dbg-show-base-v1-2-7f27cd7f2256@linaro.org
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio/gpio-nomadik.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/gpio-nomadik.h b/include/linux/gpio/gpio-nomadik.h
index b5a84864650d..7ba53b499e16 100644
--- a/include/linux/gpio/gpio-nomadik.h
+++ b/include/linux/gpio/gpio-nomadik.h
@@ -261,8 +261,7 @@ struct platform_device;
  * true.
  */
 void nmk_gpio_dbg_show_one(struct seq_file *s, struct pinctrl_dev *pctldev,
-			   struct gpio_chip *chip, unsigned int offset,
-			   unsigned int gpio);
+			   struct gpio_chip *chip, unsigned int offset);
 
 #else
 
-- 
cgit v1.2.3


From 661f951e371cc134ea31c84238dbdc9a898b8403 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 25 Aug 2025 12:02:44 +0000
Subject: sched/fair: Get rid of sched_domains_curr_level hack for
 tl->cpumask()

Leon [1] and Vinicius [2] noted a topology_span_sane() warning during
their testing starting from v6.16-rc1. Debug that followed pointed to
the tl->mask() for the NODE domain being incorrectly resolved to that of
the highest NUMA domain.

tl->mask() for NODE is set to the sd_numa_mask() which depends on the
global "sched_domains_curr_level" hack. "sched_domains_curr_level" is
set to the "tl->numa_level" during tl traversal in build_sched_domains()
calling sd_init() but was not reset before topology_span_sane().

Since "tl->numa_level" still reflected the old value from
build_sched_domains(), topology_span_sane() for the NODE domain trips
when the span of the last NUMA domain overlaps.

Instead of replicating the "sched_domains_curr_level" hack, get rid of
it entirely and instead, pass the entire "sched_domain_topology_level"
object to tl->cpumask() function to prevent such mishap in the future.

sd_numa_mask() now directly references "tl->numa_level" instead of
relying on the global "sched_domains_curr_level" hack to index into
sched_domains_numa_masks[].

The original warning was reproducible on the following NUMA topology
reported by Leon:

    $ sudo numactl -H
    available: 5 nodes (0-4)
    node 0 cpus: 0 1
    node 0 size: 2927 MB
    node 0 free: 1603 MB
    node 1 cpus: 2 3
    node 1 size: 3023 MB
    node 1 free: 3008 MB
    node 2 cpus: 4 5
    node 2 size: 3023 MB
    node 2 free: 3007 MB
    node 3 cpus: 6 7
    node 3 size: 3023 MB
    node 3 free: 3002 MB
    node 4 cpus: 8 9
    node 4 size: 3022 MB
    node 4 free: 2718 MB
    node distances:
    node   0   1   2   3   4
      0:  10  39  38  37  36
      1:  39  10  38  37  36
      2:  38  38  10  37  36
      3:  37  37  37  10  36
      4:  36  36  36  36  10

The above topology can be mimicked using the following QEMU cmd that was
used to reproduce the warning and test the fix:

     sudo qemu-system-x86_64 -enable-kvm -cpu host \
     -m 20G -smp cpus=10,sockets=10 -machine q35 \
     -object memory-backend-ram,size=4G,id=m0 \
     -object memory-backend-ram,size=4G,id=m1 \
     -object memory-backend-ram,size=4G,id=m2 \
     -object memory-backend-ram,size=4G,id=m3 \
     -object memory-backend-ram,size=4G,id=m4 \
     -numa node,cpus=0-1,memdev=m0,nodeid=0 \
     -numa node,cpus=2-3,memdev=m1,nodeid=1 \
     -numa node,cpus=4-5,memdev=m2,nodeid=2 \
     -numa node,cpus=6-7,memdev=m3,nodeid=3 \
     -numa node,cpus=8-9,memdev=m4,nodeid=4 \
     -numa dist,src=0,dst=1,val=39 \
     -numa dist,src=0,dst=2,val=38 \
     -numa dist,src=0,dst=3,val=37 \
     -numa dist,src=0,dst=4,val=36 \
     -numa dist,src=1,dst=0,val=39 \
     -numa dist,src=1,dst=2,val=38 \
     -numa dist,src=1,dst=3,val=37 \
     -numa dist,src=1,dst=4,val=36 \
     -numa dist,src=2,dst=0,val=38 \
     -numa dist,src=2,dst=1,val=38 \
     -numa dist,src=2,dst=3,val=37 \
     -numa dist,src=2,dst=4,val=36 \
     -numa dist,src=3,dst=0,val=37 \
     -numa dist,src=3,dst=1,val=37 \
     -numa dist,src=3,dst=2,val=37 \
     -numa dist,src=3,dst=4,val=36 \
     -numa dist,src=4,dst=0,val=36 \
     -numa dist,src=4,dst=1,val=36 \
     -numa dist,src=4,dst=2,val=36 \
     -numa dist,src=4,dst=3,val=36 \
     ...

  [ prateek: Moved common functions to include/linux/sched/topology.h,
    reuse the common bits for s390 and ppc, commit message ]

Closes: https://lore.kernel.org/lkml/20250610110701.GA256154@unreal/ [1]
Fixes: ccf74128d66c ("sched/topology: Assert non-NUMA topology masks don't (partially) overlap") # ce29a7da84cd, f55dac1dafb3
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reported-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Tested-by: Valentin Schneider <vschneid@redhat.com> # x86
Tested-by: Shrikanth Hegde <sshegde@linux.ibm.com> # powerpc
Link: https://lore.kernel.org/lkml/a3de98387abad28592e6ab591f3ff6107fe01dc1.1755893468.git.tim.c.chen@linux.intel.com/ [2]
---
 include/linux/sched/topology.h | 28 +++++++++++++++++++++++++++-
 include/linux/topology.h       |  2 +-
 2 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 5263746b63e8..a3a24e115d44 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -30,11 +30,19 @@ struct sd_flag_debug {
 };
 extern const struct sd_flag_debug sd_flag_debug[];
 
+struct sched_domain_topology_level;
+
 #ifdef CONFIG_SCHED_SMT
 static inline int cpu_smt_flags(void)
 {
 	return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC;
 }
+
+static inline const
+struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu)
+{
+	return cpu_smt_mask(cpu);
+}
 #endif
 
 #ifdef CONFIG_SCHED_CLUSTER
@@ -42,6 +50,12 @@ static inline int cpu_cluster_flags(void)
 {
 	return SD_CLUSTER | SD_SHARE_LLC;
 }
+
+static inline const
+struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu)
+{
+	return cpu_clustergroup_mask(cpu);
+}
 #endif
 
 #ifdef CONFIG_SCHED_MC
@@ -49,8 +63,20 @@ static inline int cpu_core_flags(void)
 {
 	return SD_SHARE_LLC;
 }
+
+static inline const
+struct cpumask *tl_mc_mask(struct sched_domain_topology_level *tl, int cpu)
+{
+	return cpu_coregroup_mask(cpu);
+}
 #endif
 
+static inline const
+struct cpumask *tl_pkg_mask(struct sched_domain_topology_level *tl, int cpu)
+{
+	return cpu_node_mask(cpu);
+}
+
 #ifdef CONFIG_NUMA
 static inline int cpu_numa_flags(void)
 {
@@ -172,7 +198,7 @@ bool cpus_equal_capacity(int this_cpu, int that_cpu);
 bool cpus_share_cache(int this_cpu, int that_cpu);
 bool cpus_share_resources(int this_cpu, int that_cpu);
 
-typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
+typedef const struct cpumask *(*sched_domain_mask_f)(struct sched_domain_topology_level *tl, int cpu);
 typedef int (*sched_domain_flags_f)(void);
 
 struct sd_data {
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 33b7fda97d39..6575af39fd10 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -260,7 +260,7 @@ static inline bool topology_is_primary_thread(unsigned int cpu)
 
 #endif
 
-static inline const struct cpumask *cpu_cpu_mask(int cpu)
+static inline const struct cpumask *cpu_node_mask(int cpu)
 {
 	return cpumask_of_node(cpu_to_node(cpu));
 }
-- 
cgit v1.2.3


From 91c614f09abf1d45aac6b475d82a36c704b527ee Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 26 Aug 2025 10:55:55 +0200
Subject: sched: Move STDL_INIT() functions out-of-line

Since all these functions are address-taken in SDTL_INIT() and called
indirectly, it doesn't really make sense for them to be inline.

Suggested-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/sched/topology.h | 49 ++++++------------------------------------
 1 file changed, 7 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index a3a24e115d44..bbcfdf12aa6e 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -33,56 +33,21 @@ extern const struct sd_flag_debug sd_flag_debug[];
 struct sched_domain_topology_level;
 
 #ifdef CONFIG_SCHED_SMT
-static inline int cpu_smt_flags(void)
-{
-	return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC;
-}
-
-static inline const
-struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu)
-{
-	return cpu_smt_mask(cpu);
-}
+extern int cpu_smt_flags(void);
+extern const struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu);
 #endif
 
 #ifdef CONFIG_SCHED_CLUSTER
-static inline int cpu_cluster_flags(void)
-{
-	return SD_CLUSTER | SD_SHARE_LLC;
-}
-
-static inline const
-struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu)
-{
-	return cpu_clustergroup_mask(cpu);
-}
+extern int cpu_cluster_flags(void);
+extern const struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu);
 #endif
 
 #ifdef CONFIG_SCHED_MC
-static inline int cpu_core_flags(void)
-{
-	return SD_SHARE_LLC;
-}
-
-static inline const
-struct cpumask *tl_mc_mask(struct sched_domain_topology_level *tl, int cpu)
-{
-	return cpu_coregroup_mask(cpu);
-}
+extern int cpu_core_flags(void);
+extern const struct cpumask *tl_mc_mask(struct sched_domain_topology_level *tl, int cpu);
 #endif
 
-static inline const
-struct cpumask *tl_pkg_mask(struct sched_domain_topology_level *tl, int cpu)
-{
-	return cpu_node_mask(cpu);
-}
-
-#ifdef CONFIG_NUMA
-static inline int cpu_numa_flags(void)
-{
-	return SD_NUMA;
-}
-#endif
+extern const struct cpumask *tl_pkg_mask(struct sched_domain_topology_level *tl, int cpu);
 
 extern int arch_asym_cpu_priority(int cpu);
 
-- 
cgit v1.2.3


From 2cd571245b43492867bf1b4252485f3e6647b643 Mon Sep 17 00:00:00 2001
From: Valentin Schneider <vschneid@redhat.com>
Date: Fri, 29 Aug 2025 16:11:16 +0800
Subject: sched/fair: Add related data structure for task based throttle

Add related data structures for this new throttle functionality.

Tesed-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Signed-off-by: Aaron Lu <ziqianlu@bytedance.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev>
Tested-by: Valentin Schneider <vschneid@redhat.com>
Tested-by: Matteo Martelli <matteo.martelli@codethink.co.uk>
Link: https://lore.kernel.org/r/20250829081120.806-2-ziqianlu@bytedance.com
---
 include/linux/sched.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f8188b833350..644a01bdae70 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -883,6 +883,11 @@ struct task_struct {
 
 #ifdef CONFIG_CGROUP_SCHED
 	struct task_group		*sched_task_group;
+#ifdef CONFIG_CFS_BANDWIDTH
+	struct callback_head		sched_throttle_work;
+	struct list_head		throttle_node;
+	bool				throttled;
+#endif
 #endif
 
 
-- 
cgit v1.2.3


From 74e2ef72bd4b25ce21c8f309d4f5b91b5df9ff5b Mon Sep 17 00:00:00 2001
From: Gokul Sivakumar <gokulkumar.sivakumar@infineon.com>
Date: Thu, 24 Jul 2025 15:41:36 +0530
Subject: wifi: brcmfmac: fix 43752 SDIO FWVID incorrectly labelled as Cypress
 (CYW)

Cypress(Infineon) is not the vendor for this 43752 SDIO WLAN chip, and so
has not officially released any firmware binary for it. It is incorrect to
maintain this WLAN chip with firmware vendor ID as "CYW". So relabel the
chip's firmware Vendor ID as "WCC" as suggested by the maintainer.

Fixes: d2587c57ffd8 ("brcmfmac: add 43752 SDIO ids and initialization")
Fixes: f74f1ec22dc2 ("wifi: brcmfmac: add support for Cypress firmware api")
Signed-off-by: Gokul Sivakumar <gokulkumar.sivakumar@infineon.com>
Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Link: https://patch.msgid.link/20250724101136.6691-1-gokulkumar.sivakumar@infineon.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/mmc/sdio_ids.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index fe3d6d98f8da..673cbdf43453 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -77,7 +77,7 @@
 #define SDIO_DEVICE_ID_BROADCOM_43439		0xa9af
 #define SDIO_DEVICE_ID_BROADCOM_43455		0xa9bf
 #define SDIO_DEVICE_ID_BROADCOM_43751		0xaae7
-#define SDIO_DEVICE_ID_BROADCOM_CYPRESS_43752	0xaae8
+#define SDIO_DEVICE_ID_BROADCOM_43752		0xaae8
 
 #define SDIO_VENDOR_ID_CYPRESS			0x04b4
 #define SDIO_DEVICE_ID_BROADCOM_CYPRESS_43439	0xbd3d
-- 
cgit v1.2.3


From 762af5a2aa0ad18da1316666dae30d369268d44c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <thomas.weissschuh@linutronix.de>
Date: Mon, 25 Aug 2025 15:26:35 +0200
Subject: vdso/vsyscall: Avoid slow division loop in auxiliary clock update
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The call to __iter_div_u64_rem() in vdso_time_update_aux() is a wrapper
around subtraction. It cannot be used to divide large numbers, as that
introduces long, computationally expensive delays.  A regular u64 division
is also not possible in the timekeeper update path as it can be too slow.

Instead of splitting the ktime_t offset into into second and subsecond
components during the timekeeper update fast-path, do it together with the
adjustment of tk->offs_aux in the slow-path. Equivalent to the handling of
offs_boot and monotonic_to_boot.

Reuse the storage of monotonic_to_boot for the new field, as it is not used
by auxiliary timekeepers.

Fixes: 380b84e168e5 ("vdso/vsyscall: Update auxiliary clock data in the datapage")
Reported-by: Miroslav Lichvar <mlichvar@redhat.com>
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250825-vdso-auxclock-division-v1-1-a1d32a16a313@linutronix.de
Closes: https://lore.kernel.org/lkml/aKwsNNWsHJg8IKzj@localhost/
---
 include/linux/timekeeper_internal.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index c27aac67cb3f..b8ae89ea28ab 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -76,6 +76,7 @@ struct tk_read_base {
  * @cs_was_changed_seq:		The sequence number of clocksource change events
  * @clock_valid:		Indicator for valid clock
  * @monotonic_to_boot:		CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
+ * @monotonic_to_aux:		CLOCK_MONOTONIC to CLOCK_AUX offset
  * @cycle_interval:		Number of clock cycles in one NTP interval
  * @xtime_interval:		Number of clock shifted nano seconds in one NTP
  *				interval.
@@ -117,6 +118,9 @@ struct tk_read_base {
  * @offs_aux is used by the auxiliary timekeepers which do not utilize any
  * of the regular timekeeper offset fields.
  *
+ * @monotonic_to_aux is a timespec64 representation of @offs_aux to
+ * accelerate the VDSO update for CLOCK_AUX.
+ *
  * The cacheline ordering of the structure is optimized for in kernel usage of
  * the ktime_get() and ktime_get_ts64() family of time accessors. Struct
  * timekeeper is prepended in the core timekeeping code with a sequence count,
@@ -159,7 +163,10 @@ struct timekeeper {
 	u8			cs_was_changed_seq;
 	u8			clock_valid;
 
-	struct timespec64	monotonic_to_boot;
+	union {
+		struct timespec64	monotonic_to_boot;
+		struct timespec64	monotonic_to_aux;
+	};
 
 	u64			cycle_interval;
 	u64			xtime_interval;
-- 
cgit v1.2.3


From 0a26e5eb78fb1627beb8e3eb172737f8492d2799 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 25 Aug 2025 15:34:23 -0500
Subject: jiffies: Remove obsolete SHIFTED_HZ comment

b3c869d35b9b ("jiffies: Remove compile time assumptions about
CLOCK_TICK_RATE") removed the last definition of SHIFTED_HZ but left
behind comments about it.  Remove the comments as well.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250825203425.796034-1-helgaas@kernel.org
---
 include/linux/jiffies.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 91b20788273d..0d1927da8055 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -61,7 +61,7 @@
 
 extern void register_refined_jiffies(long clock_tick_rate);
 
-/* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */
+/* TICK_USEC is the time between ticks in usec */
 #define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ)
 
 /* USER_TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
-- 
cgit v1.2.3


From a576a849d5f33356e0d8fd3eae4fbaf8869417e5 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lpieralisi@kernel.org>
Date: Tue, 5 Aug 2025 15:34:43 +0200
Subject: of/irq: Convert of_msi_map_id() callers to of_msi_xlate()

With the introduction of the of_msi_xlate() function, the OF layer
provides an API to map a device ID and retrieve the MSI controller
node the ID is mapped to with a single call.

of_msi_map_id() is currently used to map a deviceID to a specific
MSI controller node; of_msi_xlate() can be used for that purpose
too, there is no need to keep the two functions.

Convert of_msi_map_id() to of_msi_xlate() calls and update the
of_msi_xlate() documentation to describe how the struct device_node
pointer passed in should be set-up to either provide the MSI controller
node target or receive its pointer upon mapping completion.

Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Rob Herring <robh@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://lore.kernel.org/r/20250805133443.936955-1-lpieralisi@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 include/linux/of_irq.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index a480063c9cb1..1db8543dfc8a 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -55,7 +55,6 @@ extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev,
 							u32 bus_token);
 extern void of_msi_configure(struct device *dev, const struct device_node *np);
 extern u32 of_msi_xlate(struct device *dev, struct device_node **msi_np, u32 id_in);
-u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in);
 #else
 static inline void of_irq_init(const struct of_device_id *matches)
 {
@@ -105,11 +104,6 @@ static inline u32 of_msi_xlate(struct device *dev, struct device_node **msi_np,
 {
 	return id_in;
 }
-static inline u32 of_msi_map_id(struct device *dev,
-				 struct device_node *msi_np, u32 id_in)
-{
-	return id_in;
-}
 #endif
 
 #if defined(CONFIG_OF_IRQ) || defined(CONFIG_SPARC)
-- 
cgit v1.2.3


From 54dbd2a8e974b900b18639e75f62702a4334ddc0 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Wed, 3 Sep 2025 14:52:56 +0300
Subject: PCI/P2PDMA: Reduce scope of pci_has_p2pmem()

pci_has_p2pmem() is not used outside of p2pdma.c, and there is no need to
export it for use by modules.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Link: https://patch.msgid.link/d40f3f1decf54c9236bc38b48a6aae612a5c182f.1756900291.git.leon@kernel.org
---
 include/linux/pci-p2pdma.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h
index 075c20b161d9..951f81a38f3a 100644
--- a/include/linux/pci-p2pdma.h
+++ b/include/linux/pci-p2pdma.h
@@ -21,7 +21,6 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
 		u64 offset);
 int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients,
 			     int num_clients, bool verbose);
-bool pci_has_p2pmem(struct pci_dev *pdev);
 struct pci_dev *pci_p2pmem_find_many(struct device **clients, int num_clients);
 void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size);
 void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size);
@@ -45,10 +44,6 @@ static inline int pci_p2pdma_distance_many(struct pci_dev *provider,
 {
 	return -1;
 }
-static inline bool pci_has_p2pmem(struct pci_dev *pdev)
-{
-	return false;
-}
 static inline struct pci_dev *pci_p2pmem_find_many(struct device **clients,
 						   int num_clients)
 {
-- 
cgit v1.2.3


From dd386b0d5e61556927189cd7b59a628d22cb6851 Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Mon, 1 Sep 2025 19:26:07 -0600
Subject: io_uring/uring_cmd: correct io_uring_cmd_done() ret type

io_uring_cmd_done() takes the result code for the CQE as a ssize_t ret
argument. However, the CQE res field is a s32 value, as is the argument
to io_req_set_res(). To clarify that only s32 values can be faithfully
represented without truncation, change io_uring_cmd_done()'s ret
argument type to s32.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Link: https://lore.kernel.org/r/20250902012609.1513123-1-csander@purestorage.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring/cmd.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index 7211157edfe9..c4d7874016bb 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -56,7 +56,7 @@ int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd,
  * Note: the caller should never hard code @issue_flags and is only allowed
  * to pass the mask provided by the core io_uring code.
  */
-void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, u64 res2,
+void io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret, u64 res2,
 			unsigned issue_flags);
 
 void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
@@ -104,7 +104,7 @@ static inline int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd,
 {
 	return -EOPNOTSUPP;
 }
-static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
+static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret,
 		u64 ret2, unsigned issue_flags)
 {
 }
-- 
cgit v1.2.3


From 9f8608fce90fbcd2a98ceefad0bc762423927629 Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Mon, 1 Sep 2025 19:33:27 -0600
Subject: io_uring/cmd: remove unused io_uring_cmd_iopoll_done()

io_uring_cmd_iopoll_done()'s only caller was removed in commit
9ce6c9875f3e ("nvme: always punt polled uring_cmd end_io work to
task_work"). So remove the unused function too.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Link: https://lore.kernel.org/r/20250902013328.1517686-1-csander@purestorage.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring/cmd.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index c4d7874016bb..50dd6a53cb5e 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -133,17 +133,6 @@ static inline bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
 }
 #endif
 
-/*
- * Polled completions must ensure they are coming from a poll queue, and
- * hence are completed inside the usual poll handling loops.
- */
-static inline void io_uring_cmd_iopoll_done(struct io_uring_cmd *ioucmd,
-					    ssize_t ret, ssize_t res2)
-{
-	lockdep_assert(in_task());
-	io_uring_cmd_done(ioucmd, ret, res2, 0);
-}
-
 /* users must follow the IOU_F_TWQ_LAZY_WAKE semantics */
 static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
 			io_uring_cmd_tw_t task_work_cb)
-- 
cgit v1.2.3


From 8c94db0ae97c72c253a615f990bd466b456e94f6 Mon Sep 17 00:00:00 2001
From: Svetlana Parfenova <svetlana.parfenova@syntacore.com>
Date: Mon, 1 Sep 2025 20:53:50 +0700
Subject: binfmt_elf: preserve original ELF e_flags for core dumps

Some architectures, such as RISC-V, use the ELF e_flags field to encode
ABI-specific information (e.g., ISA extensions, fpu support). Debuggers
like GDB rely on these flags in core dumps to correctly interpret
optional register sets. If the flags are missing or incorrect, GDB may
warn and ignore valid data, for example:

    warning: Unexpected size of section '.reg2/213' in core file.

This can prevent access to fpu or other architecture-specific registers
even when they were dumped.

Save the e_flags field during ELF binary loading (in load_elf_binary())
into the mm_struct, and later retrieve it during core dump generation
(in fill_note_info()). Kconfig option CONFIG_ARCH_HAS_ELF_CORE_EFLAGS
is introduced for architectures that require this behaviour.

Signed-off-by: Svetlana Parfenova <svetlana.parfenova@syntacore.com>
Link: https://lore.kernel.org/r/20250901135350.619485-1-svetlana.parfenova@syntacore.com
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/mm_types.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 08bc2442db93..04a2857f12f2 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1102,6 +1102,11 @@ struct mm_struct {
 
 		unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
 
+#ifdef CONFIG_ARCH_HAS_ELF_CORE_EFLAGS
+		/* the ABI-related flags from the ELF header. Used for core dump */
+		unsigned long saved_e_flags;
+#endif
+
 		struct percpu_counter rss_stat[NR_MM_COUNTERS];
 
 		struct linux_binfmt *binfmt;
-- 
cgit v1.2.3


From e0c47c6229c25b54440fe1f84a0ff533942290b1 Mon Sep 17 00:00:00 2001
From: Lachlan Hodges <lachlan.hodges@morsemicro.com>
Date: Fri, 25 Jul 2025 23:22:20 +1000
Subject: wifi: mac80211: support parsing S1G TIM PVB

An S1G TIM PVB has 3 mandatory encoding modes, that being
block bitmap, single AID and OBL alongside the ability for
each encoding mode to be inverted. Introduce the ability to
parse the 3 encoding formats. The implementation specification
for the encoding formats can be found in IEEE80211-2024 9.4.2.5.

Signed-off-by: Arien Judge <arien.judge@morsemicro.com>
Signed-off-by: Lachlan Hodges <lachlan.hodges@morsemicro.com>
Link: https://patch.msgid.link/20250725132221.258217-3-lachlan.hodges@morsemicro.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 265 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 256 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index e5a2096e022e..d350263f23f3 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -220,6 +220,12 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2)
 #define IEEE80211_MAX_AID_S1G		8191
 #define IEEE80211_MAX_TIM_LEN		251
 #define IEEE80211_MAX_MESH_PEERINGS	63
+
+/* S1G encoding types */
+#define IEEE80211_S1G_TIM_ENC_MODE_BLOCK	0
+#define IEEE80211_S1G_TIM_ENC_MODE_SINGLE	1
+#define IEEE80211_S1G_TIM_ENC_MODE_OLB		2
+
 /* Maximum size for the MA-UNITDATA primitive, 802.11 standard section
    6.2.1.1.2.
 
@@ -4757,15 +4763,8 @@ static inline unsigned long ieee80211_tu_to_usec(unsigned long tu)
 	return 1024 * tu;
 }
 
-/**
- * ieee80211_check_tim - check if AID bit is set in TIM
- * @tim: the TIM IE
- * @tim_len: length of the TIM IE
- * @aid: the AID to look for
- * Return: whether or not traffic is indicated in the TIM for the given AID
- */
-static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim,
-				       u8 tim_len, u16 aid)
+static inline bool __ieee80211_check_tim(const struct ieee80211_tim_ie *tim,
+					 u8 tim_len, u16 aid)
 {
 	u8 mask;
 	u8 index, indexn1, indexn2;
@@ -4788,6 +4787,254 @@ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim,
 	return !!(tim->virtual_map[index] & mask);
 }
 
+struct s1g_tim_aid {
+	u16 aid;
+	u8 target_blk; /* Target block index */
+	u8 target_subblk; /* Target subblock index */
+	u8 target_subblk_bit; /* Target subblock bit */
+};
+
+struct s1g_tim_enc_block {
+	u8 enc_mode;
+	bool inverse;
+	const u8 *ptr;
+	u8 len;
+
+	/*
+	 * For an OLB encoded block that spans multiple blocks, this
+	 * is the offset into the span described by that encoded block.
+	 */
+	u8 olb_blk_offset;
+};
+
+/*
+ * Helper routines to quickly extract the length of an encoded block. Validation
+ * is also performed to ensure the length extracted lies within the TIM.
+ */
+
+static inline int ieee80211_s1g_len_bitmap(const u8 *ptr, const u8 *end)
+{
+	u8 blkmap;
+	u8 n_subblks;
+
+	if (ptr >= end)
+		return -EINVAL;
+
+	blkmap = *ptr;
+	n_subblks = hweight8(blkmap);
+
+	if (ptr + 1 + n_subblks > end)
+		return -EINVAL;
+
+	return 1 + n_subblks;
+}
+
+static inline int ieee80211_s1g_len_single(const u8 *ptr, const u8 *end)
+{
+	return (ptr + 1 > end) ? -EINVAL : 1;
+}
+
+static inline int ieee80211_s1g_len_olb(const u8 *ptr, const u8 *end)
+{
+	if (ptr >= end)
+		return -EINVAL;
+
+	return (ptr + 1 + *ptr > end) ? -EINVAL : 1 + *ptr;
+}
+
+/*
+ * Enumerate all encoded blocks until we find the encoded block that describes
+ * our target AID. OLB is a special case as a single encoded block can describe
+ * multiple blocks as a single encoded block.
+ */
+static inline int ieee80211_s1g_find_target_block(struct s1g_tim_enc_block *enc,
+						  const struct s1g_tim_aid *aid,
+						  const u8 *ptr, const u8 *end)
+{
+	/* need at least block-control octet */
+	while (ptr + 1 <= end) {
+		u8 ctrl = *ptr++;
+		u8 mode = ctrl & 0x03;
+		bool contains, inverse = ctrl & BIT(2);
+		u8 span, blk_off = ctrl >> 3;
+		int len;
+
+		switch (mode) {
+		case IEEE80211_S1G_TIM_ENC_MODE_BLOCK:
+			len = ieee80211_s1g_len_bitmap(ptr, end);
+			contains = blk_off == aid->target_blk;
+			break;
+		case IEEE80211_S1G_TIM_ENC_MODE_SINGLE:
+			len = ieee80211_s1g_len_single(ptr, end);
+			contains = blk_off == aid->target_blk;
+			break;
+		case IEEE80211_S1G_TIM_ENC_MODE_OLB:
+			len = ieee80211_s1g_len_olb(ptr, end);
+			/*
+			 * An OLB encoded block can describe more then one
+			 * block, meaning an encoded OLB block can span more
+			 * then a single block.
+			 */
+			if (len > 0) {
+				/* Minus one for the length octet */
+				span = DIV_ROUND_UP(len - 1, 8);
+				/*
+				 * Check if our target block lies within the
+				 * block span described by this encoded block.
+				 */
+				contains = (aid->target_blk >= blk_off) &&
+					   (aid->target_blk < blk_off + span);
+			}
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+
+		if (len < 0)
+			return len;
+
+		if (contains) {
+			enc->enc_mode = mode;
+			enc->inverse = inverse;
+			enc->ptr = ptr;
+			enc->len = (u8)len;
+			enc->olb_blk_offset = blk_off;
+			return 0;
+		}
+
+		ptr += len;
+	}
+
+	return -ENOENT;
+}
+
+static inline bool ieee80211_s1g_parse_bitmap(struct s1g_tim_enc_block *enc,
+					      struct s1g_tim_aid *aid)
+{
+	const u8 *ptr = enc->ptr;
+	u8 blkmap = *ptr++;
+
+	/*
+	 * If our block bitmap does not contain a set bit that corresponds
+	 * to our AID, it could mean a variety of things depending on if
+	 * the encoding mode is inverted or not.
+	 *
+	 * 1. If inverted, it means the entire subblock is present and hence
+	 *    our AID has been set.
+	 * 2. If not inverted, it means our subblock is not present and hence
+	 *    it is all zero meaning our AID is not set.
+	 */
+	if (!(blkmap & BIT(aid->target_subblk)))
+		return enc->inverse;
+
+	/*
+	 * Increment ptr by the number of set subblocks that appear before our
+	 * target subblock. If our target subblock is 0, do nothing as ptr
+	 * already points to our target subblock.
+	 */
+	if (aid->target_subblk)
+		ptr += hweight8(blkmap & GENMASK(aid->target_subblk - 1, 0));
+
+	return !!(*ptr & BIT(aid->target_subblk_bit)) ^ enc->inverse;
+}
+
+static inline bool ieee80211_s1g_parse_single(struct s1g_tim_enc_block *enc,
+					      struct s1g_tim_aid *aid)
+{
+	/*
+	 * Single AID mode describes, as the name suggests, a single AID
+	 * within the block described by the encoded block. The octet
+	 * contains the 6 LSBs of the AID described in the block. The other
+	 * 2 bits are reserved. When inversed, every single AID described
+	 * by the current block have buffered traffic except for the AID
+	 * described in the single AID octet.
+	 */
+	return ((*enc->ptr & 0x3f) == (aid->aid & 0x3f)) ^ enc->inverse;
+}
+
+static inline bool ieee80211_s1g_parse_olb(struct s1g_tim_enc_block *enc,
+					   struct s1g_tim_aid *aid)
+{
+	const u8 *ptr = enc->ptr;
+	u8 blk_len = *ptr++;
+	/*
+	 * Given an OLB encoded block that describes multiple blocks,
+	 * calculate the offset into the span. Then calculate the
+	 * subblock location normally.
+	 */
+	u16 span_offset = aid->target_blk - enc->olb_blk_offset;
+	u16 subblk_idx = span_offset * 8 + aid->target_subblk;
+
+	if (subblk_idx >= blk_len)
+		return enc->inverse;
+
+	return !!(ptr[subblk_idx] & BIT(aid->target_subblk_bit)) ^ enc->inverse;
+}
+
+/*
+ * An S1G PVB has 3 non optional encoding types, each that can be inverted.
+ * An S1G PVB is constructed with zero or more encoded block subfields. Each
+ * encoded block represents a single "block" of AIDs (64), and each encoded
+ * block can contain one of the 3 encoding types alongside a single bit for
+ * whether the bits should be inverted.
+ *
+ * As the standard makes no guarantee about the ordering of encoded blocks,
+ * we must parse every encoded block in the worst case scenario given an
+ * AID that lies within the last block.
+ */
+static inline bool ieee80211_s1g_check_tim(const struct ieee80211_tim_ie *tim,
+					   u8 tim_len, u16 aid)
+{
+	int err;
+	struct s1g_tim_aid target_aid;
+	struct s1g_tim_enc_block enc_blk;
+
+	if (tim_len < 3)
+		return false;
+
+	target_aid.aid = aid;
+	target_aid.target_blk = (aid >> 6) & 0x1f;
+	target_aid.target_subblk = (aid >> 3) & 0x7;
+	target_aid.target_subblk_bit = aid & 0x7;
+
+	/*
+	 * Find our AIDs target encoded block and fill &enc_blk with the
+	 * encoded blocks information. If no entry is found or an error
+	 * occurs return false.
+	 */
+	err = ieee80211_s1g_find_target_block(&enc_blk, &target_aid,
+					      tim->virtual_map,
+					      (const u8 *)tim + tim_len + 2);
+	if (err)
+		return false;
+
+	switch (enc_blk.enc_mode) {
+	case IEEE80211_S1G_TIM_ENC_MODE_BLOCK:
+		return ieee80211_s1g_parse_bitmap(&enc_blk, &target_aid);
+	case IEEE80211_S1G_TIM_ENC_MODE_SINGLE:
+		return ieee80211_s1g_parse_single(&enc_blk, &target_aid);
+	case IEEE80211_S1G_TIM_ENC_MODE_OLB:
+		return ieee80211_s1g_parse_olb(&enc_blk, &target_aid);
+	default:
+		return false;
+	}
+}
+
+/**
+ * ieee80211_check_tim - check if AID bit is set in TIM
+ * @tim: the TIM IE
+ * @tim_len: length of the TIM IE
+ * @aid: the AID to look for
+ * @s1g: whether the TIM is from an S1G PPDU
+ * Return: whether or not traffic is indicated in the TIM for the given AID
+ */
+static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim,
+				       u8 tim_len, u16 aid, bool s1g)
+{
+	return s1g ? ieee80211_s1g_check_tim(tim, tim_len, aid) :
+		     __ieee80211_check_tim(tim, tim_len, aid);
+}
+
 /**
  * ieee80211_get_tdls_action - get TDLS action code
  * @skb: the skb containing the frame, length will not be checked
-- 
cgit v1.2.3


From d8b269e009bbc471cb2735b5f737839495efce3b Mon Sep 17 00:00:00 2001
From: Chuyi Zhou <zhouchuyi@bytedance.com>
Date: Thu, 4 Sep 2025 15:45:05 +0800
Subject: cgroup: Remove unused cgroup_subsys::post_attach

cgroup_subsys::post_attach callback was introduced in commit 5cf1cacb49ae
("cgroup, cpuset: replace cpuset_post_attach_flush() with
cgroup_subsys->post_attach callback") and only cpuset would use this
callback to wait for the mm migration to complete at the end of
__cgroup_procs_write(). Since the previous patch defer the flush operation
until returning to userspace, no one use this callback now. Remove this
callback from cgroup_subsys.

Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com>
Acked-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 539c64eeef38..92ed6d18266d 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -763,7 +763,6 @@ struct cgroup_subsys {
 	int (*can_attach)(struct cgroup_taskset *tset);
 	void (*cancel_attach)(struct cgroup_taskset *tset);
 	void (*attach)(struct cgroup_taskset *tset);
-	void (*post_attach)(void);
 	int (*can_fork)(struct task_struct *task,
 			struct css_set *cset);
 	void (*cancel_fork)(struct task_struct *task, struct css_set *cset);
-- 
cgit v1.2.3


From b28f9eba12a4967eff6e8a1c0512f86f1ac7fa68 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 28 Jun 2025 11:37:30 -0400
Subject: change the calling conventions for vfs_parse_fs_string()

Absolute majority of callers are passing the 4th argument equal to
strlen() of the 3rd one.

Drop the v_size argument, add vfs_parse_fs_qstr() for the cases that
want independent length.

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs_context.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 7773eb870039..97b514a79a49 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -134,8 +134,13 @@ extern struct fs_context *fs_context_for_submount(struct file_system_type *fs_ty
 
 extern struct fs_context *vfs_dup_fs_context(struct fs_context *fc);
 extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param);
-extern int vfs_parse_fs_string(struct fs_context *fc, const char *key,
-			       const char *value, size_t v_size);
+extern int vfs_parse_fs_qstr(struct fs_context *fc, const char *key,
+				const struct qstr *value);
+static inline int vfs_parse_fs_string(struct fs_context *fc, const char *key,
+			       const char *value)
+{
+	return vfs_parse_fs_qstr(fc, key, value ? &QSTR(value) : NULL);
+}
 int vfs_parse_monolithic_sep(struct fs_context *fc, void *data,
 			     char *(*sep)(char **));
 extern int generic_parse_monolithic(struct fs_context *fc, void *data);
-- 
cgit v1.2.3


From 038c7dc66e2744e5df57163b8f957745ae10d23e Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Wed, 3 Sep 2025 20:46:40 -0700
Subject: compiler_types.h: Move __nocfi out of compiler-specific header

Prepare for GCC KCFI support and move the __nocfi attribute from
compiler-clang.h to compiler_types.h. This was already gated by
CONFIG_CFI_CLANG, so this remains safe for non-KCFI GCC builds.

Signed-off-by: Kees Cook <kees@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/20250904034656.3670313-1-kees@kernel.org
---
 include/linux/compiler-clang.h | 5 -----
 include/linux/compiler_types.h | 4 +++-
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index fa4ffe037bc7..7a4568e421dc 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -96,11 +96,6 @@
 # define __noscs	__attribute__((__no_sanitize__("shadow-call-stack")))
 #endif
 
-#if __has_feature(kcfi)
-/* Disable CFI checking inside a function. */
-#define __nocfi		__attribute__((__no_sanitize__("kcfi")))
-#endif
-
 /*
  * Turn individual warnings and errors on and off locally, depending
  * on version.
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 16755431fc11..a910f9fa5341 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -432,7 +432,9 @@ struct ftrace_likely_data {
 # define __noscs
 #endif
 
-#ifndef __nocfi
+#if defined(CONFIG_CFI_CLANG)
+# define __nocfi		__attribute__((__no_sanitize__("kcfi")))
+#else
 # define __nocfi
 #endif
 
-- 
cgit v1.2.3


From 0b815825b1b0bd6762ca028e9b6631b002efb7ca Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Wed, 3 Sep 2025 20:46:45 -0700
Subject: x86/cfi: Remove __noinitretpoline and __noretpoline

Commit 66f793099a63 ("x86/retpoline: Avoid retpolines for built-in __init
functions") disabled retpolines in __init sections (__noinitretpoline)
as a precaution against potential issues with retpolines in early boot,
but it has not been a problem in practice (i.e. see Clang below).

Commit 87358710c1fb ("x86/retpoline: Support retpoline builds with Clang")
narrowed this to only GCC, as Clang doesn't have per-function control
over retpoline emission. As such, Clang has been booting with retpolines
in __init since retpoline support was introduced.

Clang KCFI has been instrumenting __init since CFI was introduced.

With the introduction of KCFI for GCC, KCFI instrumentation with
retpolines disabled means that objtool does not construct .retpoline_sites
section entries for the non-retpoline KCFI calls. At boot, the KCFI
rehashing code, via __apply_fineibt(), misses all __init KCFI calls
(since they are not retpolines), resulting in immediate hash mismatches:
all preambles are rehashed (via .cfi_sites) and none of the __init call
sites are rehashed.

Remove __noinitretpoline since it provides no meaningful utility and
creates problems with CFI. Additionally remove __noretpoline since it
is now unused.

Alternatively, cfi_rand_callers() could walk the .kcfi_traps section which
is exactly the list of KCFI instrumentation sites. But it seems better to
have as few differences in common instruction sequences between compilers
as possible, so better to remove the special handling of retpolines in
__init for GCC.

Signed-off-by: Kees Cook <kees@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20250904034656.3670313-6-kees@kernel.org
---
 include/linux/compiler-gcc.h | 4 ----
 include/linux/init.h         | 8 --------
 2 files changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 5d07c469b571..5de824a0b3d7 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -35,10 +35,6 @@
 	(typeof(ptr)) (__ptr + (off));					\
 })
 
-#ifdef CONFIG_MITIGATION_RETPOLINE
-#define __noretpoline __attribute__((__indirect_branch__("keep")))
-#endif
-
 #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
 #define __latent_entropy __attribute__((latent_entropy))
 #endif
diff --git a/include/linux/init.h b/include/linux/init.h
index a60d32d227ee..17c1bc712e23 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -7,13 +7,6 @@
 #include <linux/stringify.h>
 #include <linux/types.h>
 
-/* Built-in __init functions needn't be compiled with retpoline */
-#if defined(__noretpoline) && !defined(MODULE)
-#define __noinitretpoline __noretpoline
-#else
-#define __noinitretpoline
-#endif
-
 /* These macros are used to mark some functions or 
  * initialized data (doesn't apply to uninitialized data)
  * as `initialization' functions. The kernel can take this
@@ -50,7 +43,6 @@
 /* These are for everybody (although not all archs will actually
    discard it in modules) */
 #define __init		__section(".init.text") __cold __latent_entropy	\
-						__noinitretpoline	\
 						__no_kstack_erase
 #define __initdata	__section(".init.data")
 #define __initconst	__section(".init.rodata")
-- 
cgit v1.2.3


From 54728bd535fb3899ad51489dc1e05eb5bb53cb95 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Mon, 1 Sep 2025 15:27:43 +0200
Subject: bpf: Return an error pointer for skb metadata when CONFIG_NET=n

Kernel Test Robot reported a compiler warning - a null pointer may be
passed to memmove in __bpf_dynptr_{read,write} when building without
networking support.

The warning is correct from a static analysis standpoint, but not actually
reachable. Without CONFIG_NET, creating dynptrs to skb metadata is
impossible since the constructor kfunc is missing.

Silence the false-postive diagnostic message by returning an error pointer
from bpf_skb_meta_pointer stub when CONFIG_NET=n.

Fixes: 6877cd392bae ("bpf: Enable read/write access to skb metadata through a dynptr")
Closes: https://lore.kernel.org/oe-kbuild-all/202508212031.ir9b3B6Q-lkp@intel.com/
Reported-by: kernel test robot <lkp@intel.com>
Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://patch.msgid.link/20250901-dynptr-skb-meta-no-net-v2-1-ce607fcb6091@cloudflare.com
---
 include/linux/filter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 9ed21b65e2e9..af6d9354662c 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1822,7 +1822,7 @@ static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, voi
 
 static inline void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset)
 {
-	return NULL;
+	return ERR_PTR(-EOPNOTSUPP);
 }
 #endif /* CONFIG_NET */
 
-- 
cgit v1.2.3


From 74a0e72f03ffd01b5d88b411f02d9b9861fdb99e Mon Sep 17 00:00:00 2001
From: Hector Martin <marcan@marcan.st>
Date: Thu, 21 Aug 2025 12:15:59 +0200
Subject: iommu/io-pgtable-dart: Add 4-level page table support

DARTs on t602x SoCs are of the t8110 variant but have an IAS of 42,
which means optional support for an extra page table level.

Refactor the PTE management to support an arbitrary level count, and
then calculate how many levels we need for any given configuration.

Signed-off-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Janne Grunau <j@jannau.net>
Reviewed-by: Sven Peter <sven@kernel.org>
Reviewed-by: Neal Gompa <neal@gompa.dev>
Link: https://lore.kernel.org/r/20250821-apple-dart-4levels-v2-2-e39af79daa37@jannau.net
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/io-pgtable.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 138fbd89b1e6..8a823c6f2b4a 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -180,6 +180,7 @@ struct io_pgtable_cfg {
 		struct {
 			u64 ttbr[4];
 			u32 n_ttbrs;
+			u32 n_levels;
 		} apple_dart_cfg;
 
 		struct {
-- 
cgit v1.2.3


From 8f77295525825086cb43675cd1a4f3716b119d7f Mon Sep 17 00:00:00 2001
From: Sunil V L <sunilvl@ventanamicro.com>
Date: Mon, 18 Aug 2025 10:28:05 +0530
Subject: ACPI: RISC-V: Add support for RIMT

RISC-V IO Mapping Table (RIMT) is a static ACPI table to communicate
IOMMU information to the OS. The spec is available at [1].

The changes at high level are,
	a) Initialize data structures required for IOMMU/device
	   configuration using the data from RIMT. Provide APIs required
	   for device configuration.
	b) Provide an API for IOMMU drivers to register the
	   fwnode with RIMT data structures. This API will create a
	   fwnode for PCIe IOMMU.

[1] - https://github.com/riscv-non-isa/riscv-acpi-rimt

Signed-off-by: Sunil V L <sunilvl@ventanamicro.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Link: https://lore.kernel.org/r/20250818045807.763922-2-sunilvl@ventanamicro.com
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/acpi_rimt.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 include/linux/acpi_rimt.h

(limited to 'include/linux')

diff --git a/include/linux/acpi_rimt.h b/include/linux/acpi_rimt.h
new file mode 100644
index 000000000000..fad3adc4d899
--- /dev/null
+++ b/include/linux/acpi_rimt.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *  Copyright (C) 2024-2025, Ventana Micro Systems Inc.
+ *	Author: Sunil V L <sunilvl@ventanamicro.com>
+ */
+
+#ifndef _ACPI_RIMT_H
+#define _ACPI_RIMT_H
+
+#ifdef CONFIG_ACPI_RIMT
+int rimt_iommu_register(struct device *dev);
+#else
+static inline int rimt_iommu_register(struct device *dev)
+{
+	return -ENODEV;
+}
+#endif
+
+#if defined(CONFIG_IOMMU_API) && defined(CONFIG_ACPI_RIMT)
+int rimt_iommu_configure_id(struct device *dev, const u32 *id_in);
+#else
+static inline int rimt_iommu_configure_id(struct device *dev, const u32 *id_in)
+{
+	return -ENODEV;
+}
+#endif
+
+#endif /* _ACPI_RIMT_H */
-- 
cgit v1.2.3


From f6cfa602d2ba7e5ca9dc65ec4141521aca80bda2 Mon Sep 17 00:00:00 2001
From: Marco Crivellari <marco.crivellari@suse.com>
Date: Fri, 5 Sep 2025 11:13:23 +0200
Subject: workqueue: replace use of system_unbound_wq with system_dfl_wq

Currently if a user enqueue a work item using schedule_delayed_work() the
used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use
WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to
schedule_work() that is using system_wq and queue_work(), that makes use
again of WORK_CPU_UNBOUND.

This lack of consistentcy cannot be addressed without refactoring the API.

system_unbound_wq should be the default workqueue so as not to enforce
locality constraints for random work whenever it's not required.

Adding system_dfl_wq to encourage its use when unbound work should be used.

queue_work() / queue_delayed_work() / mod_delayed_work() will now use the
new unbound wq: whether the user still use the old wq a warn will be
printed along with a wq redirect to the new one.

The old system_unbound_wq will be kept for a few release cycles.

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 45d5dd470ff6..af860e8f8481 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -783,8 +783,8 @@ extern void __warn_flushing_systemwide_wq(void)
 	     _wq == system_highpri_wq) ||				\
 	    (__builtin_constant_p(_wq == system_long_wq) &&		\
 	     _wq == system_long_wq) ||					\
-	    (__builtin_constant_p(_wq == system_unbound_wq) &&		\
-	     _wq == system_unbound_wq) ||				\
+	    (__builtin_constant_p(_wq == system_dfl_wq) &&		\
+	     _wq == system_dfl_wq) ||				\
 	    (__builtin_constant_p(_wq == system_freezable_wq) &&	\
 	     _wq == system_freezable_wq) ||				\
 	    (__builtin_constant_p(_wq == system_power_efficient_wq) &&	\
-- 
cgit v1.2.3


From a2be943b46b4a7478ea8ddf9bb8e5251c59fceb7 Mon Sep 17 00:00:00 2001
From: Marco Crivellari <marco.crivellari@suse.com>
Date: Fri, 5 Sep 2025 11:13:24 +0200
Subject: workqueue: replace use of system_wq with system_percpu_wq

Currently if a user enqueue a work item using schedule_delayed_work() the
used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use
WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to
schedule_work() that is using system_wq and queue_work(), that makes use
again of WORK_CPU_UNBOUND.

This lack of consistentcy cannot be addressed without refactoring the API.

system_wq is a per-CPU worqueue, yet nothing in its name tells about that
CPU affinity constraint, which is very often not required by users. Make
it clear by adding a system_percpu_wq.

queue_work() / queue_delayed_work() mod_delayed_work() will now use the
new per-cpu wq: whether the user still stick on the old name a warn will
be printed along a wq redirect to the new one.

This patch add the new system_percpu_wq except for mm, fs and net
subsystem, whom are handled in separated patches.

The old wq will be kept for a few release cylces.

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index af860e8f8481..b6834b7aee4b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -434,10 +434,10 @@ enum wq_consts {
  * short queue flush time.  Don't queue works which can run for too
  * long.
  *
- * system_highpri_wq is similar to system_wq but for work items which
+ * system_highpri_wq is similar to system_percpu_wq but for work items which
  * require WQ_HIGHPRI.
  *
- * system_long_wq is similar to system_wq but may host long running
+ * system_long_wq is similar to system_percpu_wq but may host long running
  * works.  Queue flushing might take relatively long.
  *
  * system_dfl_wq is unbound workqueue.  Workers are not bound to
@@ -445,13 +445,13 @@ enum wq_consts {
  * executed immediately as long as max_active limit is not reached and
  * resources are available.
  *
- * system_freezable_wq is equivalent to system_wq except that it's
+ * system_freezable_wq is equivalent to system_percpu_wq except that it's
  * freezable.
  *
  * *_power_efficient_wq are inclined towards saving power and converted
  * into WQ_UNBOUND variants if 'wq_power_efficient' is enabled; otherwise,
  * they are same as their non-power-efficient counterparts - e.g.
- * system_power_efficient_wq is identical to system_wq if
+ * system_power_efficient_wq is identical to system_percpu_wq if
  * 'wq_power_efficient' is disabled.  See WQ_POWER_EFFICIENT for more info.
  *
  * system_bh[_highpri]_wq are convenience interface to softirq. BH work items
@@ -708,7 +708,7 @@ static inline bool mod_delayed_work(struct workqueue_struct *wq,
  */
 static inline bool schedule_work_on(int cpu, struct work_struct *work)
 {
-	return queue_work_on(cpu, system_wq, work);
+	return queue_work_on(cpu, system_percpu_wq, work);
 }
 
 /**
@@ -727,7 +727,7 @@ static inline bool schedule_work_on(int cpu, struct work_struct *work)
  */
 static inline bool schedule_work(struct work_struct *work)
 {
-	return queue_work(system_wq, work);
+	return queue_work(system_percpu_wq, work);
 }
 
 /**
@@ -770,15 +770,15 @@ extern void __warn_flushing_systemwide_wq(void)
 #define flush_scheduled_work()						\
 ({									\
 	__warn_flushing_systemwide_wq();				\
-	__flush_workqueue(system_wq);					\
+	__flush_workqueue(system_percpu_wq);					\
 })
 
 #define flush_workqueue(wq)						\
 ({									\
 	struct workqueue_struct *_wq = (wq);				\
 									\
-	if ((__builtin_constant_p(_wq == system_wq) &&			\
-	     _wq == system_wq) ||					\
+	if ((__builtin_constant_p(_wq == system_percpu_wq) &&			\
+	     _wq == system_percpu_wq) ||					\
 	    (__builtin_constant_p(_wq == system_highpri_wq) &&		\
 	     _wq == system_highpri_wq) ||				\
 	    (__builtin_constant_p(_wq == system_long_wq) &&		\
@@ -807,7 +807,7 @@ extern void __warn_flushing_systemwide_wq(void)
 static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
 					    unsigned long delay)
 {
-	return queue_delayed_work_on(cpu, system_wq, dwork, delay);
+	return queue_delayed_work_on(cpu, system_percpu_wq, dwork, delay);
 }
 
 /**
@@ -821,7 +821,7 @@ static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
 static inline bool schedule_delayed_work(struct delayed_work *dwork,
 					 unsigned long delay)
 {
-	return queue_delayed_work(system_wq, dwork, delay);
+	return queue_delayed_work(system_percpu_wq, dwork, delay);
 }
 
 #ifndef CONFIG_SMP
-- 
cgit v1.2.3


From 97248d05b70edc674f2f2fa835fed33172686b1d Mon Sep 17 00:00:00 2001
From: Zihuan Zhang <zhangzihuan@kylinos.cn>
Date: Tue, 2 Sep 2025 15:33:23 +0800
Subject: cpufreq: Drop redundant freq_table parameter

Since commit e0b3165ba521 ("cpufreq: add 'freq_table' in struct
cpufreq_policy"), freq_table has been stored in struct cpufreq_policy
instead of being maintained separately.

However, several helpers in freq_table.c still take both policy and
freq_table as parameters, even though policy->freq_table can always be
used. This leads to redundant function arguments and increases the
chance of inconsistencies.

This patch removes the unnecessary freq_table argument from these
functions and updates their callers to only pass policy. This makes
the code simpler, more consistent, and avoids duplication.

Signed-off-by: Zihuan Zhang <zhangzihuan@kylinos.cn>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Link: https://patch.msgid.link/20250902073323.48330-1-zhangzihuan@kylinos.cn
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 95f3807c8c55..40966512ea18 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -780,11 +780,10 @@ struct cpufreq_frequency_table {
 		else
 
 
-int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
-				    struct cpufreq_frequency_table *table);
+int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy);
+
+int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy);
 
-int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy,
-				   struct cpufreq_frequency_table *table);
 int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy);
 
 int cpufreq_table_index_unsorted(struct cpufreq_policy *policy,
-- 
cgit v1.2.3


From 1544344563376b2a2ae2af5af1db00d6410c18e0 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Fri, 29 Aug 2025 15:28:44 +0800
Subject: rhashtable: Use __always_inline instead of inline

Sometimes, the compiler is not clever enough to inline the
rhashtable_lookup() for us, even if the "obj_cmpfn" and "key_len" in
params is const. This can introduce more overhead.

Therefore, use __always_inline for the rhashtable.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/rhashtable.h | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 6c85b28ea30b..e740157f3cd7 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -122,7 +122,7 @@ static inline unsigned int rht_bucket_index(const struct bucket_table *tbl,
 	return hash & (tbl->size - 1);
 }
 
-static inline unsigned int rht_key_get_hash(struct rhashtable *ht,
+static __always_inline unsigned int rht_key_get_hash(struct rhashtable *ht,
 	const void *key, const struct rhashtable_params params,
 	unsigned int hash_rnd)
 {
@@ -152,7 +152,7 @@ static inline unsigned int rht_key_get_hash(struct rhashtable *ht,
 	return hash;
 }
 
-static inline unsigned int rht_key_hashfn(
+static __always_inline unsigned int rht_key_hashfn(
 	struct rhashtable *ht, const struct bucket_table *tbl,
 	const void *key, const struct rhashtable_params params)
 {
@@ -161,7 +161,7 @@ static inline unsigned int rht_key_hashfn(
 	return rht_bucket_index(tbl, hash);
 }
 
-static inline unsigned int rht_head_hashfn(
+static __always_inline unsigned int rht_head_hashfn(
 	struct rhashtable *ht, const struct bucket_table *tbl,
 	const struct rhash_head *he, const struct rhashtable_params params)
 {
@@ -586,7 +586,7 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
 }
 
 /* Internal function, do not use. */
-static inline struct rhash_head *__rhashtable_lookup(
+static __always_inline struct rhash_head *__rhashtable_lookup(
 	struct rhashtable *ht, const void *key,
 	const struct rhashtable_params params)
 {
@@ -639,7 +639,7 @@ restart:
  *
  * Returns the first entry on which the compare function returned true.
  */
-static inline void *rhashtable_lookup(
+static __always_inline void *rhashtable_lookup(
 	struct rhashtable *ht, const void *key,
 	const struct rhashtable_params params)
 {
@@ -662,7 +662,7 @@ static inline void *rhashtable_lookup(
  *
  * Returns the first entry on which the compare function returned true.
  */
-static inline void *rhashtable_lookup_fast(
+static __always_inline void *rhashtable_lookup_fast(
 	struct rhashtable *ht, const void *key,
 	const struct rhashtable_params params)
 {
@@ -689,7 +689,7 @@ static inline void *rhashtable_lookup_fast(
  *
  * Returns the list of entries that match the given key.
  */
-static inline struct rhlist_head *rhltable_lookup(
+static __always_inline struct rhlist_head *rhltable_lookup(
 	struct rhltable *hlt, const void *key,
 	const struct rhashtable_params params)
 {
@@ -702,7 +702,7 @@ static inline struct rhlist_head *rhltable_lookup(
  * function returns the existing element already in hashes if there is a clash,
  * otherwise it returns an error via ERR_PTR().
  */
-static inline void *__rhashtable_insert_fast(
+static __always_inline void *__rhashtable_insert_fast(
 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
 	const struct rhashtable_params params, bool rhlist)
 {
@@ -825,7 +825,7 @@ out_unlock:
  * Will trigger an automatic deferred table resizing if residency in the
  * table grows beyond 70%.
  */
-static inline int rhashtable_insert_fast(
+static __always_inline int rhashtable_insert_fast(
 	struct rhashtable *ht, struct rhash_head *obj,
 	const struct rhashtable_params params)
 {
@@ -854,7 +854,7 @@ static inline int rhashtable_insert_fast(
  * Will trigger an automatic deferred table resizing if residency in the
  * table grows beyond 70%.
  */
-static inline int rhltable_insert_key(
+static __always_inline int rhltable_insert_key(
 	struct rhltable *hlt, const void *key, struct rhlist_head *list,
 	const struct rhashtable_params params)
 {
@@ -877,7 +877,7 @@ static inline int rhltable_insert_key(
  * Will trigger an automatic deferred table resizing if residency in the
  * table grows beyond 70%.
  */
-static inline int rhltable_insert(
+static __always_inline int rhltable_insert(
 	struct rhltable *hlt, struct rhlist_head *list,
 	const struct rhashtable_params params)
 {
@@ -902,7 +902,7 @@ static inline int rhltable_insert(
  * Will trigger an automatic deferred table resizing if residency in the
  * table grows beyond 70%.
  */
-static inline int rhashtable_lookup_insert_fast(
+static __always_inline int rhashtable_lookup_insert_fast(
 	struct rhashtable *ht, struct rhash_head *obj,
 	const struct rhashtable_params params)
 {
@@ -929,7 +929,7 @@ static inline int rhashtable_lookup_insert_fast(
  * object if it exists, NULL if it did not and the insertion was successful,
  * and an ERR_PTR otherwise.
  */
-static inline void *rhashtable_lookup_get_insert_fast(
+static __always_inline void *rhashtable_lookup_get_insert_fast(
 	struct rhashtable *ht, struct rhash_head *obj,
 	const struct rhashtable_params params)
 {
@@ -956,7 +956,7 @@ static inline void *rhashtable_lookup_get_insert_fast(
  *
  * Returns zero on success.
  */
-static inline int rhashtable_lookup_insert_key(
+static __always_inline int rhashtable_lookup_insert_key(
 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
 	const struct rhashtable_params params)
 {
@@ -982,7 +982,7 @@ static inline int rhashtable_lookup_insert_key(
  * object if it exists, NULL if it does not and the insertion was successful,
  * and an ERR_PTR otherwise.
  */
-static inline void *rhashtable_lookup_get_insert_key(
+static __always_inline void *rhashtable_lookup_get_insert_key(
 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
 	const struct rhashtable_params params)
 {
@@ -992,7 +992,7 @@ static inline void *rhashtable_lookup_get_insert_key(
 }
 
 /* Internal function, please use rhashtable_remove_fast() instead */
-static inline int __rhashtable_remove_fast_one(
+static __always_inline int __rhashtable_remove_fast_one(
 	struct rhashtable *ht, struct bucket_table *tbl,
 	struct rhash_head *obj, const struct rhashtable_params params,
 	bool rhlist)
@@ -1074,7 +1074,7 @@ unlocked:
 }
 
 /* Internal function, please use rhashtable_remove_fast() instead */
-static inline int __rhashtable_remove_fast(
+static __always_inline int __rhashtable_remove_fast(
 	struct rhashtable *ht, struct rhash_head *obj,
 	const struct rhashtable_params params, bool rhlist)
 {
@@ -1115,7 +1115,7 @@ static inline int __rhashtable_remove_fast(
  *
  * Returns zero on success, -ENOENT if the entry could not be found.
  */
-static inline int rhashtable_remove_fast(
+static __always_inline int rhashtable_remove_fast(
 	struct rhashtable *ht, struct rhash_head *obj,
 	const struct rhashtable_params params)
 {
@@ -1137,7 +1137,7 @@ static inline int rhashtable_remove_fast(
  *
  * Returns zero on success, -ENOENT if the entry could not be found.
  */
-static inline int rhltable_remove(
+static __always_inline int rhltable_remove(
 	struct rhltable *hlt, struct rhlist_head *list,
 	const struct rhashtable_params params)
 {
@@ -1145,7 +1145,7 @@ static inline int rhltable_remove(
 }
 
 /* Internal function, please use rhashtable_replace_fast() instead */
-static inline int __rhashtable_replace_fast(
+static __always_inline int __rhashtable_replace_fast(
 	struct rhashtable *ht, struct bucket_table *tbl,
 	struct rhash_head *obj_old, struct rhash_head *obj_new,
 	const struct rhashtable_params params)
@@ -1208,7 +1208,7 @@ unlocked:
  * Returns zero on success, -ENOENT if the entry could not be found,
  * -EINVAL if hash is not the same for the old and new objects.
  */
-static inline int rhashtable_replace_fast(
+static __always_inline int rhashtable_replace_fast(
 	struct rhashtable *ht, struct rhash_head *obj_old,
 	struct rhash_head *obj_new,
 	const struct rhashtable_params params)
-- 
cgit v1.2.3


From 886d6981208263b55a1eb8b39c5d00db1544b9bb Mon Sep 17 00:00:00 2001
From: Zhushuai Yin <yinzhushuai@huawei.com>
Date: Sat, 30 Aug 2025 18:27:57 +0800
Subject: crypto: hisilicon/zip - add hashjoin, gather, and UDMA data move
 features

The new version of the hisilicon zip driver supports the hash join
and gather features, as well as the data move feature (UDMA),
including data copying and memory initialization functions.These
features are registered to the uacce subsystem.

Signed-off-by: Zhushuai Yin <yinzhushuai@huawei.com>
Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/hisi_acc_qm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index 0c4c84b8c3be..f2254ddc327c 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -125,6 +125,7 @@ enum qm_hw_ver {
 	QM_HW_V2 = 0x21,
 	QM_HW_V3 = 0x30,
 	QM_HW_V4 = 0x50,
+	QM_HW_V5 = 0x51,
 };
 
 enum qm_fun_type {
-- 
cgit v1.2.3


From c2ce2453413d429e302659abc5ace634e873f6f5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 28 Aug 2025 12:59:24 +0200
Subject: driver core/PM: Set power.no_callbacks along with power.no_pm

Devices with power.no_pm set are not expected to need any power
management at all, so modify device_set_pm_not_required() to set
power.no_callbacks for them too in case runtime PM will be enabled
for any of them (which in principle may be done for convenience if
such a device participates in a dependency chain).

Since device_set_pm_not_required() must be called before device_add()
or it would not have any effect, it can update power.no_callbacks
without locking, unlike pm_runtime_no_callbacks() that can be called
after registering the target device.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: stable <stable@kernel.org>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/1950054.tdWV9SEqCh@rafael.j.wysocki
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 0470d19da7f2..b031ff71a5bd 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -851,6 +851,9 @@ static inline bool device_pm_not_required(struct device *dev)
 static inline void device_set_pm_not_required(struct device *dev)
 {
 	dev->power.no_pm = true;
+#ifdef CONFIG_PM
+	dev->power.no_callbacks = true;
+#endif
 }
 
 static inline void dev_pm_syscore_device(struct device *dev, bool val)
-- 
cgit v1.2.3


From 20f988320d2718ef28b1f0635acc88c12a216d29 Mon Sep 17 00:00:00 2001
From: "Rai, Amardeep" <amardeep.rai@intel.com>
Date: Wed, 20 Aug 2025 17:38:19 +0300
Subject: usb: core: Add a function to get USB version independent periodic
 payload

Add usb_endpoint_max_periodic_payload() to obtain maximum payload bytes in
a service interval for isochronous and interrupt endpoints in a USB
version independent way.

Signed-off-by: Rai, Amardeep <amardeep.rai@intel.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Co-developed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Hans de Goede <hansg@kernel.org>
Acked-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20250820143824.551777-5-sakari.ailus@linux.intel.com
---
 include/linux/usb.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 9d662c6abb4d..e9cf2786d8bd 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -2039,6 +2039,9 @@ static inline u16 usb_maxpacket(struct usb_device *udev, int pipe)
 	return usb_endpoint_maxp(&ep->desc);
 }
 
+u32 usb_endpoint_max_periodic_payload(struct usb_device *udev,
+				      const struct usb_host_endpoint *ep);
+
 /* translate USB error codes to codes user space understands */
 static inline int usb_translate_errors(int error_code)
 {
-- 
cgit v1.2.3


From d6725169a9bbcb5bd1dd14b2891b874614c59f52 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Wed, 20 Aug 2025 17:38:21 +0300
Subject: usb: core: Introduce usb_endpoint_is_hs_isoc_double()

Introduce usb_endpoint_is_hs_isoc_double() tell whether an endpoint
conforms to USB 2.0 Isochronous Double IN Bandwidth ECN.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20250820143824.551777-7-sakari.ailus@linux.intel.com
---
 include/linux/usb.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index e9cf2786d8bd..70ef00c42d22 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -2042,6 +2042,9 @@ static inline u16 usb_maxpacket(struct usb_device *udev, int pipe)
 u32 usb_endpoint_max_periodic_payload(struct usb_device *udev,
 				      const struct usb_host_endpoint *ep);
 
+bool usb_endpoint_is_hs_isoc_double(struct usb_device *udev,
+				    const struct usb_host_endpoint *ep);
+
 /* translate USB error codes to codes user space understands */
 static inline int usb_translate_errors(int error_code)
 {
-- 
cgit v1.2.3


From 23743ba64709a9c137c1b928f8b8e00d846af9cc Mon Sep 17 00:00:00 2001
From: Calixte Pernot <calixte.pernot@grenoble-inp.org>
Date: Mon, 25 Aug 2025 14:56:09 +0200
Subject: vt: add support for smput/rmput escape codes

Support "\e[?1049h" and "\e[?1049l" escape codes.
This patch allows programs to enter and leave alternate screens.
This feature is widely available in graphical terminal emulators and mostly
used by fullscreen terminal-based user interfaces such as text editors.
Most editors such as vim and nano assume this escape code in not supported
and will not try to print the escape sequence if TERM=linux.
To try out this patch, run `TERM=xterm-256color vim` inside a VT.

Signed-off-by: Calixte Pernot <calixte.pernot@grenoble-inp.org>
Link: https://lore.kernel.org/r/20250825125607.2478-3-calixte.pernot@grenoble-inp.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/console_struct.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index 59b4fec5f254..13b35637bd5a 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -159,6 +159,9 @@ struct vc_data {
 	struct uni_pagedict *uni_pagedict;
 	struct uni_pagedict **uni_pagedict_loc; /* [!] Location of uni_pagedict variable for this console */
 	u32 **vc_uni_lines;			/* unicode screen content */
+	u16		*vc_saved_screen;
+	unsigned int	vc_saved_cols;
+	unsigned int	vc_saved_rows;
 	/* additional information is in vt_kern.h */
 };
 
-- 
cgit v1.2.3


From 0bcd01f757bc06471c82a137eafee281ef1b6e38 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 28 Aug 2024 21:56:57 -0700
Subject: hwmon: Introduce 64-bit energy attribute support

Many chips require 64-bit variables to display the accumulated energy,
even more so since the energy units are micro-Joule. Add new sensor type
"energy64" to support reporting the chip energy as 64-bit values.

Changing the entire hardware monitoring API is not feasible, and it is only
really necessary to support reading 64-bit values for the "energyX_input"
attribute. For this reason, keep the API as-is and use type casts on both
ends to pass 64-bit pointers when reading the accumulated energy. On the
write side (which is only useful for the energyX_enable attribute), keep
passing the written value as long.

Reviewed-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Tested-by: Chris Packham <chris.packham@alliedtelesis.co.nz> # INA780
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/hwmon.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 3a63dff62d03..886fc90b2d25 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -24,6 +24,7 @@ enum hwmon_sensor_types {
 	hwmon_curr,
 	hwmon_power,
 	hwmon_energy,
+	hwmon_energy64,
 	hwmon_humidity,
 	hwmon_fan,
 	hwmon_pwm,
-- 
cgit v1.2.3


From d364d2ad07873dc4991b2a631a8536597272418b Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Tue, 2 Sep 2025 13:59:11 +0200
Subject: devres: provide devm_kmemdup_const()

Provide a function similar to devm_strdup_const() but for copying blocks
of memory that are likely to be placed in .rodata.

Reviewed-by: Andy Shevchenko <andy@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tested-by: Neil Armstrong <neil.armstrong@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/device/devres.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device/devres.h b/include/linux/device/devres.h
index ae696d10faff..8c5f57e0d613 100644
--- a/include/linux/device/devres.h
+++ b/include/linux/device/devres.h
@@ -80,6 +80,8 @@ void devm_kfree(struct device *dev, const void *p);
 
 void * __realloc_size(3)
 devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp);
+const void *
+devm_kmemdup_const(struct device *dev, const void *src, size_t len, gfp_t gfp);
 static inline void *devm_kmemdup_array(struct device *dev, const void *src,
 				       size_t n, size_t size, gfp_t flags)
 {
-- 
cgit v1.2.3


From 11aa02d6a9c222260490f952d041dec6d7f16a92 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Tue, 2 Sep 2025 13:59:22 +0200
Subject: pinctrl: allow to mark pin functions as requestable GPIOs

The name of the pin function has no real meaning to pinctrl core and is
there only for human readability of device properties. Some pins are
muxed as GPIOs but for "strict" pinmuxers it's impossible to request
them as GPIOs if they're bound to a devide - even if their function name
explicitly says "gpio". Add a new field to struct pinfunction that
allows to pass additional flags to pinctrl core. While we could go with
a boolean "is_gpio" field, a flags field is more future-proof.

If the PINFUNCTION_FLAG_GPIO is set for a given function, the pin muxed
to it can be requested as GPIO even on strict pin controllers. Add a new
callback to struct pinmux_ops - function_is_gpio() - that allows pinmux
core to inspect a function and see if it's a GPIO one. Provide a generic
implementation of this callback.

Tested-by: Neil Armstrong <neil.armstrong@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/pinctrl.h | 14 ++++++++++++++
 include/linux/pinctrl/pinmux.h  |  2 ++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h
index d138e1815645..1a8084e29405 100644
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -11,6 +11,7 @@
 #ifndef __LINUX_PINCTRL_PINCTRL_H
 #define __LINUX_PINCTRL_PINCTRL_H
 
+#include <linux/bits.h>
 #include <linux/types.h>
 
 struct device;
@@ -206,16 +207,20 @@ extern int pinctrl_get_group_pins(struct pinctrl_dev *pctldev,
 				  const char *pin_group, const unsigned int **pins,
 				  unsigned int *num_pins);
 
+#define PINFUNCTION_FLAG_GPIO	BIT(0)
+
 /**
  * struct pinfunction - Description about a function
  * @name: Name of the function
  * @groups: An array of groups for this function
  * @ngroups: Number of groups in @groups
+ * @flags: Additional pin function flags
  */
 struct pinfunction {
 	const char *name;
 	const char * const *groups;
 	size_t ngroups;
+	unsigned long flags;
 };
 
 /* Convenience macro to define a single named pinfunction */
@@ -226,6 +231,15 @@ struct pinfunction {
 		.ngroups = (_ngroups),			\
 	}
 
+/* Same as PINCTRL_PINFUNCTION() but for the GPIO category of functions */
+#define PINCTRL_GPIO_PINFUNCTION(_name, _groups, _ngroups)	\
+(struct pinfunction) {						\
+		.name = (_name),				\
+		.groups = (_groups),				\
+		.ngroups = (_ngroups),				\
+		.flags = PINFUNCTION_FLAG_GPIO,			\
+	}
+
 #if IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_PINCTRL)
 extern struct pinctrl_dev *of_pinctrl_get(struct device_node *np);
 #else
diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h
index d6f7b58d6ad0..6db6c3e1ccc2 100644
--- a/include/linux/pinctrl/pinmux.h
+++ b/include/linux/pinctrl/pinmux.h
@@ -66,6 +66,8 @@ struct pinmux_ops {
 				    unsigned int selector,
 				    const char * const **groups,
 				    unsigned int *num_groups);
+	bool (*function_is_gpio) (struct pinctrl_dev *pctldev,
+				  unsigned int selector);
 	int (*set_mux) (struct pinctrl_dev *pctldev, unsigned int func_selector,
 			unsigned int group_selector);
 	int (*gpio_request_enable) (struct pinctrl_dev *pctldev,
-- 
cgit v1.2.3


From 203a83112e097a501fbe12722b6342787497efe0 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 5 Sep 2025 11:21:50 +0200
Subject: pinctrl: generic: rename PIN_CONFIG_OUTPUT to LEVEL

This generic pin config property is confusingly named so let's
rename it to make things clearer.

There are already drivers in the tree that use PIN_CONFIG_OUTPUT
to *read* the value of an output driven pin, which is a big
semantic confusion for the head: are we then reading the
setting of the output or the actual value/level that is put
out on the pin?

We already have PIN_CONFIG_OUTPUT_ENABLE that turns on driver
buffers for output, so this can by logical conclusion only
drive the voltage level if it should be any different.

But if we read the pin, are we then reading the *setting* of
the output value or the *actual* value we can see on the
line?

If the pin has not first been set into output mode with
PIN_CONFIG_OUTPUT_ENABLE, but is instead in some input mode
or tristate, what will reading this property actually
return?

Reading the current users reading this property it is clear
that what we read is the logical level of the pin as 0 or 1
depending on if it is low or high.

Rename it to PIN_CONFIG_LEVEL so it is crystal clear that
we set or read the voltage level of the pin and nothing else.

Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/pinconf-generic.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 1bcf071b860e..d9245ecec71d 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -88,9 +88,13 @@ struct pinctrl_map;
  *	passed in the argument on a custom form, else just use argument 1
  *	to indicate low power mode, argument 0 turns low power mode off.
  * @PIN_CONFIG_MODE_PWM: this will configure the pin for PWM
- * @PIN_CONFIG_OUTPUT: this will configure the pin as an output and drive a
- * 	value on the line. Use argument 1 to indicate high level, argument 0 to
- *	indicate low level. (Please see Documentation/driver-api/pin-control.rst,
+ * @PIN_CONFIG_LEVEL: setting this will configure the pin as an output and
+ *	drive a value on the line. Use argument 1 to indicate high level,
+ *	argument 0 to indicate low level. Conversely the value of the line
+ *	can be read using this parameter, if and only if that value can be
+ *	represented as a binary 0 or 1 where 0 indicate a low voltage level
+ *	and 1 indicate a high voltage level.
+ *	(Please see Documentation/driver-api/pin-control.rst,
  *	section "GPIO mode pitfalls" for a discussion around this parameter.)
  * @PIN_CONFIG_OUTPUT_ENABLE: this will enable the pin's output mode
  * 	without driving a value there. For most platforms this reduces to
@@ -137,7 +141,7 @@ enum pin_config_param {
 	PIN_CONFIG_INPUT_SCHMITT_UV,
 	PIN_CONFIG_MODE_LOW_POWER,
 	PIN_CONFIG_MODE_PWM,
-	PIN_CONFIG_OUTPUT,
+	PIN_CONFIG_LEVEL,
 	PIN_CONFIG_OUTPUT_ENABLE,
 	PIN_CONFIG_OUTPUT_IMPEDANCE_OHMS,
 	PIN_CONFIG_PERSIST_STATE,
-- 
cgit v1.2.3


From 5f3cec21f6d57336a2cbfa9ee428ac66c77a7211 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Fri, 29 Aug 2025 20:46:01 +0300
Subject: overflow: add range_overflows() and range_end_overflows()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the range_overflows() and range_end_overflows() along with the _t
variants over from drm/i915 and drm/buddy to overflow.h.

Cc: Kees Cook <kees@kernel.org>
Cc: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Cc: linux-hardening@vger.kernel.org
Reviewed-by: Kees Cook <kees@kernel.org>
Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://lore.kernel.org/r/20250829174601.2163064-3-jani.nikula@intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/linux/overflow.h | 70 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 154ed0dbb43f..725f95f7e416 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -238,6 +238,76 @@ static inline bool __must_check __must_check_overflow(bool overflow)
 			      __overflows_type_constexpr(n, T),	\
 			      __overflows_type(n, T))
 
+/**
+ * range_overflows() - Check if a range is out of bounds
+ * @start: Start of the range.
+ * @size:  Size of the range.
+ * @max:   Exclusive upper boundary.
+ *
+ * A strict check to determine if the range [@start, @start + @size) is
+ * invalid with respect to the allowable range [0, @max). Any range
+ * starting at or beyond @max is considered an overflow, even if @size is 0.
+ *
+ * Returns: true if the range is out of bounds.
+ */
+#define range_overflows(start, size, max) ({ \
+	typeof(start) start__ = (start); \
+	typeof(size) size__ = (size); \
+	typeof(max) max__ = (max); \
+	(void)(&start__ == &size__); \
+	(void)(&start__ == &max__); \
+	start__ >= max__ || size__ > max__ - start__; \
+})
+
+/**
+ * range_overflows_t() - Check if a range is out of bounds
+ * @type:  Data type to use.
+ * @start: Start of the range.
+ * @size:  Size of the range.
+ * @max:   Exclusive upper boundary.
+ *
+ * Same as range_overflows() but forcing the parameters to @type.
+ *
+ * Returns: true if the range is out of bounds.
+ */
+#define range_overflows_t(type, start, size, max) \
+	range_overflows((type)(start), (type)(size), (type)(max))
+
+/**
+ * range_end_overflows() - Check if a range's endpoint is out of bounds
+ * @start: Start of the range.
+ * @size:  Size of the range.
+ * @max:   Exclusive upper boundary.
+ *
+ * Checks only if the endpoint of a range (@start + @size) exceeds @max.
+ * Unlike range_overflows(), a zero-sized range at the boundary (@start == @max)
+ * is not considered an overflow. Useful for iterator-style checks.
+ *
+ * Returns: true if the endpoint exceeds the boundary.
+ */
+#define range_end_overflows(start, size, max) ({ \
+	typeof(start) start__ = (start); \
+	typeof(size) size__ = (size); \
+	typeof(max) max__ = (max); \
+	(void)(&start__ == &size__); \
+	(void)(&start__ == &max__); \
+	start__ > max__ || size__ > max__ - start__; \
+})
+
+/**
+ * range_end_overflows_t() - Check if a range's endpoint is out of bounds
+ * @type:  Data type to use.
+ * @start: Start of the range.
+ * @size:  Size of the range.
+ * @max:   Exclusive upper boundary.
+ *
+ * Same as range_end_overflows() but forcing the parameters to @type.
+ *
+ * Returns: true if the endpoint exceeds the boundary.
+ */
+#define range_end_overflows_t(type, start, size, max) \
+	range_end_overflows((type)(start), (type)(size), (type)(max))
+
 /**
  * castable_to_type - like __same_type(), but also allows for casted literals
  *
-- 
cgit v1.2.3


From 3ad2a7b9b15d5072139a20be84adb36776eb6c9b Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 5 Jun 2025 16:23:57 -0700
Subject: hwmon: Serialize accesses in hwmon core

Implement locking in the hardware monitoring core for drivers using
the _with_info() API functions.

Most hardware monitoring drivers need to support locking to protect
against parallel accesses from userspace. With older API functions, such
locking had to be implemented in the driver code since sysfs attributes
were created by the driver. However, the _with_info() API creates sysfs
attributes in the hardware monitoring core. This makes it easy to move
the locking primitives into that code. This has the benefit of simplifying
driver code while at the same time reducing the risk of incomplete of bad
locking implementations in hardware monitoring drivers.

While this means that all accesses are forced to be synchronized, this
has little if any practical impact since accesses are expected to be low
frequency and are typically synchronized from userspace anyway since
only a single process is accessing the data. On top of that, many drivers
use regmap, which also has its own locking scheme and already serializes
accesses.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/hwmon.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 886fc90b2d25..301a83afbd66 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -492,6 +492,9 @@ int hwmon_notify_event(struct device *dev, enum hwmon_sensor_types type,
 char *hwmon_sanitize_name(const char *name);
 char *devm_hwmon_sanitize_name(struct device *dev, const char *name);
 
+void hwmon_lock(struct device *dev);
+void hwmon_unlock(struct device *dev);
+
 /**
  * hwmon_is_bad_char - Is the char invalid in a hwmon name
  * @ch: the char to be considered
-- 
cgit v1.2.3


From ca734f54b346db170be20ff3382bc3da8eb65904 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sun, 17 Aug 2025 14:53:15 -0700
Subject: Input: pxa27x-keypad - drop support for platform data

There are no in-kernel users of pxa27x_keypad_platform_data in the
kernel, and the driver supports configuration via device tree, so drop
support of static platform data and move properties parsing from
OF-specific methods to generic ones.

Link: https://lore.kernel.org/r/20250817215316.1872689-3-dmitry.torokhov@gmail.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/platform_data/keypad-pxa27x.h | 73 -----------------------------
 1 file changed, 73 deletions(-)
 delete mode 100644 include/linux/platform_data/keypad-pxa27x.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/keypad-pxa27x.h b/include/linux/platform_data/keypad-pxa27x.h
deleted file mode 100644
index a376442b9935..000000000000
--- a/include/linux/platform_data/keypad-pxa27x.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_ARCH_PXA27x_KEYPAD_H
-#define __ASM_ARCH_PXA27x_KEYPAD_H
-
-#include <linux/input.h>
-#include <linux/input/matrix_keypad.h>
-
-#define MAX_MATRIX_KEY_ROWS	(8)
-#define MAX_MATRIX_KEY_COLS	(8)
-#define MATRIX_ROW_SHIFT	(3)
-#define MAX_DIRECT_KEY_NUM	(8)
-
-/* pxa3xx keypad platform specific parameters
- *
- * NOTE:
- * 1. direct_key_num indicates the number of keys in the direct keypad
- *    _plus_ the number of rotary-encoder sensor inputs,  this can be
- *    left as 0 if only rotary encoders are enabled,  the driver will
- *    automatically calculate this
- *
- * 2. direct_key_map is the key code map for the direct keys, if rotary
- *    encoder(s) are enabled, direct key 0/1(2/3) will be ignored
- *
- * 3. rotary can be either interpreted as a relative input event (e.g.
- *    REL_WHEEL/REL_HWHEEL) or specific keys (e.g. UP/DOWN/LEFT/RIGHT)
- *
- * 4. matrix key and direct key will use the same debounce_interval by
- *    default, which should be sufficient in most cases
- *
- * pxa168 keypad platform specific parameter
- *
- * NOTE:
- * clear_wakeup_event callback is a workaround required to clear the
- * keypad interrupt. The keypad wake must be cleared in addition to
- * reading the MI/DI bits in the KPC register.
- */
-struct pxa27x_keypad_platform_data {
-
-	/* code map for the matrix keys */
-	const struct matrix_keymap_data *matrix_keymap_data;
-	unsigned int	matrix_key_rows;
-	unsigned int	matrix_key_cols;
-
-	/* direct keys */
-	int		direct_key_num;
-	unsigned int	direct_key_map[MAX_DIRECT_KEY_NUM];
-	/* the key output may be low active */
-	int		direct_key_low_active;
-	/* give board a chance to choose the start direct key */
-	unsigned int	direct_key_mask;
-
-	/* rotary encoders 0 */
-	int		enable_rotary0;
-	int		rotary0_rel_code;
-	int		rotary0_up_key;
-	int		rotary0_down_key;
-
-	/* rotary encoders 1 */
-	int		enable_rotary1;
-	int		rotary1_rel_code;
-	int		rotary1_up_key;
-	int		rotary1_down_key;
-
-	/* key debounce interval */
-	unsigned int	debounce_interval;
-
-	/* clear wakeup event requirement for pxa168 */
-	void		(*clear_wakeup_event)(void);
-};
-
-extern void pxa_set_keypad_info(struct pxa27x_keypad_platform_data *info);
-
-#endif /* __ASM_ARCH_PXA27x_KEYPAD_H */
-- 
cgit v1.2.3


From 890ba82a60cb7a6584968a4ac15546f434afa40b Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sun, 17 Aug 2025 14:39:06 -0700
Subject: Input: spear-keyboard - drop support for platform data

There are no in-kernel users of spear kbd_platform_data in the kernel,
and the driver supports configuration via device tree, so drop
support of static platform data and move properties parsing from
OF-specific methods to generic ones.

Link: https://lore.kernel.org/r/vppjxui76im26uamznx7evm5lmbe3d6v3oxsa7mqyytykh4zm6@nhlf33v3hp6g
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/platform_data/keyboard-spear.h | 164 ---------------------------
 1 file changed, 164 deletions(-)
 delete mode 100644 include/linux/platform_data/keyboard-spear.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/keyboard-spear.h b/include/linux/platform_data/keyboard-spear.h
deleted file mode 100644
index 5e3ff653900c..000000000000
--- a/include/linux/platform_data/keyboard-spear.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright (C) 2010 ST Microelectronics
- * Rajeev Kumar <rajeevkumar.linux@gmail.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#ifndef __PLAT_KEYBOARD_H
-#define __PLAT_KEYBOARD_H
-
-#include <linux/bitops.h>
-#include <linux/input.h>
-#include <linux/input/matrix_keypad.h>
-#include <linux/types.h>
-
-#define DECLARE_9x9_KEYMAP(_name) \
-int _name[] = { \
-	KEY(0, 0, KEY_ESC), \
-	KEY(0, 1, KEY_1), \
-	KEY(0, 2, KEY_2), \
-	KEY(0, 3, KEY_3), \
-	KEY(0, 4, KEY_4), \
-	KEY(0, 5, KEY_5), \
-	KEY(0, 6, KEY_6), \
-	KEY(0, 7, KEY_7), \
-	KEY(0, 8, KEY_8), \
-	KEY(1, 0, KEY_9), \
-	KEY(1, 1, KEY_MINUS), \
-	KEY(1, 2, KEY_EQUAL), \
-	KEY(1, 3, KEY_BACKSPACE), \
-	KEY(1, 4, KEY_TAB), \
-	KEY(1, 5, KEY_Q), \
-	KEY(1, 6, KEY_W), \
-	KEY(1, 7, KEY_E), \
-	KEY(1, 8, KEY_R), \
-	KEY(2, 0, KEY_T), \
-	KEY(2, 1, KEY_Y), \
-	KEY(2, 2, KEY_U), \
-	KEY(2, 3, KEY_I), \
-	KEY(2, 4, KEY_O), \
-	KEY(2, 5, KEY_P), \
-	KEY(2, 6, KEY_LEFTBRACE), \
-	KEY(2, 7, KEY_RIGHTBRACE), \
-	KEY(2, 8, KEY_ENTER), \
-	KEY(3, 0, KEY_LEFTCTRL), \
-	KEY(3, 1, KEY_A), \
-	KEY(3, 2, KEY_S), \
-	KEY(3, 3, KEY_D), \
-	KEY(3, 4, KEY_F), \
-	KEY(3, 5, KEY_G), \
-	KEY(3, 6, KEY_H), \
-	KEY(3, 7, KEY_J), \
-	KEY(3, 8, KEY_K), \
-	KEY(4, 0, KEY_L), \
-	KEY(4, 1, KEY_SEMICOLON), \
-	KEY(4, 2, KEY_APOSTROPHE), \
-	KEY(4, 3, KEY_GRAVE), \
-	KEY(4, 4, KEY_LEFTSHIFT), \
-	KEY(4, 5, KEY_BACKSLASH), \
-	KEY(4, 6, KEY_Z), \
-	KEY(4, 7, KEY_X), \
-	KEY(4, 8, KEY_C), \
-	KEY(5, 0, KEY_V), \
-	KEY(5, 1, KEY_B), \
-	KEY(5, 2, KEY_N), \
-	KEY(5, 3, KEY_M), \
-	KEY(5, 4, KEY_COMMA), \
-	KEY(5, 5, KEY_DOT), \
-	KEY(5, 6, KEY_SLASH), \
-	KEY(5, 7, KEY_RIGHTSHIFT), \
-	KEY(5, 8, KEY_KPASTERISK), \
-	KEY(6, 0, KEY_LEFTALT), \
-	KEY(6, 1, KEY_SPACE), \
-	KEY(6, 2, KEY_CAPSLOCK), \
-	KEY(6, 3, KEY_F1), \
-	KEY(6, 4, KEY_F2), \
-	KEY(6, 5, KEY_F3), \
-	KEY(6, 6, KEY_F4), \
-	KEY(6, 7, KEY_F5), \
-	KEY(6, 8, KEY_F6), \
-	KEY(7, 0, KEY_F7), \
-	KEY(7, 1, KEY_F8), \
-	KEY(7, 2, KEY_F9), \
-	KEY(7, 3, KEY_F10), \
-	KEY(7, 4, KEY_NUMLOCK), \
-	KEY(7, 5, KEY_SCROLLLOCK), \
-	KEY(7, 6, KEY_KP7), \
-	KEY(7, 7, KEY_KP8), \
-	KEY(7, 8, KEY_KP9), \
-	KEY(8, 0, KEY_KPMINUS), \
-	KEY(8, 1, KEY_KP4), \
-	KEY(8, 2, KEY_KP5), \
-	KEY(8, 3, KEY_KP6), \
-	KEY(8, 4, KEY_KPPLUS), \
-	KEY(8, 5, KEY_KP1), \
-	KEY(8, 6, KEY_KP2), \
-	KEY(8, 7, KEY_KP3), \
-	KEY(8, 8, KEY_KP0), \
-}
-
-#define DECLARE_6x6_KEYMAP(_name) \
-int _name[] = { \
-	KEY(0, 0, KEY_RESERVED), \
-	KEY(0, 1, KEY_1), \
-	KEY(0, 2, KEY_2), \
-	KEY(0, 3, KEY_3), \
-	KEY(0, 4, KEY_4), \
-	KEY(0, 5, KEY_5), \
-	KEY(1, 0, KEY_Q), \
-	KEY(1, 1, KEY_W), \
-	KEY(1, 2, KEY_E), \
-	KEY(1, 3, KEY_R), \
-	KEY(1, 4, KEY_T), \
-	KEY(1, 5, KEY_Y), \
-	KEY(2, 0, KEY_D), \
-	KEY(2, 1, KEY_F), \
-	KEY(2, 2, KEY_G), \
-	KEY(2, 3, KEY_H), \
-	KEY(2, 4, KEY_J), \
-	KEY(2, 5, KEY_K), \
-	KEY(3, 0, KEY_B), \
-	KEY(3, 1, KEY_N), \
-	KEY(3, 2, KEY_M), \
-	KEY(3, 3, KEY_COMMA), \
-	KEY(3, 4, KEY_DOT), \
-	KEY(3, 5, KEY_SLASH), \
-	KEY(4, 0, KEY_F6), \
-	KEY(4, 1, KEY_F7), \
-	KEY(4, 2, KEY_F8), \
-	KEY(4, 3, KEY_F9), \
-	KEY(4, 4, KEY_F10), \
-	KEY(4, 5, KEY_NUMLOCK), \
-	KEY(5, 0, KEY_KP2), \
-	KEY(5, 1, KEY_KP3), \
-	KEY(5, 2, KEY_KP0), \
-	KEY(5, 3, KEY_KPDOT), \
-	KEY(5, 4, KEY_RO), \
-	KEY(5, 5, KEY_ZENKAKUHANKAKU), \
-}
-
-#define KEYPAD_9x9     0
-#define KEYPAD_6x6     1
-#define KEYPAD_2x2     2
-
-/**
- * struct kbd_platform_data - spear keyboard platform data
- * keymap: pointer to keymap data (table and size)
- * rep: enables key autorepeat
- * mode: choose keyboard support(9x9, 6x6, 2x2)
- * suspended_rate: rate at which keyboard would operate in suspended mode
- *
- * This structure is supposed to be used by platform code to supply
- * keymaps to drivers that implement keyboards.
- */
-struct kbd_platform_data {
-	const struct matrix_keymap_data *keymap;
-	bool rep;
-	unsigned int mode;
-	unsigned int suspended_rate;
-};
-
-#endif /* __PLAT_KEYBOARD_H */
-- 
cgit v1.2.3


From ad0d05dbddc1bf86e92220fea873176de6b12f78 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Sat, 30 Aug 2025 10:18:21 +0800
Subject: blk-mq: Defer freeing of tags page_list to SRCU callback

Tag iterators can race with the freeing of the request pages(tags->page_list),
potentially leading to use-after-free issues.

Defer the freeing of the page list and the tags structure itself until
after an SRCU grace period has passed. This ensures that any concurrent
tag iterators have completed before the memory is released. With this
way, we can replace the big tags->lock in tags iterator code path with
srcu for solving the issue.

This is achieved by:
- Adding a new `srcu_struct tags_srcu` to `blk_mq_tag_set` to protect
  tag map iteration.
- Adding an `rcu_head` to `struct blk_mq_tags` to be used with
  `call_srcu`.
- Moving the page list freeing logic and the `kfree(tags)` call into a
  new callback function, `blk_mq_free_tags_callback`.
- In `blk_mq_free_tags`, invoking `call_srcu` to schedule the new
  callback for deferred execution.

The read-side protection for the tag iterators will be added in a
subsequent patch.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2a5a828f19a0..1325ceeb743a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -531,6 +531,7 @@ struct blk_mq_tag_set {
 	struct mutex		tag_list_lock;
 	struct list_head	tag_list;
 	struct srcu_struct	*srcu;
+	struct srcu_struct	tags_srcu;
 
 	struct rw_semaphore	update_nr_hwq_lock;
 };
@@ -767,6 +768,7 @@ struct blk_mq_tags {
 	 * request pool
 	 */
 	spinlock_t lock;
+	struct rcu_head rcu_head;
 };
 
 static inline struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags,
-- 
cgit v1.2.3


From 473efbc3ca29f725abfb7b323798df596ef41f3e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 8 Sep 2025 08:18:15 -0600
Subject: io_uring/uring_cmd: fix __io_uring_cmd_do_in_task !CONFIG_IO_URING
 typo

A manual application of this patch resulted in a typo for the stub
function __io_uring_cmd_do_in_task(), for the case where CONFIG_IO_URING
isn't true. Fix that up.

Reported-by: Klara Modin <klarasmodin@gmail.com>
Fixes: df3a7762ee24 ("io_uring/uring_cmd: add io_uring_cmd_tw_t type alias")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring/cmd.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index 50dd6a53cb5e..1350af846ddd 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -109,8 +109,7 @@ static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret,
 {
 }
 static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
-			    io_uring_tw_t task_work_cb,
-			    unsigned flags)
+			    io_uring_cmd_tw_t task_work_cb, unsigned flags)
 {
 }
 static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
-- 
cgit v1.2.3


From 9cf93a8fa9513c6d3cc65bdd50e05c1355cef322 Mon Sep 17 00:00:00 2001
From: Corey Minyard <corey@minyard.net>
Date: Wed, 20 Aug 2025 14:04:24 -0500
Subject: ipmi: Allow an SMI sender to return an error

Getting ready for handling when a BMC is non-responsive or broken, allow
the sender operation to fail in an SMI.  If it was a user-generated
message it will return the error.

The powernv code was already doing this internally, but the way it was
written could result in deep stack descent if there were a lot of
messages queued.  Have its send return an error in this case.

Signed-off-by: Corey Minyard <corey@minyard.net>
---
 include/linux/ipmi_smi.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index 5d69820d8b02..c2d975bbff60 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -109,8 +109,8 @@ struct ipmi_smi_msg {
 
 	enum ipmi_smi_msg_type type;
 
-	long    msgid;
-	void    *user_data;
+	long msgid;
+	void *user_data;
 
 	int           data_size;
 	unsigned char data[IPMI_MAX_MSG_LENGTH];
@@ -168,9 +168,11 @@ struct ipmi_smi_handlers {
 	 * are held when this is run.  Message are delivered one at
 	 * a time by the message handler, a new message will not be
 	 * delivered until the previous message is returned.
+	 *
+	 * This can return an error if the SMI is not in a state where it
+	 * can send a message.
 	 */
-	void (*sender)(void                *send_info,
-		       struct ipmi_smi_msg *msg);
+	int (*sender)(void *send_info, struct ipmi_smi_msg *msg);
 
 	/*
 	 * Called by the upper layer to request that we try to get
-- 
cgit v1.2.3


From 3bc54ab3b9790ca92f197e9822e486665daa321c Mon Sep 17 00:00:00 2001
From: Corey Minyard <corey@minyard.net>
Date: Wed, 20 Aug 2025 14:09:11 -0500
Subject: ipmi: Rename "user_data" to "recv_msg" in an SMI message

It's only used to hold the corresponding receive message, so fix the
name to make that clear and the type so nothing else can be accidentally
assigned to it.

Signed-off-by: Corey Minyard <corey@minyard.net>
---
 include/linux/ipmi_smi.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index c2d975bbff60..892e2d656e1e 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -110,7 +110,8 @@ struct ipmi_smi_msg {
 	enum ipmi_smi_msg_type type;
 
 	long msgid;
-	void *user_data;
+	/* Response to this message, will be NULL if not from a user request. */
+	struct ipmi_recv_msg *recv_msg;
 
 	int           data_size;
 	unsigned char data[IPMI_MAX_MSG_LENGTH];
-- 
cgit v1.2.3


From e465ad7ef57aa1ec4122fd5b34c182d59629cb91 Mon Sep 17 00:00:00 2001
From: Brian Masney <bmasney@redhat.com>
Date: Mon, 11 Aug 2025 08:48:08 -0400
Subject: clk: ti: dpll: convert from round_rate() to determine_rate()

The round_rate() clk ops is deprecated, so migrate this driver from
round_rate() to determine_rate(). Part of these changes were done
using the Coccinelle semantic patch on the cover letter of this
series, and the rest of the changes were manually done.

omap4_dpll_regm4xen_round_rate() is now only called by
omap4_dpll_regm4xen_determine_rate(), so let's merge that functionality
into one function. This is needed for another cleanup to completely
remove the round_rate() clk ops from the clk core.

Tested-by: Anddreas Kemnade <andreas@kemnade.info> # OMAP3 GTA04, OMAP4 Panda
Reviewed-by: Kevin Hilman <khilman@baylibre.com>
Tested-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Brian Masney <bmasney@redhat.com>
---
 include/linux/clk/ti.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index e656f63efdce..54a3fa370004 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -34,14 +34,14 @@ struct clk_omap_reg {
  * @clk_ref: struct clk_hw pointer to the clock's reference clock input
  * @control_reg: register containing the DPLL mode bitfield
  * @enable_mask: mask of the DPLL mode bitfield in @control_reg
- * @last_rounded_rate: cache of the last rate result of omap2_dpll_round_rate()
- * @last_rounded_m: cache of the last M result of omap2_dpll_round_rate()
+ * @last_rounded_rate: cache of the last rate result of omap2_dpll_determine_rate()
+ * @last_rounded_m: cache of the last M result of omap2_dpll_determine_rate()
  * @last_rounded_m4xen: cache of the last M4X result of
- *			omap4_dpll_regm4xen_round_rate()
+ *			omap4_dpll_regm4xen_determine_rate()
  * @last_rounded_lpmode: cache of the last lpmode result of
  *			 omap4_dpll_lpmode_recalc()
  * @max_multiplier: maximum valid non-bypass multiplier value (actual)
- * @last_rounded_n: cache of the last N result of omap2_dpll_round_rate()
+ * @last_rounded_n: cache of the last N result of omap2_dpll_determine_rate()
  * @min_divider: minimum valid non-bypass divider value (actual)
  * @max_divider: maximum valid non-bypass divider value (actual)
  * @max_rate: maximum clock rate for the DPLL
-- 
cgit v1.2.3


From 3f5952917498e7bb9d227812d4349668f62c413b Mon Sep 17 00:00:00 2001
From: Per Larsen <perlarsen@google.com>
Date: Wed, 20 Aug 2025 01:10:09 +0000
Subject: KVM: arm64: Mask response to FFA_FEATURE call

The minimum size and alignment boundary for FFA_RXTX_MAP is returned in
bit[1:0]. Mask off any other bits in w2 when reading the minimum buffer
size in hyp_ffa_post_init.

Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Per Larsen <perlarsen@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/arm_ffa.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index e1634897e159..cd7ee4df9045 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -128,6 +128,7 @@
 #define FFA_FEAT_RXTX_MIN_SZ_4K		0
 #define FFA_FEAT_RXTX_MIN_SZ_64K	1
 #define FFA_FEAT_RXTX_MIN_SZ_16K	2
+#define FFA_FEAT_RXTX_MIN_SZ_MASK	GENMASK(1, 0)
 
 /* FFA Bus/Device/Driver related */
 struct ffa_device {
-- 
cgit v1.2.3


From 6606c8c7e81886565f5cbdb0c0ce82e280c2b229 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Mon, 4 Aug 2025 09:43:58 -0700
Subject: bitops: Add __attribute_const__ to generic ffs()-family
 implementations

While tracking down a problem where constant expressions used by
BUILD_BUG_ON() suddenly stopped working[1], we found that an added static
initializer was convincing the compiler that it couldn't track the state
of the prior statically initialized value. Tracing this down found that
ffs() was used in the initializer macro, but since it wasn't marked with
__attribute__const__, the compiler had to assume the function might
change variable states as a side-effect (which is not true for ffs(),
which provides deterministic math results).

Add missing __attribute_const__ annotations to generic implementations of
ffs(), __ffs(), fls(), and __fls() functions. These are pure mathematical
functions that always return the same result for the same input with no
side effects, making them eligible for compiler optimization.

Build tested with x86_64 defconfig using GCC 14.2.0, which should validate
the implementations when used by ARM, ARM64, LoongArch, Microblaze,
NIOS2, and SPARC32 architectures.

Link: https://github.com/KSPP/linux/issues/364 [1]
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20250804164417.1612371-2-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/bitops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 9be2d50da09a..ea7898cc5903 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -267,7 +267,7 @@ static inline int parity8(u8 val)
  * The result is not defined if no bits are set, so check that @word
  * is non-zero before calling this.
  */
-static inline unsigned int __ffs64(u64 word)
+static inline __attribute_const__ unsigned int __ffs64(u64 word)
 {
 #if BITS_PER_LONG == 32
 	if (((u32)word) == 0UL)
-- 
cgit v1.2.3


From 2d0ddbb65cef99aab241378b0f4ff2d6ea8c3a5a Mon Sep 17 00:00:00 2001
From: Michael Kelley <mhklinux@outlook.com>
Date: Sun, 31 Aug 2025 09:04:06 -0700
Subject: Drivers: hv: Simplify data structures for VMBus channel close message

struct vmbus_close_msg is used for sending the VMBus channel close
message. It contains a struct vmbus_channel_msginfo, which has a
flex array member at the end. The latter's presence in the middle
of struct vmbus_close_msg causes warnings when built with
-Wflex-array-member-not-at-end.

But the struct vmbus_channel_msginfo is unused because the Hyper-V host
does not send a response to the channel close message. So remove the
struct vmbus_channel_msginfo. Then, since the only remaining field is
struct vmbus_channel_close_channel, also remove the containing struct
vmbus_close_msg and directly use struct vmbus_channel_close_channel.
Besides eliminating unnecessary complexity, these changes resolve the
-Wflex-array-member-not-at-end warnings.

Signed-off-by: Michael Kelley <mhklinux@outlook.com>
Reviewed-by: Tianyu Lan <tiala@microsoft.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 include/linux/hyperv.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index a59c5c3e95fb..59826c89171c 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -707,11 +707,6 @@ struct vmbus_channel_msginfo {
 	unsigned char msg[];
 };
 
-struct vmbus_close_msg {
-	struct vmbus_channel_msginfo info;
-	struct vmbus_channel_close_channel msg;
-};
-
 enum vmbus_device_type {
 	HV_IDE = 0,
 	HV_SCSI,
@@ -800,7 +795,7 @@ struct vmbus_channel {
 	struct hv_ring_buffer_info outbound;	/* send to parent */
 	struct hv_ring_buffer_info inbound;	/* receive from parent */
 
-	struct vmbus_close_msg close_msg;
+	struct vmbus_channel_close_channel close_msg;
 
 	/* Statistics */
 	u64	interrupts;	/* Host to Guest interrupts */
-- 
cgit v1.2.3


From 79357cd06d41d0f5a11b17d7c86176e395d10ef2 Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Sun, 31 Aug 2025 14:10:58 +0200
Subject: mm/vmalloc, mm/kasan: respect gfp mask in kasan_populate_vmalloc()

kasan_populate_vmalloc() and its helpers ignore the caller's gfp_mask and
always allocate memory using the hardcoded GFP_KERNEL flag.  This makes
them inconsistent with vmalloc(), which was recently extended to support
GFP_NOFS and GFP_NOIO allocations.

Page table allocations performed during shadow population also ignore the
external gfp_mask.  To preserve the intended semantics of GFP_NOFS and
GFP_NOIO, wrap the apply_to_page_range() calls into the appropriate
memalloc scope.

xfs calls vmalloc with GFP_NOFS, so this bug could lead to deadlock.

There was a report here
https://lkml.kernel.org/r/686ea951.050a0220.385921.0016.GAE@google.com

This patch:
 - Extends kasan_populate_vmalloc() and helpers to take gfp_mask;
 - Passes gfp_mask down to alloc_pages_bulk() and __get_free_page();
 - Enforces GFP_NOFS/NOIO semantics with memalloc_*_save()/restore()
   around apply_to_page_range();
 - Updates vmalloc.c and percpu allocator call sites accordingly.

Link: https://lkml.kernel.org/r/20250831121058.92971-1-urezki@gmail.com
Fixes: 451769ebb7e7 ("mm/vmalloc: alloc GFP_NO{FS,IO} for vmalloc")
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reported-by: syzbot+3470c9ffee63e4abafeb@syzkaller.appspotmail.com
Reviewed-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kasan.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 890011071f2b..fe5ce9215821 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -562,7 +562,7 @@ static inline void kasan_init_hw_tags(void) { }
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 
 void kasan_populate_early_vm_area_shadow(void *start, unsigned long size);
-int kasan_populate_vmalloc(unsigned long addr, unsigned long size);
+int kasan_populate_vmalloc(unsigned long addr, unsigned long size, gfp_t gfp_mask);
 void kasan_release_vmalloc(unsigned long start, unsigned long end,
 			   unsigned long free_region_start,
 			   unsigned long free_region_end,
@@ -574,7 +574,7 @@ static inline void kasan_populate_early_vm_area_shadow(void *start,
 						       unsigned long size)
 { }
 static inline int kasan_populate_vmalloc(unsigned long start,
-					unsigned long size)
+					unsigned long size, gfp_t gfp_mask)
 {
 	return 0;
 }
@@ -610,7 +610,7 @@ static __always_inline void kasan_poison_vmalloc(const void *start,
 static inline void kasan_populate_early_vm_area_shadow(void *start,
 						       unsigned long size) { }
 static inline int kasan_populate_vmalloc(unsigned long start,
-					unsigned long size)
+					unsigned long size, gfp_t gfp_mask)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 3fac212fe489aa0dbe8d80a42a7809840ca7b0f9 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Tue, 2 Sep 2025 15:49:26 -0700
Subject: compiler-clang.h: define __SANITIZE_*__ macros only when undefined

Clang 22 recently added support for defining __SANITIZE__ macros similar
to GCC [1], which causes warnings (or errors with CONFIG_WERROR=y or W=e)
with the existing defines that the kernel creates to emulate this behavior
with existing clang versions.

  In file included from <built-in>:3:
  In file included from include/linux/compiler_types.h:171:
  include/linux/compiler-clang.h:37:9: error: '__SANITIZE_THREAD__' macro redefined [-Werror,-Wmacro-redefined]
     37 | #define __SANITIZE_THREAD__
        |         ^
  <built-in>:352:9: note: previous definition is here
    352 | #define __SANITIZE_THREAD__ 1
        |         ^

Refactor compiler-clang.h to only define the sanitizer macros when they
are undefined and adjust the rest of the code to use these macros for
checking if the sanitizers are enabled, clearing up the warnings and
allowing the kernel to easily drop these defines when the minimum
supported version of LLVM for building the kernel becomes 22.0.0 or newer.

Link: https://lkml.kernel.org/r/20250902-clang-update-sanitize-defines-v1-1-cf3702ca3d92@kernel.org
Link: https://github.com/llvm/llvm-project/commit/568c23bbd3303518c5056d7f03444dae4fdc8a9c [1]
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Justin Stitt <justinstitt@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Bill Wendling <morbo@google.com>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Marco Elver <elver@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/compiler-clang.h | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index fa4ffe037bc7..8720a0705900 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -18,23 +18,42 @@
 #define KASAN_ABI_VERSION 5
 
 /*
+ * Clang 22 added preprocessor macros to match GCC, in hopes of eventually
+ * dropping __has_feature support for sanitizers:
+ * https://github.com/llvm/llvm-project/commit/568c23bbd3303518c5056d7f03444dae4fdc8a9c
+ * Create these macros for older versions of clang so that it is easy to clean
+ * up once the minimum supported version of LLVM for building the kernel always
+ * creates these macros.
+ *
  * Note: Checking __has_feature(*_sanitizer) is only true if the feature is
  * enabled. Therefore it is not required to additionally check defined(CONFIG_*)
  * to avoid adding redundant attributes in other configurations.
  */
+#if __has_feature(address_sanitizer) && !defined(__SANITIZE_ADDRESS__)
+#define __SANITIZE_ADDRESS__
+#endif
+#if __has_feature(hwaddress_sanitizer) && !defined(__SANITIZE_HWADDRESS__)
+#define __SANITIZE_HWADDRESS__
+#endif
+#if __has_feature(thread_sanitizer) && !defined(__SANITIZE_THREAD__)
+#define __SANITIZE_THREAD__
+#endif
 
-#if __has_feature(address_sanitizer) || __has_feature(hwaddress_sanitizer)
-/* Emulate GCC's __SANITIZE_ADDRESS__ flag */
+/*
+ * Treat __SANITIZE_HWADDRESS__ the same as __SANITIZE_ADDRESS__ in the kernel.
+ */
+#ifdef __SANITIZE_HWADDRESS__
 #define __SANITIZE_ADDRESS__
+#endif
+
+#ifdef __SANITIZE_ADDRESS__
 #define __no_sanitize_address \
 		__attribute__((no_sanitize("address", "hwaddress")))
 #else
 #define __no_sanitize_address
 #endif
 
-#if __has_feature(thread_sanitizer)
-/* emulate gcc's __SANITIZE_THREAD__ flag */
-#define __SANITIZE_THREAD__
+#ifdef __SANITIZE_THREAD__
 #define __no_sanitize_thread \
 		__attribute__((no_sanitize("thread")))
 #else
-- 
cgit v1.2.3


From ff97bc38be343e4530e2f140b40cbdce2e09152f Mon Sep 17 00:00:00 2001
From: Carolina Jubran <cjubran@nvidia.com>
Date: Wed, 3 Sep 2025 10:30:00 +0300
Subject: net/mlx5: Add RS FEC histogram infrastructure

Define the Ports Phy Histogram Configuration Register (PPHCR) to expose
RS-FEC histogram bin ranges, and expose a new counter group in the Ports
Performance Counters Register (PPCNT) to report the corresponding
histogram values.

Co-developed-by: Yael Chemla <ychemla@nvidia.com>
Signed-off-by: Yael Chemla <ychemla@nvidia.com>
Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1756884600-520195-1-git-send-email-tariqt@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/device.h   |  1 +
 include/linux/mlx5/driver.h   |  1 +
 include/linux/mlx5/mlx5_ifc.h | 29 +++++++++++++++++++++++++++++
 3 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 72a83666e67f..d7f46a8fbfa1 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1525,6 +1525,7 @@ enum {
 	MLX5_PHYSICAL_LAYER_RECOVERY_GROUP    = 0x1a,
 	MLX5_INFINIBAND_PORT_COUNTERS_GROUP   = 0x20,
 	MLX5_INFINIBAND_EXTENDED_PORT_COUNTERS_GROUP = 0x21,
+	MLX5_RS_FEC_HISTOGRAM_GROUP = 0x23,
 };
 
 enum {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 8c5fbfb85749..c0858af0e854 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -130,6 +130,7 @@ enum {
 	MLX5_REG_PDDR		 = 0x5031,
 	MLX5_REG_PMLP		 = 0x5002,
 	MLX5_REG_PPLM		 = 0x5023,
+	MLX5_REG_PPHCR		 = 0x503E,
 	MLX5_REG_PCAM		 = 0x507f,
 	MLX5_REG_NODE_DESC	 = 0x6001,
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index e9f14a0c7f4f..097b1b7ada63 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -4901,6 +4901,11 @@ union mlx5_ifc_field_select_802_1_r_roce_auto_bits {
 	u8         reserved_at_0[0x20];
 };
 
+struct mlx5_ifc_rs_histogram_cntrs_bits {
+	u8         hist[16][0x40];
+	u8         reserved_at_400[0x2c0];
+};
+
 union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits {
 	struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits eth_802_3_cntrs_grp_data_layout;
 	struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits eth_2863_cntrs_grp_data_layout;
@@ -4915,6 +4920,7 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits {
 	struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs;
 	struct mlx5_ifc_phys_layer_statistical_cntrs_bits phys_layer_statistical_cntrs;
 	struct mlx5_ifc_phys_layer_recovery_cntrs_bits phys_layer_recovery_cntrs;
+	struct mlx5_ifc_rs_histogram_cntrs_bits rs_histogram_cntrs;
 	u8         reserved_at_0[0x7c0];
 };
 
@@ -11738,6 +11744,28 @@ struct mlx5_ifc_mtctr_reg_bits {
 	u8         second_clock_timestamp[0x40];
 };
 
+struct mlx5_ifc_bin_range_layout_bits {
+	u8         reserved_at_0[0xa];
+	u8         high_val[0x6];
+	u8         reserved_at_10[0xa];
+	u8         low_val[0x6];
+};
+
+struct mlx5_ifc_pphcr_reg_bits {
+	u8         active_hist_type[0x4];
+	u8         reserved_at_4[0x4];
+	u8         local_port[0x8];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x8];
+	u8         num_of_bins[0x8];
+	u8         reserved_at_30[0x10];
+
+	u8         reserved_at_40[0x40];
+
+	struct mlx5_ifc_bin_range_layout_bits bin_range[16];
+};
+
 union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_bufferx_reg_bits bufferx_reg;
 	struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout;
@@ -11804,6 +11832,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_mtmp_reg_bits mtmp_reg;
 	struct mlx5_ifc_mtptm_reg_bits mtptm_reg;
 	struct mlx5_ifc_mtctr_reg_bits mtctr_reg;
+	struct mlx5_ifc_pphcr_reg_bits pphcr_reg;
 	u8         reserved_at_0[0x60e0];
 };
 
-- 
cgit v1.2.3


From cdea7cdae26995992a766443e1ea862923f2443d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <thomas.weissschuh@linutronix.de>
Date: Thu, 21 Aug 2025 15:28:14 +0200
Subject: hrtimer: Use hrtimer_cb_get_time() helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Various other helpers contain open-coded implementations of
hrtimer_cb_get_time(). This prevents refactoring the implementation.

Reuse the existing helper.

For this to work, move hrtimer_cb_get_time() a bit up in the file and also
make its argument 'const'.

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/20250821-hrtimer-cleanup-get_time-v2-7-3ae822e5bfbd@linutronix.de
---
 include/linux/hrtimer.h | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 1ef867bb8c44..e655502b14e6 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -154,14 +154,14 @@ static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer)
 	return ktime_to_ns(timer->node.expires);
 }
 
-static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
+static inline ktime_t hrtimer_cb_get_time(const struct hrtimer *timer)
 {
-	return ktime_sub(timer->node.expires, timer->base->get_time());
+	return timer->base->get_time();
 }
 
-static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
+static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
 {
-	return timer->base->get_time();
+	return ktime_sub(timer->node.expires, hrtimer_cb_get_time(timer));
 }
 
 static inline int hrtimer_is_hres_active(struct hrtimer *timer)
@@ -200,8 +200,7 @@ __hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now)
 static inline ktime_t
 hrtimer_expires_remaining_adjusted(const struct hrtimer *timer)
 {
-	return __hrtimer_expires_remaining_adjusted(timer,
-						    timer->base->get_time());
+	return __hrtimer_expires_remaining_adjusted(timer, hrtimer_cb_get_time(timer));
 }
 
 #ifdef CONFIG_TIMERFD
@@ -363,7 +362,7 @@ hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval);
 static inline u64 hrtimer_forward_now(struct hrtimer *timer,
 				      ktime_t interval)
 {
-	return hrtimer_forward(timer, timer->base->get_time(), interval);
+	return hrtimer_forward(timer, hrtimer_cb_get_time(timer), interval);
 }
 
 /* Precise sleep: */
-- 
cgit v1.2.3


From 009eb5da29a91016e3ebb988e6401e79411be7a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <thomas.weissschuh@linutronix.de>
Date: Thu, 21 Aug 2025 15:28:15 +0200
Subject: hrtimer: Remove hrtimer_clock_base:: Get_time
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The get_time() callbacks always need to match the bases clockid.
Instead of maintaining that association twice in hrtimer_bases,
use a helper.

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/20250821-hrtimer-cleanup-get_time-v2-8-3ae822e5bfbd@linutronix.de
---
 include/linux/hrtimer.h      | 5 +----
 include/linux/hrtimer_defs.h | 2 --
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index e655502b14e6..2cf1bf65b225 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -154,10 +154,7 @@ static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer)
 	return ktime_to_ns(timer->node.expires);
 }
 
-static inline ktime_t hrtimer_cb_get_time(const struct hrtimer *timer)
-{
-	return timer->base->get_time();
-}
+ktime_t hrtimer_cb_get_time(const struct hrtimer *timer);
 
 static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
 {
diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h
index 84a5045f80f3..aa49ffa130e5 100644
--- a/include/linux/hrtimer_defs.h
+++ b/include/linux/hrtimer_defs.h
@@ -41,7 +41,6 @@
  * @seq:		seqcount around __run_hrtimer
  * @running:		pointer to the currently running hrtimer
  * @active:		red black tree root node for the active timers
- * @get_time:		function to retrieve the current time of the clock
  * @offset:		offset of this clock to the monotonic base
  */
 struct hrtimer_clock_base {
@@ -51,7 +50,6 @@ struct hrtimer_clock_base {
 	seqcount_raw_spinlock_t	seq;
 	struct hrtimer		*running;
 	struct timerqueue_head	active;
-	ktime_t			(*get_time)(void);
 	ktime_t			offset;
 } __hrtimer_clock_base_align;
 
-- 
cgit v1.2.3


From 70a6f71b1a77decfc5b1db426ccbe914b58adb38 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Sep 2025 12:56:38 +0200
Subject: block: add a bio_init_inline helper

Just a simpler wrapper around bio_init for callers that want to
initialize a bio with inline bvecs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 46ffac5caab7..eb7f4fbd8aa9 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -405,6 +405,11 @@ struct request_queue;
 
 void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
 	      unsigned short max_vecs, blk_opf_t opf);
+static inline void bio_init_inline(struct bio *bio, struct block_device *bdev,
+	      unsigned short max_vecs, blk_opf_t opf)
+{
+	bio_init(bio, bdev, bio->bi_inline_vecs, max_vecs, opf);
+}
 extern void bio_uninit(struct bio *);
 void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf);
 void bio_chain(struct bio *, struct bio *);
-- 
cgit v1.2.3


From d86eaa0f3c56da286853b698b45c8ce404291082 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Sep 2025 12:56:39 +0200
Subject: block: remove the bi_inline_vecs variable sized array from struct bio

Bios are embedded into other structures, and at least spare is unhappy
about embedding structures with variable sized arrays.  There's no
real need to the array anyway, we can replace it with a helper pointing
to the memory just behind the bio, and with the previous cleanups there
is very few site doing anything special with it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h       |  2 +-
 include/linux/blk_types.h | 12 +++++-------
 2 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index eb7f4fbd8aa9..27cbff5b0356 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -408,7 +408,7 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
 static inline void bio_init_inline(struct bio *bio, struct block_device *bdev,
 	      unsigned short max_vecs, blk_opf_t opf)
 {
-	bio_init(bio, bdev, bio->bi_inline_vecs, max_vecs, opf);
+	bio_init(bio, bdev, bio_inline_vecs(bio), max_vecs, opf);
 }
 extern void bio_uninit(struct bio *);
 void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 930daff207df..bbb7893e0542 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -269,18 +269,16 @@ struct bio {
 	struct bio_vec		*bi_io_vec;	/* the actual vec list */
 
 	struct bio_set		*bi_pool;
-
-	/*
-	 * We can inline a number of vecs at the end of the bio, to avoid
-	 * double allocations for a small number of bio_vecs. This member
-	 * MUST obviously be kept at the very end of the bio.
-	 */
-	struct bio_vec		bi_inline_vecs[];
 };
 
 #define BIO_RESET_BYTES		offsetof(struct bio, bi_max_vecs)
 #define BIO_MAX_SECTORS		(UINT_MAX >> SECTOR_SHIFT)
 
+static inline struct bio_vec *bio_inline_vecs(struct bio *bio)
+{
+	return (struct bio_vec *)(bio + 1);
+}
+
 /*
  * bio flags
  */
-- 
cgit v1.2.3


From 199c9a8d26638845f509b76e3c176c27e7baafd7 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 9 Sep 2025 20:33:10 +0800
Subject: blk-mq: Document tags_srcu member in blk_mq_tag_set structure

Add missing documentation for the tags_srcu member that was introduced
to defer freeing of tags page_list to prevent use-after-free when
iterating tags.

Fixes htmldocs warning:
WARNING: include/linux/blk-mq.h:536 struct member 'tags_srcu' not described in 'blk_mq_tag_set'

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 1325ceeb743a..b25d12545f46 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -507,6 +507,8 @@ enum hctx_type {
  *		   request_queue.tag_set_list.
  * @srcu:	   Use as lock when type of the request queue is blocking
  *		   (BLK_MQ_F_BLOCKING).
+ * @tags_srcu:	   SRCU used to defer freeing of tags page_list to prevent
+ *		   use-after-free when iterating tags.
  * @update_nr_hwq_lock:
  * 		   Synchronize updating nr_hw_queues with add/del disk &
  * 		   switching elevator.
-- 
cgit v1.2.3


From e8c8d961d8ad53d8d104d7474c08d8e4626c7767 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@kernel.org>
Date: Tue, 24 Jun 2025 08:41:15 +0200
Subject: media: include: update Hans Verkuil's email address

Replace hverkuil@xs4all.nl by hverkuil@kernel.org.

Signed-off-by: Hans Verkuil <hverkuil@kernel.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/linux/videodev2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 219037f4c08d..9609cf365e8e 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -50,7 +50,7 @@
  *
  *	Author: Bill Dirks <bill@thedirks.org>
  *		Justin Schoeman
- *              Hans Verkuil <hverkuil@xs4all.nl>
+ *              Hans Verkuil <hverkuil@kernel.org>
  *		et al.
  */
 #ifndef __LINUX_VIDEODEV2_H
-- 
cgit v1.2.3


From fec2e705729dc93de5399d8b139e4746805c3d81 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Wed, 27 Aug 2025 07:12:51 -0700
Subject: block: check for valid bio while splitting

We're already iterating every segment, so check these for a valid IO
lengths at the same time. Individual segment lengths will not be checked
on passthrough commands. The read/write command segments must be sized
to the dma alignment.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h    | 4 ++--
 include/linux/blkdev.h | 7 +++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 27cbff5b0356..13d1df02656a 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -322,8 +322,8 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio)
 void bio_trim(struct bio *bio, sector_t offset, sector_t size);
 extern struct bio *bio_split(struct bio *bio, int sectors,
 			     gfp_t gfp, struct bio_set *bs);
-int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
-		unsigned *segs, unsigned max_bytes);
+int bio_split_io_at(struct bio *bio, const struct queue_limits *lim,
+		unsigned *segs, unsigned max_bytes, unsigned len_align);
 
 /**
  * bio_next_split - get next @sectors from a bio, splitting if necessary
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7709d55adc23..9efacabaa2f7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1870,6 +1870,13 @@ bdev_atomic_write_unit_max_bytes(struct block_device *bdev)
 	return queue_atomic_write_unit_max_bytes(bdev_get_queue(bdev));
 }
 
+static inline int bio_split_rw_at(struct bio *bio,
+		const struct queue_limits *lim,
+		unsigned *segs, unsigned max_bytes)
+{
+	return bio_split_io_at(bio, lim, segs, max_bytes, lim->dma_alignment);
+}
+
 #define DEFINE_IO_COMP_BATCH(name)	struct io_comp_batch name = { }
 
 #endif /* _LINUX_BLKDEV_H */
-- 
cgit v1.2.3


From 743bf2e0c49c835cb7c4e4ac7d5a2610587047be Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Wed, 27 Aug 2025 07:12:52 -0700
Subject: block: add size alignment to bio_iov_iter_get_pages

The block layer tries to align bio vectors to the block device's logical
block size. Some cases don't have a block device, or we may need to
align to something larger, which we can't derive it from the queue
limits. Have the caller specify what they want, or allow any length
alignment if nothing was specified. Since the most common use case
relies on the block device's limits, a helper function is provided.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h    | 9 ++++++++-
 include/linux/blkdev.h | 7 +++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 13d1df02656a..a64a30131031 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -446,7 +446,14 @@ int submit_bio_wait(struct bio *bio);
 int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data,
 		size_t len, enum req_op op);
 
-int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
+int bio_iov_iter_get_pages_aligned(struct bio *bio, struct iov_iter *iter,
+		unsigned len_align_mask);
+
+static inline int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
+{
+	return bio_iov_iter_get_pages_aligned(bio, iter, 0);
+}
+
 void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter);
 void __bio_release_pages(struct bio *bio, bool mark_dirty);
 extern void bio_set_pages_dirty(struct bio *bio);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9efacabaa2f7..44e1066f7446 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1877,6 +1877,13 @@ static inline int bio_split_rw_at(struct bio *bio,
 	return bio_split_io_at(bio, lim, segs, max_bytes, lim->dma_alignment);
 }
 
+static inline int bio_iov_iter_get_bdev_pages(struct bio *bio,
+		struct iov_iter *iter, struct block_device *bdev)
+{
+	return bio_iov_iter_get_pages_aligned(bio, iter,
+					bdev_logical_block_size(bdev) - 1);
+}
+
 #define DEFINE_IO_COMP_BATCH(name)	struct io_comp_batch name = { }
 
 #endif /* _LINUX_BLKDEV_H */
-- 
cgit v1.2.3


From 9eab1d4e0d15b633adc170c458c51e8be3b1c553 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Wed, 27 Aug 2025 07:12:56 -0700
Subject: block: remove bdev_iter_is_aligned

No more callers.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 44e1066f7446..c8cb08b2ed29 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1590,13 +1590,6 @@ static inline unsigned int bdev_dma_alignment(struct block_device *bdev)
 	return queue_dma_alignment(bdev_get_queue(bdev));
 }
 
-static inline bool bdev_iter_is_aligned(struct block_device *bdev,
-					struct iov_iter *iter)
-{
-	return iov_iter_is_aligned(iter, bdev_dma_alignment(bdev),
-				   bdev_logical_block_size(bdev) - 1);
-}
-
 static inline unsigned int
 blk_lim_dma_alignment_and_pad(struct queue_limits *lim)
 {
-- 
cgit v1.2.3


From b475272f03ca5d0c437c8f899ff229b21010ec83 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Wed, 27 Aug 2025 07:12:58 -0700
Subject: iov_iter: remove iov_iter_is_aligned

No more callers.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/uio.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 2e86c653186c..5b127043a151 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -286,8 +286,6 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
 #endif
 
 size_t iov_iter_zero(size_t bytes, struct iov_iter *);
-bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask,
-			unsigned len_mask);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
 void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov,
-- 
cgit v1.2.3


From d57447ffb5fadffdba920f2fb933296fb6c5ff57 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Wed, 3 Sep 2025 12:33:17 -0700
Subject: blk-mq-dma: bring back p2p request flags

We only need to consider data and metadata dma mapping types separately.
The request and bio integrity payload have enough flag bits to
internally track the mapping type for each. Use these so the caller
doesn't need to track them, and provide separete request and integrity
helpers to the common code. This will make it easier to scale new
mappings, like the proposed MMIO attribute, without burdening the caller
to track such things.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio-integrity.h |  1 +
 include/linux/blk-integrity.h | 15 +++++++++++++++
 include/linux/blk-mq-dma.h    | 11 +++++++++--
 include/linux/blk_types.h     |  2 ++
 4 files changed, 27 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
index 0a25716820fe..851254f36eb3 100644
--- a/include/linux/bio-integrity.h
+++ b/include/linux/bio-integrity.h
@@ -13,6 +13,7 @@ enum bip_flags {
 	BIP_CHECK_GUARD		= 1 << 5, /* guard check */
 	BIP_CHECK_REFTAG	= 1 << 6, /* reftag check */
 	BIP_CHECK_APPTAG	= 1 << 7, /* apptag check */
+	BIP_P2P_DMA		= 1 << 8, /* using P2P address */
 };
 
 struct bio_integrity_payload {
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index 78fe2459e661..b659373788f6 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -27,6 +27,15 @@ static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t,
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
+
+static inline bool blk_rq_integrity_dma_unmap(struct request *req,
+		struct device *dma_dev, struct dma_iova_state *state,
+		size_t mapped_len)
+{
+	return blk_dma_unmap(req, dma_dev, state, mapped_len,
+			bio_integrity(req->bio)->bip_flags & BIP_P2P_DMA);
+}
+
 int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
 int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
 			      ssize_t bytes);
@@ -115,6 +124,12 @@ static inline int blk_rq_map_integrity_sg(struct request *q,
 {
 	return 0;
 }
+static inline bool blk_rq_integrity_dma_unmap(struct request *req,
+		struct device *dma_dev, struct dma_iova_state *state,
+		size_t mapped_len)
+{
+	return false;
+}
 static inline int blk_rq_integrity_map_user(struct request *rq,
 					    void __user *ubuf,
 					    ssize_t bytes)
diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h
index 0f45ea110ca1..51829958d872 100644
--- a/include/linux/blk-mq-dma.h
+++ b/include/linux/blk-mq-dma.h
@@ -43,7 +43,7 @@ static inline bool blk_rq_dma_map_coalesce(struct dma_iova_state *state)
 }
 
 /**
- * blk_rq_dma_unmap - try to DMA unmap a request
+ * blk_dma_unmap - try to DMA unmap a request
  * @req:	request to unmap
  * @dma_dev:	device to unmap from
  * @state:	DMA IOVA state
@@ -53,7 +53,7 @@ static inline bool blk_rq_dma_map_coalesce(struct dma_iova_state *state)
  * Returns %false if the callers need to manually unmap every DMA segment
  * mapped using @iter or %true if no work is left to be done.
  */
-static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev,
+static inline bool blk_dma_unmap(struct request *req, struct device *dma_dev,
 		struct dma_iova_state *state, size_t mapped_len, bool is_p2p)
 {
 	if (is_p2p)
@@ -68,4 +68,11 @@ static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev,
 	return !dma_need_unmap(dma_dev);
 }
 
+static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev,
+		struct dma_iova_state *state, size_t mapped_len)
+{
+	return blk_dma_unmap(req, dma_dev, state, mapped_len,
+				req->cmd_flags & REQ_P2PDMA);
+}
+
 #endif /* BLK_MQ_DMA_H */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index bbb7893e0542..4bd098fd61cb 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -384,6 +384,7 @@ enum req_flag_bits {
 	__REQ_DRV,		/* for driver use */
 	__REQ_FS_PRIVATE,	/* for file system (submitter) use */
 	__REQ_ATOMIC,		/* for atomic write operations */
+	__REQ_P2PDMA,		/* contains P2P DMA pages */
 	/*
 	 * Command specific flags, keep last:
 	 */
@@ -416,6 +417,7 @@ enum req_flag_bits {
 #define REQ_DRV		(__force blk_opf_t)(1ULL << __REQ_DRV)
 #define REQ_FS_PRIVATE	(__force blk_opf_t)(1ULL << __REQ_FS_PRIVATE)
 #define REQ_ATOMIC	(__force blk_opf_t)(1ULL << __REQ_ATOMIC)
+#define REQ_P2PDMA	(__force blk_opf_t)(1ULL << __REQ_P2PDMA)
 
 #define REQ_NOUNMAP	(__force blk_opf_t)(1ULL << __REQ_NOUNMAP)
 
-- 
cgit v1.2.3


From bf2da4799fdb6eb58d9c9541b7dc1096c260499d Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@nvidia.com>
Date: Sat, 6 Sep 2025 18:29:44 -0700
Subject: net/mlx5: Implement cqe_compress_type via devlink params

Selects which algorithm should be used by the NIC in order to decide rate of
CQE compression dependeng on PCIe bus conditions.

Supported values:

1) balanced, merges fewer CQEs, resulting in a moderate compression ratio
   but maintaining a balance between bandwidth savings and performance
2) aggressive, merges more CQEs into a single entry, achieving a higher
   compression rate and maximizing performance, particularly under high
   traffic loads.

Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250907012953.301746-3-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/driver.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index c0858af0e854..fcfc18bfeba9 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -137,6 +137,7 @@ enum {
 	MLX5_REG_MTCAP		 = 0x9009,
 	MLX5_REG_MTMP		 = 0x900A,
 	MLX5_REG_MCIA		 = 0x9014,
+	MLX5_REG_MNVDA		 = 0x9024,
 	MLX5_REG_MFRL		 = 0x9028,
 	MLX5_REG_MLCR		 = 0x902b,
 	MLX5_REG_MRTC		 = 0x902d,
-- 
cgit v1.2.3


From e096a7cc0be126d9376e549a10d71cf16b1a1c1c Mon Sep 17 00:00:00 2001
From: Wei Fang <wei.fang@nxp.com>
Date: Fri, 5 Sep 2025 11:07:09 +0800
Subject: ptp: add debugfs interfaces to loop back the periodic output signal

For some PTP devices, they have the capability to loop back the periodic
output signal for debugging, such as the ptp_qoriq device. So add the
generic interfaces to set the periodic output signal loopback, rather
than each vendor having a different implementation.

Show how many channels support the periodic output signal loopback:
$ cat /sys/kernel/debug/ptp<N>/n_perout_loopback

Enable the loopback of the periodic output signal of channel X:
$ echo <X> 1 > /sys/kernel/debug/ptp<N>/perout_loopback

Disable the loopback of the periodic output signal of channel X:
$ echo <X> 0 > /sys/kernel/debug/ptp<N>/perout_loopback

Suggested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Wei Fang <wei.fang@nxp.com>
Link: https://patch.msgid.link/20250905030711.1509648-2-wei.fang@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ptp_clock_kernel.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 7dd7951b23d5..884364596dd3 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -67,6 +67,8 @@ struct ptp_system_timestamp {
  * @n_ext_ts:  The number of external time stamp channels.
  * @n_per_out: The number of programmable periodic signals.
  * @n_pins:    The number of programmable pins.
+ * @n_per_lp:  The number of channels that support loopback the periodic
+ *             output signal.
  * @pps:       Indicates whether the clock supports a PPS callback.
  *
  * @supported_perout_flags:  The set of flags the driver supports for the
@@ -175,6 +177,11 @@ struct ptp_system_timestamp {
  *                scheduling time (>=0) or negative value in case further
  *                scheduling is not required.
  *
+ * @perout_loopback: Request driver to enable or disable the periodic output
+ *                   signal loopback.
+ *                   parameter index: index of the periodic output signal channel.
+ *                   parameter on: caller passes one to enable or zero to disable.
+ *
  * Drivers should embed their ptp_clock_info within a private
  * structure, obtaining a reference to it using container_of().
  *
@@ -189,6 +196,7 @@ struct ptp_clock_info {
 	int n_ext_ts;
 	int n_per_out;
 	int n_pins;
+	int n_per_lp;
 	int pps;
 	unsigned int supported_perout_flags;
 	unsigned int supported_extts_flags;
@@ -213,6 +221,8 @@ struct ptp_clock_info {
 	int (*verify)(struct ptp_clock_info *ptp, unsigned int pin,
 		      enum ptp_pin_function func, unsigned int chan);
 	long (*do_aux_work)(struct ptp_clock_info *ptp);
+	int (*perout_loopback)(struct ptp_clock_info *ptp, unsigned int index,
+			       int on);
 };
 
 struct ptp_clock;
-- 
cgit v1.2.3


From f3164840a136b123c8348ca5af4d83d99aa86eb7 Mon Sep 17 00:00:00 2001
From: Wei Fang <wei.fang@nxp.com>
Date: Fri, 5 Sep 2025 11:07:11 +0800
Subject: ptp: qoriq: convert to use generic interfaces to set loopback mode

Since the generic debugfs interfaces for setting the periodic pulse
signal loopback have been added to the ptp_clock driver, so convert
the vendor-defined debugfs interfaces to the generic interfaces.

Signed-off-by: Wei Fang <wei.fang@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/20250905030711.1509648-4-wei.fang@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/fsl/ptp_qoriq.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h
index b301bf7199d3..3601e25779ba 100644
--- a/include/linux/fsl/ptp_qoriq.h
+++ b/include/linux/fsl/ptp_qoriq.h
@@ -145,7 +145,6 @@ struct ptp_qoriq {
 	struct ptp_clock *clock;
 	struct ptp_clock_info caps;
 	struct resource *rsrc;
-	struct dentry *debugfs_root;
 	struct device *dev;
 	bool extts_fifo_support;
 	bool fiper3_support;
@@ -195,14 +194,5 @@ int ptp_qoriq_settime(struct ptp_clock_info *ptp,
 int ptp_qoriq_enable(struct ptp_clock_info *ptp,
 		     struct ptp_clock_request *rq, int on);
 int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, bool update_event);
-#ifdef CONFIG_DEBUG_FS
-void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq);
-void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq);
-#else
-static inline void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq)
-{ }
-static inline void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq)
-{ }
-#endif
 
 #endif
-- 
cgit v1.2.3


From e0423541477dfb684fbc6e6b5386054bc650f264 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 5 Sep 2025 15:44:45 +0200
Subject: PM: EM: Add function for registering a PD without capacity update

The intel_pstate driver manages CPU capacity changes itself and it does
not need an update of the capacity of all CPUs in the system to be
carried out after registering a PD.

Moreover, in some configurations (for instance, an SMT-capable
hybrid x86 system booted with nosmt in the kernel command line) the
em_check_capacity_update() call at the end of em_dev_register_perf_domain()
always fails and reschedules itself to run once again in 1 s, so
effectively it runs in vain every 1 s forever.

To address this, introduce a new variant of em_dev_register_perf_domain(),
called em_dev_register_pd_no_update(), that does not invoke
em_check_capacity_update(), and make intel_pstate use it instead of the
original.

Fixes: 7b010f9b9061 ("cpufreq: intel_pstate: EAS support for hybrid platforms")
Closes: https://lore.kernel.org/linux-pm/40212796-734c-4140-8a85-854f72b8144d@panix.com/
Reported-by: Kenneth R. Crudup <kenny@panix.com>
Tested-by: Kenneth R. Crudup <kenny@panix.com>
Cc: 6.16+ <stable@vger.kernel.org> # 6.16+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/energy_model.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 7fa1eb3cc823..61d50571ad88 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -171,6 +171,9 @@ int em_dev_update_perf_domain(struct device *dev,
 int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
 				const struct em_data_callback *cb,
 				const cpumask_t *cpus, bool microwatts);
+int em_dev_register_pd_no_update(struct device *dev, unsigned int nr_states,
+				 const struct em_data_callback *cb,
+				 const cpumask_t *cpus, bool microwatts);
 void em_dev_unregister_perf_domain(struct device *dev);
 struct em_perf_table *em_table_alloc(struct em_perf_domain *pd);
 void em_table_free(struct em_perf_table *table);
@@ -350,6 +353,13 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
 {
 	return -EINVAL;
 }
+static inline
+int em_dev_register_pd_no_update(struct device *dev, unsigned int nr_states,
+				 const struct em_data_callback *cb,
+				 const cpumask_t *cpus, bool microwatts)
+{
+	return -EINVAL;
+}
 static inline void em_dev_unregister_perf_domain(struct device *dev)
 {
 }
-- 
cgit v1.2.3


From 1733e88874838ddebf7774440c285700865e6b08 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Wed, 10 Sep 2025 14:30:41 +0800
Subject: block: cleanup bio_issue

Now that bio->bi_issue is only used by blk-iolatency to get bio issue
time, replace bio_issue with u64 time directly and remove bio_issue to
make code cleaner.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk_types.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 4bd098fd61cb..8e8d1cc8b06c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -198,10 +198,6 @@ static inline bool blk_path_error(blk_status_t error)
 	return true;
 }
 
-struct bio_issue {
-	u64 value;
-};
-
 typedef __u32 __bitwise blk_opf_t;
 
 typedef unsigned int blk_qc_t;
@@ -242,7 +238,8 @@ struct bio {
 	 * on release of the bio.
 	 */
 	struct blkcg_gq		*bi_blkg;
-	struct bio_issue	bi_issue;
+	/* Time that this bio was issued. */
+	u64			issue_time_ns;
 #ifdef CONFIG_BLK_CGROUP_IOCOST
 	u64			bi_iocost_cost;
 #endif
-- 
cgit v1.2.3


From ea3d1f104db60f9d5074b33819ccea3c216e0bee Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Wed, 10 Sep 2025 14:30:43 +0800
Subject: blk-mq: add QUEUE_FLAG_BIO_ISSUE_TIME

bio->issue_time_ns is initialized for every bio, however, it's only used
by blk-iolatency. Add a new queue_flag and only set this flag when
blk-iolatency is enabled, so that extra blk_time_get_ns() can be saved
for disks that blk-iolatency is not enabled.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c8cb08b2ed29..7c542b1851fa 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -657,6 +657,7 @@ enum {
 	QUEUE_FLAG_DISABLE_WBT_DEF,	/* for sched to disable/enable wbt */
 	QUEUE_FLAG_NO_ELV_SWITCH,	/* can't switch elevator any more */
 	QUEUE_FLAG_QOS_ENABLED,		/* qos is enabled */
+	QUEUE_FLAG_BIO_ISSUE_TIME,	/* record bio->issue_time_ns */
 	QUEUE_FLAG_MAX
 };
 
-- 
cgit v1.2.3


From e37b5596a19be9a150cb194ec32e78f295a3574b Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Wed, 10 Sep 2025 14:30:46 +0800
Subject: block: factor out a helper bio_submit_split_bioset()

No functional changes are intended, some drivers like mdraid will split
bio by internal processing, prepare to unify bio split codes.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7c542b1851fa..066e5309bd45 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1000,6 +1000,8 @@ extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);
 void submit_bio_noacct(struct bio *bio);
 struct bio *bio_split_to_limits(struct bio *bio);
+struct bio *bio_submit_split_bioset(struct bio *bio, unsigned int split_sectors,
+				    struct bio_set *bs);
 
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags);
-- 
cgit v1.2.3


From 448097bbd3836d2ee46fa6eabd18661e9a3c8be8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-Fran=C3=A7ois=20Lessard?= <jefflessard3@gmail.com>
Date: Tue, 2 Sep 2025 15:04:39 -0400
Subject: device property: Add scoped fwnode child node iterators
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add scoped versions of fwnode child node iterators that automatically
handle reference counting cleanup using the __free() attribute:

- fwnode_for_each_child_node_scoped()
- fwnode_for_each_available_child_node_scoped()

These macros follow the same pattern as existing scoped iterators in the
kernel, ensuring fwnode references are automatically released when the
iterator variable goes out of scope. This prevents resource leaks and
eliminates the need for manual cleanup in error paths.

The implementation mirrors the non-scoped variants but uses
__free(fwnode_handle) for automatic resource management, providing a
safer and more convenient interface for drivers iterating over firmware
node children.

Signed-off-by: Jean-François Lessard <jefflessard3@gmail.com>
Acked-by: Danilo Krummrich <dakr@kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/property.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/property.h b/include/linux/property.h
index 82f0cb3abd1e..862e208133f3 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -176,6 +176,16 @@ struct fwnode_handle *fwnode_get_next_available_child_node(
 	for (child = fwnode_get_next_available_child_node(fwnode, NULL); child;\
 	     child = fwnode_get_next_available_child_node(fwnode, child))
 
+#define fwnode_for_each_child_node_scoped(fwnode, child)		\
+	for (struct fwnode_handle *child __free(fwnode_handle) =	\
+		fwnode_get_next_child_node(fwnode, NULL);		\
+	     child; child = fwnode_get_next_child_node(fwnode, child))
+
+#define fwnode_for_each_available_child_node_scoped(fwnode, child)	\
+	for (struct fwnode_handle *child __free(fwnode_handle) =	\
+		fwnode_get_next_available_child_node(fwnode, NULL);	\
+	     child; child = fwnode_get_next_available_child_node(fwnode, child))
+
 struct fwnode_handle *device_get_next_child_node(const struct device *dev,
 						 struct fwnode_handle *child);
 
-- 
cgit v1.2.3


From a1ffc8ad3165fa1cf6a60c6a4b4e00dfd6603cf2 Mon Sep 17 00:00:00 2001
From: Yi Tao <escape@linux.alibaba.com>
Date: Wed, 10 Sep 2025 14:59:33 +0800
Subject: cgroup: refactor the cgroup_attach_lock code to make it clearer

Dynamic cgroup migration involving threadgroup locks can be in one of
two states: no lock held, or holding the global lock. Explicitly
declaring the different lock modes to make the code easier to
understand and facilitates future extensions of the lock modes.

Signed-off-by: Yi Tao <escape@linux.alibaba.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 92ed6d18266d..ff3c7d0e3e01 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -140,6 +140,14 @@ enum {
 	__CFTYPE_ADDED		= (1 << 18),
 };
 
+enum cgroup_attach_lock_mode {
+	/* Default */
+	CGRP_ATTACH_LOCK_GLOBAL,
+
+	/* When pid=0 && threadgroup=false, see comments in cgroup_procs_write_start */
+	CGRP_ATTACH_LOCK_NONE,
+};
+
 /*
  * cgroup_file is the handle for a file instance created in a cgroup which
  * is used, for example, to generate file changed notifications.  This can
-- 
cgit v1.2.3


From 0568f89d4fb82d98001baeb870e92f43cd1f7317 Mon Sep 17 00:00:00 2001
From: Yi Tao <escape@linux.alibaba.com>
Date: Wed, 10 Sep 2025 14:59:35 +0800
Subject: cgroup: replace global percpu_rwsem with per threadgroup resem when
 writing to cgroup.procs

The static usage pattern of creating a cgroup, enabling controllers,
and then seeding it with CLONE_INTO_CGROUP doesn't require write
locking cgroup_threadgroup_rwsem and thus doesn't benefit from this
patch.

To avoid affecting other users, the per threadgroup rwsem is only used
when the favordynmods is enabled.

As computer hardware advances, modern systems are typically equipped
with many CPU cores and large amounts of memory, enabling the deployment
of numerous applications. On such systems, container creation and
deletion become frequent operations, making cgroup process migration no
longer a cold path. This leads to noticeable contention with common
process operations such as fork, exec, and exit.

To alleviate the contention between cgroup process migration and
operations like process fork, this patch modifies lock to take the write
lock on signal_struct->group_rwsem when writing pid to
cgroup.procs/threads instead of holding a global write lock.

Cgroup process migration has historically relied on
signal_struct->group_rwsem to protect thread group integrity. In commit
<1ed1328792ff> ("sched, cgroup: replace signal_struct->group_rwsem with
a global percpu_rwsem"), this was changed to a global
cgroup_threadgroup_rwsem. The advantage of using a global lock was
simplified handling of process group migrations. This patch retains the
use of the global lock for protecting process group migration, while
reducing contention by using per thread group lock during
cgroup.procs/threads writes.

The locking behavior is as follows:

write cgroup.procs/threads  | process fork,exec,exit | process group migration
------------------------------------------------------------------------------
cgroup_lock()               | down_read(&g_rwsem)    | cgroup_lock()
down_write(&p_rwsem)        | down_read(&p_rwsem)    | down_write(&g_rwsem)
critical section            | critical section       | critical section
up_write(&p_rwsem)          | up_read(&p_rwsem)      | up_write(&g_rwsem)
cgroup_unlock()             | up_read(&g_rwsem)      | cgroup_unlock()

g_rwsem denotes cgroup_threadgroup_rwsem, p_rwsem denotes
signal_struct->group_rwsem.

This patch eliminates contention between cgroup migration and fork
operations for threads that belong to different thread groups, thereby
reducing the long-tail latency of cgroup migrations and lowering system
load.

With this patch, under heavy fork and exec interference, the long-tail
latency of cgroup migration has been reduced from milliseconds to
microseconds. Under heavy cgroup migration interference, the multi-CPU
score of the spawn test case in UnixBench increased by 9%.

tj: Update comment in cgroup_favor_dynmods() and switch WARN_ONCE() to
    pr_warn_once().

Signed-off-by: Yi Tao <escape@linux.alibaba.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h  | 17 ++++++++++++++++-
 include/linux/sched/signal.h |  4 ++++
 2 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index ff3c7d0e3e01..93318fce31f3 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -91,6 +91,12 @@ enum {
 	 * cgroup_threadgroup_rwsem. This makes hot path operations such as
 	 * forks and exits into the slow path and more expensive.
 	 *
+	 * Alleviate the contention between fork, exec, exit operations and
+	 * writing to cgroup.procs by taking a per threadgroup rwsem instead of
+	 * the global cgroup_threadgroup_rwsem. Fork and other operations
+	 * from threads in different thread groups no longer contend with
+	 * writing to cgroup.procs.
+	 *
 	 * The static usage pattern of creating a cgroup, enabling controllers,
 	 * and then seeding it with CLONE_INTO_CGROUP doesn't require write
 	 * locking cgroup_threadgroup_rwsem and thus doesn't benefit from
@@ -146,6 +152,9 @@ enum cgroup_attach_lock_mode {
 
 	/* When pid=0 && threadgroup=false, see comments in cgroup_procs_write_start */
 	CGRP_ATTACH_LOCK_NONE,
+
+	/* When favordynmods is on, see comments above CGRP_ROOT_FAVOR_DYNMODS */
+	CGRP_ATTACH_LOCK_PER_THREADGROUP,
 };
 
 /*
@@ -846,6 +855,7 @@ struct cgroup_subsys {
 };
 
 extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
+extern bool cgroup_enable_per_threadgroup_rwsem;
 
 struct cgroup_of_peak {
 	unsigned long		value;
@@ -857,11 +867,14 @@ struct cgroup_of_peak {
  * @tsk: target task
  *
  * Allows cgroup operations to synchronize against threadgroup changes
- * using a percpu_rw_semaphore.
+ * using a global percpu_rw_semaphore and a per threadgroup rw_semaphore when
+ * favordynmods is on. See the comment above CGRP_ROOT_FAVOR_DYNMODS definition.
  */
 static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
 {
 	percpu_down_read(&cgroup_threadgroup_rwsem);
+	if (cgroup_enable_per_threadgroup_rwsem)
+		down_read(&tsk->signal->cgroup_threadgroup_rwsem);
 }
 
 /**
@@ -872,6 +885,8 @@ static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
  */
 static inline void cgroup_threadgroup_change_end(struct task_struct *tsk)
 {
+	if (cgroup_enable_per_threadgroup_rwsem)
+		up_read(&tsk->signal->cgroup_threadgroup_rwsem);
 	percpu_up_read(&cgroup_threadgroup_rwsem);
 }
 
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 1ef1edbaaf79..7d6449982822 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -226,6 +226,10 @@ struct signal_struct {
 	struct tty_audit_buf *tty_audit_buf;
 #endif
 
+#ifdef CONFIG_CGROUPS
+	struct rw_semaphore cgroup_threadgroup_rwsem;
+#endif
+
 	/*
 	 * Thread is the potential origin of an oom condition; kill first on
 	 * oom
-- 
cgit v1.2.3


From 1dfdf4527fd391f653c53b634af9122613c58904 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Sat, 30 Aug 2025 18:48:32 +0200
Subject: iio: adc: exynos_adc: Drop platform data support

There are no Samsung Exynos SoC ADC driver users which bind via platform
ID, thus platform data is never set and can be dropped.

Reviewed-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://patch.msgid.link/20250830-s3c-cleanup-adc-v2-3-4f8299343d32@linaro.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/platform_data/touchscreen-s3c2410.h | 22 ----------------------
 1 file changed, 22 deletions(-)
 delete mode 100644 include/linux/platform_data/touchscreen-s3c2410.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/touchscreen-s3c2410.h b/include/linux/platform_data/touchscreen-s3c2410.h
deleted file mode 100644
index bf8d3b9d7c6a..000000000000
--- a/include/linux/platform_data/touchscreen-s3c2410.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2005 Arnaud Patard <arnaud.patard@rtp-net.org>
-*/
-
-#ifndef __TOUCHSCREEN_S3C2410_H
-#define __TOUCHSCREEN_S3C2410_H
-
-struct s3c2410_ts_mach_info {
-	int delay;
-	int presc;
-	int oversampling_shift;
-	void (*cfg_gpio)(struct platform_device *dev);
-};
-
-extern void s3c24xx_ts_set_platdata(struct s3c2410_ts_mach_info *);
-extern void s3c64xx_ts_set_platdata(struct s3c2410_ts_mach_info *);
-
-/* defined by architecture to configure gpio */
-extern void s3c24xx_ts_cfg_gpio(struct platform_device *dev);
-
-#endif /*__TOUCHSCREEN_S3C2410_H */
-- 
cgit v1.2.3


From cec1aec9c46305743a2d4a1bfb06f6b0374d5ed0 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hansg@kernel.org>
Date: Sun, 31 Aug 2025 12:48:22 +0200
Subject: iio: consumers: Add an iio_multiply_value() helper function

The channel-scale handling in iio_convert_raw_to_processed() in essence
does the following:

processed  = raw * caller-provided-scale * channel-scale

Which can also be written as:

multiplier = raw * caller-provided-scale
iio-value  = channel-scale
processed  = multiplier * iio-value

Where iio-value is a set of IIO_VAL_* type + val + val2 integers, being
able to handle multiplication of iio-values like this is something
which is useful to have in general and, as previous bugfixes to
iio_convert_raw_to_processed() have shown, also tricky to implement.

Split the iio-value multiplication code from iio_convert_raw_to_processed()
out into a new iio_multiply_value() helper. This serves multiple purposes:

1. Having this split out allows writing a KUnit test for this.
2. Having this split out allows re-use to get better precision
   when scaling values in iio_read_channel_processed_scale().

Reviewed-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Hans de Goede <hansg@kernel.org>
Link: https://patch.msgid.link/20250831104825.15097-4-hansg@kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/consumer.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h
index 6a4479616479..a38b277c2c02 100644
--- a/include/linux/iio/consumer.h
+++ b/include/linux/iio/consumer.h
@@ -381,6 +381,24 @@ int iio_read_channel_offset(struct iio_channel *chan, int *val,
 int iio_read_channel_scale(struct iio_channel *chan, int *val,
 			   int *val2);
 
+/**
+ * iio_multiply_value() - Multiply an IIO value
+ * @result:	Destination pointer for the multiplication result
+ * @multiplier:	Multiplier.
+ * @type:	One of the IIO_VAL_* constants. This decides how the @val and
+ *		@val2 parameters are interpreted.
+ * @val:	Value being multiplied.
+ * @val2:	Value being multiplied. @val2 use depends on type.
+ *
+ * Multiply an IIO value with a s64 multiplier storing the result as
+ * IIO_VAL_INT. This is typically used for scaling.
+ *
+ * Returns:
+ * IIO_VAL_INT on success or a negative error-number on failure.
+ */
+int iio_multiply_value(int *result, s64 multiplier,
+		       unsigned int type, int val, int val2);
+
 /**
  * iio_convert_raw_to_processed() - Converts a raw value to a processed value
  * @chan:		The channel being queried
-- 
cgit v1.2.3


From 6fef6ae764be8a77f61ad3b6937ba82fe8358045 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Sun, 7 Sep 2025 21:43:20 +0100
Subject: net: ethtool: fix wrong type used in struct kernel_ethtool_ts_info

In C, enumerated types do not have a defined size, apart from being
compatible with one of the standard types. This allows an ABI /
compiler to choose the type of an enum depending on the values it
needs to store, and storing larger values in it can lead to undefined
behaviour.

The tx_type and rx_filters members of struct kernel_ethtool_ts_info
are defined as enumerated types, but are bit arrays, where each bit
is defined by the enumerated type. This means they typically store
values in excess of the maximum value of the enumerated type, in
fact (1 << max_value) and thus must not be declared using the
enumated type.

Fix both of these to use u32, as per the corresponding __u32 UAPI type.

Fixes: 2111375b85ad ("net: Add struct kernel_ethtool_ts_info")
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://patch.msgid.link/E1uvMEK-00000003Amd-2pWR@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index de5bd76a400c..d7d757e72554 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -856,8 +856,8 @@ struct kernel_ethtool_ts_info {
 	enum hwtstamp_provider_qualifier phc_qualifier;
 	enum hwtstamp_source phc_source;
 	int phc_phyindex;
-	enum hwtstamp_tx_types tx_types;
-	enum hwtstamp_rx_filters rx_filters;
+	u32 tx_types;
+	u32 rx_filters;
 };
 
 /**
-- 
cgit v1.2.3


From b2461e20fa9ac18b1305bba5bc7e22ebf644ea01 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Mon, 25 Aug 2025 15:00:30 +0800
Subject: firmware: imx: Add stub functions for SCMI MISC API

To ensure successful builds when CONFIG_IMX_SCMI_MISC_DRV is not enabled,
this patch adds static inline stub implementations for the following
functions:

  - scmi_imx_misc_ctrl_get()
  - scmi_imx_misc_ctrl_set()

These stubs return -EOPNOTSUPP to indicate that the functionality is not
supported in the current configuration. This avoids potential build or
link errors in code that conditionally calls these functions based on
feature availability.

This patch also drops the changes in commit 540c830212ed ("firmware: imx:
remove duplicate scmi_imx_misc_ctrl_get()").

The original change aimed to simplify the handling of optional features by
removing conditional stubs. However, the use of conditional stubs is
necessary when CONFIG_IMX_SCMI_MISC_DRV is n, while consumer driver is
set to y.

This is not a matter of preserving legacy patterns, but rather to ensure
that there is no link error whether for module or built-in.

Fixes: 0b4f8a68b292 ("firmware: imx: Add i.MX95 MISC driver")
Reviewed-by: Cristian Marussi <cristian.marussi@arm.com>
Signed-off-by: Peng Fan <peng.fan@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 include/linux/firmware/imx/sm.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h
index d4212bc42b2c..99c15bbb46aa 100644
--- a/include/linux/firmware/imx/sm.h
+++ b/include/linux/firmware/imx/sm.h
@@ -26,8 +26,20 @@
 #define SCMI_IMX94_CTRL_SAI3_MCLK	5U	/*!< WAKE SAI3 MCLK */
 #define SCMI_IMX94_CTRL_SAI4_MCLK	6U	/*!< WAKE SAI4 MCLK */
 
+#if IS_ENABLED(CONFIG_IMX_SCMI_MISC_DRV)
 int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val);
 int scmi_imx_misc_ctrl_set(u32 id, u32 val);
+#else
+static inline int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int scmi_imx_misc_ctrl_set(u32 id, u32 val)
+{
+	return -EOPNOTSUPP;
+}
+#endif
 
 int scmi_imx_cpu_start(u32 cpuid, bool start);
 int scmi_imx_cpu_started(u32 cpuid, bool *started);
-- 
cgit v1.2.3


From 3fb91b5c86d0fb5ff6f65c30a4f20193166e22fe Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Mon, 25 Aug 2025 15:00:31 +0800
Subject: firmware: imx: Add stub functions for SCMI LMM API

To ensure successful builds when CONFIG_IMX_SCMI_LMM_DRV is not enabled,
this patch adds static inline stub implementations for the following
functions:

  - scmi_imx_lmm_operation()
  - scmi_imx_lmm_info()
  - scmi_imx_lmm_reset_vector_set()

These stubs return -EOPNOTSUPP to indicate that the functionality is not
supported in the current configuration. This avoids potential build or
link errors in code that conditionally calls these functions based on
feature availability.

Fixes: 7242bbf418f0 ("firmware: imx: Add i.MX95 SCMI LMM driver")
Reviewed-by: Cristian Marussi <cristian.marussi@arm.com>
Signed-off-by: Peng Fan <peng.fan@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 include/linux/firmware/imx/sm.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h
index 99c15bbb46aa..f2a72177bb37 100644
--- a/include/linux/firmware/imx/sm.h
+++ b/include/linux/firmware/imx/sm.h
@@ -56,7 +56,24 @@ enum scmi_imx_lmm_op {
 #define SCMI_IMX_LMM_OP_FORCEFUL	0
 #define SCMI_IMX_LMM_OP_GRACEFUL	BIT(0)
 
+#if IS_ENABLED(CONFIG_IMX_SCMI_LMM_DRV)
 int scmi_imx_lmm_operation(u32 lmid, enum scmi_imx_lmm_op op, u32 flags);
 int scmi_imx_lmm_info(u32 lmid, struct scmi_imx_lmm_info *info);
 int scmi_imx_lmm_reset_vector_set(u32 lmid, u32 cpuid, u32 flags, u64 vector);
+#else
+static inline int scmi_imx_lmm_operation(u32 lmid, enum scmi_imx_lmm_op op, u32 flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int scmi_imx_lmm_info(u32 lmid, struct scmi_imx_lmm_info *info)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int scmi_imx_lmm_reset_vector_set(u32 lmid, u32 cpuid, u32 flags, u64 vector)
+{
+	return -EOPNOTSUPP;
+}
+#endif
 #endif
-- 
cgit v1.2.3


From 222accf05fc42f68ae02065d9c1542c20315118b Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Mon, 25 Aug 2025 15:00:32 +0800
Subject: firmware: imx: Add stub functions for SCMI CPU API

To ensure successful builds when CONFIG_IMX_SCMI_CPU_DRV is not enabled,
this patch adds static inline stub implementations for the following
functions:

  - scmi_imx_cpu_start()
  - scmi_imx_cpu_started()
  - scmi_imx_cpu_reset_vector_set()

These stubs return -EOPNOTSUPP to indicate that the functionality is not
supported in the current configuration. This avoids potential build or
link errors in code that conditionally calls these functions based on
feature availability.

Fixes: 1055faa5d660 ("firmware: imx: Add i.MX95 SCMI CPU driver")
Reviewed-by: Cristian Marussi <cristian.marussi@arm.com>
Signed-off-by: Peng Fan <peng.fan@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 include/linux/firmware/imx/sm.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h
index f2a72177bb37..a33b45027356 100644
--- a/include/linux/firmware/imx/sm.h
+++ b/include/linux/firmware/imx/sm.h
@@ -41,10 +41,28 @@ static inline int scmi_imx_misc_ctrl_set(u32 id, u32 val)
 }
 #endif
 
+#if IS_ENABLED(CONFIG_IMX_SCMI_CPU_DRV)
 int scmi_imx_cpu_start(u32 cpuid, bool start);
 int scmi_imx_cpu_started(u32 cpuid, bool *started);
 int scmi_imx_cpu_reset_vector_set(u32 cpuid, u64 vector, bool start, bool boot,
 				  bool resume);
+#else
+static inline int scmi_imx_cpu_start(u32 cpuid, bool start)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int scmi_imx_cpu_started(u32 cpuid, bool *started)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int scmi_imx_cpu_reset_vector_set(u32 cpuid, u64 vector, bool start,
+						bool boot, bool resume)
+{
+	return -EOPNOTSUPP;
+}
+#endif
 
 enum scmi_imx_lmm_op {
 	SCMI_IMX_LMM_BOOT,
-- 
cgit v1.2.3


From c924c65f52c300ba36373e140a43a8e723c3abdd Mon Sep 17 00:00:00 2001
From: Jens Wiklander <jens.wiklander@linaro.org>
Date: Wed, 13 Aug 2025 08:02:52 +0200
Subject: tee: implement protected DMA-heap

Implement DMA heap for protected DMA-buf allocation in the TEE
subsystem.

Protected memory refers to memory buffers behind a hardware enforced
firewall. It is not accessible to the kernel during normal circumstances
but rather only accessible to certain hardware IPs or CPUs executing in
higher or differently privileged mode than the kernel itself. This
interface allows to allocate and manage such protected memory buffers
via interaction with a TEE implementation.

The protected memory is allocated for a specific use-case, like Secure
Video Playback, Trusted UI, or Secure Video Recording where certain
hardware devices can access the memory.

The DMA-heaps are enabled explicitly by the TEE backend driver. The TEE
backend drivers needs to implement protected memory pool to manage the
protected memory.

Reviewed-by: Sumit Garg <sumit.garg@oss.qualcomm.com>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 include/linux/tee_core.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tee_core.h b/include/linux/tee_core.h
index a38494d6b5f4..28b65010b9ed 100644
--- a/include/linux/tee_core.h
+++ b/include/linux/tee_core.h
@@ -8,9 +8,11 @@
 
 #include <linux/cdev.h>
 #include <linux/device.h>
+#include <linux/dma-buf.h>
 #include <linux/idr.h>
 #include <linux/kref.h>
 #include <linux/list.h>
+#include <linux/scatterlist.h>
 #include <linux/tee.h>
 #include <linux/tee_drv.h>
 #include <linux/types.h>
@@ -30,6 +32,12 @@
 #define TEE_DEVICE_FLAG_REGISTERED	0x1
 #define TEE_MAX_DEV_NAME_LEN		32
 
+enum tee_dma_heap_id {
+	TEE_DMA_HEAP_SECURE_VIDEO_PLAY = 1,
+	TEE_DMA_HEAP_TRUSTED_UI,
+	TEE_DMA_HEAP_SECURE_VIDEO_RECORD,
+};
+
 /**
  * struct tee_device - TEE Device representation
  * @name:	name of device
@@ -116,6 +124,36 @@ struct tee_desc {
 	u32 flags;
 };
 
+/**
+ * struct tee_protmem_pool - protected memory pool
+ * @ops:		operations
+ *
+ * This is an abstract interface where this struct is expected to be
+ * embedded in another struct specific to the implementation.
+ */
+struct tee_protmem_pool {
+	const struct tee_protmem_pool_ops *ops;
+};
+
+/**
+ * struct tee_protmem_pool_ops - protected memory pool operations
+ * @alloc:		called when allocating protected memory
+ * @free:		called when freeing protected memory
+ * @update_shm:		called when registering a dma-buf to update the @shm
+ *			with physical address of the buffer or to return the
+ *			@parent_shm of the memory pool
+ * @destroy_pool:	called when destroying the pool
+ */
+struct tee_protmem_pool_ops {
+	int (*alloc)(struct tee_protmem_pool *pool, struct sg_table *sgt,
+		     size_t size, size_t *offs);
+	void (*free)(struct tee_protmem_pool *pool, struct sg_table *sgt);
+	int (*update_shm)(struct tee_protmem_pool *pool, struct sg_table *sgt,
+			  size_t offs, struct tee_shm *shm,
+			  struct tee_shm **parent_shm);
+	void (*destroy_pool)(struct tee_protmem_pool *pool);
+};
+
 /**
  * tee_device_alloc() - Allocate a new struct tee_device instance
  * @teedesc:	Descriptor for this driver
@@ -154,6 +192,11 @@ int tee_device_register(struct tee_device *teedev);
  */
 void tee_device_unregister(struct tee_device *teedev);
 
+int tee_device_register_dma_heap(struct tee_device *teedev,
+				 enum tee_dma_heap_id id,
+				 struct tee_protmem_pool *pool);
+void tee_device_put_all_dma_heaps(struct tee_device *teedev);
+
 /**
  * tee_device_set_dev_groups() - Set device attribute groups
  * @teedev:	Device to register
@@ -229,6 +272,16 @@ static inline void tee_shm_pool_free(struct tee_shm_pool *pool)
 	pool->ops->destroy_pool(pool);
 }
 
+/**
+ * tee_protmem_static_pool_alloc() - Create a protected memory manager
+ * @paddr:	Physical address of start of pool
+ * @size:	Size in bytes of the pool
+ *
+ * @returns pointer to a 'struct tee_protmem_pool' or an ERR_PTR on failure.
+ */
+struct tee_protmem_pool *tee_protmem_static_pool_alloc(phys_addr_t paddr,
+						       size_t size);
+
 /**
  * tee_get_drvdata() - Return driver_data pointer
  * @returns the driver_data pointer supplied to tee_register().
-- 
cgit v1.2.3


From 146bf4e75ecab9759ed78c9d167e860042d627fb Mon Sep 17 00:00:00 2001
From: Etienne Carriere <etienne.carriere@foss.st.com>
Date: Wed, 13 Aug 2025 08:02:54 +0200
Subject: tee: new ioctl to a register tee_shm from a dmabuf file descriptor

Add a userspace API to create a tee_shm object that refers to a dmabuf
reference.

Userspace registers the dmabuf file descriptor as in a tee_shm object.
The registration is completed with a tee_shm returned file descriptor.

Userspace is free to close the dmabuf file descriptor after it has been
registered since all the resources are now held via the new tee_shm
object.

Closing the tee_shm file descriptor will eventually release all
resources used by the tee_shm object when all references are released.

The new IOCTL, TEE_IOC_SHM_REGISTER_FD, supports dmabuf references to
physically contiguous memory buffers. Dmabuf references acquired from
the TEE DMA-heap can be used as protected memory for Secure Video Path
and such use cases. It depends on the TEE and the TEE driver if dmabuf
references acquired by other means can be used.

A new tee_shm flag is added to identify tee_shm objects built from a
registered dmabuf, TEE_SHM_DMA_BUF.

Signed-off-by: Etienne Carriere <etienne.carriere@foss.st.com>
Signed-off-by: Olivier Masse <olivier.masse@nxp.com>
Reviewed-by: Sumit Garg <sumit.garg@oss.qualcomm.com>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 include/linux/tee_core.h |  1 +
 include/linux/tee_drv.h  | 10 ++++++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tee_core.h b/include/linux/tee_core.h
index 28b65010b9ed..b6c54b34a8b5 100644
--- a/include/linux/tee_core.h
+++ b/include/linux/tee_core.h
@@ -28,6 +28,7 @@
 #define TEE_SHM_USER_MAPPED	BIT(1)  /* Memory mapped in user space */
 #define TEE_SHM_POOL		BIT(2)  /* Memory allocated from pool */
 #define TEE_SHM_PRIV		BIT(3)  /* Memory private to TEE driver */
+#define TEE_SHM_DMA_BUF		BIT(4)	/* Memory with dma-buf handle */
 
 #define TEE_DEVICE_FLAG_REGISTERED	0x1
 #define TEE_MAX_DEV_NAME_LEN		32
diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
index a54c203000ed..824f1251de60 100644
--- a/include/linux/tee_drv.h
+++ b/include/linux/tee_drv.h
@@ -116,6 +116,16 @@ struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size);
 struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx,
 					    void *addr, size_t length);
 
+/**
+ * tee_shm_register_fd() - Register shared memory from file descriptor
+ *
+ * @ctx:	Context that allocates the shared memory
+ * @fd:		Shared memory file descriptor reference
+ *
+ * @returns a pointer to 'struct tee_shm' on success, and ERR_PTR on failure
+ */
+struct tee_shm *tee_shm_register_fd(struct tee_context *ctx, int fd);
+
 /**
  * tee_shm_free() - Free shared memory
  * @shm:	Handle to shared memory to free
-- 
cgit v1.2.3


From ab09dd6d9201af9930efd5a5a0cb56a0fea6a169 Mon Sep 17 00:00:00 2001
From: Jens Wiklander <jens.wiklander@linaro.org>
Date: Wed, 13 Aug 2025 08:02:55 +0200
Subject: tee: add tee_shm_alloc_dma_mem()

Add tee_shm_alloc_dma_mem() to allocate DMA memory. The memory is
represented by a tee_shm object using the new flag TEE_SHM_DMA_MEM to
identify it as DMA memory. The allocated memory will later be lent to
the TEE to be used as protected memory.

Reviewed-by: Sumit Garg <sumit.garg@oss.qualcomm.com>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 include/linux/tee_core.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tee_core.h b/include/linux/tee_core.h
index b6c54b34a8b5..7b0c1da2ca6c 100644
--- a/include/linux/tee_core.h
+++ b/include/linux/tee_core.h
@@ -29,6 +29,8 @@
 #define TEE_SHM_POOL		BIT(2)  /* Memory allocated from pool */
 #define TEE_SHM_PRIV		BIT(3)  /* Memory private to TEE driver */
 #define TEE_SHM_DMA_BUF		BIT(4)	/* Memory with dma-buf handle */
+#define TEE_SHM_DMA_MEM		BIT(5)	/* Memory allocated with */
+					/* dma_alloc_pages() */
 
 #define TEE_DEVICE_FLAG_REGISTERED	0x1
 #define TEE_MAX_DEV_NAME_LEN		32
@@ -298,6 +300,9 @@ void *tee_get_drvdata(struct tee_device *teedev);
  */
 struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size);
 
+struct tee_shm *tee_shm_alloc_dma_mem(struct tee_context *ctx,
+				      size_t page_count);
+
 int tee_dyn_shm_alloc_helper(struct tee_shm *shm, size_t size, size_t align,
 			     int (*shm_register)(struct tee_context *ctx,
 						 struct tee_shm *shm,
-- 
cgit v1.2.3


From 002ebddd695a53999550e241b71950f1aa0e1ac4 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 9 Sep 2025 13:11:20 +0200
Subject: pmdomain: core: Restore behaviour for disabling unused PM domains

Recent changes to genpd prevents those PM domains being powered-on during
initialization from being powered-off during the boot sequence. Based upon
whether CONFIG_PM_CONFIG_PM_GENERIC_DOMAINS_OF is set of not, genpd relies
on the sync_state mechanism or the genpd_power_off_unused() (which is a
late_initcall_sync), to understand when it's okay to allow these PM domains
to be powered-off.

This new behaviour in genpd has lead to problems on different platforms.
Let's therefore restore the behavior of genpd_power_off_unused().
Moreover, let's introduce GENPD_FLAG_NO_STAY_ON, to allow genpd OF
providers to opt-out from the new behaviour.

Link: https://lore.kernel.org/all/20250701114733.636510-1-ulf.hansson@linaro.org/
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/all/20250902-rk3576-lockup-regression-v1-1-c4a0c9daeb00@collabora.com/
Reported-by: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
Fixes: 0e789b491ba0 ("pmdomain: core: Leave powered-on genpds on until sync_state")
Fixes: 13a4b7fb6260 ("pmdomain: core: Leave powered-on genpds on until late_initcall_sync")
Tested-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/pm_domain.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index c84edf217819..f67a2cb7d781 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -115,6 +115,12 @@ struct dev_pm_domain_list {
  *				genpd provider specific way, likely through a
  *				parent device node. This flag makes genpd to
  *				skip its internal support for this.
+ *
+ * GENPD_FLAG_NO_STAY_ON:	For genpd OF providers a powered-on PM domain at
+ *				initialization is prevented from being
+ *				powered-off until the ->sync_state() callback is
+ *				invoked. This flag informs genpd to allow a
+ *				power-off without waiting for ->sync_state().
  */
 #define GENPD_FLAG_PM_CLK	 (1U << 0)
 #define GENPD_FLAG_IRQ_SAFE	 (1U << 1)
@@ -126,6 +132,7 @@ struct dev_pm_domain_list {
 #define GENPD_FLAG_OPP_TABLE_FW	 (1U << 7)
 #define GENPD_FLAG_DEV_NAME_FW	 (1U << 8)
 #define GENPD_FLAG_NO_SYNC_STATE (1U << 9)
+#define GENPD_FLAG_NO_STAY_ON	 (1U << 10)
 
 enum gpd_status {
 	GENPD_STATE_ON = 0,	/* PM domain is on */
-- 
cgit v1.2.3


From f0addd325ef692c92c522a2ba4d9db13fc90e664 Mon Sep 17 00:00:00 2001
From: Alexander Kurz <akurz@blala.de>
Date: Mon, 11 Aug 2025 06:43:58 +0000
Subject: mfd: input: rtc: mc13783: Remove deprecated mc13xxx_irq_ack()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

mc13xxx_irq_ack() got deprecated and became dead code with commit
10f9edaeaa30 ("mfd: mc13xxx: Use regmap irq framework for interrupts").
It should be safe to remove it now.

Signed-off-by: Alexander Kurz <akurz@blala.de>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Acked-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> # for input
Link: https://lore.kernel.org/r/20250811064358.1659-1-akurz@blala.de
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/mc13xxx.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h
index f372926d5894..dd46fe424a80 100644
--- a/include/linux/mfd/mc13xxx.h
+++ b/include/linux/mfd/mc13xxx.h
@@ -31,12 +31,6 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx,
 		unsigned int mode, unsigned int channel,
 		u8 ato, bool atox, unsigned int *sample);
 
-/* Deprecated calls */
-static inline int mc13xxx_irq_ack(struct mc13xxx *mc13xxx, int irq)
-{
-	return 0;
-}
-
 static inline int mc13xxx_irq_request_nounmask(struct mc13xxx *mc13xxx, int irq,
 					       irq_handler_t handler,
 					       const char *name, void *dev)
-- 
cgit v1.2.3


From b8cf8fda522d5a37f8948ad8a19a1113cc38710f Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 9 Sep 2025 16:30:47 +0200
Subject: fanotify: add watchdog for permission events

This is to make it easier to debug issues with AV software, which time and
again deadlocks with no indication of where the issue comes from, and the
kernel being blamed for the deadlock.  Then we need to analyze dumps to
prove that the kernel is not in fact at fault.

The deadlock comes from recursion: handling the event triggers another
permission event, in some roundabout way, obviously, otherwise it would
have been found in testing.

With this patch a warning is printed when permission event is received by
userspace but not answered for more than the timeout specified in
/proc/sys/fs/fanotify/watchdog_timeout.  The watchdog can be turned off by
setting the timeout to zero (which is the default).

The timeout is very coarse (T <= t < 2T) but I guess it's good enough for
the purpose.

Overhead should be minimal.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Link: https://patch.msgid.link/20250909143053.112171-1-mszeredi@redhat.com
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fsnotify_backend.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index d4034ddaf392..0d954ea7b179 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -273,6 +273,8 @@ struct fsnotify_group {
 			int f_flags; /* event_f_flags from fanotify_init() */
 			struct ucounts *ucounts;
 			mempool_t error_events_pool;
+			/* chained on perm_group_list */
+			struct list_head perm_grp_list;
 		} fanotify_data;
 #endif /* CONFIG_FANOTIFY */
 	};
-- 
cgit v1.2.3


From 3b1bbfb5fce3ca9fffc92ac1b053b0cfbb1f322b Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Thu, 4 Sep 2025 11:05:27 -0500
Subject: mfd: bq257xx: Add support for BQ25703A core driver

The Texas Instruments BQ25703A is an integrated charger manager and
boost converter.

The MFD driver initializes the device for the regulator driver
and power supply driver.

Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Link: https://lore.kernel.org/r/20250904160530.66178-3-macroalpha82@gmail.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/bq257xx.h | 104 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)
 create mode 100644 include/linux/mfd/bq257xx.h

(limited to 'include/linux')

diff --git a/include/linux/mfd/bq257xx.h b/include/linux/mfd/bq257xx.h
new file mode 100644
index 000000000000..1d6ddc7fb09f
--- /dev/null
+++ b/include/linux/mfd/bq257xx.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Register definitions for TI BQ257XX
+ * Copyright (C) 2020 Texas Instruments Incorporated - http://www.ti.com/
+ */
+
+#define BQ25703_CHARGE_OPTION_0			0x00
+#define BQ25703_CHARGE_CURRENT			0x02
+#define BQ25703_MAX_CHARGE_VOLT			0x04
+#define BQ25703_OTG_VOLT			0x06
+#define BQ25703_OTG_CURRENT			0x08
+#define BQ25703_INPUT_VOLTAGE			0x0a
+#define BQ25703_MIN_VSYS			0x0c
+#define BQ25703_IIN_HOST			0x0e
+#define BQ25703_CHARGER_STATUS			0x20
+#define BQ25703_PROCHOT_STATUS			0x22
+#define BQ25703_IIN_DPM				0x24
+#define BQ25703_ADCIBAT_CHG			0x28
+#define BQ25703_ADCIINCMPIN			0x2a
+#define BQ25703_ADCVSYSVBAT			0x2c
+#define BQ25703_MANUFACT_DEV_ID			0x2e
+#define BQ25703_CHARGE_OPTION_1			0x30
+#define BQ25703_CHARGE_OPTION_2			0x32
+#define BQ25703_CHARGE_OPTION_3			0x34
+#define BQ25703_ADC_OPTION			0x3a
+
+#define BQ25703_EN_LWPWR			BIT(15)
+#define BQ25703_WDTMR_ADJ_MASK			GENMASK(14, 13)
+#define BQ25703_WDTMR_DISABLE			0
+#define BQ25703_WDTMR_5_SEC			1
+#define BQ25703_WDTMR_88_SEC			2
+#define BQ25703_WDTMR_175_SEC			3
+
+#define BQ25703_ICHG_MASK			GENMASK(12, 6)
+#define BQ25703_ICHG_STEP_UA			64000
+#define BQ25703_ICHG_MIN_UA			64000
+#define BQ25703_ICHG_MAX_UA			8128000
+
+#define BQ25703_MAX_CHARGE_VOLT_MASK		GENMASK(15, 4)
+#define BQ25703_VBATREG_STEP_UV			16000
+#define BQ25703_VBATREG_MIN_UV			1024000
+#define BQ25703_VBATREG_MAX_UV			19200000
+
+#define BQ25703_OTG_VOLT_MASK			GENMASK(13, 6)
+#define BQ25703_OTG_VOLT_STEP_UV		64000
+#define BQ25703_OTG_VOLT_MIN_UV			4480000
+#define BQ25703_OTG_VOLT_MAX_UV			20800000
+#define BQ25703_OTG_VOLT_NUM_VOLT		256
+
+#define BQ25703_OTG_CUR_MASK			GENMASK(14, 8)
+#define BQ25703_OTG_CUR_STEP_UA			50000
+#define BQ25703_OTG_CUR_MAX_UA			6350000
+
+#define BQ25703_MINVSYS_MASK			GENMASK(13, 8)
+#define BQ25703_MINVSYS_STEP_UV			256000
+#define BQ25703_MINVSYS_MIN_UV			1024000
+#define BQ25703_MINVSYS_MAX_UV			16128000
+
+#define BQ25703_STS_AC_STAT			BIT(15)
+#define BQ25703_STS_IN_FCHRG			BIT(10)
+#define BQ25703_STS_IN_PCHRG			BIT(9)
+#define BQ25703_STS_FAULT_ACOV			BIT(7)
+#define BQ25703_STS_FAULT_BATOC			BIT(6)
+#define BQ25703_STS_FAULT_ACOC			BIT(5)
+
+#define BQ25703_IINDPM_MASK			GENMASK(14, 8)
+#define BQ25703_IINDPM_STEP_UA			50000
+#define BQ25703_IINDPM_MIN_UA			50000
+#define BQ25703_IINDPM_MAX_UA			6400000
+#define BQ25703_IINDPM_DEFAULT_UA		3300000
+#define BQ25703_IINDPM_OFFSET_UA		50000
+
+#define BQ25703_ADCIBAT_DISCHG_MASK		GENMASK(6, 0)
+#define BQ25703_ADCIBAT_CHG_MASK		GENMASK(14, 8)
+#define BQ25703_ADCIBAT_CHG_STEP_UA		64000
+#define BQ25703_ADCIBAT_DIS_STEP_UA		256000
+
+#define BQ25703_ADCIIN				GENMASK(15, 8)
+#define BQ25703_ADCIINCMPIN_STEP		50000
+
+#define BQ25703_ADCVSYS_MASK			GENMASK(15, 8)
+#define BQ25703_ADCVBAT_MASK			GENMASK(7, 0)
+#define BQ25703_ADCVSYSVBAT_OFFSET_UV		2880000
+#define BQ25703_ADCVSYSVBAT_STEP		64000
+
+#define BQ25703_ADC_CH_MASK			GENMASK(7, 0)
+#define BQ25703_ADC_CONV_EN			BIT(15)
+#define BQ25703_ADC_START			BIT(14)
+#define BQ25703_ADC_FULL_SCALE			BIT(13)
+#define BQ25703_ADC_CMPIN_EN			BIT(7)
+#define BQ25703_ADC_VBUS_EN			BIT(6)
+#define BQ25703_ADC_PSYS_EN			BIT(5)
+#define BQ25703_ADC_IIN_EN			BIT(4)
+#define BQ25703_ADC_IDCHG_EN			BIT(3)
+#define BQ25703_ADC_ICHG_EN			BIT(2)
+#define BQ25703_ADC_VSYS_EN			BIT(1)
+#define BQ25703_ADC_VBAT_EN			BIT(0)
+
+#define BQ25703_EN_OTG_MASK			BIT(12)
+
+struct bq257xx_device {
+	struct i2c_client *client;
+	struct regmap *regmap;
+};
-- 
cgit v1.2.3


From 948cb194bcb4c01fb4cd029936f0c02b10780394 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 30 Jul 2025 15:59:24 +0200
Subject: mtd: map: add back asm/barrier.h inclusion

The mb() macro is used in this header:

In file included from include/linux/mtd/qinfo.h:5,
                 from include/linux/mtd/pfow.h:8,
                 from drivers/mtd/lpddr/lpddr_cmds.c:14:
include/linux/mtd/map.h: In function 'inline_map_write':
include/linux/mtd/map.h:428:9: error: implicit declaration of function 'mb' [-Wimplicit-function-declaration]

Fixes: 56eb7c13b97c ("mtd: map: Don't use "proxy" headers")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/map.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index 288ef765a44e..75b0b2abc880 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -14,6 +14,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/unaligned.h>
+#include <asm/barrier.h>
 
 struct device_node;
 struct module;
-- 
cgit v1.2.3


From 67a529b7d3c50a56c162476509361f4fe11350dd Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner@baylibre.com>
Date: Fri, 15 Aug 2025 12:40:02 -0500
Subject: include: adi-axi-common: add version check function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a version check function for checking ADI AXI IP core versions.

These cores use a semantic versioning scheme, so it is useful to have
a version check function that can check the minor version to enable
features in driver while maintaining backward compatibility.

Signed-off-by: David Lechner <dlechner@baylibre.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://patch.msgid.link/20250815-spi-axi-spi-enigne-improve-version-checks-v1-1-13bde357d5b6@baylibre.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/adi-axi-common.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/adi-axi-common.h b/include/linux/adi-axi-common.h
index f64f4ad4beda..37962ba530df 100644
--- a/include/linux/adi-axi-common.h
+++ b/include/linux/adi-axi-common.h
@@ -8,6 +8,8 @@
  * https://wiki.analog.com/resources/fpga/docs/hdl/regmap
  */
 
+#include <linux/types.h>
+
 #ifndef ADI_AXI_COMMON_H_
 #define ADI_AXI_COMMON_H_
 
@@ -21,6 +23,25 @@
 #define ADI_AXI_PCORE_VER_MINOR(version)	(((version) >> 8) & 0xff)
 #define ADI_AXI_PCORE_VER_PATCH(version)	((version) & 0xff)
 
+/**
+ * adi_axi_pcore_ver_gteq() - check if a version is satisfied
+ * @version: the full version read from the hardware
+ * @major: the major version to compare against
+ * @minor: the minor version to compare against
+ *
+ * ADI AXI IP Cores use semantic versioning, so this can be used to check for
+ * feature availability.
+ *
+ * Return: true if the version is greater than or equal to the specified
+ *         major and minor version, false otherwise.
+ */
+static inline bool adi_axi_pcore_ver_gteq(u32 version, u32 major, u32 minor)
+{
+	return ADI_AXI_PCORE_VER_MAJOR(version) > (major) ||
+	       (ADI_AXI_PCORE_VER_MAJOR(version) == (major) &&
+		ADI_AXI_PCORE_VER_MINOR(version) >= (minor));
+}
+
 #define ADI_AXI_INFO_FPGA_TECH(info)            (((info) >> 24) & 0xff)
 #define ADI_AXI_INFO_FPGA_FAMILY(info)          (((info) >> 16) & 0xff)
 #define ADI_AXI_INFO_FPGA_SPEED_GRADE(info)     (((info) >> 8) & 0xff)
-- 
cgit v1.2.3


From 413cf5db2fee00fdd69bc62debdbf655f97f4c08 Mon Sep 17 00:00:00 2001
From: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Date: Tue, 12 Aug 2025 06:23:31 +0200
Subject: libie, ice: move fwlog admin queue to libie

Copy the code and:
- change ICE_AQC to LIBIE_AQC
- change ice_aqc to libie_aqc
- move definitions outside the structures

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Tested-by: Rinitha S <sx.rinitha@intel.com> (A Contingent worker at Intel)
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/net/intel/libie/adminq.h | 89 ++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/net/intel/libie/adminq.h b/include/linux/net/intel/libie/adminq.h
index ba62f703df43..ca2ac88b5709 100644
--- a/include/linux/net/intel/libie/adminq.h
+++ b/include/linux/net/intel/libie/adminq.h
@@ -222,6 +222,94 @@ struct libie_aqc_list_caps_elem {
 };
 LIBIE_CHECK_STRUCT_LEN(32, libie_aqc_list_caps_elem);
 
+/* Admin Queue command opcodes */
+enum libie_adminq_opc {
+	/* FW Logging Commands */
+	libie_aqc_opc_fw_logs_config			= 0xFF30,
+	libie_aqc_opc_fw_logs_register			= 0xFF31,
+	libie_aqc_opc_fw_logs_query			= 0xFF32,
+	libie_aqc_opc_fw_logs_event			= 0xFF33,
+};
+
+enum libie_aqc_fw_logging_mod {
+	LIBIE_AQC_FW_LOG_ID_GENERAL = 0,
+	LIBIE_AQC_FW_LOG_ID_CTRL,
+	LIBIE_AQC_FW_LOG_ID_LINK,
+	LIBIE_AQC_FW_LOG_ID_LINK_TOPO,
+	LIBIE_AQC_FW_LOG_ID_DNL,
+	LIBIE_AQC_FW_LOG_ID_I2C,
+	LIBIE_AQC_FW_LOG_ID_SDP,
+	LIBIE_AQC_FW_LOG_ID_MDIO,
+	LIBIE_AQC_FW_LOG_ID_ADMINQ,
+	LIBIE_AQC_FW_LOG_ID_HDMA,
+	LIBIE_AQC_FW_LOG_ID_LLDP,
+	LIBIE_AQC_FW_LOG_ID_DCBX,
+	LIBIE_AQC_FW_LOG_ID_DCB,
+	LIBIE_AQC_FW_LOG_ID_XLR,
+	LIBIE_AQC_FW_LOG_ID_NVM,
+	LIBIE_AQC_FW_LOG_ID_AUTH,
+	LIBIE_AQC_FW_LOG_ID_VPD,
+	LIBIE_AQC_FW_LOG_ID_IOSF,
+	LIBIE_AQC_FW_LOG_ID_PARSER,
+	LIBIE_AQC_FW_LOG_ID_SW,
+	LIBIE_AQC_FW_LOG_ID_SCHEDULER,
+	LIBIE_AQC_FW_LOG_ID_TXQ,
+	LIBIE_AQC_FW_LOG_ID_RSVD,
+	LIBIE_AQC_FW_LOG_ID_POST,
+	LIBIE_AQC_FW_LOG_ID_WATCHDOG,
+	LIBIE_AQC_FW_LOG_ID_TASK_DISPATCH,
+	LIBIE_AQC_FW_LOG_ID_MNG,
+	LIBIE_AQC_FW_LOG_ID_SYNCE,
+	LIBIE_AQC_FW_LOG_ID_HEALTH,
+	LIBIE_AQC_FW_LOG_ID_TSDRV,
+	LIBIE_AQC_FW_LOG_ID_PFREG,
+	LIBIE_AQC_FW_LOG_ID_MDLVER,
+	LIBIE_AQC_FW_LOG_ID_MAX,
+};
+
+/* Set FW Logging configuration (indirect 0xFF30)
+ * Register for FW Logging (indirect 0xFF31)
+ * Query FW Logging (indirect 0xFF32)
+ * FW Log Event (indirect 0xFF33)
+ */
+#define LIBIE_AQC_FW_LOG_CONF_UART_EN		BIT(0)
+#define LIBIE_AQC_FW_LOG_CONF_AQ_EN		BIT(1)
+#define LIBIE_AQC_FW_LOG_QUERY_REGISTERED	BIT(2)
+#define LIBIE_AQC_FW_LOG_CONF_SET_VALID		BIT(3)
+#define LIBIE_AQC_FW_LOG_AQ_REGISTER		BIT(0)
+#define LIBIE_AQC_FW_LOG_AQ_QUERY		BIT(2)
+
+#define LIBIE_AQC_FW_LOG_MIN_RESOLUTION		(1)
+#define LIBIE_AQC_FW_LOG_MAX_RESOLUTION		(128)
+
+struct libie_aqc_fw_log {
+	u8 cmd_flags;
+
+	u8 rsp_flag;
+	__le16 fw_rt_msb;
+	union {
+		struct {
+			__le32 fw_rt_lsb;
+		} sync;
+		struct {
+			__le16 log_resolution;
+			__le16 mdl_cnt;
+		} cfg;
+	} ops;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Response Buffer for:
+ *    Set Firmware Logging Configuration (0xFF30)
+ *    Query FW Logging (0xFF32)
+ */
+struct libie_aqc_fw_log_cfg_resp {
+	__le16 module_identifier;
+	u8 log_level;
+	u8 rsvd0;
+};
+
 /**
  * struct libie_aq_desc - Admin Queue (AQ) descriptor
  * @flags: LIBIE_AQ_FLAG_* flags
@@ -253,6 +341,7 @@ struct libie_aq_desc {
 		struct	libie_aqc_driver_ver driver_ver;
 		struct	libie_aqc_req_res res_owner;
 		struct	libie_aqc_list_caps get_cap;
+		struct	libie_aqc_fw_log fw_log;
 	} params;
 };
 LIBIE_CHECK_STRUCT_LEN(32, libie_aq_desc);
-- 
cgit v1.2.3


From 02f44dac8930dc7cc43aa3eba872ce35382f6332 Mon Sep 17 00:00:00 2001
From: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Date: Tue, 12 Aug 2025 06:23:33 +0200
Subject: ice: prepare for moving file to libie

s/ice/libie

There is no function for filling default descriptor in libie. Zero
descriptor structure and set opcode without calling the function.

Make functions that are caled only in ice_fwlog.c static.

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Tested-by: Rinitha S <sx.rinitha@intel.com> (A Contingent worker at Intel)
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/net/intel/libie/adminq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/net/intel/libie/adminq.h b/include/linux/net/intel/libie/adminq.h
index ca2ac88b5709..29420193889a 100644
--- a/include/linux/net/intel/libie/adminq.h
+++ b/include/linux/net/intel/libie/adminq.h
@@ -9,6 +9,7 @@
 
 #define LIBIE_CHECK_STRUCT_LEN(n, X)	\
 	static_assert((n) == sizeof(struct X))
+#define LIBIE_AQ_MAX_BUF_LEN 4096
 
 /**
  * struct libie_aqc_generic - Generic structure used in adminq communication
-- 
cgit v1.2.3


From f3b3fc1ff0823b73f6f66b6340e6ebc4b00d2ed3 Mon Sep 17 00:00:00 2001
From: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Date: Tue, 12 Aug 2025 06:23:35 +0200
Subject: ice, libie: move fwlog code to libie

Move whole code from ice_fwlog.c/h to libie/fwlog.c/h.

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Tested-by: Rinitha S <sx.rinitha@intel.com> (A Contingent worker at Intel)
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/net/intel/libie/adminq.h |  6 +--
 include/linux/net/intel/libie/fwlog.h  | 85 ++++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/net/intel/libie/fwlog.h

(limited to 'include/linux')

diff --git a/include/linux/net/intel/libie/adminq.h b/include/linux/net/intel/libie/adminq.h
index 29420193889a..ab13bd777a28 100644
--- a/include/linux/net/intel/libie/adminq.h
+++ b/include/linux/net/intel/libie/adminq.h
@@ -265,7 +265,7 @@ enum libie_aqc_fw_logging_mod {
 	LIBIE_AQC_FW_LOG_ID_TSDRV,
 	LIBIE_AQC_FW_LOG_ID_PFREG,
 	LIBIE_AQC_FW_LOG_ID_MDLVER,
-	LIBIE_AQC_FW_LOG_ID_MAX,
+	LIBIE_AQC_FW_LOG_ID_MAX
 };
 
 /* Set FW Logging configuration (indirect 0xFF30)
@@ -280,8 +280,8 @@ enum libie_aqc_fw_logging_mod {
 #define LIBIE_AQC_FW_LOG_AQ_REGISTER		BIT(0)
 #define LIBIE_AQC_FW_LOG_AQ_QUERY		BIT(2)
 
-#define LIBIE_AQC_FW_LOG_MIN_RESOLUTION		(1)
-#define LIBIE_AQC_FW_LOG_MAX_RESOLUTION		(128)
+#define LIBIE_AQC_FW_LOG_MIN_RESOLUTION		1
+#define LIBIE_AQC_FW_LOG_MAX_RESOLUTION		128
 
 struct libie_aqc_fw_log {
 	u8 cmd_flags;
diff --git a/include/linux/net/intel/libie/fwlog.h b/include/linux/net/intel/libie/fwlog.h
new file mode 100644
index 000000000000..36b13fabca9e
--- /dev/null
+++ b/include/linux/net/intel/libie/fwlog.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2022, Intel Corporation. */
+
+#ifndef _LIBIE_FWLOG_H_
+#define _LIBIE_FWLOG_H_
+
+#include <linux/net/intel/libie/adminq.h>
+
+/* Only a single log level should be set and all log levels under the set value
+ * are enabled, e.g. if log level is set to LIBIE_FW_LOG_LEVEL_VERBOSE, then all
+ * other log levels are included (except LIBIE_FW_LOG_LEVEL_NONE)
+ */
+enum libie_fwlog_level {
+	LIBIE_FWLOG_LEVEL_NONE = 0,
+	LIBIE_FWLOG_LEVEL_ERROR = 1,
+	LIBIE_FWLOG_LEVEL_WARNING = 2,
+	LIBIE_FWLOG_LEVEL_NORMAL = 3,
+	LIBIE_FWLOG_LEVEL_VERBOSE = 4,
+	LIBIE_FWLOG_LEVEL_INVALID, /* all values >= this entry are invalid */
+};
+
+struct libie_fwlog_module_entry {
+	/* module ID for the corresponding firmware logging event */
+	u16 module_id;
+	/* verbosity level for the module_id */
+	u8 log_level;
+};
+
+struct libie_fwlog_cfg {
+	/* list of modules for configuring log level */
+	struct libie_fwlog_module_entry module_entries[LIBIE_AQC_FW_LOG_ID_MAX];
+	/* options used to configure firmware logging */
+	u16 options;
+#define LIBIE_FWLOG_OPTION_ARQ_ENA		BIT(0)
+#define LIBIE_FWLOG_OPTION_UART_ENA		BIT(1)
+	/* set before calling libie_fwlog_init() so the PF registers for
+	 * firmware logging on initialization
+	 */
+#define LIBIE_FWLOG_OPTION_REGISTER_ON_INIT	BIT(2)
+	/* set in the libie_aq_fwlog_get() response if the PF is registered for
+	 * FW logging events over ARQ
+	 */
+#define LIBIE_FWLOG_OPTION_IS_REGISTERED	BIT(3)
+
+	/* minimum number of log events sent per Admin Receive Queue event */
+	u16 log_resolution;
+};
+
+struct libie_fwlog_data {
+	u16 data_size;
+	u8 *data;
+};
+
+struct libie_fwlog_ring {
+	struct libie_fwlog_data *rings;
+	u16 index;
+	u16 size;
+	u16 head;
+	u16 tail;
+};
+
+#define LIBIE_FWLOG_RING_SIZE_INDEX_DFLT 3
+#define LIBIE_FWLOG_RING_SIZE_DFLT 256
+#define LIBIE_FWLOG_RING_SIZE_MAX 512
+
+struct libie_fwlog {
+	struct libie_fwlog_cfg cfg;
+	bool supported; /* does hardware support FW logging? */
+	struct libie_fwlog_ring ring;
+	struct dentry *debugfs;
+	/* keep track of all the dentrys for FW log modules */
+	struct dentry **debugfs_modules;
+	struct_group_tagged(libie_fwlog_api, api,
+		struct pci_dev *pdev;
+		int (*send_cmd)(void *, struct libie_aq_desc *, void *, u16);
+		void *priv;
+		struct dentry *debugfs_root;
+	);
+};
+
+int libie_fwlog_init(struct libie_fwlog *fwlog, struct libie_fwlog_api *api);
+void libie_fwlog_deinit(struct libie_fwlog *fwlog);
+void libie_fwlog_reregister(struct libie_fwlog *fwlog);
+void libie_get_fwlog_data(struct libie_fwlog *fwlog, u8 *buf, u16 len);
+#endif /* _LIBIE_FWLOG_H_ */
-- 
cgit v1.2.3


From 70f23546d246563da648baedbb0432ba1d6bb357 Mon Sep 17 00:00:00 2001
From: Puranjay Mohan <puranjay@kernel.org>
Date: Thu, 11 Sep 2025 14:58:01 +0000
Subject: bpf: core: introduce main_prog_aux for stream access

BPF streams are only valid for the main programs, to make it easier to
access streams from subprogs, introduce main_prog_aux in struct
bpf_prog_aux.

prog->aux->main_prog_aux = prog->aux, for main programs and
prog->aux->main_prog_aux = main_prog->aux, for subprograms.

Make bpf_prog_find_from_stack() use the added main_prog_aux to return
the mainprog when a subprog is found on the stack.

Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20250911145808.58042-3-puranjay@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8f6e87f0f3a8..d133171c4d2a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1633,6 +1633,7 @@ struct bpf_prog_aux {
 	/* function name for valid attach_btf_id */
 	const char *attach_func_name;
 	struct bpf_prog **func;
+	struct bpf_prog_aux *main_prog_aux;
 	void *jit_data; /* JIT specific data. arch dependent */
 	struct bpf_jit_poke_descriptor *poke_tab;
 	struct bpf_kfunc_desc_tab *kfunc_tab;
-- 
cgit v1.2.3


From 5c5240d020615f13331f4e2c559186125eddc7d3 Mon Sep 17 00:00:00 2001
From: Puranjay Mohan <puranjay@kernel.org>
Date: Thu, 11 Sep 2025 14:58:02 +0000
Subject: bpf: Report arena faults to BPF stderr

Begin reporting arena page faults and the faulting address to BPF
program's stderr, this patch adds support in the arm64 and x86-64 JITs,
support for other archs can be added later.

The fault handlers receive the 32 bit address in the arena region so
the upper 32 bits of user_vm_start is added to it before printing the
address. This is what the user would expect to see as this is what is
printed by bpf_printk() is you pass it an address returned by
bpf_arena_alloc_pages();

Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20250911145808.58042-4-puranjay@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d133171c4d2a..41f776071ff5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2881,6 +2881,7 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
 		     enum bpf_dynptr_type type, u32 offset, u32 size);
 void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
 void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);
+void bpf_prog_report_arena_violation(bool write, unsigned long addr, unsigned long fault_ip);
 
 #else /* !CONFIG_BPF_SYSCALL */
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
@@ -3168,6 +3169,11 @@ static inline void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
 static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
 {
 }
+
+static inline void bpf_prog_report_arena_violation(bool write, unsigned long addr,
+						   unsigned long fault_ip)
+{
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 static __always_inline int
-- 
cgit v1.2.3


From eadaa8b255f36ee39ca97d0815c25eeeb1f5d674 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Tue, 9 Sep 2025 16:27:29 +0300
Subject: dma-mapping: introduce new DMA attribute to indicate MMIO memory

This patch introduces the DMA_ATTR_MMIO attribute to mark DMA buffers
that reside in memory-mapped I/O (MMIO) regions, such as device BARs
exposed through the host bridge, which are accessible for peer-to-peer
(P2P) DMA.

This attribute is especially useful for exporting device memory to other
devices for DMA without CPU involvement, and avoids unnecessary or
potentially detrimental CPU cache maintenance calls.

DMA_ATTR_MMIO is supposed to provide dma_map_resource() functionality
without need to call to special function and perform branching when
processing generic containers like bio_vec by the callers.

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/6f058ec395c5348014860dbc2eed348c17975843.1757423202.git.leonro@nvidia.com
---
 include/linux/dma-mapping.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 55c03e5fe8cb..4254fd9bdf5d 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -58,6 +58,26 @@
  */
 #define DMA_ATTR_PRIVILEGED		(1UL << 9)
 
+/*
+ * DMA_ATTR_MMIO - Indicates memory-mapped I/O (MMIO) region for DMA mapping
+ *
+ * This attribute indicates the physical address is not normal system
+ * memory. It may not be used with kmap*()/phys_to_virt()/phys_to_page()
+ * functions, it may not be cacheable, and access using CPU load/store
+ * instructions may not be allowed.
+ *
+ * Usually this will be used to describe MMIO addresses, or other non-cacheable
+ * register addresses. When DMA mapping this sort of address we call
+ * the operation Peer to Peer as a one device is DMA'ing to another device.
+ * For PCI devices the p2pdma APIs must be used to determine if DMA_ATTR_MMIO
+ * is appropriate.
+ *
+ * For architectures that require cache flushing for DMA coherence
+ * DMA_ATTR_MMIO will not perform any cache flushing. The address
+ * provided must never be mapped cacheable into the CPU.
+ */
+#define DMA_ATTR_MMIO		(1UL << 10)
+
 /*
  * A dma_addr_t can hold any valid DMA or bus address for the platform.  It can
  * be given to a device to use as a DMA source or target.  It is specific to a
-- 
cgit v1.2.3


From e9e81d86fee63c6d5757841ab557019ddf73786f Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Tue, 9 Sep 2025 16:27:31 +0300
Subject: dma-debug: refactor to use physical addresses for page mapping

Convert the DMA debug infrastructure from page-based to physical address-based
mapping as a preparation to rely on physical address for DMA mapping routines.

The refactoring renames debug_dma_map_page() to debug_dma_map_phys() and
changes its signature to accept a phys_addr_t parameter instead of struct page
and offset. Similarly, debug_dma_unmap_page() becomes debug_dma_unmap_phys().
A new dma_debug_phy type is introduced to distinguish physical address mappings
from other debug entry types. All callers throughout the codebase are updated
to pass physical addresses directly, eliminating the need for page-to-physical
conversion in the debug layer.

This refactoring eliminates the need to convert between page pointers and
physical addresses in the debug layer, making the code more efficient and
consistent with the DMA mapping API's physical address focus.

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
[mszyprow: added a fixup]
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/56d1a6769b68dfcbf8b26a75a7329aeb8e3c3b6a.1757423202.git.leonro@nvidia.com
Link: https://lore.kernel.org/all/20250910052618.GH341237@unreal/
---
 include/linux/page-flags.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 8d3fa3a91ce4..2a1f34617802 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -618,6 +618,7 @@ FOLIO_FLAG(dropbehind, FOLIO_HEAD_PAGE)
 #else
 PAGEFLAG_FALSE(HighMem, highmem)
 #endif
+#define PhysHighMem(__p) (PageHighMem(phys_to_page(__p)))
 
 /* Does kmap_local_folio() only allow access to one page of the folio? */
 #ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP
-- 
cgit v1.2.3


From 513559f73700966ded094b090c3ecc6dff877ef9 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Tue, 9 Sep 2025 16:27:33 +0300
Subject: iommu/dma: rename iommu_dma_*map_page to iommu_dma_*map_phys

Rename the IOMMU DMA mapping functions to better reflect their actual
calling convention. The functions iommu_dma_map_page() and
iommu_dma_unmap_page() are renamed to iommu_dma_map_phys() and
iommu_dma_unmap_phys() respectively, as they already operate on physical
addresses rather than page structures.

The calling convention changes from accepting (struct page *page,
unsigned long offset) to (phys_addr_t phys), which eliminates the need
for page-to-physical address conversion within the functions. This
renaming prepares for the broader DMA API conversion from page-based
to physical address-based mapping throughout the kernel.

All callers are updated to pass physical addresses directly, including
dma_map_page_attrs(), scatterlist mapping functions, and DMA page
allocation helpers. The change simplifies the code by removing the
page_to_phys() + offset calculation that was previously done inside
the IOMMU functions.

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/ed172f95f8f57782beae04f782813366894e98df.1757423202.git.leonro@nvidia.com
---
 include/linux/iommu-dma.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu-dma.h b/include/linux/iommu-dma.h
index 508beaa44c39..485bdffed988 100644
--- a/include/linux/iommu-dma.h
+++ b/include/linux/iommu-dma.h
@@ -21,10 +21,9 @@ static inline bool use_dma_iommu(struct device *dev)
 }
 #endif /* CONFIG_IOMMU_DMA */
 
-dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
-		unsigned long offset, size_t size, enum dma_data_direction dir,
-		unsigned long attrs);
-void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
+dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
+		enum dma_data_direction dir, unsigned long attrs);
+void iommu_dma_unmap_phys(struct device *dev, dma_addr_t dma_handle,
 		size_t size, enum dma_data_direction dir, unsigned long attrs);
 int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 		enum dma_data_direction dir, unsigned long attrs);
-- 
cgit v1.2.3


From e53d29f957b36ba1666331956c6ccb047bb157d2 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Tue, 9 Sep 2025 16:27:35 +0300
Subject: dma-mapping: convert dma_direct_*map_page to be phys_addr_t based

Convert the DMA direct mapping functions to accept physical addresses
directly instead of page+offset parameters. The functions were already
operating on physical addresses internally, so this change eliminates
the redundant page-to-physical conversion at the API boundary.

The functions dma_direct_map_page() and dma_direct_unmap_page() are
renamed to dma_direct_map_phys() and dma_direct_unmap_phys() respectively,
with their calling convention changed from (struct page *page,
unsigned long offset) to (phys_addr_t phys).

Architecture-specific functions arch_dma_map_page_direct() and
arch_dma_unmap_page_direct() are similarly renamed to
arch_dma_map_phys_direct() and arch_dma_unmap_phys_direct().

The is_pci_p2pdma_page() checks are replaced with DMA_ATTR_MMIO checks
to allow integration with dma_direct_map_resource and dma_direct_map_phys()
is extended to support MMIO path either.

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/bb15a22f76dc2e26683333ff54e789606cfbfcf0.1757423202.git.leonro@nvidia.com
---
 include/linux/dma-map-ops.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 332b80c42b6f..10882d00cb17 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -395,15 +395,15 @@ void *arch_dma_set_uncached(void *addr, size_t size);
 void arch_dma_clear_uncached(void *addr, size_t size);
 
 #ifdef CONFIG_ARCH_HAS_DMA_MAP_DIRECT
-bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr);
-bool arch_dma_unmap_page_direct(struct device *dev, dma_addr_t dma_handle);
+bool arch_dma_map_phys_direct(struct device *dev, phys_addr_t addr);
+bool arch_dma_unmap_phys_direct(struct device *dev, dma_addr_t dma_handle);
 bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg,
 		int nents);
 bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg,
 		int nents);
 #else
-#define arch_dma_map_page_direct(d, a)		(false)
-#define arch_dma_unmap_page_direct(d, a)	(false)
+#define arch_dma_map_phys_direct(d, a)		(false)
+#define arch_dma_unmap_phys_direct(d, a)	(false)
 #define arch_dma_map_sg_direct(d, s, n)		(false)
 #define arch_dma_unmap_sg_direct(d, s, n)	(false)
 #endif
-- 
cgit v1.2.3


From 6eb1e769b2c13a33cb2ca694454a7561d3d72c0a Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Tue, 9 Sep 2025 16:27:36 +0300
Subject: kmsan: convert kmsan_handle_dma to use physical addresses

Convert the KMSAN DMA handling function from page-based to physical
address-based interface.

The refactoring renames kmsan_handle_dma() parameters from accepting
(struct page *page, size_t offset, size_t size) to (phys_addr_t phys,
size_t size). The existing semantics where callers are expected to
provide only kmap memory is continued here.

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/3557cbaf66e935bc794f37d2b891ef75cbf2c80c.1757423202.git.leonro@nvidia.com
---
 include/linux/kmsan.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h
index 2b1432cc16d5..f2fd221107bb 100644
--- a/include/linux/kmsan.h
+++ b/include/linux/kmsan.h
@@ -182,8 +182,7 @@ void kmsan_iounmap_page_range(unsigned long start, unsigned long end);
 
 /**
  * kmsan_handle_dma() - Handle a DMA data transfer.
- * @page:   first page of the buffer.
- * @offset: offset of the buffer within the first page.
+ * @phys:   physical address of the buffer.
  * @size:   buffer size.
  * @dir:    one of possible dma_data_direction values.
  *
@@ -192,7 +191,7 @@ void kmsan_iounmap_page_range(unsigned long start, unsigned long end);
  * * initializes the buffer, if it is copied from device;
  * * does both, if this is a DMA_BIDIRECTIONAL transfer.
  */
-void kmsan_handle_dma(struct page *page, size_t offset, size_t size,
+void kmsan_handle_dma(phys_addr_t phys, size_t size,
 		      enum dma_data_direction dir);
 
 /**
@@ -372,8 +371,8 @@ static inline void kmsan_iounmap_page_range(unsigned long start,
 {
 }
 
-static inline void kmsan_handle_dma(struct page *page, size_t offset,
-				    size_t size, enum dma_data_direction dir)
+static inline void kmsan_handle_dma(phys_addr_t phys, size_t size,
+				    enum dma_data_direction dir)
 {
 }
 
-- 
cgit v1.2.3


From f7326196a781622b33bfbdabb00f5e72b5fb5679 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Tue, 9 Sep 2025 16:27:39 +0300
Subject: dma-mapping: export new dma_*map_phys() interface

Introduce new DMA mapping functions dma_map_phys() and dma_unmap_phys()
that operate directly on physical addresses instead of page+offset
parameters. This provides a more efficient interface for drivers that
already have physical addresses available.

The new functions are implemented as the primary mapping layer, with
the existing dma_map_page_attrs()/dma_map_resource() and
dma_unmap_page_attrs()/dma_unmap_resource() functions converted to simple
wrappers around the phys-based implementations.

In case dma_map_page_attrs(), the struct page is converted to physical
address with help of page_to_phys() function and dma_map_resource()
provides physical address as is together with addition of DMA_ATTR_MMIO
attribute.

The old page-based API is preserved in mapping.c to ensure that existing
code won't be affected by changing EXPORT_SYMBOL to EXPORT_SYMBOL_GPL
variant for dma_*map_phys().

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/54cc52af91777906bbe4a386113437ba0bcfba9c.1757423202.git.leonro@nvidia.com
---
 include/linux/dma-direct.h  |  2 --
 include/linux/dma-mapping.h | 13 +++++++++++++
 include/linux/iommu-dma.h   |  4 ----
 3 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index f3bc0bcd7098..c249912456f9 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -149,7 +149,5 @@ void dma_direct_free_pages(struct device *dev, size_t size,
 		struct page *page, dma_addr_t dma_addr,
 		enum dma_data_direction dir);
 int dma_direct_supported(struct device *dev, u64 mask);
-dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr,
-		size_t size, enum dma_data_direction dir, unsigned long attrs);
 
 #endif /* _LINUX_DMA_DIRECT_H */
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 4254fd9bdf5d..8248ff9363ee 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -138,6 +138,10 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page,
 		unsigned long attrs);
 void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size,
 		enum dma_data_direction dir, unsigned long attrs);
+dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
+		enum dma_data_direction dir, unsigned long attrs);
+void dma_unmap_phys(struct device *dev, dma_addr_t addr, size_t size,
+		enum dma_data_direction dir, unsigned long attrs);
 unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
 		int nents, enum dma_data_direction dir, unsigned long attrs);
 void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
@@ -192,6 +196,15 @@ static inline void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr,
 		size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
 }
+static inline dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+	return DMA_MAPPING_ERROR;
+}
+static inline void dma_unmap_phys(struct device *dev, dma_addr_t addr,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+}
 static inline unsigned int dma_map_sg_attrs(struct device *dev,
 		struct scatterlist *sg, int nents, enum dma_data_direction dir,
 		unsigned long attrs)
diff --git a/include/linux/iommu-dma.h b/include/linux/iommu-dma.h
index 485bdffed988..a92b3ff9b934 100644
--- a/include/linux/iommu-dma.h
+++ b/include/linux/iommu-dma.h
@@ -42,10 +42,6 @@ size_t iommu_dma_opt_mapping_size(void);
 size_t iommu_dma_max_mapping_size(struct device *dev);
 void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr,
 		dma_addr_t handle, unsigned long attrs);
-dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
-		size_t size, enum dma_data_direction dir, unsigned long attrs);
-void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir, unsigned long attrs);
 struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, size_t size,
 		enum dma_data_direction dir, gfp_t gfp, unsigned long attrs);
 void iommu_dma_free_noncontiguous(struct device *dev, size_t size,
-- 
cgit v1.2.3


From 5f790208d68fe1526c751dc2af366c7b552b8631 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 10 Sep 2025 22:42:47 +0200
Subject: net: phy: fixed_phy: remove two function stubs

Remove stubs for fixed_phy_set_link_update() and
fixed_phy_change_carrier() because all callers
(actually just one per function) select config
symbol FIXED_PHY.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/8729170d-cf39-48d9-aabc-c9aa4acda070@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy_fixed.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 6227a1bdefec..d17ff750c708 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -37,16 +37,6 @@ fixed_phy_register(const struct fixed_phy_status *status,
 static inline void fixed_phy_unregister(struct phy_device *phydev)
 {
 }
-static inline int fixed_phy_set_link_update(struct phy_device *phydev,
-			int (*link_update)(struct net_device *,
-					   struct fixed_phy_status *))
-{
-	return -ENODEV;
-}
-static inline int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier)
-{
-	return -EINVAL;
-}
 #endif /* CONFIG_FIXED_PHY */
 
 #endif /* __PHY_FIXED_H */
-- 
cgit v1.2.3


From 9b90afa6d613b66ec4e74ae75f9bfa5baf386ecd Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Wed, 10 Sep 2025 09:12:51 +0200
Subject: gpio: move gpio-mmio-specific fields out of struct gpio_chip

With all users of bgpio_init() converted to using the modernized generic
GPIO chip API, we can now move the gpio-mmio-specific fields out of
struct gpio_chip and into the dedicated struct gpio_generic_chip. To
that end: adjust the gpio-mmio driver to the new layout, update the
docs, etc.

The changes in gpio-mlxbf2.c and gpio-mpc8xxx.c are here and not in their
respective conversion commits because the former passes the address of
the generic chip's lock to the __releases() annotation and we cannot
really hide it while gpio-mpc8xxx.c accesses the shadow registers in a
driver-specific workaround and there's no reason to make them available
in a public API.

Also: drop the relevant task from TODO as it's now done.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20250910-gpio-mmio-gpio-conv-part4-v2-15-f3d1a4c57124@linaro.org
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio/driver.h  | 44 -----------------------------
 include/linux/gpio/generic.h | 67 +++++++++++++++++++++++++++++---------------
 2 files changed, 45 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 9fcd4a988081..9b14fd20f13e 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -388,28 +388,6 @@ struct gpio_irq_chip {
  *	implies that if the chip supports IRQs, these IRQs need to be threaded
  *	as the chip access may sleep when e.g. reading out the IRQ status
  *	registers.
- * @read_reg: reader function for generic GPIO
- * @write_reg: writer function for generic GPIO
- * @be_bits: if the generic GPIO has big endian bit order (bit 31 is representing
- *	line 0, bit 30 is line 1 ... bit 0 is line 31) this is set to true by the
- *	generic GPIO core. It is for internal housekeeping only.
- * @reg_dat: data (in) register for generic GPIO
- * @reg_set: output set register (out=high) for generic GPIO
- * @reg_clr: output clear register (out=low) for generic GPIO
- * @reg_dir_out: direction out setting register for generic GPIO
- * @reg_dir_in: direction in setting register for generic GPIO
- * @bgpio_dir_unreadable: indicates that the direction register(s) cannot
- *	be read and we need to rely on out internal state tracking.
- * @bgpio_pinctrl: the generic GPIO uses a pin control backend.
- * @bgpio_bits: number of register bits used for a generic GPIO i.e.
- *	<register width> * 8
- * @bgpio_lock: used to lock chip->bgpio_data. Also, this is needed to keep
- *	shadowed and real data registers writes together.
- * @bgpio_data:	shadowed data register for generic GPIO to clear/set bits
- *	safely.
- * @bgpio_dir: shadowed direction register for generic GPIO to clear/set
- *	direction safely. A "1" in this word means the line is set as
- *	output.
  *
  * A gpio_chip can help platforms abstract various sources of GPIOs so
  * they can all be accessed through a common programming interface.
@@ -475,23 +453,6 @@ struct gpio_chip {
 	const char		*const *names;
 	bool			can_sleep;
 
-#if IS_ENABLED(CONFIG_GPIO_GENERIC)
-	unsigned long (*read_reg)(void __iomem *reg);
-	void (*write_reg)(void __iomem *reg, unsigned long data);
-	bool be_bits;
-	void __iomem *reg_dat;
-	void __iomem *reg_set;
-	void __iomem *reg_clr;
-	void __iomem *reg_dir_out;
-	void __iomem *reg_dir_in;
-	bool bgpio_dir_unreadable;
-	bool bgpio_pinctrl;
-	int bgpio_bits;
-	raw_spinlock_t bgpio_lock;
-	unsigned long bgpio_data;
-	unsigned long bgpio_dir;
-#endif /* CONFIG_GPIO_GENERIC */
-
 #ifdef CONFIG_GPIOLIB_IRQCHIP
 	/*
 	 * With CONFIG_GPIOLIB_IRQCHIP we get an irqchip inside the gpiolib
@@ -723,11 +684,6 @@ int gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc,
 
 #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
 
-int bgpio_init(struct gpio_chip *gc, struct device *dev,
-	       unsigned long sz, void __iomem *dat, void __iomem *set,
-	       void __iomem *clr, void __iomem *dirout, void __iomem *dirin,
-	       unsigned long flags);
-
 #define BGPIOF_BIG_ENDIAN		BIT(0)
 #define BGPIOF_UNREADABLE_REG_SET	BIT(1) /* reg_set is unreadable */
 #define BGPIOF_UNREADABLE_REG_DIR	BIT(2) /* reg_dir is unreadable */
diff --git a/include/linux/gpio/generic.h b/include/linux/gpio/generic.h
index 4c0626b53ec9..162430d96660 100644
--- a/include/linux/gpio/generic.h
+++ b/include/linux/gpio/generic.h
@@ -50,9 +50,44 @@ struct gpio_generic_chip_config {
  * struct gpio_generic_chip - Generic GPIO chip implementation.
  * @gc: The underlying struct gpio_chip object, implementing low-level GPIO
  *      chip routines.
+ * @read_reg: reader function for generic GPIO
+ * @write_reg: writer function for generic GPIO
+ * @be_bits: if the generic GPIO has big endian bit order (bit 31 is
+ *           representing line 0, bit 30 is line 1 ... bit 0 is line 31) this
+ *           is set to true by the generic GPIO core. It is for internal
+ *           housekeeping only.
+ * @reg_dat: data (in) register for generic GPIO
+ * @reg_set: output set register (out=high) for generic GPIO
+ * @reg_clr: output clear register (out=low) for generic GPIO
+ * @reg_dir_out: direction out setting register for generic GPIO
+ * @reg_dir_in: direction in setting register for generic GPIO
+ * @dir_unreadable: indicates that the direction register(s) cannot be read and
+ *                  we need to rely on out internal state tracking.
+ * @pinctrl: the generic GPIO uses a pin control backend.
+ * @bits: number of register bits used for a generic GPIO
+ *        i.e. <register width> * 8
+ * @lock: used to lock chip->sdata. Also, this is needed to keep
+ *        shadowed and real data registers writes together.
+ * @sdata: shadowed data register for generic GPIO to clear/set bits safely.
+ * @sdir: shadowed direction register for generic GPIO to clear/set direction
+ *        safely. A "1" in this word means the line is set as output.
  */
 struct gpio_generic_chip {
 	struct gpio_chip gc;
+	unsigned long (*read_reg)(void __iomem *reg);
+	void (*write_reg)(void __iomem *reg, unsigned long data);
+	bool be_bits;
+	void __iomem *reg_dat;
+	void __iomem *reg_set;
+	void __iomem *reg_clr;
+	void __iomem *reg_dir_out;
+	void __iomem *reg_dir_in;
+	bool dir_unreadable;
+	bool pinctrl;
+	int bits;
+	raw_spinlock_t lock;
+	unsigned long sdata;
+	unsigned long sdir;
 };
 
 static inline struct gpio_generic_chip *
@@ -61,20 +96,8 @@ to_gpio_generic_chip(struct gpio_chip *gc)
 	return container_of(gc, struct gpio_generic_chip, gc);
 }
 
-/**
- * gpio_generic_chip_init() - Initialize a generic GPIO chip.
- * @chip: Generic GPIO chip to set up.
- * @cfg: Generic GPIO chip configuration.
- *
- * Returns 0 on success, negative error number on failure.
- */
-static inline int
-gpio_generic_chip_init(struct gpio_generic_chip *chip,
-		       const struct gpio_generic_chip_config *cfg)
-{
-	return bgpio_init(&chip->gc, cfg->dev, cfg->sz, cfg->dat, cfg->set,
-			  cfg->clr, cfg->dirout, cfg->dirin, cfg->flags);
-}
+int gpio_generic_chip_init(struct gpio_generic_chip *chip,
+			   const struct gpio_generic_chip_config *cfg);
 
 /**
  * gpio_generic_chip_set() - Set the GPIO line value of the generic GPIO chip.
@@ -110,10 +133,10 @@ gpio_generic_chip_set(struct gpio_generic_chip *chip, unsigned int offset,
 static inline unsigned long
 gpio_generic_read_reg(struct gpio_generic_chip *chip, void __iomem *reg)
 {
-	if (WARN_ON(!chip->gc.read_reg))
+	if (WARN_ON(!chip->read_reg))
 		return 0;
 
-	return chip->gc.read_reg(reg);
+	return chip->read_reg(reg);
 }
 
 /**
@@ -125,23 +148,23 @@ gpio_generic_read_reg(struct gpio_generic_chip *chip, void __iomem *reg)
 static inline void gpio_generic_write_reg(struct gpio_generic_chip *chip,
 					  void __iomem *reg, unsigned long val)
 {
-	if (WARN_ON(!chip->gc.write_reg))
+	if (WARN_ON(!chip->write_reg))
 		return;
 
-	chip->gc.write_reg(reg, val);
+	chip->write_reg(reg, val);
 }
 
 #define gpio_generic_chip_lock(gen_gc) \
-	raw_spin_lock(&(gen_gc)->gc.bgpio_lock)
+	raw_spin_lock(&(gen_gc)->lock)
 
 #define gpio_generic_chip_unlock(gen_gc) \
-	raw_spin_unlock(&(gen_gc)->gc.bgpio_lock)
+	raw_spin_unlock(&(gen_gc)->lock)
 
 #define gpio_generic_chip_lock_irqsave(gen_gc, flags) \
-	raw_spin_lock_irqsave(&(gen_gc)->gc.bgpio_lock, flags)
+	raw_spin_lock_irqsave(&(gen_gc)->lock, flags)
 
 #define gpio_generic_chip_unlock_irqrestore(gen_gc, flags) \
-	raw_spin_unlock_irqrestore(&(gen_gc)->gc.bgpio_lock, flags)
+	raw_spin_unlock_irqrestore(&(gen_gc)->lock, flags)
 
 DEFINE_LOCK_GUARD_1(gpio_generic_lock,
 		    struct gpio_generic_chip,
-- 
cgit v1.2.3


From 121a0f839dbb397af5fabb701cea3e9983223e50 Mon Sep 17 00:00:00 2001
From: Israel Cepeda <israel.a.cepeda.lopez@intel.com>
Date: Thu, 11 Sep 2025 20:13:41 +0200
Subject: usb: misc: Add Intel USBIO bridge driver

Add a driver for the Intel USBIO USB IO-expander used by the MIPI cameras
on various new (Meteor Lake and later) Intel laptops.

This is an USB bridge driver which adds auxbus child devices for the GPIO,
I2C and SPI functions of the USBIO chip and which exports IO-functions for
the drivers for the auxbus child devices to communicate with the USBIO
device's firmware.

Co-developed-by: Hans de Goede <hansg@kernel.org>
Signed-off-by: Hans de Goede <hansg@kernel.org>
Signed-off-by: Israel Cepeda <israel.a.cepeda.lopez@intel.com>
Link: https://lore.kernel.org/r/20250911181343.77398-2-hansg@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/usbio.h | 177 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 177 insertions(+)
 create mode 100644 include/linux/usb/usbio.h

(limited to 'include/linux')

diff --git a/include/linux/usb/usbio.h b/include/linux/usb/usbio.h
new file mode 100644
index 000000000000..6c4e7c246d58
--- /dev/null
+++ b/include/linux/usb/usbio.h
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2025 Intel Corporation.
+ *
+ */
+
+#ifndef _LINUX_USBIO_H_
+#define _LINUX_USBIO_H_
+
+#include <linux/auxiliary_bus.h>
+#include <linux/byteorder/generic.h>
+#include <linux/list.h>
+#include <linux/types.h>
+
+/***********************
+ * USBIO Clients Names *
+ ***********************/
+#define USBIO_GPIO_CLIENT		"usbio-gpio"
+#define USBIO_I2C_CLIENT		"usbio-i2c"
+
+/****************
+ * USBIO quirks *
+ ****************/
+#define USBIO_QUIRK_BULK_MAXP_63	BIT(0)  /* Force bulk endpoint maxp to 63 */
+#define USBIO_QUIRK_I2C_NO_INIT_ACK	BIT(8)  /* Do not ask for ack on I2C init */
+#define USBIO_QUIRK_I2C_MAX_RW_LEN_52	BIT(9)  /* Set i2c-adapter max r/w len to 52 */
+#define USBIO_QUIRK_I2C_USE_CHUNK_LEN	BIT(10) /* Send chunk-len for split xfers */
+#define USBIO_QUIRK_I2C_ALLOW_400KHZ	BIT(11) /* Override desc, allowing 400 KHz */
+
+/**************************
+ * USBIO Type Definitions *
+ **************************/
+
+/* USBIO Packet Type */
+#define USBIO_PKTTYPE_CTRL		1
+#define USBIO_PKTTYPE_DBG		2
+#define USBIO_PKTTYPE_GPIO		3
+#define USBIO_PKTTYPE_I2C		4
+
+/* USBIO Packet Header */
+struct usbio_packet_header {
+	u8 type;
+	u8 cmd;
+	u8 flags;
+} __packed;
+
+/* USBIO Control Transfer Packet */
+struct usbio_ctrl_packet {
+	struct usbio_packet_header header;
+	u8 len;
+	u8 data[] __counted_by(len);
+} __packed;
+
+/* USBIO Bulk Transfer Packet */
+struct usbio_bulk_packet {
+	struct usbio_packet_header header;
+	__le16 len;
+	u8 data[] __counted_by(len);
+} __packed;
+
+/* USBIO GPIO commands */
+enum usbio_gpio_cmd {
+	USBIO_GPIOCMD_DEINIT,
+	USBIO_GPIOCMD_INIT,
+	USBIO_GPIOCMD_READ,
+	USBIO_GPIOCMD_WRITE,
+	USBIO_GPIOCMD_END
+};
+
+/* USBIO GPIO config */
+enum usbio_gpio_pincfg {
+	USBIO_GPIO_PINCFG_DEFAULT,
+	USBIO_GPIO_PINCFG_PULLUP,
+	USBIO_GPIO_PINCFG_PULLDOWN,
+	USBIO_GPIO_PINCFG_PUSHPULL
+};
+
+#define USBIO_GPIO_PINCFG_SHIFT		2
+#define USBIO_GPIO_PINCFG_MASK		(0x3 << USBIO_GPIO_PINCFG_SHIFT)
+#define USBIO_GPIO_SET_PINCFG(pincfg) \
+	(((pincfg) << USBIO_GPIO_PINCFG_SHIFT) & USBIO_GPIO_PINCFG_MASK)
+
+enum usbio_gpio_pinmode {
+	USBIO_GPIO_PINMOD_INVAL,
+	USBIO_GPIO_PINMOD_INPUT,
+	USBIO_GPIO_PINMOD_OUTPUT,
+	USBIO_GPIO_PINMOD_MAXVAL
+};
+
+#define USBIO_GPIO_PINMOD_MASK		0x3
+#define USBIO_GPIO_SET_PINMOD(pin)	(pin & USBIO_GPIO_PINMOD_MASK)
+
+/*************************
+ * USBIO GPIO Controller *
+ *************************/
+
+#define USBIO_MAX_GPIOBANKS		5
+#define USBIO_GPIOSPERBANK		32
+
+struct usbio_gpio_bank_desc {
+	u8 id;
+	u8 pins;
+	__le32 bmap;
+} __packed;
+
+struct usbio_gpio_init {
+	u8 bankid;
+	u8 config;
+	u8 pincount;
+	u8 pin;
+} __packed;
+
+struct usbio_gpio_rw {
+	u8 bankid;
+	u8 pincount;
+	u8 pin;
+	__le32 value;
+} __packed;
+
+/* USBIO I2C commands */
+enum usbio_i2c_cmd {
+	USBIO_I2CCMD_UNINIT,
+	USBIO_I2CCMD_INIT,
+	USBIO_I2CCMD_READ,
+	USBIO_I2CCMD_WRITE,
+	USBIO_I2CCMD_END
+};
+
+/************************
+ * USBIO I2C Controller *
+ ************************/
+
+#define USBIO_MAX_I2CBUSES 5
+
+#define USBIO_I2C_BUS_ADDR_CAP_10B	BIT(3) /* 10bit address support */
+#define USBIO_I2C_BUS_MODE_CAP_MASK	0x3
+#define USBIO_I2C_BUS_MODE_CAP_SM	0 /* Standard Mode */
+#define USBIO_I2C_BUS_MODE_CAP_FM	1 /* Fast Mode */
+#define USBIO_I2C_BUS_MODE_CAP_FMP	2 /* Fast Mode+ */
+#define USBIO_I2C_BUS_MODE_CAP_HSM	3 /* High-Speed Mode */
+
+struct usbio_i2c_bus_desc {
+	u8 id;
+	u8 caps;
+} __packed;
+
+struct usbio_i2c_uninit {
+	u8 busid;
+	__le16 config;
+} __packed;
+
+struct usbio_i2c_init {
+	u8 busid;
+	__le16 config;
+	__le32 speed;
+} __packed;
+
+struct usbio_i2c_rw {
+	u8 busid;
+	__le16 config;
+	__le16 size;
+	u8 data[] __counted_by(size);
+} __packed;
+
+int usbio_control_msg(struct auxiliary_device *adev, u8 type, u8 cmd,
+		      const void *obuf, u16 obuf_len, void *ibuf, u16 ibuf_len);
+
+int usbio_bulk_msg(struct auxiliary_device *adev, u8 type, u8 cmd, bool last,
+		   const void *obuf, u16 obuf_len, void *ibuf, u16 ibuf_len);
+
+int usbio_acquire(struct auxiliary_device *adev);
+void usbio_release(struct auxiliary_device *adev);
+void usbio_get_txrxbuf_len(struct auxiliary_device *adev, u16 *txbuf_len, u16 *rxbuf_len);
+unsigned long usbio_get_quirks(struct auxiliary_device *adev);
+void usbio_acpi_bind(struct auxiliary_device *adev, const struct acpi_device_id *hids);
+
+#endif
-- 
cgit v1.2.3


From 7f70b89b2be66c03ddc76d3ad8aebeeec4a9c505 Mon Sep 17 00:00:00 2001
From: Guan-Yu Lin <guanyulin@google.com>
Date: Thu, 11 Sep 2025 14:20:14 +0000
Subject: usb: offload: add apis for offload usage tracking

Introduce offload_usage and corresponding apis to track offload usage
on each USB device. Offload denotes that there is another co-processor
accessing the USB device via the same USB host controller. To optimize
power usage, it's essential to monitor whether the USB device is
actively used by other co-processor. This information is vital when
determining if a USB device can be safely suspended during system power
state transitions.

Signed-off-by: Guan-Yu Lin <guanyulin@google.com>
Link: https://lore.kernel.org/r/20250911142051.90822-3-guanyulin@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20250911142051.90822-3-guanyulin@google.com
---
 include/linux/usb.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 70ef00c42d22..e85105939af8 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -636,6 +636,8 @@ struct usb3_lpm_parameters {
  * @do_remote_wakeup:  remote wakeup should be enabled
  * @reset_resume: needs reset instead of resume
  * @port_is_suspended: the upstream port is suspended (L2 or U3)
+ * @offload_at_suspend: offload activities during suspend is enabled.
+ * @offload_usage: number of offload activities happening on this usb device.
  * @slot_id: Slot ID assigned by xHCI
  * @l1_params: best effor service latency for USB2 L1 LPM state, and L1 timeout.
  * @u1_params: exit latencies for USB3 U1 LPM state, and hub-initiated timeout.
@@ -724,6 +726,8 @@ struct usb_device {
 	unsigned do_remote_wakeup:1;
 	unsigned reset_resume:1;
 	unsigned port_is_suspended:1;
+	unsigned offload_at_suspend:1;
+	int offload_usage;
 	enum usb_link_tunnel_mode tunnel_mode;
 	struct device_link *usb4_link;
 
@@ -841,6 +845,20 @@ static inline void usb_mark_last_busy(struct usb_device *udev)
 { }
 #endif
 
+#if IS_ENABLED(CONFIG_USB_XHCI_SIDEBAND)
+int usb_offload_get(struct usb_device *udev);
+int usb_offload_put(struct usb_device *udev);
+bool usb_offload_check(struct usb_device *udev);
+#else
+
+static inline int usb_offload_get(struct usb_device *udev)
+{ return 0; }
+static inline int usb_offload_put(struct usb_device *udev)
+{ return 0; }
+static inline bool usb_offload_check(struct usb_device *udev)
+{ return false; }
+#endif
+
 extern int usb_disable_lpm(struct usb_device *udev);
 extern void usb_enable_lpm(struct usb_device *udev);
 /* Same as above, but these functions lock/unlock the bandwidth_mutex. */
-- 
cgit v1.2.3


From ef82a4803aabaf623bfcae07981406f1386eabf9 Mon Sep 17 00:00:00 2001
From: Guan-Yu Lin <guanyulin@google.com>
Date: Thu, 11 Sep 2025 14:20:15 +0000
Subject: xhci: sideband: add api to trace sideband usage

The existing sideband driver only registers sidebands without tracking
their active usage. To address this, sideband will now record its active
usage when it creates/removes interrupters. In addition, a new api is
introduced to provide a means for other dirvers to fetch sideband
activity information on a USB host controller.

Signed-off-by: Guan-Yu Lin <guanyulin@google.com>
Link: https://lore.kernel.org/r/20250911142051.90822-4-guanyulin@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20250911142051.90822-4-guanyulin@google.com
---
 include/linux/usb/xhci-sideband.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/xhci-sideband.h b/include/linux/usb/xhci-sideband.h
index 45288c392f6e..005257085dcb 100644
--- a/include/linux/usb/xhci-sideband.h
+++ b/include/linux/usb/xhci-sideband.h
@@ -11,6 +11,7 @@
 
 #include <linux/scatterlist.h>
 #include <linux/usb.h>
+#include <linux/usb/hcd.h>
 
 #define	EP_CTX_PER_DEV		31	/* FIXME defined twice, from xhci.h */
 
@@ -83,6 +84,14 @@ xhci_sideband_get_endpoint_buffer(struct xhci_sideband *sb,
 				  struct usb_host_endpoint *host_ep);
 struct sg_table *
 xhci_sideband_get_event_buffer(struct xhci_sideband *sb);
+
+#if IS_ENABLED(CONFIG_USB_XHCI_SIDEBAND)
+bool xhci_sideband_check(struct usb_hcd *hcd);
+#else
+static inline bool xhci_sideband_check(struct usb_hcd *hcd)
+{ return false; }
+#endif /* IS_ENABLED(CONFIG_USB_XHCI_SIDEBAND) */
+
 int
 xhci_sideband_create_interrupter(struct xhci_sideband *sb, int num_seg,
 				 bool ip_autoclear, u32 imod_interval, int intr_num);
-- 
cgit v1.2.3


From f338529ca9279e3bea392cb53cec8bd292909cb1 Mon Sep 17 00:00:00 2001
From: Sarthak Garg <quic_sartgarg@quicinc.com>
Date: Mon, 8 Sep 2025 16:11:21 +0530
Subject: mmc: core: Parse and use the new max-sd-hs-hz DT property

Introduce a new device tree flag to cap the maximum High-Speed (HS)
mode frequency for SD cards, accommodating board-specific
electrical limitations which cannot support the default 50Mhz HS
frequency and others.

Signed-off-by: Sarthak Garg <quic_sartgarg@quicinc.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index e0d935a4ac1d..e0e2c265e5d1 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -576,6 +576,7 @@ struct mmc_host {
 	int			hsq_depth;
 
 	u32			err_stats[MMC_ERR_MAX];
+	u32			max_sd_hs_hz;
 	unsigned long		private[] ____cacheline_aligned;
 };
 
-- 
cgit v1.2.3


From 70da02061499ca89ab92f7c4310f815d5fe674ec Mon Sep 17 00:00:00 2001
From: Antoniu Miclaus <antoniu.miclaus@analog.com>
Date: Mon, 8 Sep 2025 07:35:22 +0000
Subject: iio: add power and energy measurement modifiers

Add new IIO modifiers to support power and energy measurement devices:

Power modifiers:
- IIO_MOD_ACTIVE: Real power consumed by the load
- IIO_MOD_REACTIVE: Power that oscillates between source and load
- IIO_MOD_APPARENT: Magnitude of complex power

Signal quality modifiers:
- IIO_MOD_RMS: Root Mean Square value

Additionally adds:
- IIO_CHAN_INFO_POWERFACTOR: Power factor channel info type for
  representing the ratio of active power to apparent power

These modifiers enable proper representation of power measurement
devices like energy meters and power analyzers.

Signed-off-by: Antoniu Miclaus <antoniu.miclaus@analog.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/types.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h
index ad2761efcc83..34eebad12d2c 100644
--- a/include/linux/iio/types.h
+++ b/include/linux/iio/types.h
@@ -70,6 +70,7 @@ enum iio_chan_info_enum {
 	IIO_CHAN_INFO_ZEROPOINT,
 	IIO_CHAN_INFO_TROUGH,
 	IIO_CHAN_INFO_CONVDELAY,
+	IIO_CHAN_INFO_POWERFACTOR,
 };
 
 #endif /* _IIO_TYPES_H_ */
-- 
cgit v1.2.3


From 3422b4bc606eee2ba7758ea9347c83332eeec3e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Duje=20Mihanovi=C4=87?= <duje@dujemihanovic.xyz>
Date: Thu, 11 Sep 2025 14:43:45 +0200
Subject: iio: adc: Add driver for Marvell 88PM886 PMIC ADC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Marvell's 88PM886 PMIC has a so-called General Purpose ADC used for
monitoring various system voltages and temperatures. Add the relevant
register definitions to the MFD header and a driver for the ADC.

Acked-by: Karel Balej <balejk@matfyz.cz> # for the PMIC
Signed-off-by: Duje Mihanović <duje@dujemihanovic.xyz>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/mfd/88pm886.h | 58 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/88pm886.h b/include/linux/mfd/88pm886.h
index 85eca44f39ab..38892ba7b8a4 100644
--- a/include/linux/mfd/88pm886.h
+++ b/include/linux/mfd/88pm886.h
@@ -10,6 +10,7 @@
 #define PM886_IRQ_ONKEY			0
 
 #define PM886_PAGE_OFFSET_REGULATORS	1
+#define PM886_PAGE_OFFSET_GPADC		2
 
 #define PM886_REG_ID			0x00
 
@@ -70,6 +71,63 @@
 #define PM886_LDO_VSEL_MASK		0x0f
 #define PM886_BUCK_VSEL_MASK		0x7f
 
+/* GPADC enable/disable registers */
+#define PM886_REG_GPADC_CONFIG(n)	(n)
+
+#define PM886_GPADC_VSC_EN		BIT(0)
+#define PM886_GPADC_VBAT_EN		BIT(1)
+#define PM886_GPADC_GNDDET1_EN		BIT(3)
+#define PM886_GPADC_VBUS_EN		BIT(4)
+#define PM886_GPADC_VCHG_PWR_EN		BIT(5)
+#define PM886_GPADC_VCF_OUT_EN		BIT(6)
+#define PM886_GPADC_CONFIG1_EN_ALL	\
+	(PM886_GPADC_VSC_EN |		\
+	 PM886_GPADC_VBAT_EN |		\
+	 PM886_GPADC_GNDDET1_EN |	\
+	 PM886_GPADC_VBUS_EN |		\
+	 PM886_GPADC_VCHG_PWR_EN |	\
+	 PM886_GPADC_VCF_OUT_EN)
+
+#define PM886_GPADC_TINT_EN		BIT(0)
+#define PM886_GPADC_PMODE_EN		BIT(1)
+#define PM886_GPADC_GPADC0_EN		BIT(2)
+#define PM886_GPADC_GPADC1_EN		BIT(3)
+#define PM886_GPADC_GPADC2_EN		BIT(4)
+#define PM886_GPADC_GPADC3_EN		BIT(5)
+#define PM886_GPADC_MIC_DET_EN		BIT(6)
+#define PM886_GPADC_CONFIG2_EN_ALL	\
+	(PM886_GPADC_TINT_EN |		\
+	 PM886_GPADC_GPADC0_EN |	\
+	 PM886_GPADC_GPADC1_EN |	\
+	 PM886_GPADC_GPADC2_EN |	\
+	 PM886_GPADC_GPADC3_EN |	\
+	 PM886_GPADC_MIC_DET_EN)
+
+/* No CONFIG3_EN_ALL because this is the only bit there. */
+#define PM886_GPADC_GND_DET2_EN		BIT(0)
+
+/* GPADC channel registers */
+#define PM886_REG_GPADC_VSC		0x40
+#define PM886_REG_GPADC_VCHG_PWR	0x4c
+#define PM886_REG_GPADC_VCF_OUT		0x4e
+#define PM886_REG_GPADC_TINT		0x50
+#define PM886_REG_GPADC_GPADC0		0x54
+#define PM886_REG_GPADC_GPADC1		0x56
+#define PM886_REG_GPADC_GPADC2		0x58
+#define PM886_REG_GPADC_VBAT		0xa0
+#define PM886_REG_GPADC_GND_DET1	0xa4
+#define PM886_REG_GPADC_GND_DET2	0xa6
+#define PM886_REG_GPADC_VBUS		0xa8
+#define PM886_REG_GPADC_GPADC3		0xaa
+#define PM886_REG_GPADC_MIC_DET		0xac
+#define PM886_REG_GPADC_VBAT_SLP	0xb0
+
+/* VBAT_SLP is the last register and is 2 bytes wide like other channels. */
+#define PM886_GPADC_MAX_REGISTER	(PM886_REG_GPADC_VBAT_SLP + 1)
+
+#define PM886_GPADC_BIAS_LEVELS		16
+#define PM886_GPADC_INDEX_TO_BIAS_uA(i)	(1 + (i) * 5)
+
 struct pm886_chip {
 	struct i2c_client *client;
 	unsigned int chip_id;
-- 
cgit v1.2.3


From 1ab40529ad52f339975886a6a9e815dfdcb8d011 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 15 Jul 2025 21:52:54 +0300
Subject: media: uvcvideo: Move MSXU_CONTROL_METADATA definition to header

Move the MSXU_CONTROL_METADATA control definitino to the
include/linux/usb/uvc.h header, alongside the corresponding XU GUID. Add
a UVC_ prefix to avoid namespace clashes.

While at it, add the definition for the other controls for that
extension unit, as defined in
https://learn.microsoft.com/en-us/windows-hardware/drivers/stream/uvc-extensions-1-5#222-extension-unit-controls.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Ricardo Ribalda <ribalda@chromium.org>
Reviewed-by: Hans de Goede <hansg@kernel.org>
Signed-off-by: Hans de Goede <hansg@kernel.org>
Signed-off-by: Hans Verkuil <hverkuil+cisco@kernel.org>
---
 include/linux/usb/uvc.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h
index ee19e9f915b8..12a57e1d3467 100644
--- a/include/linux/usb/uvc.h
+++ b/include/linux/usb/uvc.h
@@ -33,6 +33,23 @@
 	{0xdc, 0x95, 0x3f, 0x0f, 0x32, 0x26, 0x4e, 0x4c, \
 	 0x92, 0xc9, 0xa0, 0x47, 0x82, 0xf4, 0x3b, 0xc8}
 
+/* https://learn.microsoft.com/en-us/windows-hardware/drivers/stream/uvc-extensions-1-5#222-extension-unit-controls */
+#define UVC_MSXU_CONTROL_FOCUS			0x01
+#define UVC_MSXU_CONTROL_EXPOSURE		0x02
+#define UVC_MSXU_CONTROL_EVCOMPENSATION		0x03
+#define UVC_MSXU_CONTROL_WHITEBALANCE		0x04
+#define UVC_MSXU_CONTROL_FACE_AUTHENTICATION	0x06
+#define UVC_MSXU_CONTROL_CAMERA_EXTRINSICS	0x07
+#define UVC_MSXU_CONTROL_CAMERA_INTRINSICS	0x08
+#define UVC_MSXU_CONTROL_METADATA		0x09
+#define UVC_MSXU_CONTROL_IR_TORCH		0x0a
+#define UVC_MSXU_CONTROL_DIGITALWINDOW		0x0b
+#define UVC_MSXU_CONTROL_DIGITALWINDOW_CONFIG	0x0c
+#define UVC_MSXU_CONTROL_VIDEO_HDR		0x0d
+#define UVC_MSXU_CONTROL_FRAMERATE_THROTTLE	0x0e
+#define UVC_MSXU_CONTROL_FIELDOFVIEW2_CONFIG	0x0f
+#define UVC_MSXU_CONTROL_FIELDOFVIEW2		0x10
+
 #define UVC_GUID_FORMAT_MJPEG \
 	{ 'M',  'J',  'P',  'G', 0x00, 0x00, 0x10, 0x00, \
 	 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71}
-- 
cgit v1.2.3


From 0f99b8bed426b8f5434b10c3f6f6b92d7ce3c467 Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda <ribalda@chromium.org>
Date: Mon, 18 Aug 2025 20:15:39 +0000
Subject: media: uvcvideo: Support UVC_CROSXU_CONTROL_IQ_PROFILE

The ChromeOS XU provides a control to change the IQ profile for a camera.
It can be switched from VIVID (a.k.a. standard) to NONE (a.k.a. natural).

Wire it up to the standard v4l2 control.

Signed-off-by: Ricardo Ribalda <ribalda@chromium.org>
Reviewed-by: Hans de Goede <hansg@kernel.org>
Signed-off-by: Hans de Goede <hansg@kernel.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil+cisco@kernel.org>
---
 include/linux/usb/uvc.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h
index 12a57e1d3467..22e0dab0809e 100644
--- a/include/linux/usb/uvc.h
+++ b/include/linux/usb/uvc.h
@@ -29,6 +29,9 @@
 #define UVC_GUID_EXT_GPIO_CONTROLLER \
 	{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
 	 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03}
+#define UVC_GUID_CHROMEOS_XU \
+	{0x24, 0xe9, 0xd7, 0x74, 0xc9, 0x49, 0x45, 0x4a, \
+	 0x98, 0xa3, 0xc8, 0x07, 0x7e, 0x05, 0x1c, 0xa3}
 #define UVC_GUID_MSXU_1_5 \
 	{0xdc, 0x95, 0x3f, 0x0f, 0x32, 0x26, 0x4e, 0x4c, \
 	 0x92, 0xc9, 0xa0, 0x47, 0x82, 0xf4, 0x3b, 0xc8}
@@ -50,6 +53,8 @@
 #define UVC_MSXU_CONTROL_FIELDOFVIEW2_CONFIG	0x0f
 #define UVC_MSXU_CONTROL_FIELDOFVIEW2		0x10
 
+#define UVC_CROSXU_CONTROL_IQ_PROFILE		0x04
+
 #define UVC_GUID_FORMAT_MJPEG \
 	{ 'M',  'J',  'P',  'G', 0x00, 0x00, 0x10, 0x00, \
 	 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71}
-- 
cgit v1.2.3


From f1880f9cc1476171bec3dae8fd56fff5665e22a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Grosjean?=
 <stephane.grosjean@hms-networks.com>
Date: Fri, 12 Sep 2025 10:17:19 +0200
Subject: can: peak: Modification of references to email accounts being deleted
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the upcoming deletion of @peak-system.com accounts and following
the acquisition of PEAK-System and its brand by HMS-Networks, this fix
aims to migrate all address references to @hms-networks.com, as well
as to map my personal committer addresses to author addresses, while
taking the opportunity to correct the accent on the first ‘e’ of my
first name.

Signed-off-by: Stéphane Grosjean <stephane.grosjean@hms-networks.com>
Link: https://patch.msgid.link/20250912081820.86314-1-stephane.grosjean@free.fr
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev/peak_canfd.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/can/dev/peak_canfd.h b/include/linux/can/dev/peak_canfd.h
index f38772fd0c07..d3788a3d0942 100644
--- a/include/linux/can/dev/peak_canfd.h
+++ b/include/linux/can/dev/peak_canfd.h
@@ -2,8 +2,8 @@
 /*
  * CAN driver for PEAK System micro-CAN based adapters
  *
- * Copyright (C) 2003-2011 PEAK System-Technik GmbH
- * Copyright (C) 2011-2013 Stephane Grosjean <s.grosjean@peak-system.com>
+ * Copyright (C) 2003-2025 PEAK System-Technik GmbH
+ * Author: Stéphane Grosjean <stephane.grosjean@hms-networks.com>
  */
 #ifndef PUCAN_H
 #define PUCAN_H
-- 
cgit v1.2.3


From 6eb350a2233100a283f882c023e5ad426d0ed63b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Aug 2025 17:02:30 +0200
Subject: rseq: Protect event mask against membarrier IPI

rseq_need_restart() reads and clears task::rseq_event_mask with preemption
disabled to guard against the scheduler.

But membarrier() uses an IPI and sets the PREEMPT bit in the event mask
from the IPI, which leaves that RMW operation unprotected.

Use guard(irq) if CONFIG_MEMBARRIER is enabled to fix that.

Fixes: 2a36ab717e8f ("rseq/membarrier: Add MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Boqun Feng <boqun.feng@gmail.com>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: stable@vger.kernel.org
---
 include/linux/rseq.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rseq.h b/include/linux/rseq.h
index bc8af3eb5598..1fbeb61babeb 100644
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -7,6 +7,12 @@
 #include <linux/preempt.h>
 #include <linux/sched.h>
 
+#ifdef CONFIG_MEMBARRIER
+# define RSEQ_EVENT_GUARD	irq
+#else
+# define RSEQ_EVENT_GUARD	preempt
+#endif
+
 /*
  * Map the event mask on the user-space ABI enum rseq_cs_flags
  * for direct mask checks.
@@ -41,9 +47,8 @@ static inline void rseq_handle_notify_resume(struct ksignal *ksig,
 static inline void rseq_signal_deliver(struct ksignal *ksig,
 				       struct pt_regs *regs)
 {
-	preempt_disable();
-	__set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
-	preempt_enable();
+	scoped_guard(RSEQ_EVENT_GUARD)
+		__set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
 	rseq_handle_notify_resume(ksig, regs);
 }
 
-- 
cgit v1.2.3


From 2da6de30e60dd9bb14600eff1cc99df2fa2ddae3 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Mon, 8 Sep 2025 15:23:15 -0700
Subject: mm: folio_may_be_lru_cached() unless folio_test_large()

mm/swap.c and mm/mlock.c agree to drain any per-CPU batch as soon as a
large folio is added: so collect_longterm_unpinnable_folios() just wastes
effort when calling lru_add_drain[_all]() on a large folio.

But although there is good reason not to batch up PMD-sized folios, we
might well benefit from batching a small number of low-order mTHPs (though
unclear how that "small number" limitation will be implemented).

So ask if folio_may_be_lru_cached() rather than !folio_test_large(), to
insulate those particular checks from future change.  Name preferred to
"folio_is_batchable" because large folios can well be put on a batch: it's
just the per-CPU LRU caches, drained much later, which need care.

Marked for stable, to counter the increase in lru_add_drain_all()s from
"mm/gup: check ref_count instead of lru before migration".

Link: https://lkml.kernel.org/r/57d2eaf8-3607-f318-e0c5-be02dce61ad0@google.com
Fixes: 9a4e9f3b2d73 ("mm: update get_user_pages_longterm to migrate pages allocated from CMA region")
Signed-off-by: Hugh Dickins <hughd@google.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@kernel.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Keir Fraser <keirf@google.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Li Zhe <lizhe.67@bytedance.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Shivank Garg <shivankg@amd.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Xu <weixugc@google.com>
Cc: Will Deacon <will@kernel.org>
Cc: yangge <yangge1116@126.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2fe6ed2cc3fd..7012a0f758d8 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -385,6 +385,16 @@ void folio_add_lru_vma(struct folio *, struct vm_area_struct *);
 void mark_page_accessed(struct page *);
 void folio_mark_accessed(struct folio *);
 
+static inline bool folio_may_be_lru_cached(struct folio *folio)
+{
+	/*
+	 * Holding PMD-sized folios in per-CPU LRU cache unbalances accounting.
+	 * Holding small numbers of low-order mTHP folios in per-CPU LRU cache
+	 * will be sensible, but nobody has implemented and tested that yet.
+	 */
+	return !folio_test_large(folio);
+}
+
 extern atomic_t lru_disable_count;
 
 static inline bool lru_cache_disabled(void)
-- 
cgit v1.2.3


From e6a0deb6fa5b0fc134ee2aa127d1cfc9456d8445 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Mon, 8 Sep 2025 13:15:12 -0700
Subject: mm/damon/core: introduce damon_call_control->dealloc_on_cancel

Patch series "mm/damon/sysfs: fix refresh_ms control overwriting on
multi-kdamonds usages".

Automatic esssential DAMON/DAMOS status update feature of DAMON sysfs
interface (refresh_ms) is broken [1] for multiple DAMON contexts
(kdamonds) use case, since it uses a global single damon_call_control
object for all created DAMON contexts.  The fields of the object,
particularly the list field is over-written for the contexts and it makes
unexpected results including user-space hangup and kernel crashes [2].
Fix it by extending damon_call_control for the use case and updating the
usage on DAMON sysfs interface to use per-context dynamically allocated
damon_call_control object.


This patch (of 2):

When damon_call_control->repeat is set, damon_call() is executed
asynchronously, and is eventually canceled when kdamond finishes.  If the
damon_call_control object is dynamically allocated, finding the place to
deallocate the object is difficult.  Introduce a new damon_call_control
field, namely dealloc_on_cancel, to ask the kdamond deallocates those
dynamically allocated objects when those are canceled.

Link: https://lkml.kernel.org/r/20250908201513.60802-3-sj@kernel.org
Link: https://lkml.kernel.org/r/20250908201513.60802-2-sj@kernel.org
Fixes: d809a7c64ba8 ("mm/damon/sysfs: implement refresh_ms file internal work")
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Yunjeong Mun <yunjeong.mun@sk.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index f13664c62ddd..9e62b2a85538 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -636,6 +636,7 @@ struct damon_operations {
  * @data:		Data that will be passed to @fn.
  * @repeat:		Repeat invocations.
  * @return_code:	Return code from @fn invocation.
+ * @dealloc_on_cancel:	De-allocate when canceled.
  *
  * Control damon_call(), which requests specific kdamond to invoke a given
  * function.  Refer to damon_call() for more details.
@@ -645,6 +646,7 @@ struct damon_call_control {
 	void *data;
 	bool repeat;
 	int return_code;
+	bool dealloc_on_cancel;
 /* private: internal use only */
 	/* informs if the kdamond finished handling of the request */
 	struct completion completion;
-- 
cgit v1.2.3


From 337135e6124b6d37d7ef1cd5a6c0b9681938c5ee Mon Sep 17 00:00:00 2001
From: Ruan Shiyang <ruansy.fnst@fujitsu.com>
Date: Tue, 29 Jul 2025 11:51:01 +0800
Subject: mm: memory-tiering: fix PGPROMOTE_CANDIDATE counting

Goto-san reported confusing pgpromote statistics where the
pgpromote_success count significantly exceeded pgpromote_candidate.

On a system with three nodes (nodes 0-1: DRAM 4GB, node 2: NVDIMM 4GB):
 # Enable demotion only
 echo 1 > /sys/kernel/mm/numa/demotion_enabled
 numactl -m 0-1 memhog -r200 3500M >/dev/null &
 pid=$!
 sleep 2
 numactl memhog -r100 2500M >/dev/null &
 sleep 10
 kill -9 $pid # terminate the 1st memhog
 # Enable promotion
 echo 2 > /proc/sys/kernel/numa_balancing

After a few seconds, we observeed `pgpromote_candidate < pgpromote_success`
$ grep -e pgpromote /proc/vmstat
pgpromote_success 2579
pgpromote_candidate 0

In this scenario, after terminating the first memhog, the conditions for
pgdat_free_space_enough() are quickly met, and triggers promotion.
However, these migrated pages are only counted for in PGPROMOTE_SUCCESS,
not in PGPROMOTE_CANDIDATE.

To solve these confusing statistics, introduce PGPROMOTE_CANDIDATE_NRL to
count the missed promotion pages.  And also, not counting these pages into
PGPROMOTE_CANDIDATE is to avoid changing the existing algorithm or
performance of the promotion rate limit.

Link: https://lkml.kernel.org/r/20250901090122.124262-1-ruansy.fnst@fujitsu.com
Link: https://lkml.kernel.org/r/20250729035101.1601407-1-ruansy.fnst@fujitsu.com
Fixes: c6833e10008f ("memory tiering: rate limit NUMA migration throughput")
Co-developed-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Ruan Shiyang <ruansy.fnst@fujitsu.com>
Reported-by: Yasunori Gotou (Fujitsu) <y-goto@fujitsu.com>
Suggested-by: Huang Ying <ying.huang@linux.alibaba.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Huang Ying <ying.huang@linux.alibaba.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Valentin Schneider <vschneid@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0c5da9141983..9d3ea9085556 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -234,7 +234,21 @@ enum node_stat_item {
 #endif
 #ifdef CONFIG_NUMA_BALANCING
 	PGPROMOTE_SUCCESS,	/* promote successfully */
-	PGPROMOTE_CANDIDATE,	/* candidate pages to promote */
+	/**
+	 * Candidate pages for promotion based on hint fault latency.  This
+	 * counter is used to control the promotion rate and adjust the hot
+	 * threshold.
+	 */
+	PGPROMOTE_CANDIDATE,
+	/**
+	 * Not rate-limited (NRL) candidate pages for those can be promoted
+	 * without considering hot threshold because of enough free pages in
+	 * fast-tier node.  These promotions bypass the regular hotness checks
+	 * and do NOT influence the promotion rate-limiter or
+	 * threshold-adjustment logic.
+	 * This is for statistics/monitoring purposes.
+	 */
+	PGPROMOTE_CANDIDATE_NRL,
 #endif
 	/* PGDEMOTE_*: pages demoted */
 	PGDEMOTE_KSWAPD,
-- 
cgit v1.2.3


From 79e1c24285c40cdfa9eb00fe8131d1ba14b84ef1 Mon Sep 17 00:00:00 2001
From: Ye Liu <liuye@kylinos.cn>
Date: Fri, 18 Jul 2025 10:41:32 +0800
Subject: mm: replace (20 - PAGE_SHIFT) with common macros for pages<->MB
 conversion

Replace repeated (20 - PAGE_SHIFT) calculations with standard macros:
- MB_TO_PAGES(mb)    converts MB to page count
- PAGES_TO_MB(pages) converts pages to MB

No functional change.

[akpm@linux-foundation.org: remove arc's private PAGES_TO_MB, remove its unused PAGES_TO_KB]
[akpm@linux-foundation.org: don't include mm.h due to include file ordering mess]
Link: https://lkml.kernel.org/r/20250718024134.1304745-1-ye.liu@linux.dev
Signed-off-by: Ye Liu <liuye@kylinos.cn>
Acked-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Dev Jain <dev.jain@arm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Chris Li <chrisl@kernel.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Kairui Song <kasong@tencent.com>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: Lai jiangshan <jiangshanlai@gmail.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Mel Gorman <mgorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: "Paul E . McKenney" <paulmck@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1ae97a0b8ec7..b626d1bacef5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -69,6 +69,15 @@ static inline void totalram_pages_add(long count)
 
 extern void * high_memory;
 
+/*
+ * Convert between pages and MB
+ * 20 is the shift for 1MB (2^20 = 1MB)
+ * PAGE_SHIFT is the shift for page size (e.g., 12 for 4KB pages)
+ * So (20 - PAGE_SHIFT) converts between pages and MB
+ */
+#define PAGES_TO_MB(pages) ((pages) >> (20 - PAGE_SHIFT))
+#define MB_TO_PAGES(mb)    ((mb) << (20 - PAGE_SHIFT))
+
 #ifdef CONFIG_SYSCTL
 extern int sysctl_legacy_va_layout;
 #else
-- 
cgit v1.2.3


From cc483b328881bbccb55265a86731384d5176fe85 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Mon, 4 Aug 2025 16:33:48 -0700
Subject: mm: limit the scope of vma_start_read()

Limit the scope of vma_start_read() as it is used only as a helper for
higher-level locking functions implemented inside mmap_lock.c and we are
about to introduce more complex RCU rules for this function.  The change
is pure code refactoring and has no functional changes.

Link: https://lkml.kernel.org/r/20250804233349.1278678-1-surenb@google.com
Suggested-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmap_lock.h | 85 -----------------------------------------------
 1 file changed, 85 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 11a078de9150..2c9fffa58714 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -147,91 +147,6 @@ static inline void vma_refcount_put(struct vm_area_struct *vma)
 	}
 }
 
-/*
- * Try to read-lock a vma. The function is allowed to occasionally yield false
- * locked result to avoid performance overhead, in which case we fall back to
- * using mmap_lock. The function should never yield false unlocked result.
- * False locked result is possible if mm_lock_seq overflows or if vma gets
- * reused and attached to a different mm before we lock it.
- * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
- * detached.
- *
- * WARNING! The vma passed to this function cannot be used if the function
- * fails to lock it because in certain cases RCU lock is dropped and then
- * reacquired. Once RCU lock is dropped the vma can be concurently freed.
- */
-static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
-						    struct vm_area_struct *vma)
-{
-	int oldcnt;
-
-	/*
-	 * Check before locking. A race might cause false locked result.
-	 * We can use READ_ONCE() for the mm_lock_seq here, and don't need
-	 * ACQUIRE semantics, because this is just a lockless check whose result
-	 * we don't rely on for anything - the mm_lock_seq read against which we
-	 * need ordering is below.
-	 */
-	if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence))
-		return NULL;
-
-	/*
-	 * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire()
-	 * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET.
-	 * Acquire fence is required here to avoid reordering against later
-	 * vm_lock_seq check and checks inside lock_vma_under_rcu().
-	 */
-	if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
-							      VMA_REF_LIMIT))) {
-		/* return EAGAIN if vma got detached from under us */
-		return oldcnt ? NULL : ERR_PTR(-EAGAIN);
-	}
-
-	rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
-
-	/*
-	 * If vma got attached to another mm from under us, that mm is not
-	 * stable and can be freed in the narrow window after vma->vm_refcnt
-	 * is dropped and before rcuwait_wake_up(mm) is called. Grab it before
-	 * releasing vma->vm_refcnt.
-	 */
-	if (unlikely(vma->vm_mm != mm)) {
-		/* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
-		struct mm_struct *other_mm = vma->vm_mm;
-
-		/*
-		 * __mmdrop() is a heavy operation and we don't need RCU
-		 * protection here. Release RCU lock during these operations.
-		 * We reinstate the RCU read lock as the caller expects it to
-		 * be held when this function returns even on error.
-		 */
-		rcu_read_unlock();
-		mmgrab(other_mm);
-		vma_refcount_put(vma);
-		mmdrop(other_mm);
-		rcu_read_lock();
-		return NULL;
-	}
-
-	/*
-	 * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
-	 * False unlocked result is impossible because we modify and check
-	 * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq
-	 * modification invalidates all existing locks.
-	 *
-	 * We must use ACQUIRE semantics for the mm_lock_seq so that if we are
-	 * racing with vma_end_write_all(), we only start reading from the VMA
-	 * after it has been unlocked.
-	 * This pairs with RELEASE semantics in vma_end_write_all().
-	 */
-	if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) {
-		vma_refcount_put(vma);
-		return NULL;
-	}
-
-	return vma;
-}
-
 /*
  * Use only while holding mmap read lock which guarantees that locking will not
  * fail (nobody can concurrently write-lock the vma). vma_start_read() should
-- 
cgit v1.2.3


From 913fff314547c1922002e655bb25199ee38e8825 Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@tencent.com>
Date: Thu, 7 Aug 2025 00:17:47 +0800
Subject: mm, swap: remove fragment clusters counter

It was used for calculating the iteration number when the swap allocator
wants to scan the whole fragment list.  Now the allocator only scans one
fragment cluster at a time, so no one uses this counter anymore.

Remove it as a cleanup; the performance change is marginal:

Build linux kernel using 10G ZRAM, make -j96, defconfig with 2G cgroup
memory limit, on top of tmpfs, 64kB mTHP enabled:

Before:  sys time: 6278.45s
After:   sys time: 6176.34s

Change to 8G ZRAM:

Before:  sys time: 5572.85s
After:   sys time: 5531.49s

Link: https://lkml.kernel.org/r/20250806161748.76651-3-ryncsn@gmail.com
Signed-off-by: Kairui Song <kasong@tencent.com>
Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Acked-by: Chris Li <chrisl@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Barry Song <baohua@kernel.org>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2fe6ed2cc3fd..a060d102e0d1 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -310,7 +310,6 @@ struct swap_info_struct {
 					/* list of cluster that contains at least one free slot */
 	struct list_head frag_clusters[SWAP_NR_ORDERS];
 					/* list of cluster that are fragmented or contented */
-	atomic_long_t frag_cluster_nr[SWAP_NR_ORDERS];
 	unsigned int pages;		/* total of usable pages of swap */
 	atomic_long_t inuse_pages;	/* number of those currently in use */
 	struct swap_sequential_cluster *global_cluster; /* Use one global cluster for rotating device */
-- 
cgit v1.2.3


From 61dc4358d37ae0be3220a0fa32cf7f0ccd4f7636 Mon Sep 17 00:00:00 2001
From: "Adrian Huang (Lenovo)" <adrianhuang0701@gmail.com>
Date: Wed, 6 Aug 2025 22:59:06 +0800
Subject: mm: correct misleading comment on mmap_lock field in mm_struct

The comment previously described the offset of mmap_lock as 0x120 (hex),
which is misleading.  The correct offset is 56 bytes (decimal) from the
last cache line boundary.  Using '0x120' could confuse readers trying to
understand why the count and owner fields reside in separate cachelines.

This change also removes an unnecessary space for improved formatting.

Link: https://lkml.kernel.org/r/20250806145906.24647-1-adrianhuang0701@gmail.com
Signed-off-by: Adrian Huang (Lenovo) <adrianhuang0701@gmail.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 08bc2442db93..3ed763e7ec6f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1026,10 +1026,10 @@ struct mm_struct {
 					     * counters
 					     */
 		/*
-		 * With some kernel config, the current mmap_lock's offset
-		 * inside 'mm_struct' is at 0x120, which is very optimal, as
+		 * Typically the current mmap_lock's offset is 56 bytes from
+		 * the last cacheline boundary, which is very optimal, as
 		 * its two hot fields 'count' and 'owner' sit in 2 different
-		 * cachelines,  and when mmap_lock is highly contended, both
+		 * cachelines, and when mmap_lock is highly contended, both
 		 * of the 2 fields will be accessed frequently, current layout
 		 * will help to reduce cache bouncing.
 		 *
-- 
cgit v1.2.3


From 4c5d3365882dbbc0784688784904f440d7a4c0f1 Mon Sep 17 00:00:00 2001
From: Vitaly Wool <vitaly.wool@konsulko.se>
Date: Wed, 6 Aug 2025 14:41:08 +0200
Subject: mm/vmalloc: allow to set node and align in vrealloc

Patch series "support large align and nid in Rust allocators", v15.

The series provides the ability for Rust allocators to set NUMA node and
large alignment.


This patch (of 4):

Reimplement vrealloc() to be able to set node and alignment should a user
need to do so.  Rename the function to vrealloc_node_align() to better
match what it actually does now and introduce macros for vrealloc() and
friends for backward compatibility.

With that change we also provide the ability for the Rust part of the
kernel to set node and alignment in its allocations.

Link: https://lkml.kernel.org/r/20250806124034.1724515-1-vitaly.wool@konsulko.se
Link: https://lkml.kernel.org/r/20250806124108.1724561-1-vitaly.wool@konsulko.se
Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.se>
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jann Horn <jannh@google.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/vmalloc.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 2759dac6be44..eb54b7b3202f 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -197,9 +197,15 @@ extern void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1
 extern void *vcalloc_noprof(size_t n, size_t size) __alloc_size(1, 2);
 #define vcalloc(...)		alloc_hooks(vcalloc_noprof(__VA_ARGS__))
 
-void * __must_check vrealloc_noprof(const void *p, size_t size, gfp_t flags)
-		__realloc_size(2);
-#define vrealloc(...)		alloc_hooks(vrealloc_noprof(__VA_ARGS__))
+void *__must_check vrealloc_node_align_noprof(const void *p, size_t size,
+		unsigned long align, gfp_t flags, int nid) __realloc_size(2);
+#define vrealloc_node_noprof(_p, _s, _f, _nid)	\
+	vrealloc_node_align_noprof(_p, _s, 1, _f, _nid)
+#define vrealloc_noprof(_p, _s, _f)		\
+	vrealloc_node_align_noprof(_p, _s, 1, _f, NUMA_NO_NODE)
+#define vrealloc_node_align(...)		alloc_hooks(vrealloc_node_align_noprof(__VA_ARGS__))
+#define vrealloc_node(...)			alloc_hooks(vrealloc_node_noprof(__VA_ARGS__))
+#define vrealloc(...)				alloc_hooks(vrealloc_noprof(__VA_ARGS__))
 
 extern void vfree(const void *addr);
 extern void vfree_atomic(const void *addr);
-- 
cgit v1.2.3


From 2cd8231796b5e7133b1c3d66ad7d2a3c42c97258 Mon Sep 17 00:00:00 2001
From: Vitaly Wool <vitaly.wool@konsulko.se>
Date: Wed, 6 Aug 2025 14:41:47 +0200
Subject: mm/slub: allow to set node and align in k[v]realloc

Reimplement k[v]realloc_node() to be able to set node and alignment should
a user need to do so.  In order to do that while retaining the maximal
backward compatibility, add k[v]realloc_node_align() functions and
redefine the rest of API using these new ones.

While doing that, we also keep the number of _noprof variants to a
minimum, which implies some changes to the existing users of older _noprof
functions, that basically being bcachefs.

With that change we also provide the ability for the Rust part of the
kernel to set node and alignment in its K[v]xxx [re]allocations.

Link: https://lkml.kernel.org/r/20250806124147.1724658-1-vitaly.wool@konsulko.se
Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.se>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jann Horn <jannh@google.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/bpfptr.h |  2 +-
 include/linux/slab.h   | 39 ++++++++++++++++++++++++---------------
 2 files changed, 25 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h
index 1af241525a17..f6e0795db484 100644
--- a/include/linux/bpfptr.h
+++ b/include/linux/bpfptr.h
@@ -67,7 +67,7 @@ static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset,
 
 static inline void *kvmemdup_bpfptr_noprof(bpfptr_t src, size_t len)
 {
-	void *p = kvmalloc_noprof(len, GFP_USER | __GFP_NOWARN);
+	void *p = kvmalloc_node_align_noprof(len, 1, GFP_USER | __GFP_NOWARN, NUMA_NO_NODE);
 
 	if (!p)
 		return ERR_PTR(-ENOMEM);
diff --git a/include/linux/slab.h b/include/linux/slab.h
index d5a8ab98035c..6dc300bac2a1 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -465,9 +465,13 @@ int kmem_cache_shrink(struct kmem_cache *s);
 /*
  * Common kmalloc functions provided by all allocators
  */
-void * __must_check krealloc_noprof(const void *objp, size_t new_size,
-				    gfp_t flags) __realloc_size(2);
-#define krealloc(...)				alloc_hooks(krealloc_noprof(__VA_ARGS__))
+void * __must_check krealloc_node_align_noprof(const void *objp, size_t new_size,
+					       unsigned long align,
+					       gfp_t flags, int nid) __realloc_size(2);
+#define krealloc_noprof(_o, _s, _f)	krealloc_node_align_noprof(_o, _s, 1, _f, NUMA_NO_NODE)
+#define krealloc_node_align(...)	alloc_hooks(krealloc_node_align_noprof(__VA_ARGS__))
+#define krealloc_node(_o, _s, _f, _n)	krealloc_node_align(_o, _s, 1, _f, _n)
+#define krealloc(...)			krealloc_node(__VA_ARGS__, NUMA_NO_NODE)
 
 void kfree(const void *objp);
 void kfree_sensitive(const void *objp);
@@ -1041,18 +1045,20 @@ static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags)
 #define kzalloc(...)				alloc_hooks(kzalloc_noprof(__VA_ARGS__))
 #define kzalloc_node(_size, _flags, _node)	kmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
 
-void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __alloc_size(1);
-#define kvmalloc_node_noprof(size, flags, node)	\
-	__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node)
-#define kvmalloc_node(...)			alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__))
-
-#define kvmalloc(_size, _flags)			kvmalloc_node(_size, _flags, NUMA_NO_NODE)
-#define kvmalloc_noprof(_size, _flags)		kvmalloc_node_noprof(_size, _flags, NUMA_NO_NODE)
+void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), unsigned long align,
+			     gfp_t flags, int node) __alloc_size(1);
+#define kvmalloc_node_align_noprof(_size, _align, _flags, _node)	\
+	__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, NULL), _align, _flags, _node)
+#define kvmalloc_node_align(...)		\
+	alloc_hooks(kvmalloc_node_align_noprof(__VA_ARGS__))
+#define kvmalloc_node(_s, _f, _n)		kvmalloc_node_align(_s, 1, _f, _n)
+#define kvmalloc(...)				kvmalloc_node(__VA_ARGS__, NUMA_NO_NODE)
 #define kvzalloc(_size, _flags)			kvmalloc(_size, (_flags)|__GFP_ZERO)
 
 #define kvzalloc_node(_size, _flags, _node)	kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
+
 #define kmem_buckets_valloc(_b, _size, _flags)	\
-	alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))
+	alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), 1, _flags, NUMA_NO_NODE))
 
 static inline __alloc_size(1, 2) void *
 kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node)
@@ -1062,7 +1068,7 @@ kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node)
 	if (unlikely(check_mul_overflow(n, size, &bytes)))
 		return NULL;
 
-	return kvmalloc_node_noprof(bytes, flags, node);
+	return kvmalloc_node_align_noprof(bytes, 1, flags, node);
 }
 
 #define kvmalloc_array_noprof(...)		kvmalloc_array_node_noprof(__VA_ARGS__, NUMA_NO_NODE)
@@ -1073,9 +1079,12 @@ kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node)
 #define kvcalloc_node(...)			alloc_hooks(kvcalloc_node_noprof(__VA_ARGS__))
 #define kvcalloc(...)				alloc_hooks(kvcalloc_noprof(__VA_ARGS__))
 
-void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags)
-		__realloc_size(2);
-#define kvrealloc(...)				alloc_hooks(kvrealloc_noprof(__VA_ARGS__))
+void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long align,
+				  gfp_t flags, int nid) __realloc_size(2);
+#define kvrealloc_node_align(...)		\
+	alloc_hooks(kvrealloc_node_align_noprof(__VA_ARGS__))
+#define kvrealloc_node(_p, _s, _f, _n)		kvrealloc_node_align(_p, _s, 1, _f, _n)
+#define kvrealloc(...)				kvrealloc_node(__VA_ARGS__, NUMA_NO_NODE)
 
 extern void kvfree(const void *addr);
 DEFINE_FREE(kvfree, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T))
-- 
cgit v1.2.3


From e6d7d3502e00ed6f86e03dcdb282cb7785e55448 Mon Sep 17 00:00:00 2001
From: Sang-Heon Jeon <ekffu200098@gmail.com>
Date: Tue, 5 Aug 2025 21:39:40 +0900
Subject: mm/damon: update expired description of damos_action

Nowadays, damos operation actions support a greater operation set.  But
comments (also, generated documentation) weren't updated.  So fix the
comments with current support status.

Link: https://lkml.kernel.org/r/20250805123940.13691-1-ekffu200098@gmail.com
Signed-off-by: Sang-Heon Jeon <ekffu200098@gmail.com>
Reviewed-by: SeongJae Park <sj@kernel.org>
Cc: Honggyu Kim <honggyu.kim@sk.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index f13664c62ddd..d01bfee80bd6 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -110,7 +110,7 @@ struct damon_target {
  *
  * @DAMOS_WILLNEED:	Call ``madvise()`` for the region with MADV_WILLNEED.
  * @DAMOS_COLD:		Call ``madvise()`` for the region with MADV_COLD.
- * @DAMOS_PAGEOUT:	Call ``madvise()`` for the region with MADV_PAGEOUT.
+ * @DAMOS_PAGEOUT:	Reclaim the region.
  * @DAMOS_HUGEPAGE:	Call ``madvise()`` for the region with MADV_HUGEPAGE.
  * @DAMOS_NOHUGEPAGE:	Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
  * @DAMOS_LRU_PRIO:	Prioritize the region on its LRU lists.
@@ -121,10 +121,10 @@ struct damon_target {
  * @NR_DAMOS_ACTIONS:	Total number of DAMOS actions
  *
  * The support of each action is up to running &struct damon_operations.
- * &enum DAMON_OPS_VADDR and &enum DAMON_OPS_FVADDR supports all actions except
- * &enum DAMOS_LRU_PRIO and &enum DAMOS_LRU_DEPRIO.  &enum DAMON_OPS_PADDR
- * supports only &enum DAMOS_PAGEOUT, &enum DAMOS_LRU_PRIO, &enum
- * DAMOS_LRU_DEPRIO, and &DAMOS_STAT.
+ * Refer to 'Operation Action' section of Documentation/mm/damon/design.rst for
+ * status of the supports.
+ *
+ * Note that DAMOS_PAGEOUT doesn't trigger demotions.
  */
 enum damos_action {
 	DAMOS_WILLNEED,
-- 
cgit v1.2.3


From 95c2908f1a4fd608b1cdbb5acef3572e5d769e1c Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 11 Aug 2025 16:39:47 +0200
Subject: mm/migrate: remove MIGRATEPAGE_UNMAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

migrate_folio_unmap() is the only user of MIGRATEPAGE_UNMAP.  We want to
remove MIGRATEPAGE_* completely.

It's rather weird to have a generic MIGRATEPAGE_UNMAP, documented to be
returned from address-space callbacks, when it's only used for an internal
helper.

Let's start by having only a single "success" return value for
migrate_folio_unmap() -- 0 -- by moving the "folio was already freed"
check into the single caller.

There is a remaining comment for PG_isolated, which we renamed to
PG_movable_ops_isolated recently and forgot to update.

While we might still run into that case with zsmalloc, it's something we
want to get rid of soon.  So let's just focus that optimization on real
folios only for now by excluding movable_ops pages.  Note that concurrent
freeing can happen at any time and this "already freed" check is not
relevant for correctness.

[david@redhat.com: no need to pass "reason" to migrate_folio_unmap(), per Lance]
  Link: https://lkml.kernel.org/r/3bb725f8-28d7-4aa2-b75f-af40d5cab280@redhat.com
Link: https://lkml.kernel.org/r/20250811143949.1117439-2-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Lance Yang <lance.yang@linux.dev>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Chris Mason <clm@fb.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Kleikamp <shaggy@kernel.org>
Cc: David Sterba <dsterba@suse.com>
Cc: Eugenio Pé rez <eperezma@redhat.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Gregory Price <gourry@gourry.net>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Jerrin Shaji George <jerrin.shaji-george@broadcom.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Mathew Brost <matthew.brost@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Cc: Dave Kleikamp <dave.kleikamp@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/migrate.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 9009e27b5f44..302f3e95faea 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -18,7 +18,6 @@ struct migration_target_control;
  * - zero on page migration success;
  */
 #define MIGRATEPAGE_SUCCESS		0
-#define MIGRATEPAGE_UNMAP		1
 
 /**
  * struct movable_operations - Driver page migration
-- 
cgit v1.2.3


From fb49a4425cfa163faccd91f913773d3401d3a7d4 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 11 Aug 2025 16:39:48 +0200
Subject: treewide: remove MIGRATEPAGE_SUCCESS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

At this point MIGRATEPAGE_SUCCESS is misnamed for all folio users,
and now that we remove MIGRATEPAGE_UNMAP, it's really the only "success"
return value that the code uses and expects.

Let's just get rid of MIGRATEPAGE_SUCCESS completely and just use "0"
for success.

Link: https://lkml.kernel.org/r/20250811143949.1117439-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>			[mm]
Acked-by: Dave Kleikamp <dave.kleikamp@oracle.com>	[jfs]
Acked-by: David Sterba <dsterba@suse.com>		[btrfs]
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Byungchul Park <byungchul@sk.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Chris Mason <clm@fb.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Kleikamp <shaggy@kernel.org>
Cc: Eugenio Pé rez <eperezma@redhat.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Jerrin Shaji George <jerrin.shaji-george@broadcom.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Mathew Brost <matthew.brost@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Cc: Lance Yang <lance.yang@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/migrate.h | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 302f3e95faea..1f0ac122c3bf 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -12,13 +12,6 @@ typedef void free_folio_t(struct folio *folio, unsigned long private);
 
 struct migration_target_control;
 
-/*
- * Return values from addresss_space_operations.migratepage():
- * - negative errno on page migration failure;
- * - zero on page migration success;
- */
-#define MIGRATEPAGE_SUCCESS		0
-
 /**
  * struct movable_operations - Driver page migration
  * @isolate_page:
@@ -34,8 +27,7 @@ struct migration_target_control;
  * @src page.  The driver should copy the contents of the
  * @src page to the @dst page and set up the fields of @dst page.
  * Both pages are locked.
- * If page migration is successful, the driver should
- * return MIGRATEPAGE_SUCCESS.
+ * If page migration is successful, the driver should return 0.
  * If the driver cannot migrate the page at the moment, it can return
  * -EAGAIN.  The VM interprets this as a temporary migration failure and
  * will retry it later.  Any other error value is a permanent migration
-- 
cgit v1.2.3


From b22cc9a9c7ff0ad8998d58fdd7122de6038c46a7 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 11 Aug 2025 13:26:27 +0200
Subject: mm/rmap: convert "enum rmap_level" to "enum pgtable_level"

Let's factor it out, and convert all checks for unsupported levels to
BUILD_BUG().  The code is written in a way such that force-inlining will
optimize out the levels.

[nathan@kernel.org: always inline __folio_rmap_sanity_checks()]
  Link: https://lkml.kernel.org/r/20250814-rmap-fix-build_bug-conversion-v1-1-fb7b10a0b362@kernel.org
Link: https://lkml.kernel.org/r/20250811112631.759341-8-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Juegren Gross <jgross@suse.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h |  8 +++++++
 include/linux/rmap.h    | 62 ++++++++++++++++++++++---------------------------
 2 files changed, 36 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 2b80fd456c8b..4f88e460eb9c 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1975,6 +1975,14 @@ static inline bool arch_has_pfn_modify_check(void)
 /* Page-Table Modification Mask */
 typedef unsigned int pgtbl_mod_mask;
 
+enum pgtable_level {
+	PGTABLE_LEVEL_PTE = 0,
+	PGTABLE_LEVEL_PMD,
+	PGTABLE_LEVEL_PUD,
+	PGTABLE_LEVEL_P4D,
+	PGTABLE_LEVEL_PGD,
+};
+
 #endif /* !__ASSEMBLY__ */
 
 #if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 6cd020eea37a..e8aff6d2deda 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -394,18 +394,8 @@ typedef int __bitwise rmap_t;
 /* The anonymous (sub)page is exclusive to a single process. */
 #define RMAP_EXCLUSIVE		((__force rmap_t)BIT(0))
 
-/*
- * Internally, we're using an enum to specify the granularity. We make the
- * compiler emit specialized code for each granularity.
- */
-enum rmap_level {
-	RMAP_LEVEL_PTE = 0,
-	RMAP_LEVEL_PMD,
-	RMAP_LEVEL_PUD,
-};
-
-static inline void __folio_rmap_sanity_checks(const struct folio *folio,
-		const struct page *page, int nr_pages, enum rmap_level level)
+static __always_inline void __folio_rmap_sanity_checks(const struct folio *folio,
+		const struct page *page, int nr_pages, enum pgtable_level level)
 {
 	/* hugetlb folios are handled separately. */
 	VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
@@ -427,18 +417,18 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio,
 	VM_WARN_ON_FOLIO(page_folio(page + nr_pages - 1) != folio, folio);
 
 	switch (level) {
-	case RMAP_LEVEL_PTE:
+	case PGTABLE_LEVEL_PTE:
 		break;
-	case RMAP_LEVEL_PMD:
+	case PGTABLE_LEVEL_PMD:
 		/*
 		 * We don't support folios larger than a single PMD yet. So
-		 * when RMAP_LEVEL_PMD is set, we assume that we are creating
+		 * when PGTABLE_LEVEL_PMD is set, we assume that we are creating
 		 * a single "entire" mapping of the folio.
 		 */
 		VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio);
 		VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio);
 		break;
-	case RMAP_LEVEL_PUD:
+	case PGTABLE_LEVEL_PUD:
 		/*
 		 * Assume that we are creating a single "entire" mapping of the
 		 * folio.
@@ -447,7 +437,7 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio,
 		VM_WARN_ON_FOLIO(nr_pages != HPAGE_PUD_NR, folio);
 		break;
 	default:
-		VM_WARN_ON_ONCE(true);
+		BUILD_BUG();
 	}
 
 	/*
@@ -567,14 +557,14 @@ static inline void hugetlb_remove_rmap(struct folio *folio)
 
 static __always_inline void __folio_dup_file_rmap(struct folio *folio,
 		struct page *page, int nr_pages, struct vm_area_struct *dst_vma,
-		enum rmap_level level)
+		enum pgtable_level level)
 {
 	const int orig_nr_pages = nr_pages;
 
 	__folio_rmap_sanity_checks(folio, page, nr_pages, level);
 
 	switch (level) {
-	case RMAP_LEVEL_PTE:
+	case PGTABLE_LEVEL_PTE:
 		if (!folio_test_large(folio)) {
 			atomic_inc(&folio->_mapcount);
 			break;
@@ -587,11 +577,13 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio,
 		}
 		folio_add_large_mapcount(folio, orig_nr_pages, dst_vma);
 		break;
-	case RMAP_LEVEL_PMD:
-	case RMAP_LEVEL_PUD:
+	case PGTABLE_LEVEL_PMD:
+	case PGTABLE_LEVEL_PUD:
 		atomic_inc(&folio->_entire_mapcount);
 		folio_inc_large_mapcount(folio, dst_vma);
 		break;
+	default:
+		BUILD_BUG();
 	}
 }
 
@@ -609,13 +601,13 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio,
 static inline void folio_dup_file_rmap_ptes(struct folio *folio,
 		struct page *page, int nr_pages, struct vm_area_struct *dst_vma)
 {
-	__folio_dup_file_rmap(folio, page, nr_pages, dst_vma, RMAP_LEVEL_PTE);
+	__folio_dup_file_rmap(folio, page, nr_pages, dst_vma, PGTABLE_LEVEL_PTE);
 }
 
 static __always_inline void folio_dup_file_rmap_pte(struct folio *folio,
 		struct page *page, struct vm_area_struct *dst_vma)
 {
-	__folio_dup_file_rmap(folio, page, 1, dst_vma, RMAP_LEVEL_PTE);
+	__folio_dup_file_rmap(folio, page, 1, dst_vma, PGTABLE_LEVEL_PTE);
 }
 
 /**
@@ -632,7 +624,7 @@ static inline void folio_dup_file_rmap_pmd(struct folio *folio,
 		struct page *page, struct vm_area_struct *dst_vma)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	__folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, dst_vma, RMAP_LEVEL_PTE);
+	__folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, dst_vma, PGTABLE_LEVEL_PTE);
 #else
 	WARN_ON_ONCE(true);
 #endif
@@ -640,7 +632,7 @@ static inline void folio_dup_file_rmap_pmd(struct folio *folio,
 
 static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
 		struct page *page, int nr_pages, struct vm_area_struct *dst_vma,
-		struct vm_area_struct *src_vma, enum rmap_level level)
+		struct vm_area_struct *src_vma, enum pgtable_level level)
 {
 	const int orig_nr_pages = nr_pages;
 	bool maybe_pinned;
@@ -665,7 +657,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
 	 * copying if the folio maybe pinned.
 	 */
 	switch (level) {
-	case RMAP_LEVEL_PTE:
+	case PGTABLE_LEVEL_PTE:
 		if (unlikely(maybe_pinned)) {
 			for (i = 0; i < nr_pages; i++)
 				if (PageAnonExclusive(page + i))
@@ -687,8 +679,8 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
 		} while (page++, --nr_pages > 0);
 		folio_add_large_mapcount(folio, orig_nr_pages, dst_vma);
 		break;
-	case RMAP_LEVEL_PMD:
-	case RMAP_LEVEL_PUD:
+	case PGTABLE_LEVEL_PMD:
+	case PGTABLE_LEVEL_PUD:
 		if (PageAnonExclusive(page)) {
 			if (unlikely(maybe_pinned))
 				return -EBUSY;
@@ -697,6 +689,8 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
 		atomic_inc(&folio->_entire_mapcount);
 		folio_inc_large_mapcount(folio, dst_vma);
 		break;
+	default:
+		BUILD_BUG();
 	}
 	return 0;
 }
@@ -730,7 +724,7 @@ static inline int folio_try_dup_anon_rmap_ptes(struct folio *folio,
 		struct vm_area_struct *src_vma)
 {
 	return __folio_try_dup_anon_rmap(folio, page, nr_pages, dst_vma,
-					 src_vma, RMAP_LEVEL_PTE);
+					 src_vma, PGTABLE_LEVEL_PTE);
 }
 
 static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio,
@@ -738,7 +732,7 @@ static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio,
 		struct vm_area_struct *src_vma)
 {
 	return __folio_try_dup_anon_rmap(folio, page, 1, dst_vma, src_vma,
-					 RMAP_LEVEL_PTE);
+					 PGTABLE_LEVEL_PTE);
 }
 
 /**
@@ -770,7 +764,7 @@ static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio,
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, dst_vma,
-					 src_vma, RMAP_LEVEL_PMD);
+					 src_vma, PGTABLE_LEVEL_PMD);
 #else
 	WARN_ON_ONCE(true);
 	return -EBUSY;
@@ -778,7 +772,7 @@ static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio,
 }
 
 static __always_inline int __folio_try_share_anon_rmap(struct folio *folio,
-		struct page *page, int nr_pages, enum rmap_level level)
+		struct page *page, int nr_pages, enum pgtable_level level)
 {
 	VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
 	VM_WARN_ON_FOLIO(!PageAnonExclusive(page), folio);
@@ -873,7 +867,7 @@ static __always_inline int __folio_try_share_anon_rmap(struct folio *folio,
 static inline int folio_try_share_anon_rmap_pte(struct folio *folio,
 		struct page *page)
 {
-	return __folio_try_share_anon_rmap(folio, page, 1, RMAP_LEVEL_PTE);
+	return __folio_try_share_anon_rmap(folio, page, 1, PGTABLE_LEVEL_PTE);
 }
 
 /**
@@ -904,7 +898,7 @@ static inline int folio_try_share_anon_rmap_pmd(struct folio *folio,
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	return __folio_try_share_anon_rmap(folio, page, HPAGE_PMD_NR,
-					   RMAP_LEVEL_PMD);
+					   PGTABLE_LEVEL_PMD);
 #else
 	WARN_ON_ONCE(true);
 	return -EBUSY;
-- 
cgit v1.2.3


From ec63a44011dccebca24e7ef7e8a9521306de1bc9 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 11 Aug 2025 13:26:28 +0200
Subject: mm/memory: convert print_bad_pte() to print_bad_page_map()

print_bad_pte() looks like something that should actually be a WARN or
similar, but historically it apparently has proven to be useful to detect
corruption of page tables even on production systems -- report the issue
and keep the system running to make it easier to actually detect what is
going wrong (e.g., multiple such messages might shed a light).

As we want to unify vm_normal_page_*() handling for PTE/PMD/PUD, we'll
have to take care of print_bad_pte() as well.

Let's prepare for using print_bad_pte() also for non-PTEs by adjusting the
implementation and renaming the function to print_bad_page_map().  Provide
print_bad_pte() as a simple wrapper.

Document the implicit locking requirements for the page table re-walk.

To make the function a bit more readable, factor out the ratelimit check
into is_bad_page_map_ratelimited() and place the printing of page table
content into __print_bad_page_map_pgtable().  We'll now dump information
from each level in a single line, and just stop the table walk once we hit
something that is not a present page table.

The report will now look something like (dumping pgd to pmd values):

[   77.943408] BUG: Bad page map in process XXX  pte:80000001233f5867
[   77.944077] addr:00007fd84bb1c000 vm_flags:08100071 anon_vma: ...
[   77.945186] pgd:10a89f067 p4d:10a89f067 pud:10e5a2067 pmd:105327067

Not using pgdp_get(), because that does not work properly on some arm
configs where pgd_t is an array.  Note that we are dumping all levels even
when levels are folded for simplicity.

[david@redhat.com: drop warning]
  Link: https://lkml.kernel.org/r/923b279c-de33-44dd-a923-2959afad8626@redhat.com
Link: https://lkml.kernel.org/r/20250811112631.759341-9-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Juegren Gross <jgross@suse.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 4f88e460eb9c..94249e671a7e 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1983,6 +1983,24 @@ enum pgtable_level {
 	PGTABLE_LEVEL_PGD,
 };
 
+static inline const char *pgtable_level_to_str(enum pgtable_level level)
+{
+	switch (level) {
+	case PGTABLE_LEVEL_PTE:
+		return "pte";
+	case PGTABLE_LEVEL_PMD:
+		return "pmd";
+	case PGTABLE_LEVEL_PUD:
+		return "pud";
+	case PGTABLE_LEVEL_P4D:
+		return "p4d";
+	case PGTABLE_LEVEL_PGD:
+		return "pgd";
+	default:
+		return "unknown";
+	}
+}
+
 #endif /* !__ASSEMBLY__ */
 
 #if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT)
-- 
cgit v1.2.3


From 2db308160b5a191b494746fd167dbbaaead3fb26 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 11 Aug 2025 13:26:30 +0200
Subject: mm: introduce and use vm_normal_page_pud()

Let's introduce vm_normal_page_pud(), which ends up being fairly simple
because of our new common helpers and there not being a PUD-sized zero
folio.

Use vm_normal_page_pud() in folio_walk_start() to resolve a TODO,
structuring the code like the other (pmd/pte) cases.  Defer introducing
vm_normal_folio_pud() until really used.

Note that we can so far get PUDs with hugetlb, daxfs and PFNMAP entries.

Link: https://lkml.kernel.org/r/20250811112631.759341-11-david@redhat.com
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Juegren Gross <jgross@suse.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index b626d1bacef5..8ca7d2fa7134 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2360,6 +2360,8 @@ struct folio *vm_normal_folio_pmd(struct vm_area_struct *vma,
 				  unsigned long addr, pmd_t pmd);
 struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
 				pmd_t pmd);
+struct page *vm_normal_page_pud(struct vm_area_struct *vma, unsigned long addr,
+		pud_t pud);
 
 void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 		  unsigned long size);
-- 
cgit v1.2.3


From 4c89792ea0a224340ff198abc7caffa211baccd6 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 11 Aug 2025 13:26:31 +0200
Subject: mm: rename vm_ops->find_special_page() to vm_ops->find_normal_page()

...  and hide it behind a kconfig option.  There is really no need for any
!xen code to perform this check.

The naming is a bit off: we want to find the "normal" page when a PTE was
marked "special".  So it's really not "finding a special" page.

Improve the documentation, and add a comment in the code where XEN ends up
performing the pte_mkspecial() through a hypercall.  More details can be
found in commit 923b2919e2c3 ("xen/gntdev: mark userspace PTEs as special
on x86 PV guests").

Link: https://lkml.kernel.org/r/20250811112631.759341-12-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Juegren Gross <jgross@suse.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8ca7d2fa7134..3868ca1a25f9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -657,13 +657,21 @@ struct vm_operations_struct {
 	struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
 					unsigned long addr, pgoff_t *ilx);
 #endif
+#ifdef CONFIG_FIND_NORMAL_PAGE
 	/*
-	 * Called by vm_normal_page() for special PTEs to find the
-	 * page for @addr.  This is useful if the default behavior
-	 * (using pte_page()) would not find the correct page.
+	 * Called by vm_normal_page() for special PTEs in @vma at @addr. This
+	 * allows for returning a "normal" page from vm_normal_page() even
+	 * though the PTE indicates that the "struct page" either does not exist
+	 * or should not be touched: "special".
+	 *
+	 * Do not add new users: this really only works when a "normal" page
+	 * was mapped, but then the PTE got changed to something weird (+
+	 * marked special) that would not make pte_pfn() identify the originally
+	 * inserted page.
 	 */
-	struct page *(*find_special_page)(struct vm_area_struct *vma,
-					  unsigned long addr);
+	struct page *(*find_normal_page)(struct vm_area_struct *vma,
+					 unsigned long addr);
+#endif /* CONFIG_FIND_NORMAL_PAGE */
 };
 
 #ifdef CONFIG_NUMA_BALANCING
-- 
cgit v1.2.3


From 2843408ca971d1472a6a8b32ee4647f55ecab598 Mon Sep 17 00:00:00 2001
From: Pankaj Raghav <p.raghav@samsung.com>
Date: Mon, 11 Aug 2025 10:41:10 +0200
Subject: mm: rename MMF_HUGE_ZERO_PAGE to MMF_HUGE_ZERO_FOLIO

As all the helper functions has been renamed from *_page to *_folio,
rename the MM flag from MMF_HUGE_ZERO_PAGE to MMF_HUGE_ZERO_FOLIO.

No functional changes.

Link: https://lkml.kernel.org/r/20250811084113.647267-3-kernel@pankajraghav.com
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Luis Chamberalin <mcgrof@kernel.org>
Cc: Mariano Pache <npache@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Kiryl Shutsemau <kirill@shutemov.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 3ed763e7ec6f..cf94df4955c7 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1758,7 +1758,7 @@ enum {
 #define MMF_RECALC_UPROBES	20	/* MMF_HAS_UPROBES can be wrong */
 #define MMF_OOM_SKIP		21	/* mm is of no interest for the OOM killer */
 #define MMF_UNSTABLE		22	/* mm is unstable for copy_from_user */
-#define MMF_HUGE_ZERO_PAGE	23      /* mm has ever used the global huge zero page */
+#define MMF_HUGE_ZERO_FOLIO	23      /* mm has ever used the global huge zero folio */
 #define MMF_DISABLE_THP		24	/* disable THP for all VMAs */
 #define MMF_DISABLE_THP_MASK	(1 << MMF_DISABLE_THP)
 #define MMF_OOM_REAP_QUEUED	25	/* mm was queued for oom_reaper */
-- 
cgit v1.2.3


From 2d8bd8049e89efe42a5397de4effd899e8dd2249 Mon Sep 17 00:00:00 2001
From: Pankaj Raghav <p.raghav@samsung.com>
Date: Mon, 11 Aug 2025 10:41:11 +0200
Subject: mm: add persistent huge zero folio

Many places in the kernel need to zero out larger chunks, but the maximum
segment that can be zeroed out at a time by ZERO_PAGE is limited by
PAGE_SIZE.

This is especially annoying in block devices and filesystems where
multiple ZERO_PAGEs are attached to the bio in different bvecs.  With
multipage bvec support in block layer, it is much more efficient to send
out larger zero pages as a part of single bvec.

This concern was raised during the review of adding Large Block Size
support to XFS[1][2].

Usually huge_zero_folio is allocated on demand, and it will be deallocated
by the shrinker if there are no users of it left.  At moment,
huge_zero_folio infrastructure refcount is tied to the process lifetime
that created it.  This might not work for bio layer as the completions can
be async and the process that created the huge_zero_folio might no longer
be alive.  And, one of the main points that came up during discussion is
to have something bigger than zero page as a drop-in replacement.

Add a config option PERSISTENT_HUGE_ZERO_FOLIO that will result in
allocating the huge zero folio during early init and never free the memory
by disabling the shrinker.  This makes using the huge_zero_folio without
having to pass any mm struct and does not tie the lifetime of the zero
folio to anything, making it a drop-in replacement for ZERO_PAGE.

If PERSISTENT_HUGE_ZERO_FOLIO config option is enabled, then
mm_get_huge_zero_folio() will simply return the allocated page instead of
dynamically allocating a new PMD page.

Use this option carefully in resource constrained systems as it uses one
full PMD sized page for zeroing purposes.

[1] https://lore.kernel.org/linux-xfs/20231027051847.GA7885@lst.de/
[2] https://lore.kernel.org/linux-xfs/ZitIK5OnR7ZNY0IG@infradead.org/

Link: https://lkml.kernel.org/r/20250811084113.647267-4-kernel@pankajraghav.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Co-developed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Luis Chamberalin <mcgrof@kernel.org>
Cc: Mariano Pache <npache@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Kiryl Shutsemau <kirill@shutemov.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 7748489fde1b..bd547857c6c1 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -495,6 +495,17 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
 struct folio *mm_get_huge_zero_folio(struct mm_struct *mm);
 void mm_put_huge_zero_folio(struct mm_struct *mm);
 
+static inline struct folio *get_persistent_huge_zero_folio(void)
+{
+	if (!IS_ENABLED(CONFIG_PERSISTENT_HUGE_ZERO_FOLIO))
+		return NULL;
+
+	if (unlikely(!huge_zero_folio))
+		return NULL;
+
+	return huge_zero_folio;
+}
+
 static inline bool thp_migration_supported(void)
 {
 	return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
@@ -685,6 +696,11 @@ static inline int change_huge_pud(struct mmu_gather *tlb,
 {
 	return 0;
 }
+
+static inline struct folio *get_persistent_huge_zero_folio(void)
+{
+	return NULL;
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 static inline int split_folio_to_list_to_order(struct folio *folio,
-- 
cgit v1.2.3


From 415a0fd62f1899fe2bb81d661e427194b1c97201 Mon Sep 17 00:00:00 2001
From: Pankaj Raghav <p.raghav@samsung.com>
Date: Mon, 11 Aug 2025 10:41:12 +0200
Subject: mm: add largest_zero_folio() routine

The callers of mm_get_huge_zero_folio() have access to a mm struct and the
lifetime of the huge_zero_folio is tied to the lifetime of the mm struct.

largest_zero_folio() will give access to huge_zero_folio when
PERSISTENT_HUGE_ZERO_FOLIO config option is enabled for callers that do
not want to tie the lifetime to a mm struct.  This is very useful for
filesystem and block layers where the request completions can be async and
there is no guarantee on the mm struct lifetime.

This function will return a ZERO_PAGE folio if PERSISTENT_HUGE_ZERO_FOLIO
is disabled or if we failed to allocate a huge_zero_folio during early
init.

Link: https://lkml.kernel.org/r/20250811084113.647267-5-kernel@pankajraghav.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Co-developed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Luis Chamberalin <mcgrof@kernel.org>
Cc: Mariano Pache <npache@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Kiryl Shutsemau <kirill@shutemov.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index bd547857c6c1..14d424830fa8 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -714,4 +714,26 @@ static inline int split_folio_to_order(struct folio *folio, int new_order)
 	return split_folio_to_list_to_order(folio, NULL, new_order);
 }
 
+/**
+ * largest_zero_folio - Get the largest zero size folio available
+ *
+ * This function shall be used when mm_get_huge_zero_folio() cannot be
+ * used as there is no appropriate mm lifetime to tie the huge zero folio
+ * from the caller.
+ *
+ * Deduce the size of the folio with folio_size instead of assuming the
+ * folio size.
+ *
+ * Return: pointer to PMD sized zero folio if CONFIG_PERSISTENT_HUGE_ZERO_FOLIO
+ * is enabled or a single page sized zero folio
+ */
+static inline struct folio *largest_zero_folio(void)
+{
+	struct folio *folio = get_persistent_huge_zero_folio();
+
+	if (folio)
+		return folio;
+
+	return page_folio(ZERO_PAGE(0));
+}
 #endif /* _LINUX_HUGE_MM_H */
-- 
cgit v1.2.3


From bb6525f2f8c41e89ba3fc506bc1705c68cf845ae Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Tue, 12 Aug 2025 16:44:10 +0100
Subject: mm: add bitmap mm->flags field

Patch series "mm: make mm->flags a bitmap and 64-bit on all arches".

We are currently in the bizarre situation where we are constrained on the
number of flags we can set in an mm_struct based on whether this is a
32-bit or 64-bit kernel.

This is because mm->flags is an unsigned long field, which is 32-bits on a
32-bit system and 64-bits on a 64-bit system.

In order to keep things functional across both architectures, we do not
permit mm flag bits to be set above flag 31 (i.e.  the 32nd bit).

This is a silly situation, especially given how profligate we are in
storing metadata in mm_struct, so let's convert mm->flags into a bitmap
and allow ourselves as many bits as we like.

In order to execute this change, we introduce a new opaque type -
mm_flags_t - which wraps a bitmap.

We go further and mark the bitmap field __private, which forces users to
have to use accessors, which allows us to enforce atomicity rules around
mm->flags (except on those occasions they are not required - fork, etc.)
and makes it far easier to keep track of how mm flags are being utilised.

In order to implement this change sensibly and an an iterative way, we
start by introducing the type with the same bitsize as the current mm
flags (system word size) and place it in union with mm->flags.

We are then able to gradually update users as we go without being forced
to do everything in a single patch.

In the course of working on this series I noticed the MMF_* flag masks
encounter a sign extension bug that, due to the 32-bit limit on mm->flags
thus far, has not caused any issues in practice, but required fixing for
this series.

We must make special dispensation for two cases - coredump and
initailisation on fork, but of which use masks extensively.

Since coredump flags are set in stone, we can safely assume they will
remain in the first 32-bits of the flags.  We therefore provide special
non-atomic accessors for this case that access the first system word of
flags, keeping everything there essentially the same.

For mm->flags initialisation on fork, we adjust the logic to ensure all
bits are cleared correctly, and then adjust the existing intialisation
logic, dubbing the implementation utilising flags as legacy.

This means we get the same fast operations as we do now, but in future we
can also choose to update the forking logic to additionally propagate
flags beyond 32-bits across fork.

With this change in place we can, in future, decide to have as many bits
as we please.

Since the size of the bitmap will scale in system word multiples, there
should be no issues with changes in alignment in mm_struct.  Additionally,
the really sensitive field (mmap_lock) is located prior to the flags field
so this should have no impact on that either.


This patch (of 10):

We are currently in the bizarre situation where we are constrained on the
number of flags we can set in an mm_struct based on whether this is a
32-bit or 64-bit kernel.

This is because mm->flags is an unsigned long field, which is 32-bits on a
32-bit system and 64-bits on a 64-bit system.

In order to keep things functional across both architectures, we do not
permit mm flag bits to be set above flag 31 (i.e.  the 32nd bit).

This is a silly situation, especially given how profligate we are in
storing metadata in mm_struct, so let's convert mm->flags into a bitmap
and allow ourselves as many bits as we like.

To keep things manageable, firstly we introduce the bitmap at a system
word system as a new field mm->_flags, in union.

This means the new bitmap mm->_flags is bitwise exactly identical to the
existing mm->flags field.

We have an opportunity to also introduce some type safety here, so let's
wrap the mm flags field as a struct and declare it as an mm_flags_t
typedef to keep it consistent with vm_flags_t for VMAs.

We make the internal field privately accessible, in order to force the use
of helper functions so we can enforce that accesses are bitwise as
required.

We therefore introduce accessors prefixed with mm_flags_*() for callers to
use.  We place the bit parameter first so as to match the parameter
ordering of the *_bit() functions.

Having this temporary union arrangement allows us to incrementally swap
over users of mm->flags patch-by-patch rather than having to do everything
in one fell swoop.

[lorenzo.stoakes@oracle.com: place __private in correct place, const-ify __mm_flags_get_word]
  Link: https://lkml.kernel.org/r/d4ba117d-6234-4069-b871-254d152d7d21@lucifer.local
Link: https://lkml.kernel.org/r/cover.1755012943.git.lorenzo.stoakes@oracle.com
Link: https://lkml.kernel.org/r/9de8dfd9de8c95cd31622d6e52051ba0d1848f5a.1755012943.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mel Gorman <mgorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Namhyung kim <namhyung@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: xu xin <xu.xin16@zte.com.cn>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h       | 32 ++++++++++++++++++++++++++++++++
 include/linux/mm_types.h | 38 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 69 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3868ca1a25f9..4ed4a0b9dad6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -34,6 +34,8 @@
 #include <linux/slab.h>
 #include <linux/cacheinfo.h>
 #include <linux/rcuwait.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -720,6 +722,36 @@ static inline void assert_fault_locked(struct vm_fault *vmf)
 }
 #endif /* CONFIG_PER_VMA_LOCK */
 
+static inline bool mm_flags_test(int flag, const struct mm_struct *mm)
+{
+	return test_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags));
+}
+
+static inline bool mm_flags_test_and_set(int flag, struct mm_struct *mm)
+{
+	return test_and_set_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags));
+}
+
+static inline bool mm_flags_test_and_clear(int flag, struct mm_struct *mm)
+{
+	return test_and_clear_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags));
+}
+
+static inline void mm_flags_set(int flag, struct mm_struct *mm)
+{
+	set_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags));
+}
+
+static inline void mm_flags_clear(int flag, struct mm_struct *mm)
+{
+	clear_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags));
+}
+
+static inline void mm_flags_clear_all(struct mm_struct *mm)
+{
+	bitmap_zero(ACCESS_PRIVATE(&mm->_flags, __mm_flags), NUM_MM_FLAG_BITS);
+}
+
 extern const struct vm_operations_struct vma_dummy_vm_ops;
 
 static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index cf94df4955c7..0e001dbad455 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -20,6 +20,7 @@
 #include <linux/seqlock.h>
 #include <linux/percpu_counter.h>
 #include <linux/types.h>
+#include <linux/bitmap.h>
 
 #include <asm/mmu.h>
 
@@ -927,6 +928,15 @@ struct mm_cid {
 };
 #endif
 
+/*
+ * Opaque type representing current mm_struct flag state. Must be accessed via
+ * mm_flags_xxx() helper functions.
+ */
+#define NUM_MM_FLAG_BITS BITS_PER_LONG
+typedef struct {
+	DECLARE_BITMAP(__mm_flags, NUM_MM_FLAG_BITS);
+} __private mm_flags_t;
+
 struct kioctx_table;
 struct iommu_mm_data;
 struct mm_struct {
@@ -1109,7 +1119,11 @@ struct mm_struct {
 		/* Architecture-specific MM context */
 		mm_context_t context;
 
-		unsigned long flags; /* Must use atomic bitops to access */
+		/* Temporary union while we convert users to mm_flags_t. */
+		union {
+			unsigned long flags; /* Must use atomic bitops to access */
+			mm_flags_t _flags;   /* Must use mm_flags_* helpers to access */
+		};
 
 #ifdef CONFIG_AIO
 		spinlock_t			ioctx_lock;
@@ -1219,6 +1233,28 @@ struct mm_struct {
 	unsigned long cpu_bitmap[];
 };
 
+/* Set the first system word of mm flags, non-atomically. */
+static inline void __mm_flags_set_word(struct mm_struct *mm, unsigned long value)
+{
+	unsigned long *bitmap = ACCESS_PRIVATE(&mm->_flags, __mm_flags);
+
+	bitmap_copy(bitmap, &value, BITS_PER_LONG);
+}
+
+/* Obtain a read-only view of the bitmap. */
+static inline const unsigned long *__mm_flags_get_bitmap(const struct mm_struct *mm)
+{
+	return (const unsigned long *)ACCESS_PRIVATE(&mm->_flags, __mm_flags);
+}
+
+/* Read the first system word of mm flags, non-atomically. */
+static inline unsigned long __mm_flags_get_word(const struct mm_struct *mm)
+{
+	const unsigned long *bitmap = __mm_flags_get_bitmap(mm);
+
+	return bitmap_read(bitmap, 0, BITS_PER_LONG);
+}
+
 #define MM_MT_FLAGS	(MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | \
 			 MT_FLAGS_USE_RCU)
 extern struct mm_struct init_mm;
-- 
cgit v1.2.3


From 12e423ba4eaed7b1561b677d32e6599f932d03db Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Tue, 12 Aug 2025 16:44:11 +0100
Subject: mm: convert core mm to mm_flags_*() accessors

As part of the effort to move to mm->flags becoming a bitmap field,
convert existing users to making use of the mm_flags_*() accessors which
will, when the conversion is complete, be the only means of accessing
mm_struct flags.

This will result in the debug output being that of a bitmap output, which
will result in a minor change here, but since this is for debug only, this
should have no bearing.

Otherwise, no functional changes intended.

[akpm@linux-foundation.org: fix typo in comment]Link: https://lkml.kernel.org/r/1eb2266f4408798a55bda00cb04545a3203aa572.1755012943.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Barry Song <baohua@kernel.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mel Gorman <mgorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Namhyung kim <namhyung@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: xu xin <xu.xin16@zte.com.cn>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h    | 2 +-
 include/linux/khugepaged.h | 6 ++++--
 include/linux/ksm.h        | 6 +++---
 include/linux/mm.h         | 2 +-
 include/linux/mman.h       | 2 +-
 include/linux/oom.h        | 2 +-
 6 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 14d424830fa8..84b7eebe0d68 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -327,7 +327,7 @@ static inline bool vma_thp_disabled(struct vm_area_struct *vma,
 	 * example, s390 kvm.
 	 */
 	return (vm_flags & VM_NOHUGEPAGE) ||
-	       test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags);
+	       mm_flags_test(MMF_DISABLE_THP, vma->vm_mm);
 }
 
 static inline bool thp_disabled_by_hw(void)
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index ff6120463745..eb1946a70cff 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -2,6 +2,8 @@
 #ifndef _LINUX_KHUGEPAGED_H
 #define _LINUX_KHUGEPAGED_H
 
+#include <linux/mm.h>
+
 extern unsigned int khugepaged_max_ptes_none __read_mostly;
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern struct attribute_group khugepaged_attr_group;
@@ -20,13 +22,13 @@ extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
 
 static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 {
-	if (test_bit(MMF_VM_HUGEPAGE, &oldmm->flags))
+	if (mm_flags_test(MMF_VM_HUGEPAGE, oldmm))
 		__khugepaged_enter(mm);
 }
 
 static inline void khugepaged_exit(struct mm_struct *mm)
 {
-	if (test_bit(MMF_VM_HUGEPAGE, &mm->flags))
+	if (mm_flags_test(MMF_VM_HUGEPAGE, mm))
 		__khugepaged_exit(mm);
 }
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index c17b955e7b0b..22e67ca7cba3 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -56,13 +56,13 @@ static inline long mm_ksm_zero_pages(struct mm_struct *mm)
 static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 {
 	/* Adding mm to ksm is best effort on fork. */
-	if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
+	if (mm_flags_test(MMF_VM_MERGEABLE, oldmm))
 		__ksm_enter(mm);
 }
 
 static inline int ksm_execve(struct mm_struct *mm)
 {
-	if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
+	if (mm_flags_test(MMF_VM_MERGE_ANY, mm))
 		return __ksm_enter(mm);
 
 	return 0;
@@ -70,7 +70,7 @@ static inline int ksm_execve(struct mm_struct *mm)
 
 static inline void ksm_exit(struct mm_struct *mm)
 {
-	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
+	if (mm_flags_test(MMF_VM_MERGEABLE, mm))
 		__ksm_exit(mm);
 }
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4ed4a0b9dad6..34311ebe62cc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1949,7 +1949,7 @@ static inline bool folio_needs_cow_for_dma(struct vm_area_struct *vma,
 {
 	VM_BUG_ON(!(raw_read_seqcount(&vma->vm_mm->write_protect_seq) & 1));
 
-	if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags))
+	if (!mm_flags_test(MMF_HAS_PINNED, vma->vm_mm))
 		return false;
 
 	return folio_maybe_dma_pinned(folio);
diff --git a/include/linux/mman.h b/include/linux/mman.h
index de9e8e6229a4..0ba8a7e8b90a 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -201,7 +201,7 @@ static inline bool arch_memory_deny_write_exec_supported(void)
 static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
 {
 	/* If MDWE is disabled, we have nothing to deny. */
-	if (!test_bit(MMF_HAS_MDWE, &current->mm->flags))
+	if (!mm_flags_test(MMF_HAS_MDWE, current->mm))
 		return false;
 
 	/* If the new VMA is not executable, we have nothing to deny. */
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 1e0fc6931ce9..7b02bc1d0a7e 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -91,7 +91,7 @@ static inline bool tsk_is_oom_victim(struct task_struct * tsk)
  */
 static inline vm_fault_t check_stable_address_space(struct mm_struct *mm)
 {
-	if (unlikely(test_bit(MMF_UNSTABLE, &mm->flags)))
+	if (unlikely(mm_flags_test(MMF_UNSTABLE, mm)))
 		return VM_FAULT_SIGBUS;
 	return 0;
 }
-- 
cgit v1.2.3


From 39f8049cd49f7e88f89a33f97f996c7306e8be0b Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Tue, 12 Aug 2025 16:44:15 +0100
Subject: mm: update coredump logic to correctly use bitmap mm flags

The coredump logic is slightly different from other users in that it both
stores mm flags and additionally sets and gets using masks.

Since the MMF_DUMPABLE_* flags must remain as they are for uABI reasons,
and of course these are within the first 32-bits of the flags, it is
reasonable to provide access to these in the same fashion so this logic
can all still keep working as it has been.

Therefore, introduce coredump-specific helpers __mm_flags_get_dumpable()
and __mm_flags_set_mask_dumpable() for this purpose, and update all core
dump users of mm flags to use these.

[lorenzo.stoakes@oracle.com: abstract set_mask_bits() invocation to mm_types.h to satisfy ARC]
  Link: https://lkml.kernel.org/r/0e7ad263-1ff7-446d-81fe-97cff9c0e7ed@lucifer.local
Link: https://lkml.kernel.org/r/2a5075f7e3c5b367d988178c79a3063d12ee53a9.1755012943.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mel Gorman <mgorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Namhyung kim <namhyung@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: xu xin <xu.xin16@zte.com.cn>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h       | 12 ++++++++++++
 include/linux/sched/coredump.h | 18 +++++++++++++++++-
 2 files changed, 29 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0e001dbad455..9d224075d895 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1255,6 +1255,18 @@ static inline unsigned long __mm_flags_get_word(const struct mm_struct *mm)
 	return bitmap_read(bitmap, 0, BITS_PER_LONG);
 }
 
+/*
+ * Update the first system word of mm flags ONLY, applying the specified mask to
+ * it, then setting all flags specified by bits.
+ */
+static inline void __mm_flags_set_mask_bits_word(struct mm_struct *mm,
+		unsigned long mask, unsigned long bits)
+{
+	unsigned long *bitmap = ACCESS_PRIVATE(&mm->_flags, __mm_flags);
+
+	set_mask_bits(bitmap, mask, bits);
+}
+
 #define MM_MT_FLAGS	(MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | \
 			 MT_FLAGS_USE_RCU)
 extern struct mm_struct init_mm;
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index 6eb65ceed213..b7fafe999073 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -8,6 +8,20 @@
 #define SUID_DUMP_USER		1	/* Dump as user of process */
 #define SUID_DUMP_ROOT		2	/* Dump as root */
 
+static inline unsigned long __mm_flags_get_dumpable(struct mm_struct *mm)
+{
+	/*
+	 * By convention, dumpable bits are contained in first 32 bits of the
+	 * bitmap, so we can simply access this first unsigned long directly.
+	 */
+	return __mm_flags_get_word(mm);
+}
+
+static inline void __mm_flags_set_mask_dumpable(struct mm_struct *mm, int value)
+{
+	__mm_flags_set_mask_bits_word(mm, MMF_DUMPABLE_MASK, value);
+}
+
 extern void set_dumpable(struct mm_struct *mm, int value);
 /*
  * This returns the actual value of the suid_dumpable flag. For things
@@ -22,7 +36,9 @@ static inline int __get_dumpable(unsigned long mm_flags)
 
 static inline int get_dumpable(struct mm_struct *mm)
 {
-	return __get_dumpable(mm->flags);
+	unsigned long flags = __mm_flags_get_dumpable(mm);
+
+	return __get_dumpable(flags);
 }
 
 #endif /* _LINUX_SCHED_COREDUMP_H */
-- 
cgit v1.2.3


From 01f86753a05a3b971d69147d6d074c1a8d29b57d Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Tue, 12 Aug 2025 16:44:16 +0100
Subject: mm: correct sign-extension issue in MMF_* flag masks

There is an issue with the mask declarations in linux/mm_types.h, which
naively do (1 << bit) operations.  Unfortunately this results in the 1
being defaulted as a signed (32-bit) integer.

When the compiler expands the MMF_INIT_MASK bitmask it comes up with:

(((1 << 2) - 1) | (((1 << 9) - 1) << 2) | (1 << 24) | (1 << 28) | (1 << 30)
| (1 << 31))

Which overflows the signed integer to -788,527,105.  Implicitly casting
this to an unsigned integer results in sign-expansion, and thus this value
becomes 0xffffffffd10007ff, rather than the intended 0xd10007ff.

While we're limited to a maximum of 32 bits in mm->flags, this isn't an
issue as the remaining bits being masked will always be zero.

However, now we are moving towards having more bits in this flag, this
becomes an issue.

Simply resolve this by using the _BITUL() helper to cast the shifted value
to an unsigned long.

[lorenzo.stoakes@oracle.com: prefer BIT() to _BITUL()]
  Link: https://lkml.kernel.org/r/a0290c77-cd88-46d6-8d9a-073be7600d88@lucifer.local
Link: https://lkml.kernel.org/r/f92194bee8c92a04fd4c9b2c14c7e65229639300.1755012943.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mel Gorman <mgorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Namhyung kim <namhyung@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: xu xin <xu.xin16@zte.com.cn>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9d224075d895..de09ae2a0de6 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1767,7 +1767,7 @@ enum {
  * the modes are SUID_DUMP_* defined in linux/sched/coredump.h
  */
 #define MMF_DUMPABLE_BITS 2
-#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
+#define MMF_DUMPABLE_MASK (BIT(MMF_DUMPABLE_BITS) - 1)
 /* coredump filter bits */
 #define MMF_DUMP_ANON_PRIVATE	2
 #define MMF_DUMP_ANON_SHARED	3
@@ -1782,13 +1782,13 @@ enum {
 #define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
 #define MMF_DUMP_FILTER_BITS	9
 #define MMF_DUMP_FILTER_MASK \
-	(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
+	((BIT(MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
 #define MMF_DUMP_FILTER_DEFAULT \
-	((1 << MMF_DUMP_ANON_PRIVATE) |	(1 << MMF_DUMP_ANON_SHARED) |\
-	 (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
+	(BIT(MMF_DUMP_ANON_PRIVATE) | BIT(MMF_DUMP_ANON_SHARED) | \
+	 BIT(MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
 
 #ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
-# define MMF_DUMP_MASK_DEFAULT_ELF	(1 << MMF_DUMP_ELF_HEADERS)
+# define MMF_DUMP_MASK_DEFAULT_ELF	BIT(MMF_DUMP_ELF_HEADERS)
 #else
 # define MMF_DUMP_MASK_DEFAULT_ELF	0
 #endif
@@ -1808,7 +1808,7 @@ enum {
 #define MMF_UNSTABLE		22	/* mm is unstable for copy_from_user */
 #define MMF_HUGE_ZERO_FOLIO	23      /* mm has ever used the global huge zero folio */
 #define MMF_DISABLE_THP		24	/* disable THP for all VMAs */
-#define MMF_DISABLE_THP_MASK	(1 << MMF_DISABLE_THP)
+#define MMF_DISABLE_THP_MASK	BIT(MMF_DISABLE_THP)
 #define MMF_OOM_REAP_QUEUED	25	/* mm was queued for oom_reaper */
 #define MMF_MULTIPROCESS	26	/* mm is shared between processes */
 /*
@@ -1821,16 +1821,15 @@ enum {
 #define MMF_HAS_PINNED		27	/* FOLL_PIN has run, never cleared */
 
 #define MMF_HAS_MDWE		28
-#define MMF_HAS_MDWE_MASK	(1 << MMF_HAS_MDWE)
-
+#define MMF_HAS_MDWE_MASK	BIT(MMF_HAS_MDWE)
 
 #define MMF_HAS_MDWE_NO_INHERIT	29
 
 #define MMF_VM_MERGE_ANY	30
-#define MMF_VM_MERGE_ANY_MASK	(1 << MMF_VM_MERGE_ANY)
+#define MMF_VM_MERGE_ANY_MASK	BIT(MMF_VM_MERGE_ANY)
 
 #define MMF_TOPDOWN		31	/* mm searches top down by default */
-#define MMF_TOPDOWN_MASK	(1 << MMF_TOPDOWN)
+#define MMF_TOPDOWN_MASK	BIT(MMF_TOPDOWN)
 
 #define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
 				 MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\
-- 
cgit v1.2.3


From 19148a19da86f1b7d1a1b067c9f656b0f3a60fb1 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Tue, 12 Aug 2025 16:44:17 +0100
Subject: mm: update fork mm->flags initialisation to use bitmap

We now need to account for flag initialisation on fork.  We retain the
existing logic as much as we can, but dub the existing flag mask legacy.

These flags are therefore required to fit in the first 32-bits of the
flags field.

However, further flag propagation upon fork can be implemented in
mm_init() on a per-flag basis.

We ensure we clear the entire bitmap prior to setting it, and use
__mm_flags_get_word() and __mm_flags_set_word() to manipulate these legacy
fields efficiently.

Link: https://lkml.kernel.org/r/9fb8954a7a0f0184f012a8e66f8565bcbab014ba.1755012943.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mel Gorman <mgorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Namhyung kim <namhyung@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: xu xin <xu.xin16@zte.com.cn>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index de09ae2a0de6..69ce407b4343 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1831,16 +1831,23 @@ enum {
 #define MMF_TOPDOWN		31	/* mm searches top down by default */
 #define MMF_TOPDOWN_MASK	BIT(MMF_TOPDOWN)
 
-#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
+#define MMF_INIT_LEGACY_MASK	(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
 				 MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\
 				 MMF_VM_MERGE_ANY_MASK | MMF_TOPDOWN_MASK)
 
-static inline unsigned long mmf_init_flags(unsigned long flags)
+/* Legacy flags must fit within 32 bits. */
+static_assert((u64)MMF_INIT_LEGACY_MASK <= (u64)UINT_MAX);
+
+/*
+ * Initialise legacy flags according to masks, propagating selected flags on
+ * fork. Further flag manipulation can be performed by the caller.
+ */
+static inline unsigned long mmf_init_legacy_flags(unsigned long flags)
 {
 	if (flags & (1UL << MMF_HAS_MDWE_NO_INHERIT))
 		flags &= ~((1UL << MMF_HAS_MDWE) |
 			   (1UL << MMF_HAS_MDWE_NO_INHERIT));
-	return flags & MMF_INIT_MASK;
+	return flags & MMF_INIT_LEGACY_MASK;
 }
 
 #endif /* _LINUX_MM_TYPES_H */
-- 
cgit v1.2.3


From 8166353fb8841390bf3ffe1b923a5ddeb348e4f7 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Tue, 12 Aug 2025 16:44:19 +0100
Subject: mm: replace mm->flags with bitmap entirely and set to 64 bits

Now we have updated all users of mm->flags to use the bitmap accessors,
repalce it with the bitmap version entirely.

We are then able to move to having 64 bits of mm->flags on both 32-bit and
64-bit architectures.

We also update the VMA userland tests to ensure that everything remains
functional there.

No functional changes intended, other than there now being 64 bits of
available mm_struct flags.

Link: https://lkml.kernel.org/r/e1f6654e016d36c43959764b01355736c5cbcdf8.1755012943.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mel Gorman <mgorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Namhyung kim <namhyung@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: xu xin <xu.xin16@zte.com.cn>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h       | 12 ++++++------
 include/linux/mm_types.h | 14 +++++---------
 2 files changed, 11 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 34311ebe62cc..b61e2d4858cf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -724,32 +724,32 @@ static inline void assert_fault_locked(struct vm_fault *vmf)
 
 static inline bool mm_flags_test(int flag, const struct mm_struct *mm)
 {
-	return test_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags));
+	return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
 }
 
 static inline bool mm_flags_test_and_set(int flag, struct mm_struct *mm)
 {
-	return test_and_set_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags));
+	return test_and_set_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
 }
 
 static inline bool mm_flags_test_and_clear(int flag, struct mm_struct *mm)
 {
-	return test_and_clear_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags));
+	return test_and_clear_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
 }
 
 static inline void mm_flags_set(int flag, struct mm_struct *mm)
 {
-	set_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags));
+	set_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
 }
 
 static inline void mm_flags_clear(int flag, struct mm_struct *mm)
 {
-	clear_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags));
+	clear_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
 }
 
 static inline void mm_flags_clear_all(struct mm_struct *mm)
 {
-	bitmap_zero(ACCESS_PRIVATE(&mm->_flags, __mm_flags), NUM_MM_FLAG_BITS);
+	bitmap_zero(ACCESS_PRIVATE(&mm->flags, __mm_flags), NUM_MM_FLAG_BITS);
 }
 
 extern const struct vm_operations_struct vma_dummy_vm_ops;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 69ce407b4343..05475b5fd516 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -932,7 +932,7 @@ struct mm_cid {
  * Opaque type representing current mm_struct flag state. Must be accessed via
  * mm_flags_xxx() helper functions.
  */
-#define NUM_MM_FLAG_BITS BITS_PER_LONG
+#define NUM_MM_FLAG_BITS (64)
 typedef struct {
 	DECLARE_BITMAP(__mm_flags, NUM_MM_FLAG_BITS);
 } __private mm_flags_t;
@@ -1119,11 +1119,7 @@ struct mm_struct {
 		/* Architecture-specific MM context */
 		mm_context_t context;
 
-		/* Temporary union while we convert users to mm_flags_t. */
-		union {
-			unsigned long flags; /* Must use atomic bitops to access */
-			mm_flags_t _flags;   /* Must use mm_flags_* helpers to access */
-		};
+		mm_flags_t flags; /* Must use mm_flags_* hlpers to access */
 
 #ifdef CONFIG_AIO
 		spinlock_t			ioctx_lock;
@@ -1236,7 +1232,7 @@ struct mm_struct {
 /* Set the first system word of mm flags, non-atomically. */
 static inline void __mm_flags_set_word(struct mm_struct *mm, unsigned long value)
 {
-	unsigned long *bitmap = ACCESS_PRIVATE(&mm->_flags, __mm_flags);
+	unsigned long *bitmap = ACCESS_PRIVATE(&mm->flags, __mm_flags);
 
 	bitmap_copy(bitmap, &value, BITS_PER_LONG);
 }
@@ -1244,7 +1240,7 @@ static inline void __mm_flags_set_word(struct mm_struct *mm, unsigned long value
 /* Obtain a read-only view of the bitmap. */
 static inline const unsigned long *__mm_flags_get_bitmap(const struct mm_struct *mm)
 {
-	return (const unsigned long *)ACCESS_PRIVATE(&mm->_flags, __mm_flags);
+	return (const unsigned long *)ACCESS_PRIVATE(&mm->flags, __mm_flags);
 }
 
 /* Read the first system word of mm flags, non-atomically. */
@@ -1262,7 +1258,7 @@ static inline unsigned long __mm_flags_get_word(const struct mm_struct *mm)
 static inline void __mm_flags_set_mask_bits_word(struct mm_struct *mm,
 		unsigned long mask, unsigned long bits)
 {
-	unsigned long *bitmap = ACCESS_PRIVATE(&mm->_flags, __mm_flags);
+	unsigned long *bitmap = ACCESS_PRIVATE(&mm->flags, __mm_flags);
 
 	set_mask_bits(bitmap, mask, bits);
 }
-- 
cgit v1.2.3


From 0f9ab62a6e44ef51ea3e7f1c552b447cf4eb20ae Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Aug 2025 10:30:08 +0200
Subject: mempool: rename struct mempool_s to struct mempool

Drop the pointless _s prefix and align to the usual struct naming to
prepare for actually using the struct instead of the typedef so that
random headers don't need to include mempool.h for just having a pointer
to the mempool.

Link: https://lkml.kernel.org/r/20250812083105.371295-1-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Christoph Lameter (Ampere) <cl@gentwo.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/blkdev.h  | 2 +-
 include/linux/mempool.h | 2 +-
 include/linux/netfs.h   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index fe1797bbec42..28ceaeffc0c9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -199,7 +199,7 @@ struct gendisk {
 	unsigned int		zone_wplugs_hash_bits;
 	atomic_t		nr_zone_wplugs;
 	spinlock_t		zone_wplugs_lock;
-	struct mempool_s	*zone_wplugs_pool;
+	struct mempool		*zone_wplugs_pool;
 	struct hlist_head	*zone_wplugs_hash;
 	struct workqueue_struct *zone_wplugs_wq;
 #endif /* CONFIG_BLK_DEV_ZONED */
diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 7b151441341b..34941a4b9026 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -15,7 +15,7 @@ struct kmem_cache;
 typedef void * (mempool_alloc_t)(gfp_t gfp_mask, void *pool_data);
 typedef void (mempool_free_t)(void *element, void *pool_data);
 
-typedef struct mempool_s {
+typedef struct mempool {
 	spinlock_t lock;
 	int min_nr;		/* nr of elements at *elements */
 	int curr_nr;		/* Current nr of elements at *elements */
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 98c96d649bf9..72ee7d210a74 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -21,7 +21,7 @@
 #include <linux/rolling_buffer.h>
 
 enum netfs_sreq_ref_trace;
-typedef struct mempool_s mempool_t;
+typedef struct mempool mempool_t;
 struct folio_queue;
 
 /**
-- 
cgit v1.2.3


From 85b8cec15034e07500a6e5b8a5aea8185a3d775a Mon Sep 17 00:00:00 2001
From: Chris Li <chrisl@kernel.org>
Date: Tue, 12 Aug 2025 00:10:59 -0700
Subject: mm: swap.h: Remove deleted field from comments

The comment for struct swap_info_struct.lock incorrectly mentions fields
that have already been deleted from the structure.

Update the comments to accurately reflect the current struct
swap_info_struct.

There is no functional change.

Link: https://lkml.kernel.org/r/20250812-swap-scan-list-v3-2-6d73504d267b@kernel.org
Signed-off-by: Chris Li <chrisl@kernel.org>
Reviewed-by: Kairui Song <kasong@tencent.com>
Acked-by: Nhat Pham <nphamcs@gmail.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index a060d102e0d1..c2da85cb7fe7 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -320,11 +320,8 @@ struct swap_info_struct {
 	struct completion comp;		/* seldom referenced */
 	spinlock_t lock;		/*
 					 * protect map scan related fields like
-					 * swap_map, lowest_bit, highest_bit,
-					 * inuse_pages, cluster_next,
-					 * cluster_nr, lowest_alloc,
-					 * highest_alloc, free/discard cluster
-					 * list. other fields are only changed
+					 * swap_map, inuse_pages and all cluster
+					 * lists. other fields are only changed
 					 * at swapon/swapoff, so are protected
 					 * by swap_lock. changing flags need
 					 * hold this lock and swap_lock. If
-- 
cgit v1.2.3


From ec45783fce52f358c9e8680d2837bc0d477f16ad Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Aug 2025 16:57:55 +0200
Subject: memcg: optimize exit to user space

memcg uses TIF_NOTIFY_RESUME to handle reclaiming on exit to user space.
TIF_NOTIFY_RESUME is a multiplexing TIF bit, which is utilized by other
entities as well.

This results in a unconditional mem_cgroup_handle_over_high() call for
every invocation of resume_user_mode_work(), which is a pointless exercise
as most of the time there is no reclaim work to do.

Especially since RSEQ is used by glibc, TIF_NOTIFY_RESUME is raised quite
frequently and the empty calls show up in exit path profiling.

Optimize this by doing a quick check of the reclaim condition before
invoking it.

[akpm@linux-foundation.org: remove now-unneeded test of memcg_nr_pages_over_high==0, per Shakeel]
Link: https://lkml.kernel.org/r/87tt2b6zgs.ffs@tglx
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 785173aa0739..9fa3afc90dd5 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -900,7 +900,13 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
 	return READ_ONCE(mz->lru_zone_size[zone_idx][lru]);
 }
 
-void mem_cgroup_handle_over_high(gfp_t gfp_mask);
+void __mem_cgroup_handle_over_high(gfp_t gfp_mask);
+
+static inline void mem_cgroup_handle_over_high(gfp_t gfp_mask)
+{
+	if (unlikely(current->memcg_nr_pages_over_high))
+		__mem_cgroup_handle_over_high(gfp_mask);
+}
 
 unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg);
 
-- 
cgit v1.2.3


From 9dc21bbd62edeae6f63e6f25e1edb7167452457b Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 15 Aug 2025 14:54:53 +0100
Subject: prctl: extend PR_SET_THP_DISABLE to optionally exclude VM_HUGEPAGE

Patch series "prctl: extend PR_SET_THP_DISABLE to only provide THPs when
advised", v5.

This will allow individual processes to opt-out of THP = "always" into THP
= "madvise", without affecting other workloads on the system.  This has
been extensively discussed on the mailing list and has been summarized
very well by David in the first patch which also includes the links to
alternatives, please refer to the first patch commit message for the
motivation for this series.

Patch 1 adds the PR_THP_DISABLE_EXCEPT_ADVISED flag to implement this,
along with the MMF changes.

Patch 2 is a cleanup patch for tva_flags that will allow the forced
collapse case to be transmitted to vma_thp_disabled (which is done in
patch 3).

Patch 4 adds documentation for PR_SET_THP_DISABLE/PR_GET_THP_DISABLE.

Patches 6-7 implement the selftests for PR_SET_THP_DISABLE for completely
disabling THPs (old behaviour) and only enabling it at advise
(PR_THP_DISABLE_EXCEPT_ADVISED).


This patch (of 7):

People want to make use of more THPs, for example, moving from the "never"
system policy to "madvise", or from "madvise" to "always".

While this is great news for every THP desperately waiting to get
allocated out there, apparently there are some workloads that require a
bit of care during that transition: individual processes may need to
opt-out from this behavior for various reasons, and this should be
permitted without needing to make all other workloads on the system
similarly opt-out.

The following scenarios are imaginable:

(1) Switch from "none" system policy to "madvise"/"always", but keep THPs
    disabled for selected workloads.

(2) Stay at "none" system policy, but enable THPs for selected
    workloads, making only these workloads use the "madvise" or "always"
    policy.

(3) Switch from "madvise" system policy to "always", but keep the
    "madvise" policy for selected workloads: allocate THPs only when
    advised.

(4) Stay at "madvise" system policy, but enable THPs even when not advised
    for selected workloads -- "always" policy.

Once can emulate (2) through (1), by setting the system policy to
"madvise"/"always" while disabling THPs for all processes that don't want
THPs.  It requires configuring all workloads, but that is a user-space
problem to sort out.

(4) can be emulated through (3) in a similar way.

Back when (1) was relevant in the past, as people started enabling THPs,
we added PR_SET_THP_DISABLE, so relevant workloads that were not ready yet
(i.e., used by Redis) were able to just disable THPs completely.  Redis
still implements the option to use this interface to disable THPs
completely.

With PR_SET_THP_DISABLE, we added a way to force-disable THPs for a
workload -- a process, including fork+exec'ed process hierarchy.  That
essentially made us support (1): simply disable THPs for all workloads
that are not ready for THPs yet, while still enabling THPs system-wide.

The quest for handling (3) and (4) started, but current approaches
(completely new prctl, options to set other policies per process,
alternatives to prctl -- mctrl, cgroup handling) don't look particularly
promising.  Likely, the future will use bpf or something similar to
implement better policies, in particular to also make better decisions
about THP sizes to use, but this will certainly take a while as that work
just started.

Long story short: a simple enable/disable is not really suitable for the
future, so we're not willing to add completely new toggles.

While we could emulate (3)+(4) through (1)+(2) by simply disabling THPs
completely for these processes, this is a step backwards, because these
processes can no longer allocate THPs in regions where THPs were
explicitly advised: regions flagged as VM_HUGEPAGE.  Apparently, that
imposes a problem for relevant workloads, because "not THPs" is certainly
worse than "THPs only when advised".

Could we simply relax PR_SET_THP_DISABLE, to "disable THPs unless not
explicitly advised by the app through MAD_HUGEPAGE"?  *maybe*, but this
would change the documented semantics quite a bit, and the versatility to
use it for debugging purposes, so I am not 100% sure that is what we want
-- although it would certainly be much easier.

So instead, as an easy way forward for (3) and (4), add an option to
make PR_SET_THP_DISABLE disable *less* THPs for a process.

In essence, this patch:

(A) Adds PR_THP_DISABLE_EXCEPT_ADVISED, to be used as a flag in arg3
    of prctl(PR_SET_THP_DISABLE) when disabling THPs (arg2 != 0).

    prctl(PR_SET_THP_DISABLE, 1, PR_THP_DISABLE_EXCEPT_ADVISED).

(B) Makes prctl(PR_GET_THP_DISABLE) return 3 if
    PR_THP_DISABLE_EXCEPT_ADVISED was set while disabling.

    Previously, it would return 1 if THPs were disabled completely. Now
    it returns the set flags as well: 3 if PR_THP_DISABLE_EXCEPT_ADVISED
    was set.

(C) Renames MMF_DISABLE_THP to MMF_DISABLE_THP_COMPLETELY, to express
    the semantics clearly.

    Fortunately, there are only two instances outside of prctl() code.

(D) Adds MMF_DISABLE_THP_EXCEPT_ADVISED to express "no THP except for VMAs
    with VM_HUGEPAGE" -- essentially "thp=madvise" behavior

    Fortunately, we only have to extend vma_thp_disabled().

(E) Indicates "THP_enabled: 0" in /proc/pid/status only if THPs are
    disabled completely

    Only indicating that THPs are disabled when they are really disabled
    completely, not only partially.

    For now, we don't add another interface to obtained whether THPs
    are disabled partially (PR_THP_DISABLE_EXCEPT_ADVISED was set). If
    ever required, we could add a new entry.

The documented semantics in the man page for PR_SET_THP_DISABLE "is
inherited by a child created via fork(2) and is preserved across
execve(2)" is maintained.  This behavior, for example, allows for
disabling THPs for a workload through the launching process (e.g., systemd
where we fork() a helper process to then exec()).

For now, MADV_COLLAPSE will *fail* in regions without VM_HUGEPAGE and
VM_NOHUGEPAGE.  As MADV_COLLAPSE is a clear advise that user space thinks
a THP is a good idea, we'll enable that separately next (requiring a bit
of cleanup first).

There is currently not way to prevent that a process will not issue
PR_SET_THP_DISABLE itself to re-enable THP.  There are not really known
users for re-enabling it, and it's against the purpose of the original
interface.  So if ever required, we could investigate just forbidding to
re-enable them, or make this somehow configurable.

Link: https://lkml.kernel.org/r/20250815135549.130506-1-usamaarif642@gmail.com
Link: https://lkml.kernel.org/r/20250815135549.130506-2-usamaarif642@gmail.com
Acked-by: Zi Yan <ziy@nvidia.com>
Acked-by: Usama Arif <usamaarif642@gmail.com>
Tested-by: Usama Arif <usamaarif642@gmail.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Usama Arif <usamaarif642@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yafang <laoar.shao@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h  | 20 +++++++++++++++-----
 include/linux/mm_types.h | 13 +++++--------
 2 files changed, 20 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 84b7eebe0d68..22b8b067b295 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -318,16 +318,26 @@ struct thpsize {
 	(transparent_hugepage_flags &					\
 	 (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
 
+/*
+ * Check whether THPs are explicitly disabled for this VMA, for example,
+ * through madvise or prctl.
+ */
 static inline bool vma_thp_disabled(struct vm_area_struct *vma,
 		vm_flags_t vm_flags)
 {
+	/* Are THPs disabled for this VMA? */
+	if (vm_flags & VM_NOHUGEPAGE)
+		return true;
+	/* Are THPs disabled for all VMAs in the whole process? */
+	if (mm_flags_test(MMF_DISABLE_THP_COMPLETELY, vma->vm_mm))
+		return true;
 	/*
-	 * Explicitly disabled through madvise or prctl, or some
-	 * architectures may disable THP for some mappings, for
-	 * example, s390 kvm.
+	 * Are THPs disabled only for VMAs where we didn't get an explicit
+	 * advise to use them?
 	 */
-	return (vm_flags & VM_NOHUGEPAGE) ||
-	       mm_flags_test(MMF_DISABLE_THP, vma->vm_mm);
+	if (vm_flags & VM_HUGEPAGE)
+		return false;
+	return mm_flags_test(MMF_DISABLE_THP_EXCEPT_ADVISED, vma->vm_mm);
 }
 
 static inline bool thp_disabled_by_hw(void)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 05475b5fd516..d247da2fdb52 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1792,19 +1792,16 @@ enum {
 #define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
 #define MMF_VM_HUGEPAGE		17	/* set when mm is available for khugepaged */
 
-/*
- * This one-shot flag is dropped due to necessity of changing exe once again
- * on NFS restore
- */
-//#define MMF_EXE_FILE_CHANGED	18	/* see prctl_set_mm_exe_file() */
+#define MMF_HUGE_ZERO_FOLIO	18      /* mm has ever used the global huge zero folio */
 
 #define MMF_HAS_UPROBES		19	/* has uprobes */
 #define MMF_RECALC_UPROBES	20	/* MMF_HAS_UPROBES can be wrong */
 #define MMF_OOM_SKIP		21	/* mm is of no interest for the OOM killer */
 #define MMF_UNSTABLE		22	/* mm is unstable for copy_from_user */
-#define MMF_HUGE_ZERO_FOLIO	23      /* mm has ever used the global huge zero folio */
-#define MMF_DISABLE_THP		24	/* disable THP for all VMAs */
-#define MMF_DISABLE_THP_MASK	BIT(MMF_DISABLE_THP)
+#define MMF_DISABLE_THP_EXCEPT_ADVISED	23	/* no THP except when advised (e.g., VM_HUGEPAGE) */
+#define MMF_DISABLE_THP_COMPLETELY	24	/* no THP for all VMAs */
+#define MMF_DISABLE_THP_MASK	(BIT(MMF_DISABLE_THP_COMPLETELY) | \
+				 BIT(MMF_DISABLE_THP_EXCEPT_ADVISED))
 #define MMF_OOM_REAP_QUEUED	25	/* mm was queued for oom_reaper */
 #define MMF_MULTIPROCESS	26	/* mm is shared between processes */
 /*
-- 
cgit v1.2.3


From 1f1c061089dcd274befa0c76fb9f6e253a8368c0 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 15 Aug 2025 14:54:54 +0100
Subject: mm/huge_memory: convert "tva_flags" to "enum tva_type"

When determining which THP orders are eligible for a VMA mapping, we have
previously specified tva_flags, however it turns out it is really not
necessary to treat these as flags.

Rather, we distinguish between distinct modes.

The only case where we previously combined flags was with
TVA_ENFORCE_SYSFS, but we can avoid this by observing that this is the
default, except for MADV_COLLAPSE or an edge cases in
collapse_pte_mapped_thp() and hugepage_vma_revalidate(), and adding a mode
specifically for this case - TVA_FORCED_COLLAPSE.

We have:
* smaps handling for showing "THPeligible"
* Pagefault handling
* khugepaged handling
* Forced collapse handling: primarily MADV_COLLAPSE, but also for
  an edge case in collapse_pte_mapped_thp()

Disregarding the edge cases, we only want to ignore sysfs settings only
when we are forcing a collapse through MADV_COLLAPSE, otherwise we want to
enforce it, hence this patch does the following flag to enum conversions:

* TVA_SMAPS | TVA_ENFORCE_SYSFS -> TVA_SMAPS
* TVA_IN_PF | TVA_ENFORCE_SYSFS -> TVA_PAGEFAULT
* TVA_ENFORCE_SYSFS             -> TVA_KHUGEPAGED
* 0                             -> TVA_FORCED_COLLAPSE

With this change, we immediately know if we are in the forced collapse
case, which will be valuable next.

Link: https://lkml.kernel.org/r/20250815135549.130506-3-usamaarif642@gmail.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Usama Arif <usamaarif642@gmail.com>
Acked-by: Usama Arif <usamaarif642@gmail.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Barry Song <baohua@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yafang <laoar.shao@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 22b8b067b295..92ea0b9771fa 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -94,12 +94,15 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr;
 #define THP_ORDERS_ALL	\
 	(THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_SPECIAL | THP_ORDERS_ALL_FILE_DEFAULT)
 
-#define TVA_SMAPS		(1 << 0)	/* Will be used for procfs */
-#define TVA_IN_PF		(1 << 1)	/* Page fault handler */
-#define TVA_ENFORCE_SYSFS	(1 << 2)	/* Obey sysfs configuration */
+enum tva_type {
+	TVA_SMAPS,		/* Exposing "THPeligible:" in smaps. */
+	TVA_PAGEFAULT,		/* Serving a page fault. */
+	TVA_KHUGEPAGED,		/* Khugepaged collapse. */
+	TVA_FORCED_COLLAPSE,	/* Forced collapse (e.g. MADV_COLLAPSE). */
+};
 
-#define thp_vma_allowable_order(vma, vm_flags, tva_flags, order) \
-	(!!thp_vma_allowable_orders(vma, vm_flags, tva_flags, BIT(order)))
+#define thp_vma_allowable_order(vma, vm_flags, type, order) \
+	(!!thp_vma_allowable_orders(vma, vm_flags, type, BIT(order)))
 
 #define split_folio(f) split_folio_to_list(f, NULL)
 
@@ -264,14 +267,14 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma,
 
 unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
 					 vm_flags_t vm_flags,
-					 unsigned long tva_flags,
+					 enum tva_type type,
 					 unsigned long orders);
 
 /**
  * thp_vma_allowable_orders - determine hugepage orders that are allowed for vma
  * @vma:  the vm area to check
  * @vm_flags: use these vm_flags instead of vma->vm_flags
- * @tva_flags: Which TVA flags to honour
+ * @type: TVA type
  * @orders: bitfield of all orders to consider
  *
  * Calculates the intersection of the requested hugepage orders and the allowed
@@ -285,11 +288,14 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
 static inline
 unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
 				       vm_flags_t vm_flags,
-				       unsigned long tva_flags,
+				       enum tva_type type,
 				       unsigned long orders)
 {
-	/* Optimization to check if required orders are enabled early. */
-	if ((tva_flags & TVA_ENFORCE_SYSFS) && vma_is_anonymous(vma)) {
+	/*
+	 * Optimization to check if required orders are enabled early. Only
+	 * forced collapse ignores sysfs configs.
+	 */
+	if (type != TVA_FORCED_COLLAPSE && vma_is_anonymous(vma)) {
 		unsigned long mask = READ_ONCE(huge_anon_orders_always);
 
 		if (vm_flags & VM_HUGEPAGE)
@@ -303,7 +309,7 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
 			return 0;
 	}
 
-	return __thp_vma_allowable_orders(vma, vm_flags, tva_flags, orders);
+	return __thp_vma_allowable_orders(vma, vm_flags, type, orders);
 }
 
 struct thpsize {
@@ -547,7 +553,7 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma,
 
 static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
 					vm_flags_t vm_flags,
-					unsigned long tva_flags,
+					enum tva_type type,
 					unsigned long orders)
 {
 	return 0;
-- 
cgit v1.2.3


From 8cdc4d27019356b0304308eb799484c899b62a87 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 15 Aug 2025 14:54:55 +0100
Subject: mm/huge_memory: respect MADV_COLLAPSE with
 PR_THP_DISABLE_EXCEPT_ADVISED

Let's allow for making MADV_COLLAPSE succeed on areas that neither have
VM_HUGEPAGE nor VM_NOHUGEPAGE when we have THP disabled unless explicitly
advised (PR_THP_DISABLE_EXCEPT_ADVISED).

MADV_COLLAPSE is a clear advice that we want to collapse.

Note that we still respect the VM_NOHUGEPAGE flag, just like
MADV_COLLAPSE always does. So consequently, MADV_COLLAPSE is now only
refused on VM_NOHUGEPAGE with PR_THP_DISABLE_EXCEPT_ADVISED,
including for shmem.

Link: https://lkml.kernel.org/r/20250815135549.130506-4-usamaarif642@gmail.com
Co-developed-by: Usama Arif <usamaarif642@gmail.com>
Signed-off-by: Usama Arif <usamaarif642@gmail.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Barry Song <baohua@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yafang <laoar.shao@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 92ea0b9771fa..1ac0d06fb3c1 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -329,7 +329,7 @@ struct thpsize {
  * through madvise or prctl.
  */
 static inline bool vma_thp_disabled(struct vm_area_struct *vma,
-		vm_flags_t vm_flags)
+		vm_flags_t vm_flags, bool forced_collapse)
 {
 	/* Are THPs disabled for this VMA? */
 	if (vm_flags & VM_NOHUGEPAGE)
@@ -343,6 +343,12 @@ static inline bool vma_thp_disabled(struct vm_area_struct *vma,
 	 */
 	if (vm_flags & VM_HUGEPAGE)
 		return false;
+	/*
+	 * Forcing a collapse (e.g., madv_collapse), is a clear advice to
+	 * use THPs.
+	 */
+	if (forced_collapse)
+		return false;
 	return mm_flags_test(MMF_DISABLE_THP_EXCEPT_ADVISED, vma->vm_mm);
 }
 
-- 
cgit v1.2.3


From 53fbef56e07df822ea3029109ffca25328c2e5ac Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 5 Aug 2025 18:22:51 +0100
Subject: mm: introduce memdesc_flags_t

Patch series "Add and use memdesc_flags_t".

At some point struct page will be separated from struct slab and struct
folio.  This is a step towards that by introducing a type for the 'flags'
word of all three structures.  This gives us a certain amount of type
safety by establishing that some of these unsigned longs are different
from other unsigned longs in that they contain things like node ID,
section number and zone number in the upper bits.  That lets us have
functions that can be easily called by anyone who has a slab, folio or
page (but not easily by anyone else) to get the node or zone.

There's going to be some unusual merge problems with this as some odd bits
of the kernel decide they want to print out the flags value or something
similar by writing page->flags and now they'll need to write page->flags.f
instead.  That's most of the churn here.  Maybe we should be removing
these things from the debug output?


This patch (of 11):

Wrap the unsigned long flags in a typedef.  In upcoming patches, this will
provide a strong hint that you can't just pass a random unsigned long to
functions which take this as an argument.

[willy@infradead.org: s/flags/flags.f/ in several architectures]
  Link: https://lkml.kernel.org/r/aKMgPRLD-WnkPxYm@casper.infradead.org
[nicola.vetrini@gmail.com: mips: fix compilation error]
  Link: https://lore.kernel.org/lkml/CA+G9fYvkpmqGr6wjBNHY=dRp71PLCoi2341JxOudi60yqaeUdg@mail.gmail.com/
  Link: https://lkml.kernel.org/r/20250825214245.1838158-1-nicola.vetrini@gmail.com
Link: https://lkml.kernel.org/r/20250805172307.1302730-1-willy@infradead.org
Link: https://lkml.kernel.org/r/20250805172307.1302730-2-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Zi Yan <ziy@nvidia.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h          | 32 ++++++++++++++++----------------
 include/linux/mm_inline.h   | 12 ++++++------
 include/linux/mm_types.h    |  8 ++++++--
 include/linux/mmzone.h      |  2 +-
 include/linux/page-flags.h  | 40 ++++++++++++++++++++--------------------
 include/linux/pgalloc_tag.h |  7 ++++---
 6 files changed, 53 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index b61e2d4858cf..da562f23f50c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1024,7 +1024,7 @@ static inline unsigned int compound_order(struct page *page)
 {
 	struct folio *folio = (struct folio *)page;
 
-	if (!test_bit(PG_head, &folio->flags))
+	if (!test_bit(PG_head, &folio->flags.f))
 		return 0;
 	return folio_large_order(folio);
 }
@@ -1554,7 +1554,7 @@ static inline bool is_nommu_shared_mapping(vm_flags_t flags)
  */
 static inline int page_zone_id(struct page *page)
 {
-	return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
+	return (page->flags.f >> ZONEID_PGSHIFT) & ZONEID_MASK;
 }
 
 #ifdef NODE_NOT_IN_PAGE_FLAGS
@@ -1562,7 +1562,7 @@ int page_to_nid(const struct page *page);
 #else
 static inline int page_to_nid(const struct page *page)
 {
-	return (PF_POISONED_CHECK(page)->flags >> NODES_PGSHIFT) & NODES_MASK;
+	return (PF_POISONED_CHECK(page)->flags.f >> NODES_PGSHIFT) & NODES_MASK;
 }
 #endif
 
@@ -1637,14 +1637,14 @@ static inline void page_cpupid_reset_last(struct page *page)
 #else
 static inline int folio_last_cpupid(struct folio *folio)
 {
-	return (folio->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK;
+	return (folio->flags.f >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK;
 }
 
 int folio_xchg_last_cpupid(struct folio *folio, int cpupid);
 
 static inline void page_cpupid_reset_last(struct page *page)
 {
-	page->flags |= LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT;
+	page->flags.f |= LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT;
 }
 #endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */
 
@@ -1740,7 +1740,7 @@ static inline u8 page_kasan_tag(const struct page *page)
 	u8 tag = KASAN_TAG_KERNEL;
 
 	if (kasan_enabled()) {
-		tag = (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK;
+		tag = (page->flags.f >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK;
 		tag ^= 0xff;
 	}
 
@@ -1755,12 +1755,12 @@ static inline void page_kasan_tag_set(struct page *page, u8 tag)
 		return;
 
 	tag ^= 0xff;
-	old_flags = READ_ONCE(page->flags);
+	old_flags = READ_ONCE(page->flags.f);
 	do {
 		flags = old_flags;
 		flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT);
 		flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT;
-	} while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags)));
+	} while (unlikely(!try_cmpxchg(&page->flags.f, &old_flags, flags)));
 }
 
 static inline void page_kasan_tag_reset(struct page *page)
@@ -1804,13 +1804,13 @@ static inline pg_data_t *folio_pgdat(const struct folio *folio)
 #ifdef SECTION_IN_PAGE_FLAGS
 static inline void set_page_section(struct page *page, unsigned long section)
 {
-	page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
-	page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
+	page->flags.f &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
+	page->flags.f |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
 }
 
 static inline unsigned long page_to_section(const struct page *page)
 {
-	return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
+	return (page->flags.f >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
 }
 #endif
 
@@ -2015,14 +2015,14 @@ static inline bool folio_is_longterm_pinnable(struct folio *folio)
 
 static inline void set_page_zone(struct page *page, enum zone_type zone)
 {
-	page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
-	page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;
+	page->flags.f &= ~(ZONES_MASK << ZONES_PGSHIFT);
+	page->flags.f |= (zone & ZONES_MASK) << ZONES_PGSHIFT;
 }
 
 static inline void set_page_node(struct page *page, unsigned long node)
 {
-	page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
-	page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;
+	page->flags.f &= ~(NODES_MASK << NODES_PGSHIFT);
+	page->flags.f |= (node & NODES_MASK) << NODES_PGSHIFT;
 }
 
 static inline void set_page_links(struct page *page, enum zone_type zone,
@@ -2064,7 +2064,7 @@ static inline long compound_nr(struct page *page)
 {
 	struct folio *folio = (struct folio *)page;
 
-	if (!test_bit(PG_head, &folio->flags))
+	if (!test_bit(PG_head, &folio->flags.f))
 		return 1;
 	return folio_large_nr_pages(folio);
 }
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 89b518ff097e..150302b4a905 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -143,7 +143,7 @@ static inline int lru_tier_from_refs(int refs, bool workingset)
 
 static inline int folio_lru_refs(struct folio *folio)
 {
-	unsigned long flags = READ_ONCE(folio->flags);
+	unsigned long flags = READ_ONCE(folio->flags.f);
 
 	if (!(flags & BIT(PG_referenced)))
 		return 0;
@@ -156,7 +156,7 @@ static inline int folio_lru_refs(struct folio *folio)
 
 static inline int folio_lru_gen(struct folio *folio)
 {
-	unsigned long flags = READ_ONCE(folio->flags);
+	unsigned long flags = READ_ONCE(folio->flags.f);
 
 	return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
 }
@@ -268,7 +268,7 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,
 	gen = lru_gen_from_seq(seq);
 	flags = (gen + 1UL) << LRU_GEN_PGOFF;
 	/* see the comment on MIN_NR_GENS about PG_active */
-	set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags);
+	set_mask_bits(&folio->flags.f, LRU_GEN_MASK | BIT(PG_active), flags);
 
 	lru_gen_update_size(lruvec, folio, -1, gen);
 	/* for folio_rotate_reclaimable() */
@@ -293,7 +293,7 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio,
 
 	/* for folio_migrate_flags() */
 	flags = !reclaiming && lru_gen_is_active(lruvec, gen) ? BIT(PG_active) : 0;
-	flags = set_mask_bits(&folio->flags, LRU_GEN_MASK, flags);
+	flags = set_mask_bits(&folio->flags.f, LRU_GEN_MASK, flags);
 	gen = ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
 
 	lru_gen_update_size(lruvec, folio, gen, -1);
@@ -304,9 +304,9 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio,
 
 static inline void folio_migrate_refs(struct folio *new, struct folio *old)
 {
-	unsigned long refs = READ_ONCE(old->flags) & LRU_REFS_MASK;
+	unsigned long refs = READ_ONCE(old->flags.f) & LRU_REFS_MASK;
 
-	set_mask_bits(&new->flags, LRU_REFS_MASK, refs);
+	set_mask_bits(&new->flags.f, LRU_REFS_MASK, refs);
 }
 #else /* !CONFIG_LRU_GEN */
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index d247da2fdb52..d934a3a5b443 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -34,6 +34,10 @@ struct address_space;
 struct futex_private_hash;
 struct mem_cgroup;
 
+typedef struct {
+	unsigned long f;
+} memdesc_flags_t;
+
 /*
  * Each physical page in the system has a struct page associated with
  * it to keep track of whatever it is we are using the page for at the
@@ -72,7 +76,7 @@ struct mem_cgroup;
 #endif
 
 struct page {
-	unsigned long flags;		/* Atomic flags, some possibly
+	memdesc_flags_t flags;		/* Atomic flags, some possibly
 					 * updated asynchronously */
 	/*
 	 * Five words (20/40 bytes) are available in this union.
@@ -383,7 +387,7 @@ struct folio {
 	union {
 		struct {
 	/* public: */
-			unsigned long flags;
+			memdesc_flags_t flags;
 			union {
 				struct list_head lru;
 	/* private: avoid cluttering the output */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9d3ea9085556..990560cd99ee 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1186,7 +1186,7 @@ static inline bool zone_is_empty(struct zone *zone)
 static inline enum zone_type page_zonenum(const struct page *page)
 {
 	ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT);
-	return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
+	return (page->flags.f >> ZONES_PGSHIFT) & ZONES_MASK;
 }
 
 static inline enum zone_type folio_zonenum(const struct folio *folio)
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 8d3fa3a91ce4..d53a86e68c89 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -217,7 +217,7 @@ static __always_inline const struct page *page_fixed_fake_head(const struct page
 	 * cold cacheline in some cases.
 	 */
 	if (IS_ALIGNED((unsigned long)page, PAGE_SIZE) &&
-	    test_bit(PG_head, &page->flags)) {
+	    test_bit(PG_head, &page->flags.f)) {
 		/*
 		 * We can safely access the field of the @page[1] with PG_head
 		 * because the @page is a compound page composed with at least
@@ -325,14 +325,14 @@ static __always_inline int PageTail(const struct page *page)
 
 static __always_inline int PageCompound(const struct page *page)
 {
-	return test_bit(PG_head, &page->flags) ||
+	return test_bit(PG_head, &page->flags.f) ||
 	       READ_ONCE(page->compound_head) & 1;
 }
 
 #define	PAGE_POISON_PATTERN	-1l
 static inline int PagePoisoned(const struct page *page)
 {
-	return READ_ONCE(page->flags) == PAGE_POISON_PATTERN;
+	return READ_ONCE(page->flags.f) == PAGE_POISON_PATTERN;
 }
 
 #ifdef CONFIG_DEBUG_VM
@@ -349,8 +349,8 @@ static const unsigned long *const_folio_flags(const struct folio *folio,
 	const struct page *page = &folio->page;
 
 	VM_BUG_ON_PGFLAGS(page->compound_head & 1, page);
-	VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page);
-	return &page[n].flags;
+	VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags.f), page);
+	return &page[n].flags.f;
 }
 
 static unsigned long *folio_flags(struct folio *folio, unsigned n)
@@ -358,8 +358,8 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n)
 	struct page *page = &folio->page;
 
 	VM_BUG_ON_PGFLAGS(page->compound_head & 1, page);
-	VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page);
-	return &page[n].flags;
+	VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags.f), page);
+	return &page[n].flags.f;
 }
 
 /*
@@ -449,37 +449,37 @@ FOLIO_CLEAR_FLAG(name, page)
 #define TESTPAGEFLAG(uname, lname, policy)				\
 FOLIO_TEST_FLAG(lname, FOLIO_##policy)					\
 static __always_inline int Page##uname(const struct page *page)		\
-{ return test_bit(PG_##lname, &policy(page, 0)->flags); }
+{ return test_bit(PG_##lname, &policy(page, 0)->flags.f); }
 
 #define SETPAGEFLAG(uname, lname, policy)				\
 FOLIO_SET_FLAG(lname, FOLIO_##policy)					\
 static __always_inline void SetPage##uname(struct page *page)		\
-{ set_bit(PG_##lname, &policy(page, 1)->flags); }
+{ set_bit(PG_##lname, &policy(page, 1)->flags.f); }
 
 #define CLEARPAGEFLAG(uname, lname, policy)				\
 FOLIO_CLEAR_FLAG(lname, FOLIO_##policy)					\
 static __always_inline void ClearPage##uname(struct page *page)		\
-{ clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{ clear_bit(PG_##lname, &policy(page, 1)->flags.f); }
 
 #define __SETPAGEFLAG(uname, lname, policy)				\
 __FOLIO_SET_FLAG(lname, FOLIO_##policy)					\
 static __always_inline void __SetPage##uname(struct page *page)		\
-{ __set_bit(PG_##lname, &policy(page, 1)->flags); }
+{ __set_bit(PG_##lname, &policy(page, 1)->flags.f); }
 
 #define __CLEARPAGEFLAG(uname, lname, policy)				\
 __FOLIO_CLEAR_FLAG(lname, FOLIO_##policy)				\
 static __always_inline void __ClearPage##uname(struct page *page)	\
-{ __clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{ __clear_bit(PG_##lname, &policy(page, 1)->flags.f); }
 
 #define TESTSETFLAG(uname, lname, policy)				\
 FOLIO_TEST_SET_FLAG(lname, FOLIO_##policy)				\
 static __always_inline int TestSetPage##uname(struct page *page)	\
-{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); }
+{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags.f); }
 
 #define TESTCLEARFLAG(uname, lname, policy)				\
 FOLIO_TEST_CLEAR_FLAG(lname, FOLIO_##policy)				\
 static __always_inline int TestClearPage##uname(struct page *page)	\
-{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags.f); }
 
 #define PAGEFLAG(uname, lname, policy)					\
 	TESTPAGEFLAG(uname, lname, policy)				\
@@ -846,7 +846,7 @@ static __always_inline bool folio_test_head(const struct folio *folio)
 static __always_inline int PageHead(const struct page *page)
 {
 	PF_POISONED_CHECK(page);
-	return test_bit(PG_head, &page->flags) && !page_is_fake_head(page);
+	return test_bit(PG_head, &page->flags.f) && !page_is_fake_head(page);
 }
 
 __SETPAGEFLAG(Head, head, PF_ANY)
@@ -1170,28 +1170,28 @@ static __always_inline int PageAnonExclusive(const struct page *page)
 	 */
 	if (PageHuge(page))
 		page = compound_head(page);
-	return test_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags);
+	return test_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f);
 }
 
 static __always_inline void SetPageAnonExclusive(struct page *page)
 {
 	VM_BUG_ON_PGFLAGS(!PageAnonNotKsm(page), page);
 	VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
-	set_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags);
+	set_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f);
 }
 
 static __always_inline void ClearPageAnonExclusive(struct page *page)
 {
 	VM_BUG_ON_PGFLAGS(!PageAnonNotKsm(page), page);
 	VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
-	clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags);
+	clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f);
 }
 
 static __always_inline void __ClearPageAnonExclusive(struct page *page)
 {
 	VM_BUG_ON_PGFLAGS(!PageAnon(page), page);
 	VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
-	__clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags);
+	__clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f);
 }
 
 #ifdef CONFIG_MMU
@@ -1241,7 +1241,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
  */
 static inline int folio_has_private(const struct folio *folio)
 {
-	return !!(folio->flags & PAGE_FLAGS_PRIVATE);
+	return !!(folio->flags.f & PAGE_FLAGS_PRIVATE);
 }
 
 #undef PF_ANY
diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index 8a7f4f802c57..38a82d65e58e 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -107,7 +107,8 @@ static inline bool get_page_tag_ref(struct page *page, union codetag_ref *ref,
 	if (static_key_enabled(&mem_profiling_compressed)) {
 		pgalloc_tag_idx idx;
 
-		idx = (page->flags >> alloc_tag_ref_offs) & alloc_tag_ref_mask;
+		idx = (page->flags.f >> alloc_tag_ref_offs) &
+			alloc_tag_ref_mask;
 		idx_to_ref(idx, ref);
 		handle->page = page;
 	} else {
@@ -149,11 +150,11 @@ static inline void update_page_tag_ref(union pgtag_ref_handle handle, union code
 		idx = (unsigned long)ref_to_idx(ref);
 		idx = (idx & alloc_tag_ref_mask) << alloc_tag_ref_offs;
 		do {
-			old_flags = READ_ONCE(page->flags);
+			old_flags = READ_ONCE(page->flags.f);
 			flags = old_flags;
 			flags &= ~(alloc_tag_ref_mask << alloc_tag_ref_offs);
 			flags |= idx;
-		} while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags)));
+		} while (unlikely(!try_cmpxchg(&page->flags.f, &old_flags, flags)));
 	} else {
 		if (WARN_ON(!handle.ref || !ref))
 			return;
-- 
cgit v1.2.3


From 56d578c1300f7efe9605b75714173dd3fda16fe2 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 5 Aug 2025 18:22:52 +0100
Subject: mm: convert page_to_section() to memdesc_section()

Pass in the memdesc_flags_t instead of a pointer to the page.  This will
allow us to remove a few conversions to struct page in upcoming patches.

Link: https://lkml.kernel.org/r/20250805172307.1302730-3-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index da562f23f50c..82617c4cfa24 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1808,9 +1808,9 @@ static inline void set_page_section(struct page *page, unsigned long section)
 	page->flags.f |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
 }
 
-static inline unsigned long page_to_section(const struct page *page)
+static inline unsigned long memdesc_section(memdesc_flags_t mdf)
 {
-	return (page->flags.f >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
+	return (mdf.f >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
 }
 #endif
 
-- 
cgit v1.2.3


From eb00fdd84ddabd6948d26595bb5e8c1302220d37 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 5 Aug 2025 18:22:53 +0100
Subject: mm: introduce memdesc_nid()

Remove a conversion from folio to page by passing the folio->flags (which
are a copy of the page->flags) to the new memdesc_nid() function.

Link: https://lkml.kernel.org/r/20250805172307.1302730-4-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 82617c4cfa24..00c8a54127d3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1558,17 +1558,22 @@ static inline int page_zone_id(struct page *page)
 }
 
 #ifdef NODE_NOT_IN_PAGE_FLAGS
-int page_to_nid(const struct page *page);
+int memdesc_nid(memdesc_flags_t mdf);
 #else
-static inline int page_to_nid(const struct page *page)
+static inline int memdesc_nid(memdesc_flags_t mdf)
 {
-	return (PF_POISONED_CHECK(page)->flags.f >> NODES_PGSHIFT) & NODES_MASK;
+	return (mdf.f >> NODES_PGSHIFT) & NODES_MASK;
 }
 #endif
 
+static inline int page_to_nid(const struct page *page)
+{
+	return memdesc_nid(PF_POISONED_CHECK(page)->flags);
+}
+
 static inline int folio_nid(const struct folio *folio)
 {
-	return page_to_nid(&folio->page);
+	return memdesc_nid(folio->flags);
 }
 
 #ifdef CONFIG_NUMA_BALANCING
@@ -1791,14 +1796,14 @@ static inline pg_data_t *page_pgdat(const struct page *page)
 	return NODE_DATA(page_to_nid(page));
 }
 
-static inline struct zone *folio_zone(const struct folio *folio)
+static inline pg_data_t *folio_pgdat(const struct folio *folio)
 {
-	return page_zone(&folio->page);
+	return NODE_DATA(folio_nid(folio));
 }
 
-static inline pg_data_t *folio_pgdat(const struct folio *folio)
+static inline struct zone *folio_zone(const struct folio *folio)
 {
-	return page_pgdat(&folio->page);
+	return &folio_pgdat(folio)->node_zones[folio_zonenum(folio)];
 }
 
 #ifdef SECTION_IN_PAGE_FLAGS
-- 
cgit v1.2.3


From 4aff03fbe508780394039053bebfc4f4800b286e Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 5 Aug 2025 18:22:54 +0100
Subject: mm: introduce memdesc_zonenum()

Remove a conversion from folio to page by passing the folio->flags (which
are a copy of the page->flags) to the new memdesc_zonenum() function.

Link: https://lkml.kernel.org/r/20250805172307.1302730-5-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 990560cd99ee..80a3b6642603 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1183,15 +1183,20 @@ static inline bool zone_is_empty(struct zone *zone)
 #define KASAN_TAG_MASK		((1UL << KASAN_TAG_WIDTH) - 1)
 #define ZONEID_MASK		((1UL << ZONEID_SHIFT) - 1)
 
+static inline enum zone_type memdesc_zonenum(memdesc_flags_t flags)
+{
+	ASSERT_EXCLUSIVE_BITS(flags.f, ZONES_MASK << ZONES_PGSHIFT);
+	return (flags.f >> ZONES_PGSHIFT) & ZONES_MASK;
+}
+
 static inline enum zone_type page_zonenum(const struct page *page)
 {
-	ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT);
-	return (page->flags.f >> ZONES_PGSHIFT) & ZONES_MASK;
+	return memdesc_zonenum(page->flags);
 }
 
 static inline enum zone_type folio_zonenum(const struct folio *folio)
 {
-	return page_zonenum(&folio->page);
+	return memdesc_zonenum(folio->flags);
 }
 
 #ifdef CONFIG_ZONE_DEVICE
-- 
cgit v1.2.3


From 89ef6ad6fa849b780b5a5caae9068261603e1738 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 5 Aug 2025 18:22:57 +0100
Subject: mm: introduce memdesc_is_zone_device()

Remove the conversion from folio to page in folio_is_zone_device() by
introducing memdesc_is_zone_device() which takes a memdesc_flags_t from
either a page or a folio.

Link: https://lkml.kernel.org/r/20250805172307.1302730-8-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 80a3b6642603..fe13ad175fed 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1200,14 +1200,14 @@ static inline enum zone_type folio_zonenum(const struct folio *folio)
 }
 
 #ifdef CONFIG_ZONE_DEVICE
-static inline bool is_zone_device_page(const struct page *page)
+static inline bool memdesc_is_zone_device(memdesc_flags_t mdf)
 {
-	return page_zonenum(page) == ZONE_DEVICE;
+	return memdesc_zonenum(mdf) == ZONE_DEVICE;
 }
 
 static inline struct dev_pagemap *page_pgmap(const struct page *page)
 {
-	VM_WARN_ON_ONCE_PAGE(!is_zone_device_page(page), page);
+	VM_WARN_ON_ONCE_PAGE(!memdesc_is_zone_device(page->flags), page);
 	return page_folio(page)->pgmap;
 }
 
@@ -1222,9 +1222,9 @@ static inline struct dev_pagemap *page_pgmap(const struct page *page)
 static inline bool zone_device_pages_have_same_pgmap(const struct page *a,
 						     const struct page *b)
 {
-	if (is_zone_device_page(a) != is_zone_device_page(b))
+	if (memdesc_is_zone_device(a->flags) != memdesc_is_zone_device(b->flags))
 		return false;
-	if (!is_zone_device_page(a))
+	if (!memdesc_is_zone_device(a->flags))
 		return true;
 	return page_pgmap(a) == page_pgmap(b);
 }
@@ -1232,7 +1232,7 @@ static inline bool zone_device_pages_have_same_pgmap(const struct page *a,
 extern void memmap_init_zone_device(struct zone *, unsigned long,
 				    unsigned long, struct dev_pagemap *);
 #else
-static inline bool is_zone_device_page(const struct page *page)
+static inline bool memdesc_is_zone_device(memdesc_flags_t mdf)
 {
 	return false;
 }
@@ -1247,9 +1247,14 @@ static inline struct dev_pagemap *page_pgmap(const struct page *page)
 }
 #endif
 
+static inline bool is_zone_device_page(const struct page *page)
+{
+	return memdesc_is_zone_device(page->flags);
+}
+
 static inline bool folio_is_zone_device(const struct folio *folio)
 {
-	return is_zone_device_page(&folio->page);
+	return memdesc_is_zone_device(folio->flags);
 }
 
 static inline bool is_zone_movable_page(const struct page *page)
-- 
cgit v1.2.3


From 7cfe9cafb6adebc13a246bebafcd69cd37add4e6 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 5 Aug 2025 18:22:58 +0100
Subject: mm: reimplement folio_is_device_private()

For callers of folio_is_device_private(), we save a folio->page->folio
conversion.  Callers of is_device_private_page() simply move the
conversion of page->folio from the implementation of page_pgmap() to
is_device_private_page().

Link: https://lkml.kernel.org/r/20250805172307.1302730-9-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memremap.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 4aa151914eab..5d18cb7a70e5 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -157,16 +157,17 @@ static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap)
 	return 1 << pgmap->vmemmap_shift;
 }
 
-static inline bool is_device_private_page(const struct page *page)
+static inline bool folio_is_device_private(const struct folio *folio)
 {
 	return IS_ENABLED(CONFIG_DEVICE_PRIVATE) &&
-		is_zone_device_page(page) &&
-		page_pgmap(page)->type == MEMORY_DEVICE_PRIVATE;
+		folio_is_zone_device(folio) &&
+		folio->pgmap->type == MEMORY_DEVICE_PRIVATE;
 }
 
-static inline bool folio_is_device_private(const struct folio *folio)
+static inline bool is_device_private_page(const struct page *page)
 {
-	return is_device_private_page(&folio->page);
+	return IS_ENABLED(CONFIG_DEVICE_PRIVATE) &&
+		folio_is_device_private(page_folio(page));
 }
 
 static inline bool is_pci_p2pdma_page(const struct page *page)
-- 
cgit v1.2.3


From bd0dbbb3fd902c7eea7eb166d91bda4530a8de96 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 5 Aug 2025 18:22:59 +0100
Subject: mm: reimplement folio_is_device_coherent()

For callers of folio_is_device_coherent(), we save a folio->page->folio
conversion.  Callers of is_device_coherent_page() simply move the
conversion of page->folio from the implementation of page_pgmap() to
is_device_coherent_page().

Link: https://lkml.kernel.org/r/20250805172307.1302730-10-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memremap.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 5d18cb7a70e5..06d29794abe6 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -177,15 +177,15 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
 		page_pgmap(page)->type == MEMORY_DEVICE_PCI_P2PDMA;
 }
 
-static inline bool is_device_coherent_page(const struct page *page)
+static inline bool folio_is_device_coherent(const struct folio *folio)
 {
-	return is_zone_device_page(page) &&
-		page_pgmap(page)->type == MEMORY_DEVICE_COHERENT;
+	return folio_is_zone_device(folio) &&
+		folio->pgmap->type == MEMORY_DEVICE_COHERENT;
 }
 
-static inline bool folio_is_device_coherent(const struct folio *folio)
+static inline bool is_device_coherent_page(const struct page *page)
 {
-	return is_device_coherent_page(&folio->page);
+	return folio_is_device_coherent(page_folio(page));
 }
 
 static inline bool is_fsdax_page(const struct page *page)
-- 
cgit v1.2.3


From c995ac3aa3747ec2a0373e5f319a22e0cb31d613 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 5 Aug 2025 18:23:00 +0100
Subject: mm: reimplement folio_is_fsdax()

For callers of folio_is_fsdax(), we save a folio->page->folio conversion.
Callers of is_fsdax_page() simply move the conversion of page->folio from
the implementation of page_pgmap() to is_fsdax_page().

Link: https://lkml.kernel.org/r/20250805172307.1302730-11-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memremap.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 06d29794abe6..450d4bb6835c 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -188,15 +188,15 @@ static inline bool is_device_coherent_page(const struct page *page)
 	return folio_is_device_coherent(page_folio(page));
 }
 
-static inline bool is_fsdax_page(const struct page *page)
+static inline bool folio_is_fsdax(const struct folio *folio)
 {
-	return is_zone_device_page(page) &&
-		page_pgmap(page)->type == MEMORY_DEVICE_FS_DAX;
+	return folio_is_zone_device(folio) &&
+		folio->pgmap->type == MEMORY_DEVICE_FS_DAX;
 }
 
-static inline bool folio_is_fsdax(const struct folio *folio)
+static inline bool is_fsdax_page(const struct page *page)
 {
-	return is_fsdax_page(&folio->page);
+	return folio_is_fsdax(page_folio(page));
 }
 
 #ifdef CONFIG_ZONE_DEVICE
-- 
cgit v1.2.3


From 88df6ab2f34b60837ebdab64b2514f356d5ebb65 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 5 Aug 2025 18:23:01 +0100
Subject: mm: add folio_is_pci_p2pdma()

Reimplement is_pci_p2pdma_page() in terms of folio_is_pci_p2pdma().  Moves
the page_folio() call from inside page_pgmap() to is_pci_p2pdma_page().
This removes a page_folio() call from try_grab_folio() which already has a
folio and can pass it in.

Link: https://lkml.kernel.org/r/20250805172307.1302730-12-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memremap.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 450d4bb6835c..aa1b6aa877a0 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -170,11 +170,17 @@ static inline bool is_device_private_page(const struct page *page)
 		folio_is_device_private(page_folio(page));
 }
 
+static inline bool folio_is_pci_p2pdma(const struct folio *folio)
+{
+	return IS_ENABLED(CONFIG_PCI_P2PDMA) &&
+		folio_is_zone_device(folio) &&
+		folio->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
+}
+
 static inline bool is_pci_p2pdma_page(const struct page *page)
 {
 	return IS_ENABLED(CONFIG_PCI_P2PDMA) &&
-		is_zone_device_page(page) &&
-		page_pgmap(page)->type == MEMORY_DEVICE_PCI_P2PDMA;
+		folio_is_pci_p2pdma(page_folio(page));
 }
 
 static inline bool folio_is_device_coherent(const struct folio *folio)
-- 
cgit v1.2.3


From 7bebb41b96b5a898134b757fda520b7b990a91fa Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 18 Aug 2025 08:10:10 +0200
Subject: mm: remove write_cache_pages

No users left.

Link: https://lkml.kernel.org/r/20250818061017.1526853-4-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/writeback.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a2848d731a46..2a7e134d03ee 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -360,12 +360,6 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb);
 struct folio *writeback_iter(struct address_space *mapping,
 		struct writeback_control *wbc, struct folio *folio, int *error);
 
-typedef int (*writepage_t)(struct folio *folio, struct writeback_control *wbc,
-				void *data);
-
-int write_cache_pages(struct address_space *mapping,
-		      struct writeback_control *wbc, writepage_t writepage,
-		      void *data);
 int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
 void writeback_set_ratelimit(void);
 void tag_pages_for_writeback(struct address_space *mapping,
-- 
cgit v1.2.3


From ef49b7b39d50b9e4f9d63e64f5d8acafe3c71158 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 26 Aug 2025 15:13:44 +0000
Subject: maple_tree: fix MAPLE_PARENT_RANGE32 and parent pointer docs

MAPLE_PARENT_RANGE32 should be 0x02 as a 32 bit node is indicated by the
bit pattern 0b010 which is the hex value 0x02.  There are no users
currently, so there is no associated bug with this wrong value.

Fix typo Note -> Node and replace x with b to indicate binary values.

Link: https://lkml.kernel.org/r/20250826151344.403286-1-sidhartha.kumar@oracle.com
Fixes: 54a611b60590 ("Maple Tree: add new data structure")
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index bafe143b1f78..41e633264e51 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -57,17 +57,17 @@
  * MT_FLAGS_ALLOC_RANGE flag.
  *
  *  Node types:
- *   0x??1 = Root
- *   0x?00 = 16 bit nodes
- *   0x010 = 32 bit nodes
- *   0x110 = 64 bit nodes
+ *   0b??1 = Root
+ *   0b?00 = 16 bit nodes
+ *   0b010 = 32 bit nodes
+ *   0b110 = 64 bit nodes
  *
  *  Slot size and location in the parent pointer:
  *   type  : slot location
- *   0x??1 : Root
- *   0x?00 : 16 bit values, type in 0-1, slot in 2-6
- *   0x010 : 32 bit values, type in 0-2, slot in 3-6
- *   0x110 : 64 bit values, type in 0-2, slot in 3-6
+ *   0b??1 : Root
+ *   0b?00 : 16 bit values, type in 0-1, slot in 2-6
+ *   0b010 : 32 bit values, type in 0-2, slot in 3-6
+ *   0b110 : 64 bit values, type in 0-2, slot in 3-6
  */
 
 /*
-- 
cgit v1.2.3


From cf1dec76ba8a00b20e51d205f3c9f5c45bc96df2 Mon Sep 17 00:00:00 2001
From: Boris Burkov <boris@bur.io>
Date: Thu, 21 Aug 2025 14:55:35 -0700
Subject: mm/filemap: add AS_KERNEL_FILE

Patch series "introduce kernel file mapped folios", v4.

Btrfs currently tracks its metadata pages in the page cache, using a fake
inode (fs_info->btree_inode) with offsets corresponding to where the
metadata is stored in the filesystem's full logical address space.

A consequence of this is that when btrfs uses filemap_add_folio(), this
usage is charged to the cgroup of whichever task happens to be running at
the time.  These folios don't belong to any particular user cgroup, so I
don't think it makes much sense for them to be charged in that way.  Some
negative consequences as a result:

- A task can be holding some important btrfs locks, then need to lookup
  some metadata and go into reclaim, extending the duration it holds
  that lock for, and unfairly pushing its own reclaim pain onto other
  cgroups.

- If that cgroup goes into reclaim, it might reclaim these folios a
  different non-reclaiming cgroup might need soon. This is naturally
  offset by LRU reclaim, but still.

We have two options for how to manage such file pages:
1. charge them to the root cgroup.
2. don't charge them to any cgroup at all.

2. breaks the invariant that every mapped page has a cgroup.  This is
   workable, but unnecessarily risky.  Therefore, go with 1.

A very similar proposal to use the root cgroup was previously made by Qu,
where he eventually proposed the idea of setting it per address_space.
This makes good sense for the btrfs use case, as the behavior should apply
to all use of the address_space, not select allocations.  I.e., if someone
adds another filemap_add_folio() call using btrfs's btree_inode, we would
almost certainly want to account that to the root cgroup as well.


This patch (of 3):

Add the flag AS_KERNEL_FILE to the address_space to indicate that this
mapping's memory is exempt from the usual memcg accounting.

[boris@bur.io: fix CONFIG_MEMCG build for AS_KERNEL_FILE]
  Link: https://lkml.kernel.org/r/6de59ddeec81b5c294d337c001ba0061631d4ec6.1755816635.git.boris@bur.io
Link: https://lore.kernel.org/linux-mm/b5fef5372ae454a7b6da4f2f75c427aeab6a07d6.1727498749.git.wqu@suse.com/
Link: https://lkml.kernel.org/r/f09c4e2c90351d4cb30a1969f7a863b9238bd291.1755812945.git.boris@bur.io
Signed-off-by: Boris Burkov <boris@bur.io>
Suggested-by: Qu Wenruo <wqu@suse.com>
Suggested-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 2 ++
 include/linux/pagemap.h    | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9fa3afc90dd5..e693978b2022 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1059,6 +1059,8 @@ extern int mem_cgroup_init(void);
 
 #define MEM_CGROUP_ID_SHIFT	0
 
+#define root_mem_cgroup		(NULL)
+
 static inline struct mem_cgroup *folio_memcg(struct folio *folio)
 {
 	return NULL;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 12a12dae727d..f0dfdfb13cd9 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -211,6 +211,8 @@ enum mapping_flags {
 				   folio contents */
 	AS_INACCESSIBLE = 8,	/* Do not attempt direct R/W access to the mapping */
 	AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9,
+	AS_KERNEL_FILE = 10,	/* mapping for a fake kernel file that shouldn't
+				   account usage to user cgroups */
 	/* Bits 16-25 are used for FOLIO_ORDER */
 	AS_FOLIO_ORDER_BITS = 5,
 	AS_FOLIO_ORDER_MIN = 16,
-- 
cgit v1.2.3


From e3a9ac4e866ea746475b4026819a8c08ec1142e6 Mon Sep 17 00:00:00 2001
From: Boris Burkov <boris@bur.io>
Date: Thu, 21 Aug 2025 14:55:36 -0700
Subject: mm: add vmstat for kernel_file pages

Kernel file pages are tricky to track because they are indistinguishable
from files whose usage is accounted to the root cgroup.

To maintain good accounting, introduce a vmstat counter tracking kernel
file pages.

Confirmed that these work as expected at a high level by mounting a btrfs
using AS_KERNEL_FILE for metadata pages, and seeing the counter rise with
fs usage then go back to a minimal level after drop_caches and finally
down to 0 after unmounting the fs.

Link: https://lkml.kernel.org/r/08ff633e3a005ed5f7691bfd9f58a5df8e474339.1755812945.git.boris@bur.io
Signed-off-by: Boris Burkov <boris@bur.io>
Suggested-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Tested-by: syzbot@syzkaller.appspotmail.com
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Qu Wenruo <wqu@suse.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fe13ad175fed..f3272ef5131b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -259,6 +259,7 @@ enum node_stat_item {
 	NR_HUGETLB,
 #endif
 	NR_BALLOON_PAGES,
+	NR_KERNEL_FILE_PAGES,
 	NR_VM_NODE_STAT_ITEMS
 };
 
-- 
cgit v1.2.3


From 98c94f1035fc0c82ab008854a165df2c20c0cb6a Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@gmail.com>
Date: Wed, 27 Aug 2025 07:01:05 +0000
Subject: mm/pageblock-flags: remove PB_migratetype_bits/PB_migrate_end

enum pageblock_bits defines the meaning of pageblock bits.  Currently
PB_migratetype_bits says the lowest 3 bits represents migratetype and
PB_migrate_end/MIGRATETYPE_MASK's definition rely on it with magical
computation.

Remove the definition of PB_migratetype_bits/PB_migrate_end.  Use
PB_migrate_[0|1|2] to represent lowest bits for migratetype.  Then we can
simplify related definition.

Also, MIGRATETYPE_AND_ISO_MASK is MIGRATETYPE_MASK add isolation bit.  Use
MIGRATETYPE_MASK in the definition of MIGRATETYPE_AND_ISO_MASK looks
cleaner.

No functional change intended.

Link: https://lkml.kernel.org/r/20250827070105.16864-3-richard.weiyang@gmail.com
Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pageblock-flags.h | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 6a44be0f39f4..e046278a01fa 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -13,12 +13,11 @@
 
 #include <linux/types.h>
 
-#define PB_migratetype_bits 3
 /* Bit indices that affect a whole block of pages */
 enum pageblock_bits {
-	PB_migrate,
-	PB_migrate_end = PB_migrate + PB_migratetype_bits - 1,
-			/* 3 bits required for migrate types */
+	PB_migrate_0,
+	PB_migrate_1,
+	PB_migrate_2,
 	PB_compact_skip,/* If set the block is skipped by compaction */
 
 #ifdef CONFIG_MEMORY_ISOLATION
@@ -37,11 +36,10 @@ enum pageblock_bits {
 
 #define NR_PAGEBLOCK_BITS (roundup_pow_of_two(__NR_PAGEBLOCK_BITS))
 
-#define MIGRATETYPE_MASK ((1UL << (PB_migrate_end + 1)) - 1)
+#define MIGRATETYPE_MASK (BIT(PB_migrate_0)|BIT(PB_migrate_1)|BIT(PB_migrate_2))
 
 #ifdef CONFIG_MEMORY_ISOLATION
-#define MIGRATETYPE_AND_ISO_MASK \
-	(((1UL << (PB_migrate_end + 1)) - 1) | BIT(PB_migrate_isolate))
+#define MIGRATETYPE_AND_ISO_MASK (MIGRATETYPE_MASK | BIT(PB_migrate_isolate))
 #else
 #define MIGRATETYPE_AND_ISO_MASK MIGRATETYPE_MASK
 #endif
-- 
cgit v1.2.3


From 09a616cbb371e6b843e536f00e38d6b43d796ac4 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Thu, 28 Aug 2025 10:12:32 -0700
Subject: mm/damon/core: add damon_ctx->addr_unit

Patch series "mm/damon: support ARM32 with LPAE", v3.

Previously, DAMON's physical address space monitoring only supported
memory ranges below 4GB on LPAE-enabled systems.  This was due to the use
of 'unsigned long' in 'struct damon_addr_range', which is 32-bit on ARM32
even with LPAE enabled[1].

To add DAMON support for ARM32 with LPAE enabled, a new core layer
parameter called 'addr_unit' was introduced[2].  Operations set layer can
translate a core layer address to the real address by multiplying the
parameter value to the core layer address.  Support of the parameter is up
to each operations layer implementation, though.  For example, operations
set implementations for virtual address space can simply ignore the
parameter.  Add the support on paddr, which is the DAMON operations set
implementation for the physical address space, as we have a clear use case
for that.


This patch (of 11):

In some cases, some of the real address that handled by the underlying
operations set cannot be handled by DAMON since it uses only 'unsinged
long' as the address type.  Using DAMON for physical address space
monitoring of 32 bit ARM devices with large physical address extension
(LPAE) is one example[1].

Add a parameter name 'addr_unit' to core layer to help such cases.  DAMON
core API callers can set it as the scale factor that will be used by the
operations set for translating the core layer's addresses to the real
address by multiplying the parameter value to the core layer address.
Support of the parameter is up to each operations set layer.  The support
from the physical address space operations set (paddr) will be added with
following commits.

Link: https://lkml.kernel.org/r/20250828171242.59810-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20250828171242.59810-2-sj@kernel.org
Link: https://lore.kernel.org/20250408075553.959388-1-zuoze1@huawei.com [1]
Link: https://lore.kernel.org/all/20250416042551.158131-1-sj@kernel.org/ [2]
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Quanmin Yan <yanquanmin1@huawei.com>
Reviewed-by: SeongJae Park <sj@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: ze zuo <zuoze1@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index d01bfee80bd6..6fa52f7495d9 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -746,7 +746,7 @@ struct damon_attrs {
  * Accesses to other fields must be protected by themselves.
  *
  * @ops:	Set of monitoring operations for given use cases.
- *
+ * @addr_unit:	Scale factor for core to ops address conversion.
  * @adaptive_targets:	Head of monitoring targets (&damon_target) list.
  * @schemes:		Head of schemes (&damos) list.
  */
@@ -788,6 +788,7 @@ struct damon_ctx {
 	struct mutex kdamond_lock;
 
 	struct damon_operations ops;
+	unsigned long addr_unit;
 
 	struct list_head adaptive_targets;
 	struct list_head schemes;
-- 
cgit v1.2.3


From d8f867fa0825fb3e358457566d7326d8aab2406a Mon Sep 17 00:00:00 2001
From: Quanmin Yan <yanquanmin1@huawei.com>
Date: Thu, 28 Aug 2025 10:12:42 -0700
Subject: mm/damon: add damon_ctx->min_sz_region

Adopting addr_unit would make DAMON_MINREGION 'addr_unit * 4096' bytes and
cause data alignment issues[1].

Add damon_ctx->min_sz_region to change DAMON_MIN_REGION from a global
macro value to per-context variable.

Link: https://lkml.kernel.org/r/20250828171242.59810-12-sj@kernel.org
Link: https://lore.kernel.org/all/527714dd-0e33-43ab-bbbd-d89670ba79e7@huawei.com [1]
Signed-off-by: Quanmin Yan <yanquanmin1@huawei.com>
Signed-off-by: SeongJae Park <sj@kernel.org>
Reviewed-by: SeongJae Park <sj@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: ze zuo <zuoze1@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 6fa52f7495d9..ec8716292c09 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -747,6 +747,7 @@ struct damon_attrs {
  *
  * @ops:	Set of monitoring operations for given use cases.
  * @addr_unit:	Scale factor for core to ops address conversion.
+ * @min_sz_region:		Minimum region size.
  * @adaptive_targets:	Head of monitoring targets (&damon_target) list.
  * @schemes:		Head of schemes (&damos) list.
  */
@@ -789,6 +790,7 @@ struct damon_ctx {
 
 	struct damon_operations ops;
 	unsigned long addr_unit;
+	unsigned long min_sz_region;
 
 	struct list_head adaptive_targets;
 	struct list_head schemes;
@@ -877,7 +879,7 @@ static inline void damon_insert_region(struct damon_region *r,
 void damon_add_region(struct damon_region *r, struct damon_target *t);
 void damon_destroy_region(struct damon_region *r, struct damon_target *t);
 int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges,
-		unsigned int nr_ranges);
+		unsigned int nr_ranges, unsigned long min_sz_region);
 void damon_update_region_access_rate(struct damon_region *r, bool accessed,
 		struct damon_attrs *attrs);
 
-- 
cgit v1.2.3


From 1e332f303ae93ba4d38b480b1bb5a08f833306f6 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Thu, 28 Aug 2025 15:03:11 +0200
Subject: pagevec.h: add `const` to pointer parameters of getter functions

For improved const-correctness.

Link: https://lkml.kernel.org/r/20250828130311.772993-1-max.kellermann@ionos.com
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagevec.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 5d3a0cccc6bf..63be5a451627 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -51,12 +51,12 @@ static inline void folio_batch_reinit(struct folio_batch *fbatch)
 	fbatch->i = 0;
 }
 
-static inline unsigned int folio_batch_count(struct folio_batch *fbatch)
+static inline unsigned int folio_batch_count(const struct folio_batch *fbatch)
 {
 	return fbatch->nr;
 }
 
-static inline unsigned int folio_batch_space(struct folio_batch *fbatch)
+static inline unsigned int folio_batch_space(const struct folio_batch *fbatch)
 {
 	return PAGEVEC_SIZE - fbatch->nr;
 }
-- 
cgit v1.2.3


From 39b44c8c73312ac535ffdf7c8ecd37ea07d4ef86 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Thu, 28 Aug 2025 10:48:20 +0200
Subject: huge_mm.h: disallow is_huge_zero_folio(NULL)

Calling is_huge_zero_folio(NULL) should not be legal - it makes no sense,
and a different (theoretical) implementation may dereference the pointer.
But currently, lacking any explicit documentation, this call is possible.

But if somebody really passes NULL, the function should not return true -
this isn't the huge zero folio after all!  However, if the
`huge_zero_folio` hasn't been allocated yet, it's NULL, and
is_huge_zero_folio(NULL) just happens to return true, which is a lie.

This weird side effect prevented me from reproducing a kernel crash that
occurred when the elements of a folio_batch were NULL - since
folios_put_refs() skips huge zero folios, this sometimes causes a crash,
but sometimes does not.  For debugging, it is better to reveal such bugs
reliably and not hide them behind random preconditions like "has the huge
zero folio already been created?"

To improve detection of such bugs, David Hildenbrand suggested adding a
VM_WARN_ON_ONCE().

Link: https://lkml.kernel.org/r/20250828084820.570118-1-max.kellermann@ionos.com
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Chris Li <chrisl@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Kairui Song <kasong@tencent.com>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 1ac0d06fb3c1..29ef70022da1 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -501,6 +501,8 @@ extern unsigned long huge_zero_pfn;
 
 static inline bool is_huge_zero_folio(const struct folio *folio)
 {
+	VM_WARN_ON_ONCE(!folio);
+
 	return READ_ONCE(huge_zero_folio) == folio;
 }
 
-- 
cgit v1.2.3


From f367474b5884edbc42661e7fecf784cb131dd25d Mon Sep 17 00:00:00 2001
From: Brian Mak <makb@juniper.net>
Date: Tue, 5 Aug 2025 14:15:27 -0700
Subject: x86/kexec: carry forward the boot DTB on kexec

Currently, the kexec_file_load syscall on x86 does not support passing a
device tree blob to the new kernel.  Some embedded x86 systems use device
trees.  On these systems, failing to pass a device tree to the new kernel
causes a boot failure.

To add support for this, we copy the behavior of ARM64 and PowerPC and
copy the current boot's device tree blob for use in the new kernel.  We do
this on x86 by passing the device tree blob as a setup_data entry in
accordance with the x86 boot protocol.

This behavior is gated behind the KEXEC_FILE_FORCE_DTB flag.

Link: https://lkml.kernel.org/r/20250805211527.122367-3-makb@juniper.net
Signed-off-by: Brian Mak <makb@juniper.net>
Cc: Alexander Graf <graf@amazon.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Dave Young <dyoung@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kexec.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 39fe3e6cd282..ff7e231b0485 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -395,6 +395,9 @@ struct kimage {
 
 	/* Information for loading purgatory */
 	struct purgatory_info purgatory_info;
+
+	/* Force carrying over the DTB from the current boot */
+	bool force_dtb;
 #endif
 
 #ifdef CONFIG_CRASH_HOTPLUG
@@ -461,7 +464,7 @@ bool kexec_load_permitted(int kexec_image_type);
 /* List of defined/legal kexec file flags */
 #define KEXEC_FILE_FLAGS	(KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
 				 KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG | \
-				 KEXEC_FILE_NO_CMA)
+				 KEXEC_FILE_NO_CMA | KEXEC_FILE_FORCE_DTB)
 
 /* flag to track if kexec reboot is in progress */
 extern bool kexec_in_progress;
-- 
cgit v1.2.3


From c8a09fc9664f79eeb66cdf4a2a34d5b6a239b727 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 14 Jul 2025 10:17:09 +0200
Subject: ida: remove the ida_simple_xxx() API

All users of the ida_simple_xxx() have been converted.  In Linux 6.11-rc2,
the only callers are in tools/testing/.

So it is now time to remove the definition of this old and deprecated
ida_simple_get() and ida_simple_remove().

Link: https://lkml.kernel.org/r/aa205f45fef70a9c948b6a98bad06da58e4de776.1752480043.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/idr.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 2267902d29a7..789e23e67444 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -334,14 +334,6 @@ static inline void ida_init(struct ida *ida)
 	xa_init_flags(&ida->xa, IDA_INIT_FLAGS);
 }
 
-/*
- * ida_simple_get() and ida_simple_remove() are deprecated. Use
- * ida_alloc() and ida_free() instead respectively.
- */
-#define ida_simple_get(ida, start, end, gfp)	\
-			ida_alloc_range(ida, start, (end) - 1, gfp)
-#define ida_simple_remove(ida, id)	ida_free(ida, id)
-
 static inline bool ida_is_empty(const struct ida *ida)
 {
 	return xa_empty(&ida->xa);
-- 
cgit v1.2.3


From baa96bcb180e979a821ce3ade87a3d2349b2d640 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 14 Jul 2025 10:17:10 +0200
Subject: nvmem: update a comment related to struct nvmem_config

Update a comment to match the function used in nvmem_register().
ida_simple_get() was replaced by ida_alloc() in commit 1eb51d6a4fce
("nvmem: switch to simpler IDA interface")

Link: https://lkml.kernel.org/r/27a9dec93a9f79140b11a77df38b1b45bd342e09.1752480043.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/nvmem-provider.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index 615a560d9edb..f3b13da78aac 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h
@@ -103,7 +103,7 @@ struct nvmem_cell_info {
  *
  * Note: A default "nvmem<id>" name will be assigned to the device if
  * no name is specified in its configuration. In such case "<id>" is
- * generated with ida_simple_get() and provided id field is ignored.
+ * generated with ida_alloc() and provided id field is ignored.
  *
  * Note: Specifying name and setting id to -1 implies a unique device
  * whose name is provided as-is (kept unaltered).
-- 
cgit v1.2.3


From 2a8c51bc9391ff3c701f06ae1b678419f843dc1a Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 19 Aug 2025 00:55:07 -0700
Subject: list.h: add missing kernel-doc for basic macros

kernel-doc for the basic LIST_HEAD() and LIST_HEAD_INIT() macros has been
missing forever (i.e., since git).  Add them for completeness.

Link: https://lkml.kernel.org/r/20250819075507.113639-1-rdunlap@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/list.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index e7e28afd28f8..ca63bdea6c1a 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -20,8 +20,16 @@
  * using the generic single-entry routines.
  */
 
+/**
+ * LIST_HEAD_INIT - initialize a &struct list_head's links to point to itself
+ * @name: name of the list_head
+ */
 #define LIST_HEAD_INIT(name) { &(name), &(name) }
 
+/**
+ * LIST_HEAD - definition of a &struct list_head with initialization values
+ * @name: name of the list_head
+ */
 #define LIST_HEAD(name) \
 	struct list_head name = LIST_HEAD_INIT(name)
 
-- 
cgit v1.2.3


From 2683df6539cbc3f0eeeba11154bc0cbf042a5cee Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@linux.alibaba.com>
Date: Mon, 25 Aug 2025 10:57:00 +0800
Subject: panic: add note that 'panic_print' parameter is deprecated

Just like for 'panic_print's systcl interface, add similar note for setup
of kernel cmdline parameter and parameter under /sys/module/kernel/.

Also add __core_param_cb() macro, which enables to add special get/set
operation for a kernel parameter.

Link: https://lkml.kernel.org/r/20250825025701.81921-4-feng.tang@linux.alibaba.com
Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>
Suggested-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Askar Safin <safinaskar@zohomail.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: "Paul E . McKenney" <paulmck@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/moduleparam.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 3a25122d83e2..6907aedc4f74 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -349,6 +349,19 @@ static inline void kernel_param_unlock(struct module *mod)
 	__module_param_call("", name, &param_ops_##type, &var, perm,	\
 			    -1, KERNEL_PARAM_FL_UNSAFE)
 
+/**
+ * __core_param_cb - similar like core_param, with a set/get ops instead of type.
+ * @name: the name of the cmdline and sysfs parameter (often the same as var)
+ * @var: the variable
+ * @ops: the set & get operations for this parameter.
+ * @perm: visibility in sysfs
+ *
+ * Ideally this should be called 'core_param_cb', but the name has been
+ * used for module core parameter, so add the '__' prefix
+ */
+#define __core_param_cb(name, ops, arg, perm) \
+	__module_param_call("", name, ops, arg, perm, -1, 0)
+
 #endif /* !MODULE */
 
 /**
-- 
cgit v1.2.3


From d0d9c7235548f1d772f1e48c9d5742c65d81c705 Mon Sep 17 00:00:00 2001
From: Jinchao Wang <wangjinchao600@gmail.com>
Date: Mon, 25 Aug 2025 10:29:29 +0800
Subject: panic: introduce helper functions for panic state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "panic: introduce panic status function family", v2.

This series introduces a family of helper functions to manage panic state
and updates existing code to use them.

Before this series, panic state helpers were scattered and inconsistent.
For example, panic_in_progress() was defined in printk/printk.c, not in
panic.c or panic.h.  As a result, developers had to look in unexpected
places to understand or re-use panic state logic.  Other checks were open-
coded, duplicating logic across panic, crash, and watchdog paths.

The new helpers centralize the functionality in panic.c/panic.h:
  - panic_try_start()
  - panic_reset()
  - panic_in_progress()
  - panic_on_this_cpu()
  - panic_on_other_cpu()

Patches 1–8 add the helpers and convert panic/crash and printk/nbcon
code to use them.

Patch 9 fixes a bug in the watchdog subsystem by skipping checks when a
panic is in progress, avoiding interference with the panic CPU.

Together, this makes panic state handling simpler, more discoverable, and
more robust.


This patch (of 9):

This patch introduces four new helper functions to abstract the management
of the panic_cpu variable.  These functions will be used in subsequent
patches to refactor existing code.

The direct use of panic_cpu can be error-prone and ambiguous, as it
requires manual checks to determine which CPU is handling the panic.  The
new helpers clarify intent:

panic_try_start():
Atomically sets the current CPU as the panicking CPU.

panic_reset():
Reset panic_cpu to PANIC_CPU_INVALID.

panic_in_progress():
Checks if a panic has been triggered.

panic_on_this_cpu():
Returns true if the current CPU is the panic originator.

panic_on_other_cpu():
Returns true if a panic is on another CPU.

This change lays the groundwork for improved code readability
and robustness in the panic handling subsystem.

Link: https://lkml.kernel.org/r/20250825022947.1596226-1-wangjinchao600@gmail.com
Link: https://lkml.kernel.org/r/20250825022947.1596226-2-wangjinchao600@gmail.com
Signed-off-by: Jinchao Wang <wangjinchao600@gmail.com>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: Doug Anderson <dianders@chromium.org>
Cc: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Kees Cook <kees@kernel.org>
Cc: Li Huafei <lihuafei1@huawei.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Luo Gengkun <luogengkun@huaweicloud.com>
Cc: Max Kellermann <max.kellermann@ionos.com>
Cc: Nam Cao <namcao@linutronix.de>
Cc: oushixiong <oushixiong@kylinos.cn>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Qianqiang Liu <qianqiang.liu@163.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Sohil Mehta <sohil.mehta@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Zimemrmann <tzimmermann@suse.de>
Cc: Thorsten Blum <thorsten.blum@linux.dev>
Cc: Ville Syrjala <ville.syrjala@linux.intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Yicong Yang <yangyicong@hisilicon.com>
Cc: Yunhui Cui <cuiyunhui@bytedance.com>
Cc: Yury Norov (NVIDIA) <yury.norov@gmail.com>b
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/panic.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/panic.h b/include/linux/panic.h
index 7be742628c25..6f972a66c13e 100644
--- a/include/linux/panic.h
+++ b/include/linux/panic.h
@@ -43,6 +43,12 @@ void abort(void);
 extern atomic_t panic_cpu;
 #define PANIC_CPU_INVALID	-1
 
+bool panic_try_start(void);
+void panic_reset(void);
+bool panic_in_progress(void);
+bool panic_on_this_cpu(void);
+bool panic_on_other_cpu(void);
+
 /*
  * Only to be used by arch init code. If the user over-wrote the default
  * CONFIG_PANIC_TIMEOUT, honor it.
-- 
cgit v1.2.3


From c6be36e2997662f423edfa3979a63935873ff648 Mon Sep 17 00:00:00 2001
From: Jinchao Wang <wangjinchao600@gmail.com>
Date: Mon, 25 Aug 2025 10:29:35 +0800
Subject: panic/printk: replace this_cpu_in_panic() with panic_on_this_cpu()

The helper this_cpu_in_panic() duplicated logic already provided by
panic_on_this_cpu().

Remove this_cpu_in_panic() and switch all users to panic_on_this_cpu().

This simplifies the code and avoids having two helpers for the same check.

Link: https://lkml.kernel.org/r/20250825022947.1596226-8-wangjinchao600@gmail.com
Signed-off-by: Jinchao Wang <wangjinchao600@gmail.com>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: Doug Anderson <dianders@chromium.org>
Cc: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Kees Cook <kees@kernel.org>
Cc: Li Huafei <lihuafei1@huawei.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Luo Gengkun <luogengkun@huaweicloud.com>
Cc: Max Kellermann <max.kellermann@ionos.com>
Cc: Nam Cao <namcao@linutronix.de>
Cc: oushixiong <oushixiong@kylinos.cn>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Qianqiang Liu <qianqiang.liu@163.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Sohil Mehta <sohil.mehta@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Zimemrmann <tzimmermann@suse.de>
Cc: Thorsten Blum <thorsten.blum@linux.dev>
Cc: Ville Syrjala <ville.syrjala@linux.intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Yicong Yang <yangyicong@hisilicon.com>
Cc: Yunhui Cui <cuiyunhui@bytedance.com>
Cc: Yury Norov (NVIDIA) <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/printk.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 5d22b803f51e..45c663124c9b 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -330,8 +330,6 @@ static inline bool pr_flush(int timeout_ms, bool reset_on_progress)
 
 #endif
 
-bool this_cpu_in_panic(void);
-
 #ifdef CONFIG_SMP
 extern int __printk_cpu_sync_try_get(void);
 extern void __printk_cpu_sync_wait(void);
-- 
cgit v1.2.3


From 7b1e502eb17c23fa6459a19bfe5974bffdb95574 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 6 Sep 2025 21:38:57 -0700
Subject: kernel.h: add comments for enum system_states

Provide some basic comments about the system_states and what they imply.
Also convert the comments to kernel-doc format.

Split the enum declaration from the definition of the system_state
variable so that kernel-doc notation works cleanly with it.  This is
picked up by Documentation/driver-api/basics.rst so it does not need
further inclusion in the kernel docbooks.

Link: https://lkml.kernel.org/r/20250907043857.2941203-1-rdunlap@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Rafael J. Wysocki <rafael@kernel.org> # v1
Reviewed-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>	[v5]
Cc: "Brown, Len" <len.brown@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: James Bottomley <james.bottomley@HansenPartnership.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kernel.h | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 989315dabb86..5b46924fdff5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -164,11 +164,23 @@ extern int root_mountflags;
 
 extern bool early_boot_irqs_disabled;
 
-/*
- * Values used for system_state. Ordering of the states must not be changed
+/**
+ * enum system_states - Values used for system_state.
+ *
+ * @SYSTEM_BOOTING:	%0, no init needed
+ * @SYSTEM_SCHEDULING: system is ready for scheduling; OK to use RCU
+ * @SYSTEM_FREEING_INITMEM: system is freeing all of initmem; almost running
+ * @SYSTEM_RUNNING:	system is up and running
+ * @SYSTEM_HALT:	system entered clean system halt state
+ * @SYSTEM_POWER_OFF:	system entered shutdown/clean power off state
+ * @SYSTEM_RESTART:	system entered emergency power off or normal restart
+ * @SYSTEM_SUSPEND:	system entered suspend or hibernate state
+ *
+ * Note:
+ * Ordering of the states must not be changed
  * as code checks for <, <=, >, >= STATE.
  */
-extern enum system_states {
+enum system_states {
 	SYSTEM_BOOTING,
 	SYSTEM_SCHEDULING,
 	SYSTEM_FREEING_INITMEM,
@@ -177,7 +189,8 @@ extern enum system_states {
 	SYSTEM_POWER_OFF,
 	SYSTEM_RESTART,
 	SYSTEM_SUSPEND,
-} system_state;
+};
+extern enum system_states system_state;
 
 /*
  * General tracing related utility functions - trace_printk(),
-- 
cgit v1.2.3


From d6d5116391857fc78fad9aa42317b36e4ce17b58 Mon Sep 17 00:00:00 2001
From: Evangelos Petrongonas <epetron@amazon.de>
Date: Thu, 21 Aug 2025 17:58:59 +0000
Subject: kexec: introduce is_kho_boot()

Patch series "efi: Fix EFI boot with kexec handover (KHO)", v3.

This patch series fixes a kernel panic that occurs when booting with both
EFI and KHO (Kexec HandOver) enabled.

The issue arises because EFI's `reserve_regions()` clears all memory
regions with `memblock_remove(0, PHYS_ADDR_MAX)` before rebuilding them
from EFI data.  This destroys KHO scratch regions that were set up early
during device tree scanning, causing a panic as the kernel has no valid
memory regions for early allocations.

The first patch introduces `is_kho_boot()` to allow early boot components
to reliably detect if the kernel was booted via KHO-enabled kexec.  The
existing `kho_is_enabled()` only checks the command line and doesn't
verify if an actual KHO FDT was passed.

The second patch modifies EFI's `reserve_regions()` to selectively remove
only non-KHO memory regions when KHO is active, preserving the critical
scratch regions while still allowing EFI to rebuild its memory map.


This patch (of 3):

During early initialisation, after a kexec, other components, like EFI
need to know if a KHO enabled kexec is performed.  The `kho_is_enabled`
function is not enough as in the early stages, it only reflects whether
the cmdline has KHO enabled, not if an actual KHO FDT exists.

Extend the KHO API with `is_kho_boot()` to provide a way for components to
check if a KHO enabled kexec is performed.

Link: https://lkml.kernel.org/r/cover.1755721529.git.epetron@amazon.de
Link: https://lkml.kernel.org/r/7dc6674a76bf6e68cca0222ccff32427699cc02e.1755721529.git.epetron@amazon.de
Signed-off-by: Evangelos Petrongonas <epetron@amazon.de>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Changyuan Lyu <changyuanl@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kexec_handover.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index 348844cffb13..559d13a3bc44 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -40,6 +40,7 @@ struct kho_serialization;
 
 #ifdef CONFIG_KEXEC_HANDOVER
 bool kho_is_enabled(void);
+bool is_kho_boot(void);
 
 int kho_preserve_folio(struct folio *folio);
 int kho_preserve_phys(phys_addr_t phys, size_t size);
@@ -60,6 +61,11 @@ static inline bool kho_is_enabled(void)
 	return false;
 }
 
+static inline bool is_kho_boot(void)
+{
+	return false;
+}
+
 static inline int kho_preserve_folio(struct folio *folio)
 {
 	return -EOPNOTSUPP;
-- 
cgit v1.2.3


From e19ceeb1c0f63e3e15b197c5f34797134b51ba0e Mon Sep 17 00:00:00 2001
From: Tzung-Bi Shih <tzungbi@kernel.org>
Date: Thu, 28 Aug 2025 08:35:58 +0000
Subject: platform/chrome: Centralize common cros_ec_device initialization

Move the common initialization from protocol device drivers into central
cros_ec_device_alloc().

This removes duplicated code from each driver's probe function.
The buffer sizes are now calculated once, using the maximum possible
overhead required by any of the transport protocols, ensuring the
allocated buffers are sufficient for all cases.

Link: https://lore.kernel.org/r/20250828083601.856083-3-tzungbi@kernel.org
Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
---
 include/linux/platform_data/cros_ec_proto.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
index 3ec24f445c29..4d96cffbb9e3 100644
--- a/include/linux/platform_data/cros_ec_proto.h
+++ b/include/linux/platform_data/cros_ec_proto.h
@@ -33,12 +33,18 @@
 
 /*
  * Max bus-specific overhead incurred by request/responses.
- * I2C requires 1 additional byte for requests.
- * I2C requires 2 additional bytes for responses.
- * SPI requires up to 32 additional bytes for responses.
+ *
+ * Request:
+ * - I2C requires 1 byte (see struct ec_host_request_i2c).
+ * - ISHTP requires 4 bytes (see struct cros_ish_out_msg).
+ *
+ * Response:
+ * - I2C requires 2 bytes (see struct ec_host_response_i2c).
+ * - ISHTP requires 4 bytes (see struct cros_ish_in_msg).
+ * - SPI requires 32 bytes (see EC_MSG_PREAMBLE_COUNT).
  */
 #define EC_PROTO_VERSION_UNKNOWN	0
-#define EC_MAX_REQUEST_OVERHEAD		1
+#define EC_MAX_REQUEST_OVERHEAD		4
 #define EC_MAX_RESPONSE_OVERHEAD	32
 
 /*
-- 
cgit v1.2.3


From 56cb557279d70397cefb497e0f06bdd6fd685f8e Mon Sep 17 00:00:00 2001
From: Tzung-Bi Shih <tzungbi@kernel.org>
Date: Thu, 28 Aug 2025 08:36:00 +0000
Subject: platform/chrome: cros_ec: Add a flag to track registration state

Introduce a `registered` flag to the `struct cros_ec_device` to allow
callers to determine if the device has been fully registered and is
ready for use.

This is a preparatory step to prevent race conditions where other drivers
might try to access the device before it is fully registered or after
it has been unregistered.

Link: https://lore.kernel.org/r/20250828083601.856083-5-tzungbi@kernel.org
Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
---
 include/linux/platform_data/cros_ec_proto.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
index 4d96cffbb9e3..de14923720a5 100644
--- a/include/linux/platform_data/cros_ec_proto.h
+++ b/include/linux/platform_data/cros_ec_proto.h
@@ -128,6 +128,7 @@ struct cros_ec_command {
  * @dout_size: Size of dout buffer to allocate (zero to use static dout).
  * @wake_enabled: True if this device can wake the system from sleep.
  * @suspended: True if this device had been suspended.
+ * @registered: True if this device had been registered.
  * @cmd_xfer: Send command to EC and get response.
  *            Returns the number of bytes received if the communication
  *            succeeded, but that doesn't mean the EC was happy with the
@@ -186,6 +187,7 @@ struct cros_ec_device {
 	int dout_size;
 	bool wake_enabled;
 	bool suspended;
+	bool registered;
 	int (*cmd_xfer)(struct cros_ec_device *ec,
 			struct cros_ec_command *msg);
 	int (*pkt_xfer)(struct cros_ec_device *ec,
@@ -278,6 +280,8 @@ int cros_ec_cmd_readmem(struct cros_ec_device *ec_dev, u8 offset, u8 size, void
 
 int cros_ec_get_cmd_versions(struct cros_ec_device *ec_dev, u16 cmd);
 
+bool cros_ec_device_registered(struct cros_ec_device *ec_dev);
+
 /**
  * cros_ec_get_time_ns() - Return time in ns.
  *
-- 
cgit v1.2.3


From e68f150bc11d0a05cbe984a4e5c0f72a95cae07d Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Mon, 18 Aug 2025 09:46:15 +0300
Subject: memblock: drop for_each_free_mem_pfn_range_in_zone_from()

for_each_free_mem_pfn_range_in_zone_from() and its "backend" implementation
__next_mem_pfn_range_in_zone() were only used by deferred initialization of
the memory map.

Remove them as they are not used anymore.

Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
 include/linux/memblock.h | 22 ----------------------
 1 file changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index fcda8481de9a..221118b5a16e 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -324,28 +324,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
 	for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \
 	     i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
 
-#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
-void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
-				  unsigned long *out_spfn,
-				  unsigned long *out_epfn);
-
-/**
- * for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific
- * free memblock areas from a given point
- * @i: u64 used as loop variable
- * @zone: zone in which all of the memory blocks reside
- * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
- * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
- *
- * Walks over free (memory && !reserved) areas of memblock in a specific
- * zone, continuing from current position. Available as soon as memblock is
- * initialized.
- */
-#define for_each_free_mem_pfn_range_in_zone_from(i, zone, p_start, p_end) \
-	for (; i != U64_MAX;					  \
-	     __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end))
-
-#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
 
 /**
  * for_each_free_mem_range - iterate through free memblock areas
-- 
cgit v1.2.3


From fdae0ab67d57d480dc61e9fb45678bbdc3786711 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 9 Sep 2025 12:19:42 +0000
Subject: net: use NUMA drop counters for softnet_data.dropped

Hosts under DOS attack can suffer from false sharing
in enqueue_to_backlog() : atomic_inc(&sd->dropped).

This is because sd->dropped can be touched from many cpus,
possibly residing on different NUMA nodes.

Generalize the sk_drop_counters infrastucture
added in commit c51613fa276f ("net: add sk->sk_drop_counters")
and use it to replace softnet_data.dropped
with NUMA friendly softnet_data.drop_counters.

This adds 64 bytes per cpu, maybe more in the future
if we increase the number of counters (currently 2)
per 'struct numa_drop_counters'.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250909121942.1202585-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ipv6.h      |  2 +-
 include/linux/netdevice.h | 28 +++++++++++++++++++++++++++-
 include/linux/udp.h       |  2 +-
 3 files changed, 29 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 261d02efb615..f43314517396 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -295,7 +295,7 @@ struct raw6_sock {
 	__u32			offset;		/* checksum offset  */
 	struct icmp6_filter	filter;
 	__u32			ip6mr_table;
-	struct socket_drop_counters drop_counters;
+	struct numa_drop_counters drop_counters;
 	struct ipv6_pinfo	inet6;
 };
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f3a3b761abfb..f5a840c07cf1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3459,6 +3459,32 @@ static inline bool dev_has_header(const struct net_device *dev)
 	return dev->header_ops && dev->header_ops->create;
 }
 
+struct numa_drop_counters {
+	atomic_t	drops0 ____cacheline_aligned_in_smp;
+	atomic_t	drops1 ____cacheline_aligned_in_smp;
+};
+
+static inline int numa_drop_read(const struct numa_drop_counters *ndc)
+{
+	return atomic_read(&ndc->drops0) + atomic_read(&ndc->drops1);
+}
+
+static inline void numa_drop_add(struct numa_drop_counters *ndc, int val)
+{
+	int n = numa_node_id() % 2;
+
+	if (n)
+		atomic_add(val, &ndc->drops1);
+	else
+		atomic_add(val, &ndc->drops0);
+}
+
+static inline void numa_drop_reset(struct numa_drop_counters *ndc)
+{
+	atomic_set(&ndc->drops0, 0);
+	atomic_set(&ndc->drops1, 0);
+}
+
 /*
  * Incoming packets are placed on per-CPU queues
  */
@@ -3504,7 +3530,7 @@ struct softnet_data {
 	struct sk_buff_head	input_pkt_queue;
 	struct napi_struct	backlog;
 
-	atomic_t		dropped ____cacheline_aligned_in_smp;
+	struct numa_drop_counters drop_counters;
 
 	/* Another possibly contended cache line */
 	spinlock_t		defer_lock ____cacheline_aligned_in_smp;
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 981506be1e15..6ed008ab1665 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -108,7 +108,7 @@ struct udp_sock {
 	 * the last UDP socket cacheline.
 	 */
 	struct hlist_node	tunnel_list;
-	struct socket_drop_counters drop_counters;
+	struct numa_drop_counters drop_counters;
 };
 
 #define udp_test_bit(nr, sk)			\
-- 
cgit v1.2.3


From ae1c658b33d4bec20c037aebba583a68375d4773 Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Thu, 11 Sep 2025 15:08:31 +0200
Subject: net: phy: introduce phy_id_compare_model() PHY ID helper

Similar to phy_id_compare_vendor(), introduce the equivalent
phy_id_compare_model() helper for the generic PHY ID Model mask.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Link: https://patch.msgid.link/20250911130840.23569-1-ansuelsmth@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 04553419adc3..6f3b25cb7f4e 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1308,6 +1308,19 @@ static inline bool phy_id_compare_vendor(u32 id, u32 vendor_mask)
 	return phy_id_compare(id, vendor_mask, PHY_ID_MATCH_VENDOR_MASK);
 }
 
+/**
+ * phy_id_compare_model - compare @id with @model mask
+ * @id: PHY ID
+ * @model_mask: PHY Model mask
+ *
+ * Return: true if the bits from @id match @model using the
+ *	   generic PHY Model mask.
+ */
+static inline bool phy_id_compare_model(u32 id, u32 model_mask)
+{
+	return phy_id_compare(id, model_mask, PHY_ID_MATCH_MODEL_MASK);
+}
+
 /**
  * phydev_id_compare - compare @id with the PHY's Clause 22 ID
  * @phydev: the PHY device
-- 
cgit v1.2.3


From 3afaff7a0ce97457c8ab46862f2c06603a89962e Mon Sep 17 00:00:00 2001
From: Akhilesh Patil <akhilesh@ee.iitb.ac.in>
Date: Mon, 11 Aug 2025 17:42:53 +0530
Subject: include/linux/rv.h: remove redundant include file

Remove redundant include <linux/types.h> to clean up the code.
Move all unique include files inside CONFIG_RV as they are only needed
when CONFIG_RV is enabled. Arrange include files alphabetically.

Fixes: 24cbfe18d55a ("rv: Merge struct rv_monitor_def into struct rv_monitor") [1]
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/r/202507312017.oyD08TL5-lkp@intel.com/
Signed-off-by: Akhilesh Patil <akhilesh@ee.iitb.ac.in>
Reviewed-by: Gabriele Monaco <gmonaco@redhat.com>
Link: https://lore.kernel.org/r/aJneRbHGlNFg7lr9@bhairav-test.ee.iitb.ac.in
Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
---
 include/linux/rv.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rv.h b/include/linux/rv.h
index 14410a42faef..9520aab34bcb 100644
--- a/include/linux/rv.h
+++ b/include/linux/rv.h
@@ -7,16 +7,14 @@
 #ifndef _LINUX_RV_H
 #define _LINUX_RV_H
 
-#include <linux/types.h>
-#include <linux/list.h>
-
 #define MAX_DA_NAME_LEN			32
 #define MAX_DA_RETRY_RACING_EVENTS	3
 
 #ifdef CONFIG_RV
+#include <linux/array_size.h>
 #include <linux/bitops.h>
+#include <linux/list.h>
 #include <linux/types.h>
-#include <linux/array_size.h>
 
 /*
  * Deterministic automaton per-object variables.
-- 
cgit v1.2.3


From e7c9b66b106989aeb17b167f5bbea9a108d26c0d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@baylibre.com>
Date: Thu, 17 Jul 2025 17:11:16 +0200
Subject: pwm: Provide a gpio device for waveform drivers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A PWM is a more general concept than an output-only GPIO. When using
duty_length = period_length the PWM looks like an active GPIO, with
duty_length = 0 like an inactive GPIO. With the waveform abstraction
there is enough control over the configuration to ensure that PWMs that
cannot generate a constant signal at both levels error out.

The pwm-pca9685 driver already provides a gpio chip. When this driver is
converted to the waveform callbacks, the gpio part can just be dropped.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Reviewed-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Link: https://lore.kernel.org/r/20250717151117.1828585-2-u.kleine-koenig@baylibre.com
Signed-off-by: Uwe Kleine-König <ukleinek@kernel.org>
---
 include/linux/pwm.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 8cafc483db53..549ac4aaad59 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -5,6 +5,7 @@
 #include <linux/cdev.h>
 #include <linux/device.h>
 #include <linux/err.h>
+#include <linux/gpio/driver.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/of.h>
@@ -321,6 +322,7 @@ struct pwm_ops {
  * @npwm: number of PWMs controlled by this chip
  * @of_xlate: request a PWM device given a device tree PWM specifier
  * @atomic: can the driver's ->apply() be called in atomic context
+ * @gpio: &struct gpio_chip to operate this PWM chip's lines as GPO
  * @uses_pwmchip_alloc: signals if pwmchip_allow was used to allocate this chip
  * @operational: signals if the chip can be used (or is already deregistered)
  * @nonatomic_lock: mutex for nonatomic chips
@@ -340,6 +342,7 @@ struct pwm_chip {
 	bool atomic;
 
 	/* only used internally by the PWM framework */
+	struct gpio_chip gpio;
 	bool uses_pwmchip_alloc;
 	bool operational;
 	union {
-- 
cgit v1.2.3


From 09f37134464cc03baf5cb8eab2d99db27ee73217 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 5 Sep 2025 16:34:00 -0500
Subject: x86,fs/resctrl: Consolidate monitor event descriptions

There are currently only three monitor events, all associated with the
RDT_RESOURCE_L3 resource. Growing support for additional events will be easier
with some restructuring to have a single point in file system code where all
attributes of all events are defined.

Place all event descriptions into an array mon_event_all[]. Doing this has the
beneficial side effect of removing the need for rdt_resource::evt_list.

Add resctrl_event_id::QOS_FIRST_EVENT for a lower bound on range checks for
event ids and as the starting index to scan mon_event_all[].

Drop the code that builds evt_list and change the two places where the list is
scanned to scan mon_event_all[] instead using a new helper macro
for_each_mon_event().

Architecture code now informs file system code which events are available with
resctrl_enable_mon_event().

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Babu Moger <babu.moger@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com
---
 include/linux/resctrl.h       |  4 ++--
 include/linux/resctrl_types.h | 12 ++++++++----
 2 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 6fb4894b8cfd..2944042bd84c 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -269,7 +269,6 @@ enum resctrl_schema_fmt {
  * @mon_domains:	RCU list of all monitor domains for this resource
  * @name:		Name to use in "schemata" file.
  * @schema_fmt:		Which format string and parser is used for this schema.
- * @evt_list:		List of monitoring events
  * @mbm_cfg_mask:	Bandwidth sources that can be tracked when bandwidth
  *			monitoring events can be configured.
  * @cdp_capable:	Is the CDP feature available on this resource
@@ -287,7 +286,6 @@ struct rdt_resource {
 	struct list_head	mon_domains;
 	char			*name;
 	enum resctrl_schema_fmt	schema_fmt;
-	struct list_head	evt_list;
 	unsigned int		mbm_cfg_mask;
 	bool			cdp_capable;
 };
@@ -372,6 +370,8 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
 u32 resctrl_arch_system_num_rmid_idx(void);
 int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
 
+void resctrl_enable_mon_event(enum resctrl_event_id eventid);
+
 bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt);
 
 /**
diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
index a25fb9c4070d..2dadbc54e4b3 100644
--- a/include/linux/resctrl_types.h
+++ b/include/linux/resctrl_types.h
@@ -34,11 +34,15 @@
 /* Max event bits supported */
 #define MAX_EVT_CONFIG_BITS		GENMASK(6, 0)
 
-/*
- * Event IDs, the values match those used to program IA32_QM_EVTSEL before
- * reading IA32_QM_CTR on RDT systems.
- */
+/* Event IDs */
 enum resctrl_event_id {
+	/* Must match value of first event below */
+	QOS_FIRST_EVENT			= 0x01,
+
+	/*
+	 * These values match those used to program IA32_QM_EVTSEL before
+	 * reading IA32_QM_CTR on RDT systems.
+	 */
 	QOS_L3_OCCUP_EVENT_ID		= 0x01,
 	QOS_L3_MBM_TOTAL_EVENT_ID	= 0x02,
 	QOS_L3_MBM_LOCAL_EVENT_ID	= 0x03,
-- 
cgit v1.2.3


From d257cc2e5c8bb8236cb161360d6c0529fd442712 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 5 Sep 2025 16:34:01 -0500
Subject: x86,fs/resctrl: Replace architecture event enabled checks

The resctrl file system now has complete knowledge of the status of every
event. So there is no need for per-event function calls to check.

Replace each of the resctrl_arch_is_{event}enabled() calls with
resctrl_is_mon_event_enabled(QOS_{EVENT}).

No functional change.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Babu Moger <babu.moger@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com
---
 include/linux/resctrl.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 2944042bd84c..40aba6b5d4f0 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -372,6 +372,8 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
 
 void resctrl_enable_mon_event(enum resctrl_event_id eventid);
 
+bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid);
+
 bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt);
 
 /**
-- 
cgit v1.2.3


From 7788255aba6545a27b8d143c5256536f8dfb2c0a Mon Sep 17 00:00:00 2001
From: Keir Fraser <keirf@google.com>
Date: Tue, 9 Sep 2025 10:00:06 +0000
Subject: KVM: Implement barriers before accessing kvm->buses[] on SRCU read
 paths

This ensures that, if a VCPU has "observed" that an IO registration has
occurred, the instruction currently being trapped or emulated will also
observe the IO registration.

At the same time, enforce that kvm_get_bus() is used only on the
update side, ensuring that a long-term reference cannot be obtained by
an SRCU reader.

Signed-off-by: Keir Fraser <keirf@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/kvm_host.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 15656b7fba6c..e7d6111cf254 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -966,11 +966,15 @@ static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm)
 	return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET);
 }
 
+/*
+ * Get a bus reference under the update-side lock. No long-term SRCU reader
+ * references are permitted, to avoid stale reads vs concurrent IO
+ * registrations.
+ */
 static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
 {
-	return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
-				      lockdep_is_held(&kvm->slots_lock) ||
-				      !refcount_read(&kvm->users_count));
+	return rcu_dereference_protected(kvm->buses[idx],
+					 lockdep_is_held(&kvm->slots_lock));
 }
 
 static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
-- 
cgit v1.2.3


From 7d9a0273c45962e9a6bc06f3b87eef7c431c1853 Mon Sep 17 00:00:00 2001
From: Keir Fraser <keirf@google.com>
Date: Tue, 9 Sep 2025 10:00:07 +0000
Subject: KVM: Avoid synchronize_srcu() in kvm_io_bus_register_dev()

Device MMIO registration may happen quite frequently during VM boot,
and the SRCU synchronization each time has a measurable effect
on VM startup time. In our experiments it can account for around 25%
of a VM's startup time.

Replace the synchronization with a deferred free of the old kvm_io_bus
structure.

Tested-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Keir Fraser <keirf@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e7d6111cf254..103be35caf0d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -206,6 +206,7 @@ struct kvm_io_range {
 struct kvm_io_bus {
 	int dev_count;
 	int ioeventfd_count;
+	struct rcu_head rcu;
 	struct kvm_io_range range[];
 };
 
-- 
cgit v1.2.3


From 83b039877310ae1eb614eef17b780df1e10d9fb5 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 5 Sep 2025 16:34:03 -0500
Subject: x86,fs/resctrl: Prepare for more monitor events

There's a rule in computer programming that objects appear zero, once, or many
times. So code accordingly.

There are two MBM events and resctrl is coded with a lot of

  if (local)
          do one thing
  if (total)
          do a different thing

Change the rdt_mon_domain and rdt_hw_mon_domain structures to hold arrays of
pointers to per event data instead of explicit fields for total and local
bandwidth.

Simplify by coding for many events using loops on which are enabled.

Move resctrl_is_mbm_event() to <linux/resctrl.h> so it can be used more
widely. Also provide a for_each_mbm_event_id() helper macro.

Cleanup variable names in functions touched to consistently use "eventid" for
those with type enum resctrl_event_id.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Babu Moger <babu.moger@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com
---
 include/linux/resctrl.h       | 23 +++++++++++++++++++----
 include/linux/resctrl_types.h |  3 +++
 2 files changed, 22 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 40aba6b5d4f0..478d7a935ca3 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -161,8 +161,9 @@ struct rdt_ctrl_domain {
  * @hdr:		common header for different domain types
  * @ci_id:		cache info id for this domain
  * @rmid_busy_llc:	bitmap of which limbo RMIDs are above threshold
- * @mbm_total:		saved state for MBM total bandwidth
- * @mbm_local:		saved state for MBM local bandwidth
+ * @mbm_states:		Per-event pointer to the MBM event's saved state.
+ *			An MBM event's state is an array of struct mbm_state
+ *			indexed by RMID on x86 or combined CLOSID, RMID on Arm.
  * @mbm_over:		worker to periodically read MBM h/w counters
  * @cqm_limbo:		worker to periodically read CQM h/w counters
  * @mbm_work_cpu:	worker CPU for MBM h/w counters
@@ -172,8 +173,7 @@ struct rdt_mon_domain {
 	struct rdt_domain_hdr		hdr;
 	unsigned int			ci_id;
 	unsigned long			*rmid_busy_llc;
-	struct mbm_state		*mbm_total;
-	struct mbm_state		*mbm_local;
+	struct mbm_state		*mbm_states[QOS_NUM_L3_MBM_EVENTS];
 	struct delayed_work		mbm_over;
 	struct delayed_work		cqm_limbo;
 	int				mbm_work_cpu;
@@ -376,6 +376,21 @@ bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid);
 
 bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt);
 
+static inline bool resctrl_is_mbm_event(enum resctrl_event_id eventid)
+{
+	return (eventid >= QOS_L3_MBM_TOTAL_EVENT_ID &&
+		eventid <= QOS_L3_MBM_LOCAL_EVENT_ID);
+}
+
+/* Iterate over all memory bandwidth events */
+#define for_each_mbm_event_id(eventid)				\
+	for (eventid = QOS_L3_MBM_TOTAL_EVENT_ID;		\
+	     eventid <= QOS_L3_MBM_LOCAL_EVENT_ID; eventid++)
+
+/* Iterate over memory bandwidth arrays in domain structures */
+#define for_each_mbm_idx(idx)					\
+	for (idx = 0; idx < QOS_NUM_L3_MBM_EVENTS; idx++)
+
 /**
  * resctrl_arch_mon_event_config_write() - Write the config for an event.
  * @config_info: struct resctrl_mon_config_info describing the resource, domain
diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
index 2dadbc54e4b3..d98351663c2c 100644
--- a/include/linux/resctrl_types.h
+++ b/include/linux/resctrl_types.h
@@ -51,4 +51,7 @@ enum resctrl_event_id {
 	QOS_NUM_EVENTS,
 };
 
+#define QOS_NUM_L3_MBM_EVENTS	(QOS_L3_MBM_LOCAL_EVENT_ID - QOS_L3_MBM_TOTAL_EVENT_ID + 1)
+#define MBM_STATE_IDX(evt)	((evt) - QOS_L3_MBM_TOTAL_EVENT_ID)
+
 #endif /* __LINUX_RESCTRL_TYPES_H */
-- 
cgit v1.2.3


From 5ad68c8f965fed78c61f2ac7aea933f06bb50032 Mon Sep 17 00:00:00 2001
From: Babu Moger <babu.moger@amd.com>
Date: Fri, 5 Sep 2025 16:34:06 -0500
Subject: x86,fs/resctrl: Consolidate monitoring related data from rdt_resource

The cache allocation and memory bandwidth allocation feature properties are
consolidated into struct resctrl_cache and struct resctrl_membw respectively.

In preparation for more monitoring properties that will clobber the existing
resource struct more, re-organize the monitoring specific properties to also
be in a separate structure.

Also convert "bandwidth sources" terminology to "memory transactions" to have
consistency within resctrl for related monitoring features.

  [ bp: Massage commit message. ]

Suggested-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Babu Moger <babu.moger@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com
---
 include/linux/resctrl.h | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 478d7a935ca3..fe2af6cb96d4 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -255,38 +255,46 @@ enum resctrl_schema_fmt {
 	RESCTRL_SCHEMA_RANGE,
 };
 
+/**
+ * struct resctrl_mon - Monitoring related data of a resctrl resource.
+ * @num_rmid:		Number of RMIDs available.
+ * @mbm_cfg_mask:	Memory transactions that can be tracked when bandwidth
+ *			monitoring events can be configured.
+ */
+struct resctrl_mon {
+	int			num_rmid;
+	unsigned int		mbm_cfg_mask;
+};
+
 /**
  * struct rdt_resource - attributes of a resctrl resource
  * @rid:		The index of the resource
  * @alloc_capable:	Is allocation available on this machine
  * @mon_capable:	Is monitor feature available on this machine
- * @num_rmid:		Number of RMIDs available
  * @ctrl_scope:		Scope of this resource for control functions
  * @mon_scope:		Scope of this resource for monitor functions
  * @cache:		Cache allocation related data
  * @membw:		If the component has bandwidth controls, their properties.
+ * @mon:		Monitoring related data.
  * @ctrl_domains:	RCU list of all control domains for this resource
  * @mon_domains:	RCU list of all monitor domains for this resource
  * @name:		Name to use in "schemata" file.
  * @schema_fmt:		Which format string and parser is used for this schema.
- * @mbm_cfg_mask:	Bandwidth sources that can be tracked when bandwidth
- *			monitoring events can be configured.
  * @cdp_capable:	Is the CDP feature available on this resource
  */
 struct rdt_resource {
 	int			rid;
 	bool			alloc_capable;
 	bool			mon_capable;
-	int			num_rmid;
 	enum resctrl_scope	ctrl_scope;
 	enum resctrl_scope	mon_scope;
 	struct resctrl_cache	cache;
 	struct resctrl_membw	membw;
+	struct resctrl_mon	mon;
 	struct list_head	ctrl_domains;
 	struct list_head	mon_domains;
 	char			*name;
 	enum resctrl_schema_fmt	schema_fmt;
-	unsigned int		mbm_cfg_mask;
 	bool			cdp_capable;
 };
 
-- 
cgit v1.2.3


From 13390861b426e936db20d675804a5b405622bc79 Mon Sep 17 00:00:00 2001
From: Babu Moger <babu.moger@amd.com>
Date: Fri, 5 Sep 2025 16:34:07 -0500
Subject: x86,fs/resctrl: Detect Assignable Bandwidth Monitoring feature
 details

ABMC feature details are reported via CPUID Fn8000_0020_EBX_x5.
Bits Description
15:0 MAX_ABMC Maximum Supported Assignable Bandwidth
     Monitoring Counter ID + 1

The ABMC feature details are documented in APM [1] available from [2].

  [1] AMD64 Architecture Programmer's Manual Volume 2: System Programming
  Publication # 24593 Revision 3.41 section 19.3.3.3 Assignable Bandwidth
  Monitoring (ABMC).

Detect the feature and number of assignable counters supported. For backward
compatibility, upon detecting the assignable counter feature, enable the
mbm_total_bytes and mbm_local_bytes events that users are familiar with as
part of original L3 MBM support.

Signed-off-by: Babu Moger <babu.moger@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com
Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2]
---
 include/linux/resctrl.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index fe2af6cb96d4..eb80cc233be4 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -260,10 +260,14 @@ enum resctrl_schema_fmt {
  * @num_rmid:		Number of RMIDs available.
  * @mbm_cfg_mask:	Memory transactions that can be tracked when bandwidth
  *			monitoring events can be configured.
+ * @num_mbm_cntrs:	Number of assignable counters.
+ * @mbm_cntr_assignable:Is system capable of supporting counter assignment?
  */
 struct resctrl_mon {
 	int			num_rmid;
 	unsigned int		mbm_cfg_mask;
+	int			num_mbm_cntrs;
+	bool			mbm_cntr_assignable;
 };
 
 /**
-- 
cgit v1.2.3


From faebbc58cde9d8f6050ac152c34c88195ed4abaa Mon Sep 17 00:00:00 2001
From: Babu Moger <babu.moger@amd.com>
Date: Fri, 5 Sep 2025 16:34:08 -0500
Subject: x86/resctrl: Add support to enable/disable AMD ABMC feature

Add the functionality to enable/disable the AMD ABMC feature.

The AMD ABMC feature is enabled by setting enabled bit(0) in the
L3_QOS_EXT_CFG MSR. When the state of ABMC is changed, the MSR needs to be
updated on all the logical processors in the QOS Domain.

Hardware counters will reset when ABMC state is changed.

  [ bp: Massage commit message. ]

Signed-off-by: Babu Moger <babu.moger@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com
Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2]
---
 include/linux/resctrl.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index eb80cc233be4..919806122c50 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -445,6 +445,26 @@ static inline u32 resctrl_get_config_index(u32 closid,
 bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l);
 int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable);
 
+/**
+ * resctrl_arch_mbm_cntr_assign_enabled() - Check if MBM counter assignment
+ *					    mode is enabled.
+ * @r:		Pointer to the resource structure.
+ *
+ * Return:
+ * true if the assignment mode is enabled, false otherwise.
+ */
+bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r);
+
+/**
+ * resctrl_arch_mbm_cntr_assign_set() - Configure the MBM counter assignment mode.
+ * @r:		Pointer to the resource structure.
+ * @enable:	Set to true to enable, false to disable the assignment mode.
+ *
+ * Return:
+ * 0 on success, < 0 on error.
+ */
+int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable);
+
 /*
  * Update the ctrl_val and apply this config right now.
  * Must be called on one of the domain's CPUs.
-- 
cgit v1.2.3


From 4d32c24a74f2c12ff440d381ba01de574f6631ce Mon Sep 17 00:00:00 2001
From: Babu Moger <babu.moger@amd.com>
Date: Fri, 5 Sep 2025 16:34:11 -0500
Subject: fs/resctrl: Introduce mbm_cntr_cfg to track assignable counters per
 domain

The "mbm_event" counter assignment mode allows users to assign a hardware
counter to an RMID, event pair and monitor bandwidth usage as long as it is
assigned.  The hardware continues to track the assigned counter until it is
explicitly unassigned by the user. Counters are assigned/unassigned at
monitoring domain level.

Manage a monitoring domain's hardware counters using a per monitoring
domain array of struct mbm_cntr_cfg that is indexed by the hardware
counter ID. A hardware counter's configuration contains the MBM event
ID and points to the monitoring group that it is assigned to, with a NULL
pointer meaning that the hardware counter is available for assignment.

There is no direct way to determine which hardware counters are assigned
to a particular monitoring group. Check every entry of every hardware
counter configuration array in every monitoring domain to query which
MBM events of a monitoring group is tracked by hardware. Such queries are
acceptable because of a very small number of assignable counters (32
to 64).

Suggested-by: Peter Newman <peternewman@google.com>
Signed-off-by: Babu Moger <babu.moger@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com
---
 include/linux/resctrl.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 919806122c50..e013caba6641 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -156,6 +156,18 @@ struct rdt_ctrl_domain {
 	u32				*mbps_val;
 };
 
+/**
+ * struct mbm_cntr_cfg - Assignable counter configuration.
+ * @evtid:		MBM event to which the counter is assigned. Only valid
+ *			if @rdtgroup is not NULL.
+ * @rdtgrp:		resctrl group assigned to the counter. NULL if the
+ *			counter is free.
+ */
+struct mbm_cntr_cfg {
+	enum resctrl_event_id	evtid;
+	struct rdtgroup		*rdtgrp;
+};
+
 /**
  * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource
  * @hdr:		common header for different domain types
@@ -168,6 +180,8 @@ struct rdt_ctrl_domain {
  * @cqm_limbo:		worker to periodically read CQM h/w counters
  * @mbm_work_cpu:	worker CPU for MBM h/w counters
  * @cqm_work_cpu:	worker CPU for CQM h/w counters
+ * @cntr_cfg:		array of assignable counters' configuration (indexed
+ *			by counter ID)
  */
 struct rdt_mon_domain {
 	struct rdt_domain_hdr		hdr;
@@ -178,6 +192,7 @@ struct rdt_mon_domain {
 	struct delayed_work		cqm_limbo;
 	int				mbm_work_cpu;
 	int				cqm_work_cpu;
+	struct mbm_cntr_cfg		*cntr_cfg;
 };
 
 /**
-- 
cgit v1.2.3


From ebebda853633de389ba2c6737f8ca38405713e90 Mon Sep 17 00:00:00 2001
From: Babu Moger <babu.moger@amd.com>
Date: Fri, 5 Sep 2025 16:34:14 -0500
Subject: fs/resctrl: Introduce event configuration field in struct mon_evt

When supported, mbm_event counter assignment mode allows the user to configure
events to track specific types of memory transactions.

Introduce an evt_cfg field in struct mon_evt to define the type of memory
transactions tracked by a monitoring event. Also add a helper function to get
the evt_cfg value.

Signed-off-by: Babu Moger <babu.moger@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com
---
 include/linux/resctrl.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index e013caba6641..87daa4ca312d 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -409,6 +409,8 @@ static inline bool resctrl_is_mbm_event(enum resctrl_event_id eventid)
 		eventid <= QOS_L3_MBM_LOCAL_EVENT_ID);
 }
 
+u32 resctrl_get_mon_evt_cfg(enum resctrl_event_id eventid);
+
 /* Iterate over all memory bandwidth events */
 #define for_each_mbm_event_id(eventid)				\
 	for (eventid = QOS_L3_MBM_TOTAL_EVENT_ID;		\
-- 
cgit v1.2.3


From f7a4fb22312646329ba21bc58958fd83fb9fc15d Mon Sep 17 00:00:00 2001
From: Babu Moger <babu.moger@amd.com>
Date: Fri, 5 Sep 2025 16:34:15 -0500
Subject: x86,fs/resctrl: Implement resctrl_arch_config_cntr() to assign a
 counter with ABMC

The ABMC feature allows users to assign a hardware counter to an RMID,
event pair and monitor bandwidth usage as long as it is assigned. The
hardware continues to track the assigned counter until it is explicitly
unassigned by the user.

Implement an x86 architecture-specific handler to configure a counter. This
architecture specific handler is called by resctrl fs when a counter is
assigned or unassigned as well as when an already assigned counter's
configuration should be updated. Configure counters by writing to the
L3_QOS_ABMC_CFG MSR, specifying the counter ID, bandwidth source (RMID),
and event configuration.

The ABMC feature details are documented in APM [1] available from [2].
[1] AMD64 Architecture Programmer's Manual Volume 2: System Programming
    Publication # 24593 Revision 3.41 section 19.3.3.3 Assignable Bandwidth
    Monitoring (ABMC).

Signed-off-by: Babu Moger <babu.moger@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com
Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2]
---
 include/linux/resctrl.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 87daa4ca312d..50e38445183a 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -594,6 +594,25 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *
  */
 void resctrl_arch_reset_all_ctrls(struct rdt_resource *r);
 
+/**
+ * resctrl_arch_config_cntr() - Configure the counter with its new RMID
+ *				and event details.
+ * @r:			Resource structure.
+ * @d:			The domain in which counter with ID @cntr_id should be configured.
+ * @evtid:		Monitoring event type (e.g., QOS_L3_MBM_TOTAL_EVENT_ID
+ *			or QOS_L3_MBM_LOCAL_EVENT_ID).
+ * @rmid:		RMID.
+ * @closid:		CLOSID.
+ * @cntr_id:		Counter ID to configure.
+ * @assign:		True to assign the counter or update an existing assignment,
+ *			false to unassign the counter.
+ *
+ * This can be called from any CPU.
+ */
+void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+			      enum resctrl_event_id evtid, u32 rmid, u32 closid,
+			      u32 cntr_id, bool assign);
+
 extern unsigned int resctrl_rmid_realloc_threshold;
 extern unsigned int resctrl_rmid_realloc_limit;
 
-- 
cgit v1.2.3


From 862314fd1f93d96eddb0559a807c66cb1f6ee520 Mon Sep 17 00:00:00 2001
From: Babu Moger <babu.moger@amd.com>
Date: Fri, 5 Sep 2025 16:34:19 -0500
Subject: fs/resctrl: Introduce counter ID read, reset calls in mbm_event mode

When supported, "mbm_event" counter assignment mode allows users to assign
a hardware counter to an RMID, event pair and monitor the bandwidth usage as
long as it is assigned. The hardware continues to track the assigned counter
until it is explicitly unassigned by the user.

Introduce the architecture calls resctrl_arch_cntr_read() and
resctrl_arch_reset_cntr() to read and reset event counters when "mbm_event"
mode is supported. Function names match existing resctrl_arch_rmid_read() and
resctrl_arch_reset_rmid().

Suggested-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Babu Moger <babu.moger@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com
---
 include/linux/resctrl.h | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 50e38445183a..04152654827d 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -613,6 +613,44 @@ void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
 			      enum resctrl_event_id evtid, u32 rmid, u32 closid,
 			      u32 cntr_id, bool assign);
 
+/**
+ * resctrl_arch_cntr_read() - Read the event data corresponding to the counter ID
+ *			      assigned to the RMID, event pair for this resource
+ *			      and domain.
+ * @r:		Resource that the counter should be read from.
+ * @d:		Domain that the counter should be read from.
+ * @closid:	CLOSID that matches the RMID.
+ * @rmid:	The RMID to which @cntr_id is assigned.
+ * @cntr_id:	The counter to read.
+ * @eventid:	The MBM event to which @cntr_id is assigned.
+ * @val:	Result of the counter read in bytes.
+ *
+ * Called on a CPU that belongs to domain @d when "mbm_event" mode is enabled.
+ * Called from a non-migrateable process context via smp_call_on_cpu() unless all
+ * CPUs are nohz_full, in which case it is called via IPI (smp_call_function_any()).
+ *
+ * Return:
+ * 0 on success, or -EIO, -EINVAL etc on error.
+ */
+int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d,
+			   u32 closid, u32 rmid, int cntr_id,
+			   enum resctrl_event_id eventid, u64 *val);
+
+/**
+ * resctrl_arch_reset_cntr() - Reset any private state associated with counter ID.
+ * @r:		The domain's resource.
+ * @d:		The counter ID's domain.
+ * @closid:	CLOSID that matches the RMID.
+ * @rmid:	The RMID to which @cntr_id is assigned.
+ * @cntr_id:	The counter to reset.
+ * @eventid:	The MBM event to which @cntr_id is assigned.
+ *
+ * This can be called from any CPU.
+ */
+void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+			     u32 closid, u32 rmid, int cntr_id,
+			     enum resctrl_event_id eventid);
+
 extern unsigned int resctrl_rmid_realloc_threshold;
 extern unsigned int resctrl_rmid_realloc_limit;
 
-- 
cgit v1.2.3


From ea274cbeaf8f0667267b347e3f84797439cdab4e Mon Sep 17 00:00:00 2001
From: Babu Moger <babu.moger@amd.com>
Date: Fri, 5 Sep 2025 16:34:23 -0500
Subject: fs/resctrl: Add event configuration directory under info/L3_MON/

The "mbm_event" counter assignment mode allows the user to assign a hardware
counter to an RMID, event pair and monitor the bandwidth as long as it is
assigned. The user can specify the memory transaction(s) for the counter to
track.

When this mode is supported, the /sys/fs/resctrl/info/L3_MON/event_configs
directory contains a sub-directory for each MBM event that can be assigned to
a counter.  The MBM event sub-directory contains a file named "event_filter"
that is used to view and modify which memory transactions the MBM event is
configured with.

Create /sys/fs/resctrl/info/L3_MON/event_configs directory on resctrl mount
and pre-populate it with directories for the two existing MBM events:
mbm_total_bytes and mbm_local_bytes. Create the "event_filter" file within
each MBM event directory with the needed *show() that displays the memory
transactions with which the MBM event is configured.

Example:
  $ mount -t resctrl resctrl /sys/fs/resctrl
  $ cd /sys/fs/resctrl/
  $ cat info/L3_MON/event_configs/mbm_total_bytes/event_filter
    local_reads,remote_reads,local_non_temporal_writes,
    remote_non_temporal_writes,local_reads_slow_memory,
    remote_reads_slow_memory,dirty_victim_writes_all

  $ cat info/L3_MON/event_configs/mbm_local_bytes/event_filter
    local_reads,local_non_temporal_writes,local_reads_slow_memory

Signed-off-by: Babu Moger <babu.moger@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com
---
 include/linux/resctrl_types.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
index d98351663c2c..acfe07860b34 100644
--- a/include/linux/resctrl_types.h
+++ b/include/linux/resctrl_types.h
@@ -34,6 +34,9 @@
 /* Max event bits supported */
 #define MAX_EVT_CONFIG_BITS		GENMASK(6, 0)
 
+/* Number of memory transactions that an MBM event can be configured with */
+#define NUM_MBM_TRANSACTIONS		7
+
 /* Event IDs */
 enum resctrl_event_id {
 	/* Must match value of first event below */
-- 
cgit v1.2.3


From ac1df9bb0ba3ae94137fb494cd9efc598f65d826 Mon Sep 17 00:00:00 2001
From: Babu Moger <babu.moger@amd.com>
Date: Fri, 5 Sep 2025 16:34:25 -0500
Subject: fs/resctrl: Introduce mbm_assign_on_mkdir to enable assignments on
 mkdir

The "mbm_event" counter assignment mode allows users to assign a hardware
counter to an RMID, event pair and monitor the bandwidth as long as it is
assigned.

Introduce a user-configurable option that determines if a counter will
automatically be assigned to an RMID, event pair when its associated
monitor group is created via mkdir. Accessible when "mbm_event" counter
assignment mode is enabled.

Suggested-by: Peter Newman <peternewman@google.com>
Signed-off-by: Babu Moger <babu.moger@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com
---
 include/linux/resctrl.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 04152654827d..a7d92718b653 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -277,12 +277,15 @@ enum resctrl_schema_fmt {
  *			monitoring events can be configured.
  * @num_mbm_cntrs:	Number of assignable counters.
  * @mbm_cntr_assignable:Is system capable of supporting counter assignment?
+ * @mbm_assign_on_mkdir:True if counters should automatically be assigned to MBM
+ *			events of monitor groups created via mkdir.
  */
 struct resctrl_mon {
 	int			num_rmid;
 	unsigned int		mbm_cfg_mask;
 	int			num_mbm_cntrs;
 	bool			mbm_cntr_assignable;
+	bool			mbm_assign_on_mkdir;
 };
 
 /**
-- 
cgit v1.2.3


From 3c17001b21b9f168c957ced9384abe969019b609 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 12 Sep 2025 13:52:24 +0200
Subject: pidfs: validate extensible ioctls

Validate extensible ioctls stricter than we do now.

Reviewed-by: Aleksa Sarai <cyphar@cyphar.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7ab4f96d705..2f2edc53bf3c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -4023,4 +4023,18 @@ static inline bool vfs_empty_path(int dfd, const char __user *path)
 
 int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter);
 
+static inline bool extensible_ioctl_valid(unsigned int cmd_a,
+					  unsigned int cmd_b, size_t min_size)
+{
+	if (_IOC_DIR(cmd_a) != _IOC_DIR(cmd_b))
+		return false;
+	if (_IOC_TYPE(cmd_a) != _IOC_TYPE(cmd_b))
+		return false;
+	if (_IOC_NR(cmd_a) != _IOC_NR(cmd_b))
+		return false;
+	if (_IOC_SIZE(cmd_a) < min_size)
+		return false;
+	return true;
+}
+
 #endif /* _LINUX_FS_H */
-- 
cgit v1.2.3


From be975448a45cd024e2b98598eefc0e164ad93f09 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 22 Jul 2025 14:25:35 -0700
Subject: srcu: Document __srcu_read_{,un}lock_fast() implicit RCU readers

This commit documents the implicit RCU readers that are implied by the
this_cpu_inc() and atomic_long_inc() operations in __srcu_read_lock_fast()
and __srcu_read_unlock_fast().  While in the area, fix the documentation
of the memory pairing of atomic_long_inc() in __srcu_read_lock_fast().

[ paulmck: Apply Joel Fernandes feedback. ]

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: <bpf@vger.kernel.org>
---
 include/linux/srcutree.h | 42 +++++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 4d2fee4d3828..42098e0fa0b7 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -232,9 +232,27 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss
  * srcu_read_unlock_fast().
  *
  * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side
- * critical sections either because they disables interrupts, because they
- * are a single instruction, or because they are a read-modify-write atomic
- * operation, depending on the whims of the architecture.
+ * critical sections either because they disables interrupts, because
+ * they are a single instruction, or because they are read-modify-write
+ * atomic operations, depending on the whims of the architecture.
+ * This matters because the SRCU-fast grace-period mechanism uses either
+ * synchronize_rcu() or synchronize_rcu_expedited(), that is, RCU,
+ * *not* SRCU, in order to eliminate the need for the read-side smp_mb()
+ * invocations that are used by srcu_read_lock() and srcu_read_unlock().
+ * The __srcu_read_unlock_fast() function also relies on this same RCU
+ * (again, *not* SRCU) trick to eliminate the need for smp_mb().
+ *
+ * The key point behind this RCU trick is that if any part of a given
+ * RCU reader precedes the beginning of a given RCU grace period, then
+ * the entirety of that RCU reader and everything preceding it happens
+ * before the end of that same RCU grace period.  Similarly, if any part
+ * of a given RCU reader follows the end of a given RCU grace period,
+ * then the entirety of that RCU reader and everything following it
+ * happens after the beginning of that same RCU grace period.  Therefore,
+ * the operations labeled Y in __srcu_read_lock_fast() and those labeled Z
+ * in __srcu_read_unlock_fast() are ordered against the corresponding SRCU
+ * read-side critical section from the viewpoint of the SRCU grace period.
+ * This is all the ordering that is required, hence no calls to smp_mb().
  *
  * This means that __srcu_read_lock_fast() is not all that fast
  * on architectures that support NMIs but do not supply NMI-safe
@@ -245,9 +263,9 @@ static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct src
 	struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
 
 	if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
-		this_cpu_inc(scp->srcu_locks.counter); /* Y */
+		this_cpu_inc(scp->srcu_locks.counter); // Y, and implicit RCU reader.
 	else
-		atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks));  /* Z */
+		atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks));  // Y, and implicit RCU reader.
 	barrier(); /* Avoid leaking the critical section. */
 	return scp;
 }
@@ -258,23 +276,17 @@ static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct src
  * different CPU than that which was incremented by the corresponding
  * srcu_read_lock_fast(), but it must be within the same task.
  *
- * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side
- * critical sections either because they disables interrupts, because they
- * are a single instruction, or because they are a read-modify-write atomic
- * operation, depending on the whims of the architecture.
- *
- * This means that __srcu_read_unlock_fast() is not all that fast
- * on architectures that support NMIs but do not supply NMI-safe
- * implementations of this_cpu_inc().
+ * Please see the __srcu_read_lock_fast() function's header comment for
+ * information on implicit RCU readers and NMI safety.
  */
 static inline void notrace
 __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
 {
 	barrier();  /* Avoid leaking the critical section. */
 	if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
-		this_cpu_inc(scp->srcu_unlocks.counter);  /* Z */
+		this_cpu_inc(scp->srcu_unlocks.counter);  // Z, and implicit RCU reader.
 	else
-		atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks));  /* Z */
+		atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks));  // Z, and implicit RCU reader.
 }
 
 void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
-- 
cgit v1.2.3


From 5e0ae59159e3a07391a35865bb79ff335473fa79 Mon Sep 17 00:00:00 2001
From: Angela Czubak <aczubak@google.com>
Date: Mon, 18 Aug 2025 23:08:42 +0000
Subject: HID: add haptics page defines

Introduce haptic usages as defined in HID Usage Tables specification.
Add HID units for newton and gram.

Signed-off-by: Angela Czubak <aczubak@google.com>
Co-developed-by: Jonathan Denose <jdenose@google.com>
Signed-off-by: Jonathan Denose <jdenose@google.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 2cc4f1e4ea96..10f113c758fe 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -156,6 +156,7 @@ struct hid_item {
 #define HID_UP_TELEPHONY	0x000b0000
 #define HID_UP_CONSUMER		0x000c0000
 #define HID_UP_DIGITIZER	0x000d0000
+#define HID_UP_HAPTIC		0x000e0000
 #define HID_UP_PID		0x000f0000
 #define HID_UP_BATTERY		0x00850000
 #define HID_UP_CAMERA		0x00900000
@@ -316,6 +317,28 @@ struct hid_item {
 #define HID_DG_TOOLSERIALNUMBER	0x000d005b
 #define HID_DG_LATENCYMODE	0x000d0060
 
+#define HID_HP_SIMPLECONTROLLER	0x000e0001
+#define HID_HP_WAVEFORMLIST	0x000e0010
+#define HID_HP_DURATIONLIST	0x000e0011
+#define HID_HP_AUTOTRIGGER	0x000e0020
+#define HID_HP_MANUALTRIGGER	0x000e0021
+#define HID_HP_AUTOTRIGGERASSOCIATEDCONTROL 0x000e0022
+#define HID_HP_INTENSITY	0x000e0023
+#define HID_HP_REPEATCOUNT	0x000e0024
+#define HID_HP_RETRIGGERPERIOD	0x000e0025
+#define HID_HP_WAVEFORMVENDORPAGE	0x000e0026
+#define HID_HP_WAVEFORMVENDORID	0x000e0027
+#define HID_HP_WAVEFORMCUTOFFTIME	0x000e0028
+#define HID_HP_WAVEFORMNONE	0x000e1001
+#define HID_HP_WAVEFORMSTOP	0x000e1002
+#define HID_HP_WAVEFORMCLICK	0x000e1003
+#define HID_HP_WAVEFORMBUZZCONTINUOUS	0x000e1004
+#define HID_HP_WAVEFORMRUMBLECONTINUOUS	0x000e1005
+#define HID_HP_WAVEFORMPRESS	0x000e1006
+#define HID_HP_WAVEFORMRELEASE	0x000e1007
+#define HID_HP_VENDORWAVEFORMMIN	0x000e2001
+#define HID_HP_VENDORWAVEFORMMAX	0x000e2fff
+
 #define HID_BAT_ABSOLUTESTATEOFCHARGE	0x00850065
 #define HID_BAT_CHARGING		0x00850044
 
@@ -423,6 +446,12 @@ struct hid_item {
 #define HID_REPORT_PROTOCOL	1
 #define HID_BOOT_PROTOCOL	0
 
+/*
+ * HID units
+ */
+#define HID_UNIT_GRAM		0x0101
+#define HID_UNIT_NEWTON		0xe111
+
 /*
  * This is the global environment of the parser. This information is
  * persistent for main-items. The global environment can be saved and
-- 
cgit v1.2.3


From 4e584ac737884ef0fd80f6836b917972fad86b17 Mon Sep 17 00:00:00 2001
From: Angela Czubak <aczubak@google.com>
Date: Mon, 18 Aug 2025 23:08:50 +0000
Subject: Input: MT - add INPUT_MT_TOTAL_FORCE flags

Add a flag to generate ABS_PRESSURE as sum of ABS_MT_PRESSURE across
all slots.
This flag should be set if one knows a device reports true force and would
like to report total force to the userspace.

Signed-off-by: Angela Czubak <aczubak@google.com>
Co-developed-by: Jonathan Denose <jdenose@google.com>
Signed-off-by: Jonathan Denose <jdenose@google.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/input/mt.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h
index 2cf89a538b18..d30286298a00 100644
--- a/include/linux/input/mt.h
+++ b/include/linux/input/mt.h
@@ -17,6 +17,7 @@
 #define INPUT_MT_DROP_UNUSED	0x0004	/* drop contacts not seen in frame */
 #define INPUT_MT_TRACK		0x0008	/* use in-kernel tracking */
 #define INPUT_MT_SEMI_MT	0x0010	/* semi-mt device, finger count handled manually */
+#define INPUT_MT_TOTAL_FORCE	0x0020	/* calculate total force from slots pressure */
 
 /**
  * struct input_mt_slot - represents the state of an input MT slot
-- 
cgit v1.2.3


From 8aa1e3a6f0ffbcfdf3bd7d87feb9090f96c54bc4 Mon Sep 17 00:00:00 2001
From: Amirreza Zarrabi <amirreza.zarrabi@oss.qualcomm.com>
Date: Thu, 11 Sep 2025 21:07:40 -0700
Subject: firmware: qcom: tzmem: export shm_bridge create/delete

Anyone with access to contiguous physical memory should be able to
share memory with QTEE using shm_bridge.

Tested-by: Neil Armstrong <neil.armstrong@linaro.org>
Tested-by: Harshal Dev <quic_hdev@quicinc.com>
Reviewed-by: Kuldeep Singh <quic_kuldsing@quicinc.com>
Signed-off-by: Amirreza Zarrabi <amirreza.zarrabi@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20250911-qcom-tee-using-tee-ss-without-mem-obj-v12-1-17f07a942b8d@oss.qualcomm.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/firmware/qcom/qcom_tzmem.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/firmware/qcom/qcom_tzmem.h b/include/linux/firmware/qcom/qcom_tzmem.h
index b83b63a0c049..48ac0e5454c7 100644
--- a/include/linux/firmware/qcom/qcom_tzmem.h
+++ b/include/linux/firmware/qcom/qcom_tzmem.h
@@ -53,4 +53,19 @@ DEFINE_FREE(qcom_tzmem, void *, if (_T) qcom_tzmem_free(_T))
 
 phys_addr_t qcom_tzmem_to_phys(void *ptr);
 
+#if IS_ENABLED(CONFIG_QCOM_TZMEM_MODE_SHMBRIDGE)
+int qcom_tzmem_shm_bridge_create(phys_addr_t paddr, size_t size, u64 *handle);
+void qcom_tzmem_shm_bridge_delete(u64 handle);
+#else
+static inline int qcom_tzmem_shm_bridge_create(phys_addr_t paddr,
+					       size_t size, u64 *handle)
+{
+	return 0;
+}
+
+static inline void qcom_tzmem_shm_bridge_delete(u64 handle)
+{
+}
+#endif
+
 #endif /* __QCOM_TZMEM */
-- 
cgit v1.2.3


From 4b700098c0fc4a76c5c1e54465c8f35e13755294 Mon Sep 17 00:00:00 2001
From: Amirreza Zarrabi <amirreza.zarrabi@oss.qualcomm.com>
Date: Thu, 11 Sep 2025 21:07:41 -0700
Subject: firmware: qcom: scm: add support for object invocation

Qualcomm TEE (QTEE) hosts Trusted Applications (TAs) and services in
the secure world, accessed via objects. A QTEE client can invoke these
objects to request services. Similarly, QTEE can request services from
the nonsecure world using objects exported to the secure world.

Add low-level primitives to facilitate the invocation of objects hosted
in QTEE, as well as those hosted in the nonsecure world.

If support for object invocation is available, the qcom_scm allocates
a dedicated child platform device. The driver for this device communicates
with QTEE using low-level primitives.

Tested-by: Neil Armstrong <neil.armstrong@linaro.org>
Tested-by: Harshal Dev <quic_hdev@quicinc.com>
Signed-off-by: Amirreza Zarrabi <amirreza.zarrabi@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20250911-qcom-tee-using-tee-ss-without-mem-obj-v12-2-17f07a942b8d@oss.qualcomm.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/firmware/qcom/qcom_scm.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h
index 0f667bf1d4d9..a55ca771286b 100644
--- a/include/linux/firmware/qcom/qcom_scm.h
+++ b/include/linux/firmware/qcom/qcom_scm.h
@@ -175,4 +175,10 @@ static inline int qcom_scm_qseecom_app_send(u32 app_id,
 
 #endif /* CONFIG_QCOM_QSEECOM */
 
+int qcom_scm_qtee_invoke_smc(phys_addr_t inbuf, size_t inbuf_size,
+			     phys_addr_t outbuf, size_t outbuf_size,
+			     u64 *result, u64 *response_type);
+int qcom_scm_qtee_callback_response(phys_addr_t buf, size_t buf_size,
+				    u64 *result, u64 *response_type);
+
 #endif
-- 
cgit v1.2.3


From 54fd6bd42e7bd351802ff1d193a2e33e4bfb1836 Mon Sep 17 00:00:00 2001
From: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
Date: Mon, 8 Sep 2025 17:26:45 +0530
Subject: cdx: Split mcdi.h and reorganize headers

Move bitfield.h from the CDX controller directory to include/linux/cdx to make
them accessible to other drivers.

As part of this refactoring, split mcdi.h into two headers:

- mcdi.h: retains interface-level declarations
- mcdid.h: contains internal definitions and macros

This is in preparation for VersalNET EDAC driver that relies on it.

Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Acked-by: Nikhil Agarwal <nikhil.agarwal@amd.com>
Link: https://lore.kernel.org/20250908115649.22903-1-shubhrajyoti.datta@amd.com
---
 include/linux/cdx/bitfield.h |  90 ++++++++++++++++++++
 include/linux/cdx/mcdi.h     | 192 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 282 insertions(+)
 create mode 100644 include/linux/cdx/bitfield.h
 create mode 100644 include/linux/cdx/mcdi.h

(limited to 'include/linux')

diff --git a/include/linux/cdx/bitfield.h b/include/linux/cdx/bitfield.h
new file mode 100644
index 000000000000..567f8ec47582
--- /dev/null
+++ b/include/linux/cdx/bitfield.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+ */
+
+#ifndef CDX_BITFIELD_H
+#define CDX_BITFIELD_H
+
+#include <linux/bitfield.h>
+
+/* Lowest bit numbers and widths */
+#define CDX_DWORD_LBN 0
+#define CDX_DWORD_WIDTH 32
+
+/* Specified attribute (e.g. LBN) of the specified field */
+#define CDX_VAL(field, attribute) field ## _ ## attribute
+/* Low bit number of the specified field */
+#define CDX_LOW_BIT(field) CDX_VAL(field, LBN)
+/* Bit width of the specified field */
+#define CDX_WIDTH(field) CDX_VAL(field, WIDTH)
+/* High bit number of the specified field */
+#define CDX_HIGH_BIT(field) (CDX_LOW_BIT(field) + CDX_WIDTH(field) - 1)
+
+/* A doubleword (i.e. 4 byte) datatype - little-endian in HW */
+struct cdx_dword {
+	__le32 cdx_u32;
+};
+
+/* Value expanders for printk */
+#define CDX_DWORD_VAL(dword)				\
+	((unsigned int)le32_to_cpu((dword).cdx_u32))
+
+/*
+ * Extract bit field portion [low,high) from the 32-bit little-endian
+ * element which contains bits [min,max)
+ */
+#define CDX_DWORD_FIELD(dword, field)					\
+	(FIELD_GET(GENMASK(CDX_HIGH_BIT(field), CDX_LOW_BIT(field)),	\
+		   le32_to_cpu((dword).cdx_u32)))
+
+/*
+ * Creates the portion of the named bit field that lies within the
+ * range [min,max).
+ */
+#define CDX_INSERT_FIELD(field, value)				\
+	(FIELD_PREP(GENMASK(CDX_HIGH_BIT(field),		\
+			    CDX_LOW_BIT(field)), value))
+
+/*
+ * Creates the portion of the named bit fields that lie within the
+ * range [min,max).
+ */
+#define CDX_INSERT_FIELDS(field1, value1,		\
+			  field2, value2,		\
+			  field3, value3,		\
+			  field4, value4,		\
+			  field5, value5,		\
+			  field6, value6,		\
+			  field7, value7)		\
+	(CDX_INSERT_FIELD(field1, (value1)) |		\
+	 CDX_INSERT_FIELD(field2, (value2)) |		\
+	 CDX_INSERT_FIELD(field3, (value3)) |		\
+	 CDX_INSERT_FIELD(field4, (value4)) |		\
+	 CDX_INSERT_FIELD(field5, (value5)) |		\
+	 CDX_INSERT_FIELD(field6, (value6)) |		\
+	 CDX_INSERT_FIELD(field7, (value7)))
+
+#define CDX_POPULATE_DWORD(dword, ...)					\
+	(dword).cdx_u32 = cpu_to_le32(CDX_INSERT_FIELDS(__VA_ARGS__))
+
+/* Populate a dword field with various numbers of arguments */
+#define CDX_POPULATE_DWORD_7 CDX_POPULATE_DWORD
+#define CDX_POPULATE_DWORD_6(dword, ...) \
+	CDX_POPULATE_DWORD_7(dword, CDX_DWORD, 0, __VA_ARGS__)
+#define CDX_POPULATE_DWORD_5(dword, ...) \
+	CDX_POPULATE_DWORD_6(dword, CDX_DWORD, 0, __VA_ARGS__)
+#define CDX_POPULATE_DWORD_4(dword, ...) \
+	CDX_POPULATE_DWORD_5(dword, CDX_DWORD, 0, __VA_ARGS__)
+#define CDX_POPULATE_DWORD_3(dword, ...) \
+	CDX_POPULATE_DWORD_4(dword, CDX_DWORD, 0, __VA_ARGS__)
+#define CDX_POPULATE_DWORD_2(dword, ...) \
+	CDX_POPULATE_DWORD_3(dword, CDX_DWORD, 0, __VA_ARGS__)
+#define CDX_POPULATE_DWORD_1(dword, ...) \
+	CDX_POPULATE_DWORD_2(dword, CDX_DWORD, 0, __VA_ARGS__)
+#define CDX_SET_DWORD(dword) \
+	CDX_POPULATE_DWORD_1(dword, CDX_DWORD, 0xffffffff)
+
+#endif /* CDX_BITFIELD_H */
diff --git a/include/linux/cdx/mcdi.h b/include/linux/cdx/mcdi.h
new file mode 100644
index 000000000000..46e3f63b062a
--- /dev/null
+++ b/include/linux/cdx/mcdi.h
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2008-2013 Solarflare Communications Inc.
+ * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+ */
+
+#ifndef CDX_MCDI_H
+#define CDX_MCDI_H
+
+#include <linux/mutex.h>
+#include <linux/kref.h>
+#include <linux/rpmsg.h>
+
+#include "linux/cdx/bitfield.h"
+
+/**
+ * enum cdx_mcdi_mode - MCDI transaction mode
+ * @MCDI_MODE_EVENTS: wait for an mcdi response callback.
+ * @MCDI_MODE_FAIL: we think MCDI is dead, so fail-fast all calls
+ */
+enum cdx_mcdi_mode {
+	MCDI_MODE_EVENTS,
+	MCDI_MODE_FAIL,
+};
+
+#define MCDI_RPC_TIMEOUT	(10 * HZ)
+#define MCDI_RPC_LONG_TIMEOU	(60 * HZ)
+#define MCDI_RPC_POST_RST_TIME	(10 * HZ)
+
+/**
+ * enum cdx_mcdi_cmd_state - State for an individual MCDI command
+ * @MCDI_STATE_QUEUED: Command not started and is waiting to run.
+ * @MCDI_STATE_RETRY: Command was submitted and MC rejected with no resources,
+ *	as MC have too many outstanding commands. Command will be retried once
+ *	another command returns.
+ * @MCDI_STATE_RUNNING: Command was accepted and is running.
+ * @MCDI_STATE_RUNNING_CANCELLED: Command is running but the issuer cancelled
+ *	the command.
+ * @MCDI_STATE_FINISHED: Processing of this command has completed.
+ */
+
+enum cdx_mcdi_cmd_state {
+	MCDI_STATE_QUEUED,
+	MCDI_STATE_RETRY,
+	MCDI_STATE_RUNNING,
+	MCDI_STATE_RUNNING_CANCELLED,
+	MCDI_STATE_FINISHED,
+};
+
+/**
+ * struct cdx_mcdi - CDX MCDI Firmware interface, to interact
+ *	with CDX controller.
+ * @mcdi: MCDI interface
+ * @mcdi_ops: MCDI operations
+ * @r5_rproc : R5 Remoteproc device handle
+ * @rpdev: RPMsg device
+ * @ept: RPMsg endpoint
+ * @work: Post probe work
+ */
+struct cdx_mcdi {
+	/* MCDI interface */
+	struct cdx_mcdi_data *mcdi;
+	const struct cdx_mcdi_ops *mcdi_ops;
+
+	struct rproc *r5_rproc;
+	struct rpmsg_device *rpdev;
+	struct rpmsg_endpoint *ept;
+	struct work_struct work;
+};
+
+struct cdx_mcdi_ops {
+	void (*mcdi_request)(struct cdx_mcdi *cdx,
+			     const struct cdx_dword *hdr, size_t hdr_len,
+			     const struct cdx_dword *sdu, size_t sdu_len);
+	unsigned int (*mcdi_rpc_timeout)(struct cdx_mcdi *cdx, unsigned int cmd);
+};
+
+typedef void cdx_mcdi_async_completer(struct cdx_mcdi *cdx,
+				      unsigned long cookie, int rc,
+				      struct cdx_dword *outbuf,
+				      size_t outlen_actual);
+
+/**
+ * struct cdx_mcdi_cmd - An outstanding MCDI command
+ * @ref: Reference count. There will be one reference if the command is
+ *	in the mcdi_iface cmd_list, another if it's on a cleanup list,
+ *	and a third if it's queued in the work queue.
+ * @list: The data for this entry in mcdi->cmd_list
+ * @cleanup_list: The data for this entry in a cleanup list
+ * @work: The work item for this command, queued in mcdi->workqueue
+ * @mcdi: The mcdi_iface for this command
+ * @state: The state of this command
+ * @inlen: inbuf length
+ * @inbuf: Input buffer
+ * @quiet: Whether to silence errors
+ * @reboot_seen: Whether a reboot has been seen during this command,
+ *	to prevent duplicates
+ * @seq: Sequence number
+ * @started: Jiffies this command was started at
+ * @cookie: Context for completion function
+ * @completer: Completion function
+ * @handle: Command handle
+ * @cmd: Command number
+ * @rc: Return code
+ * @outlen: Length of output buffer
+ * @outbuf: Output buffer
+ */
+struct cdx_mcdi_cmd {
+	struct kref ref;
+	struct list_head list;
+	struct list_head cleanup_list;
+	struct work_struct work;
+	struct cdx_mcdi_iface *mcdi;
+	enum cdx_mcdi_cmd_state state;
+	size_t inlen;
+	const struct cdx_dword *inbuf;
+	bool quiet;
+	bool reboot_seen;
+	u8 seq;
+	unsigned long started;
+	unsigned long cookie;
+	cdx_mcdi_async_completer *completer;
+	unsigned int handle;
+	unsigned int cmd;
+	int rc;
+	size_t outlen;
+	struct cdx_dword *outbuf;
+	/* followed by inbuf data if necessary */
+};
+
+/**
+ * struct cdx_mcdi_iface - MCDI protocol context
+ * @cdx: The associated NIC
+ * @iface_lock: Serialise access to this structure
+ * @outstanding_cleanups: Count of cleanups
+ * @cmd_list: List of outstanding and running commands
+ * @workqueue: Workqueue used for delayed processing
+ * @cmd_complete_wq: Waitqueue for command completion
+ * @db_held_by: Command the MC doorbell is in use by
+ * @seq_held_by: Command each sequence number is in use by
+ * @prev_handle: The last used command handle
+ * @mode: Poll for mcdi completion, or wait for an mcdi_event
+ * @prev_seq: The last used sequence number
+ * @new_epoch: Indicates start of day or start of MC reboot recovery
+ */
+struct cdx_mcdi_iface {
+	struct cdx_mcdi *cdx;
+	/* Serialise access */
+	struct mutex iface_lock;
+	unsigned int outstanding_cleanups;
+	struct list_head cmd_list;
+	struct workqueue_struct *workqueue;
+	wait_queue_head_t cmd_complete_wq;
+	struct cdx_mcdi_cmd *db_held_by;
+	struct cdx_mcdi_cmd *seq_held_by[16];
+	unsigned int prev_handle;
+	enum cdx_mcdi_mode mode;
+	u8 prev_seq;
+	bool new_epoch;
+};
+
+/**
+ * struct cdx_mcdi_data - extra state for NICs that implement MCDI
+ * @iface: Interface/protocol state
+ * @fn_flags: Flags for this function, as returned by %MC_CMD_DRV_ATTACH.
+ */
+struct cdx_mcdi_data {
+	struct cdx_mcdi_iface iface;
+	u32 fn_flags;
+};
+
+/*
+ * We expect that 16- and 32-bit fields in MCDI requests and responses
+ * are appropriately aligned, but 64-bit fields are only
+ * 32-bit-aligned.
+ */
+#define MCDI_DECLARE_BUF(_name, _len) struct cdx_dword _name[DIV_ROUND_UP(_len, 4)] = {{0}}
+#define _MCDI_PTR(_buf, _offset)					\
+	((u8 *)(_buf) + (_offset))
+#define MCDI_PTR(_buf, _field)						\
+	_MCDI_PTR(_buf, MC_CMD_ ## _field ## _OFST)
+#define _MCDI_CHECK_ALIGN(_ofst, _align)				\
+	((void)BUILD_BUG_ON_ZERO((_ofst) & ((_align) - 1)),		\
+	 (_ofst))
+#define _MCDI_DWORD(_buf, _field)					\
+	((_buf) + (_MCDI_CHECK_ALIGN(MC_CMD_ ## _field ## _OFST, 4) >> 2))
+
+#define MCDI_SET_DWORD(_buf, _field, _value)				\
+	CDX_POPULATE_DWORD_1(*_MCDI_DWORD(_buf, _field), CDX_DWORD, _value)
+#define MCDI_DWORD(_buf, _field)					\
+	CDX_DWORD_FIELD(*_MCDI_DWORD(_buf, _field), CDX_DWORD)
+#endif /* CDX_MCDI_H */
-- 
cgit v1.2.3


From f99b3917789d83ea89b24b722d784956f8289f45 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Mon, 15 Sep 2025 14:57:29 +0200
Subject: fs: rename generic_delete_inode() and generic_drop_inode()

generic_delete_inode() is rather misleading for what the routine is
doing. inode_just_drop() should be much clearer.

The new naming is inconsistent with generic_drop_inode(), so rename that
one as well with inode_ as the suffix.

No functional changes.

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4daf9b30a641..724b9af67f35 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3312,8 +3312,8 @@ extern void address_space_init_once(struct address_space *mapping);
 extern struct inode * igrab(struct inode *);
 extern ino_t iunique(struct super_block *, ino_t);
 extern int inode_needs_sync(struct inode *inode);
-extern int generic_delete_inode(struct inode *inode);
-static inline int generic_drop_inode(struct inode *inode)
+extern int inode_just_drop(struct inode *inode);
+static inline int inode_generic_drop(struct inode *inode)
 {
 	return !inode->i_nlink || inode_unhashed(inode);
 }
-- 
cgit v1.2.3


From 8b0d03129b6165bbf8c9494897489c6da6fadd58 Mon Sep 17 00:00:00 2001
From: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
Date: Mon, 8 Sep 2025 17:26:46 +0530
Subject: cdx: Export Symbols for MCDI RPC and Initialization

The cdx_mcdi_init(), cdx_mcdi_process_cmd(), and cdx_mcdi_rpc() functions are
needed by the VersalNET EDAC module that interact with the MCDI (Management
Controller Direct Interface) framework. These functions facilitate
communication between different hardware components by enabling command
execution and status management.

Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Acked-by: Nikhil Agarwal <nikhil.agarwal@amd.com>
Link: https://lore.kernel.org/20250908115649.22903-1-shubhrajyoti.datta@amd.com
---
 include/linux/cdx/mcdi.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cdx/mcdi.h b/include/linux/cdx/mcdi.h
index 46e3f63b062a..74075305cba4 100644
--- a/include/linux/cdx/mcdi.h
+++ b/include/linux/cdx/mcdi.h
@@ -169,6 +169,13 @@ struct cdx_mcdi_data {
 	u32 fn_flags;
 };
 
+void cdx_mcdi_finish(struct cdx_mcdi *cdx);
+int cdx_mcdi_init(struct cdx_mcdi *cdx);
+void cdx_mcdi_process_cmd(struct cdx_mcdi *cdx, struct cdx_dword *outbuf, int len);
+int cdx_mcdi_rpc(struct cdx_mcdi *cdx, unsigned int cmd,
+		 const struct cdx_dword *inbuf, size_t inlen,
+		 struct cdx_dword *outbuf, size_t outlen, size_t *outlen_actual);
+
 /*
  * We expect that 16- and 32-bit fields in MCDI requests and responses
  * are appropriately aligned, but 64-bit fields are only
-- 
cgit v1.2.3


From d5fe2fec6c40dda03df8cc9b4a97de0b7e39f984 Mon Sep 17 00:00:00 2001
From: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
Date: Mon, 8 Sep 2025 17:26:49 +0530
Subject: EDAC: Add a driver for the AMD Versal NET DDR controller

Add a driver for the AMD Versal NET DDR memory controller which supports
single bit error correction, double bit error detection and other system
errors from various IP subsystems (e.g., RPU, NOCs, HNICX, PL).

The driver listens for notifications from the NMC (Network management
controller) using RPMsg (Remote Processor Messaging).

The channel used for communicating to RPMsg is named "error_edac".  Upon
receipt of a notification, the driver sends a RAS event trace.

  [ bp:
    - Fixup title
    - Rewrite commit message
    - Fixup Kconfig text
    - Zap unused defines and align them
    - Simplify rpmsg_cb() considerably
    - Drop silly double-brackets in conditionals
    - Use proper void * type in mcdi_request()
    - Do not clear chinfo in rpmsg_probe() unnecessarily
    - Fix indentation
    - Do a proper err unwind path in init_versalnet()
    - Redo the error unwind path in mc_probe() properly
    - Fix the ordering in mc_remove()
    ]

Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/20250908115649.22903-1-shubhrajyoti.datta@amd.com
Link: https://lore.kernel.org/r/20250703173105.GLaGa-WQCESDNsqygm@fat_crate.local
---
 include/linux/cdx/edac_cdx_pcol.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 include/linux/cdx/edac_cdx_pcol.h

(limited to 'include/linux')

diff --git a/include/linux/cdx/edac_cdx_pcol.h b/include/linux/cdx/edac_cdx_pcol.h
new file mode 100644
index 000000000000..749db33bb482
--- /dev/null
+++ b/include/linux/cdx/edac_cdx_pcol.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Driver for AMD network controllers and boards
+ *
+ * Copyright (C) 2021, Xilinx, Inc.
+ * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+ */
+
+#ifndef MC_CDX_PCOL_H
+#define MC_CDX_PCOL_H
+#include  <linux/cdx/mcdi.h>
+
+#define MC_CMD_EDAC_GET_DDR_CONFIG_OUT_WORD_LENGTH_LEN		4
+/* Number of registers for the DDR controller */
+#define MC_CMD_GET_DDR_CONFIG_OFST	4
+#define MC_CMD_GET_DDR_CONFIG_LEN	4
+
+/***********************************/
+/* MC_CMD_EDAC_GET_DDR_CONFIG
+ * Provides detailed configuration for the DDR controller of the given index.
+ */
+#define MC_CMD_EDAC_GET_DDR_CONFIG 0x3
+
+/* MC_CMD_EDAC_GET_DDR_CONFIG_IN msgrequest */
+#define MC_CMD_EDAC_GET_DDR_CONFIG_IN_CONTROLLER_INDEX_OFST		0
+#define MC_CMD_EDAC_GET_DDR_CONFIG_IN_CONTROLLER_INDEX_LEN		4
+
+#endif /* MC_CDX_PCOL_H */
-- 
cgit v1.2.3


From 1b3aa3900782707ec2f4cc1651bc82c628f25d2b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 10 Sep 2025 17:45:36 -0600
Subject: io_uring/uring_cmd: correct signature for
 io_uring_mshot_cmd_post_cqe()

The !CONFIG_IO_URING signature is wrong, fix that up. The non stub
signature got updated for the io_br_sel changes that happened before
this patch went in, but the stub one did not.

Fixes: 620a50c92700 ("io_uring: uring_cmd: add multishot support")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring/cmd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index 1350af846ddd..c8185f54fde9 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -126,7 +126,7 @@ io_uring_cmd_buffer_select(struct io_uring_cmd *ioucmd, unsigned buf_group,
 	return (struct io_br_sel) { .val = -EOPNOTSUPP };
 }
 static inline bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
-				ssize_t ret, unsigned int issue_flags)
+				struct io_br_sel *sel, unsigned int issue_flags)
 {
 	return true;
 }
-- 
cgit v1.2.3


From 0cbaf65c91db0e40a577e8919979dac1963cfcc0 Mon Sep 17 00:00:00 2001
From: Amirreza Zarrabi <amirreza.zarrabi@oss.qualcomm.com>
Date: Thu, 11 Sep 2025 21:07:43 -0700
Subject: tee: add close_context to TEE driver operation

The tee_context can be used to manage TEE user resources, including
those allocated by the driver for the TEE on behalf of the user.
The release() callback is invoked only when all resources, such as
tee_shm, are released and there are no references to the tee_context.

When a user closes the device file, the driver should notify the
TEE to release any resources it may hold and drop the context
references. To achieve this, a close_context() callback is
introduced to initiate resource release in the TEE driver when
the device file is closed.

Relocate teedev_ctx_get, teedev_ctx_put, tee_device_get, and
tee_device_get functions to tee_core.h to make them accessible
outside the TEE subsystem.

Reviewed-by: Sumit Garg <sumit.garg@oss.qualcomm.com>
Tested-by: Neil Armstrong <neil.armstrong@linaro.org>
Tested-by: Harshal Dev <quic_hdev@quicinc.com>
Signed-off-by: Amirreza Zarrabi <amirreza.zarrabi@oss.qualcomm.com>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 include/linux/tee_core.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tee_core.h b/include/linux/tee_core.h
index 7b0c1da2ca6c..456a940d4710 100644
--- a/include/linux/tee_core.h
+++ b/include/linux/tee_core.h
@@ -76,8 +76,9 @@ struct tee_device {
 /**
  * struct tee_driver_ops - driver operations vtable
  * @get_version:	returns version of driver
- * @open:		called when the device file is opened
- * @release:		release this open file
+ * @open:		called for a context when the device file is opened
+ * @close_context:	called when the device file is closed
+ * @release:		called to release the context
  * @open_session:	open a new session
  * @close_session:	close a session
  * @system_session:	declare session as a system session
@@ -87,11 +88,17 @@ struct tee_device {
  * @supp_send:		called for supplicant to send a response
  * @shm_register:	register shared memory buffer in TEE
  * @shm_unregister:	unregister shared memory buffer in TEE
+ *
+ * The context given to @open might last longer than the device file if it is
+ * tied to other resources in the TEE driver. @close_context is called when the
+ * client closes the device file, even if there are existing references to the
+ * context. The TEE driver can use @close_context to start cleaning up.
  */
 struct tee_driver_ops {
 	void (*get_version)(struct tee_device *teedev,
 			    struct tee_ioctl_version_data *vers);
 	int (*open)(struct tee_context *ctx);
+	void (*close_context)(struct tee_context *ctx);
 	void (*release)(struct tee_context *ctx);
 	int (*open_session)(struct tee_context *ctx,
 			    struct tee_ioctl_open_session_arg *arg,
@@ -200,6 +207,24 @@ int tee_device_register_dma_heap(struct tee_device *teedev,
 				 struct tee_protmem_pool *pool);
 void tee_device_put_all_dma_heaps(struct tee_device *teedev);
 
+/**
+ * tee_device_get() - Increment the user count for a tee_device
+ * @teedev: Pointer to the tee_device
+ *
+ * If tee_device_unregister() has been called and the final user of @teedev
+ * has already released the device, this function will fail to prevent new users
+ * from accessing the device during the unregistration process.
+ *
+ * Returns: true if @teedev remains valid, otherwise false
+ */
+bool tee_device_get(struct tee_device *teedev);
+
+/**
+ * tee_device_put() - Decrease the user count for a tee_device
+ * @teedev: pointer to the tee_device
+ */
+void tee_device_put(struct tee_device *teedev);
+
 /**
  * tee_device_set_dev_groups() - Set device attribute groups
  * @teedev:	Device to register
@@ -374,4 +399,25 @@ struct tee_context *teedev_open(struct tee_device *teedev);
  */
 void teedev_close_context(struct tee_context *ctx);
 
+/**
+ * teedev_ctx_get() - Increment the reference count of a context
+ * @ctx: Pointer to the context
+ *
+ * This function increases the refcount of the context, which is tied to
+ * resources shared by the same tee_device. During the unregistration process,
+ * the context may remain valid even after tee_device_unregister() has returned.
+ *
+ * Users should ensure that the context's refcount is properly decreased before
+ * calling tee_device_put(), typically within the context's release() function.
+ * Alternatively, users can call tee_device_get() and teedev_ctx_get() together
+ * and release them simultaneously (see shm_alloc_helper()).
+ */
+void teedev_ctx_get(struct tee_context *ctx);
+
+/**
+ * teedev_ctx_put() - Decrease reference count on a context
+ * @ctx: pointer to the context
+ */
+void teedev_ctx_put(struct tee_context *ctx);
+
 #endif /*__TEE_CORE_H*/
-- 
cgit v1.2.3


From 54a53e95a908a4cc770f0530c49f04c89e7b18dc Mon Sep 17 00:00:00 2001
From: Amirreza Zarrabi <amirreza.zarrabi@oss.qualcomm.com>
Date: Thu, 11 Sep 2025 21:07:44 -0700
Subject: tee: add TEE_IOCTL_PARAM_ATTR_TYPE_UBUF

For drivers that can transfer data to the TEE without using shared
memory from client, it is necessary to receive the user address
directly, bypassing any processing by the TEE subsystem. Introduce
TEE_IOCTL_PARAM_ATTR_TYPE_UBUF_INPUT/OUTPUT/INOUT to represent
userspace buffers.

Reviewed-by: Sumit Garg <sumit.garg@oss.qualcomm.com>
Tested-by: Neil Armstrong <neil.armstrong@linaro.org>
Tested-by: Harshal Dev <quic_hdev@quicinc.com>
Signed-off-by: Amirreza Zarrabi <amirreza.zarrabi@oss.qualcomm.com>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 include/linux/tee_drv.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
index 824f1251de60..7915e8869cbd 100644
--- a/include/linux/tee_drv.h
+++ b/include/linux/tee_drv.h
@@ -82,6 +82,11 @@ struct tee_param_memref {
 	struct tee_shm *shm;
 };
 
+struct tee_param_ubuf {
+	void __user *uaddr;
+	size_t size;
+};
+
 struct tee_param_value {
 	u64 a;
 	u64 b;
@@ -92,6 +97,7 @@ struct tee_param {
 	u64 attr;
 	union {
 		struct tee_param_memref memref;
+		struct tee_param_ubuf ubuf;
 		struct tee_param_value value;
 	} u;
 };
-- 
cgit v1.2.3


From d5b8b0fa1775d8b59c3fc9e4aa2baa715d08f3ee Mon Sep 17 00:00:00 2001
From: Amirreza Zarrabi <amirreza.zarrabi@oss.qualcomm.com>
Date: Thu, 11 Sep 2025 21:07:45 -0700
Subject: tee: add TEE_IOCTL_PARAM_ATTR_TYPE_OBJREF

The TEE subsystem allows session-based access to trusted services,
requiring a session to be established to receive a service. This
is not suitable for an environment that represents services as objects.
An object supports various operations that a client can invoke,
potentially generating a result or a new object that can be invoked
independently of the original object.

Add TEE_IOCTL_PARAM_ATTR_TYPE_OBJREF_INPUT/OUTPUT/INOUT to represent an
object. Objects may reside in either TEE or userspace. To invoke an
object in TEE, introduce a new ioctl. Use the existing SUPPL_RECV and
SUPPL_SEND to invoke an object in userspace.

Reviewed-by: Sumit Garg <sumit.garg@oss.qualcomm.com>
Tested-by: Neil Armstrong <neil.armstrong@linaro.org>
Tested-by: Harshal Dev <quic_hdev@quicinc.com>
Signed-off-by: Amirreza Zarrabi <amirreza.zarrabi@oss.qualcomm.com>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 include/linux/tee_core.h | 4 ++++
 include/linux/tee_drv.h  | 6 ++++++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tee_core.h b/include/linux/tee_core.h
index 456a940d4710..1f3e5dad6d0d 100644
--- a/include/linux/tee_core.h
+++ b/include/linux/tee_core.h
@@ -83,6 +83,7 @@ struct tee_device {
  * @close_session:	close a session
  * @system_session:	declare session as a system session
  * @invoke_func:	invoke a trusted function
+ * @object_invoke_func:	invoke a TEE object
  * @cancel_req:		request cancel of an ongoing invoke or open
  * @supp_recv:		called for supplicant to get a command
  * @supp_send:		called for supplicant to send a response
@@ -108,6 +109,9 @@ struct tee_driver_ops {
 	int (*invoke_func)(struct tee_context *ctx,
 			   struct tee_ioctl_invoke_arg *arg,
 			   struct tee_param *param);
+	int (*object_invoke_func)(struct tee_context *ctx,
+				  struct tee_ioctl_object_invoke_arg *arg,
+				  struct tee_param *param);
 	int (*cancel_req)(struct tee_context *ctx, u32 cancel_id, u32 session);
 	int (*supp_recv)(struct tee_context *ctx, u32 *func, u32 *num_params,
 			 struct tee_param *param);
diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
index 7915e8869cbd..88a6f9697c89 100644
--- a/include/linux/tee_drv.h
+++ b/include/linux/tee_drv.h
@@ -87,6 +87,11 @@ struct tee_param_ubuf {
 	size_t size;
 };
 
+struct tee_param_objref {
+	u64 id;
+	u64 flags;
+};
+
 struct tee_param_value {
 	u64 a;
 	u64 b;
@@ -97,6 +102,7 @@ struct tee_param {
 	u64 attr;
 	union {
 		struct tee_param_memref memref;
+		struct tee_param_objref objref;
 		struct tee_param_ubuf ubuf;
 		struct tee_param_value value;
 	} u;
-- 
cgit v1.2.3


From 2c895133950646f45e5cf3900b168c952c8dbee8 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Mon, 15 Sep 2025 03:26:17 +0000
Subject: bpf: Do not limit bpf_cgroup_from_id to current's namespace

The bpf_cgroup_from_id kfunc relies on cgroup_get_from_id to obtain the
cgroup corresponding to a given cgroup ID. This helper can be called in
a lot of contexts where the current thread can be random. A recent
example was its use in sched_ext's ops.tick(), to obtain the root cgroup
pointer. Since the current task can be whatever random user space task
preempted by the timer tick, this makes the behavior of the helper
unreliable.

Refactor out __cgroup_get_from_id as the non-namespace aware version of
cgroup_get_from_id, and change bpf_cgroup_from_id to make use of it.

There is no compatibility breakage here, since changing the namespace
against which the lookup is being done to the root cgroup namespace only
permits a wider set of lookups to succeed now. The cgroup IDs across
namespaces are globally unique, and thus don't need to be retranslated.

Reported-by: Dan Schatzberg <dschatzberg@meta.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20250915032618.1551762-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/cgroup.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b18fb5fcb38e..b08c8e62881c 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -650,6 +650,7 @@ static inline void cgroup_kthread_ready(void)
 }
 
 void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen);
+struct cgroup *__cgroup_get_from_id(u64 id);
 struct cgroup *cgroup_get_from_id(u64 id);
 #else /* !CONFIG_CGROUPS */
 
-- 
cgit v1.2.3


From c3426ba2ed6942fe33c75bf17fc7513ba2c6ac64 Mon Sep 17 00:00:00 2001
From: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Date: Thu, 11 Sep 2025 13:06:31 +0200
Subject: tcp: reorganize tcp_sock_write_txrx group for variables later

Use the first 3-byte hole at the beginning of the tcp_sock_write_txrx
group for 'noneagle'/'rate_app_limited' to fill in the existing hole
in later patches. Therefore, the group size of tcp_sock_write_txrx is
reduced from 92 + 4 to 91 + 4. In addition, the group size of
tcp_sock_write_rx is changed to 96 to fit in the pahole outcome.
Below are the trimmed pahole outcomes before and after this patch:

[BEFORE THIS PATCH]
struct tcp_sock {
    [...]
    __cacheline_group_begin__tcp_sock_write_txrx[0]; /*  2521     0 */
    /* XXX 3 bytes hole, try to pack */

    [...]
    struct tcp_options_received rx_opt;              /*  2588    24 */
    u8                         nonagle:4;            /*  2612: 0  1 */
    u8                         rate_app_limited:1;   /*  2612: 4  1 */
    /* XXX 3 bits hole, try to pack */

    __cacheline_group_end__tcp_sock_write_txrx[0];   /*  2613     0 */
    /* XXX 3 bytes hole, try to pack */

    __cacheline_group_begin__tcp_sock_write_rx[0] __attribute__((__aligned__(8))); /*  2616     0 */

    [...]
    __cacheline_group_end__tcp_sock_write_rx[0];     /*  2712     0 */

    [...]
    /* size: 3200, cachelines: 50, members: 161 */
}

[AFTER THIS PATCH]
struct tcp_sock {
    [...]
    __cacheline_group_begin__tcp_sock_write_txrx[0]; /*  2521     0 */
    u8                         nonagle:4;            /*  2521: 0  1 */
    u8                         rate_app_limited:1;   /*  2521: 4  1 */
    /* XXX 3 bits hole, try to pack */
    /* XXX 2 bytes hole, try to pack */

    [...]
    struct tcp_options_received rx_opt;              /*  2588    24 */

    __cacheline_group_end__tcp_sock_write_txrx[0];   /*  2612     0 */
    /* XXX 4 bytes hole, try to pack */

    __cacheline_group_begin__tcp_sock_write_rx[0] __attribute__((__aligned__(8))); /*  2616     0 */

    [...]
    __cacheline_group_end__tcp_sock_write_rx[0];     /*  2712     0 */

    [...]
    /* size: 3200, cachelines: 50, members: 161 */
}

Signed-off-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250911110642.87529-4-chia-yu.chang@nokia-bell-labs.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/tcp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 57e478bfaef2..d103cc0e7a35 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -285,6 +285,8 @@ struct tcp_sock {
  *	Header prediction flags
  *	0x5?10 << 16 + snd_wnd in net byte order
  */
+	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
+		rate_app_limited:1;  /* rate_{delivered,interval_us} limited? */
 	__be32	pred_flags;
 	u64	tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */
 	u64	tcp_mstamp;	/* most recent packet received/sent */
@@ -303,8 +305,6 @@ struct tcp_sock {
  *      Options received (usually on last packet, some only on SYN packets).
  */
 	struct tcp_options_received rx_opt;
-	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
-		rate_app_limited:1;  /* rate_{delivered,interval_us} limited? */
 	__cacheline_group_end(tcp_sock_write_txrx);
 
 	/* RX read-write hotpath cache lines */
-- 
cgit v1.2.3


From 07c446e35b89bc8774792f8036e595cffdf5b162 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Tue, 16 Sep 2025 08:47:43 +0900
Subject: firewire: core: maintain phy packet receivers locally in cdev layer

The list of receivers for phy packet is used only by cdev layer, while it
is maintained as a member of fw_card structure.

This commit maintains the list locally in cdev layer.

Link: https://lore.kernel.org/r/20250915234747.915922-3-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/linux/firewire.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index d38c6e538e5c..f3260aacf730 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -115,8 +115,6 @@ struct fw_card {
 	int index;
 	struct list_head link;
 
-	struct list_head phy_receiver_list;
-
 	struct delayed_work br_work; /* bus reset job */
 	bool br_short;
 
-- 
cgit v1.2.3


From 7d138cb269dbd2fa9b0da89a9c10503d1cf269d5 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Tue, 16 Sep 2025 08:47:44 +0900
Subject: firewire: core: use spin lock specific to topology map

At present, the operation for read transaction to topology map register is
not protected by any kind of lock primitives. This causes a potential
problem to result in the mixed content of topology map.

This commit adds and uses spin lock specific to topology map.

Link: https://lore.kernel.org/r/20250915234747.915922-4-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/linux/firewire.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index f3260aacf730..aeb71c39e57e 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -129,7 +129,11 @@ struct fw_card {
 
 	bool broadcast_channel_allocated;
 	u32 broadcast_channel;
-	__be32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4];
+
+	struct {
+		__be32 buffer[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4];
+		spinlock_t lock;
+	} topology_map;
 
 	__be32 maint_utility_register;
 
-- 
cgit v1.2.3


From 420bd7068cbfaea0a857472dd631dc48311e2a8f Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Tue, 16 Sep 2025 08:47:45 +0900
Subject: firewire: core: use spin lock specific to transaction

The list of instance for asynchronous transaction to wait for response
subaction is maintained as a member of fw_card structure. The card-wide
spinlock is used at present for any operation over the list, however it
is not necessarily suited for the purpose.

This commit adds and uses the spin lock specific to maintain the list.

Link: https://lore.kernel.org/r/20250915234747.915922-5-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/linux/firewire.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index aeb71c39e57e..8d6801cf2fca 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -88,11 +88,15 @@ struct fw_card {
 
 	int node_id;
 	int generation;
-	int current_tlabel;
-	u64 tlabel_mask;
-	struct list_head transaction_list;
 	u64 reset_jiffies;
 
+	struct {
+		int current_tlabel;
+		u64 tlabel_mask;
+		struct list_head list;
+		spinlock_t lock;
+	} transactions;
+
 	u32 split_timeout_hi;
 	u32 split_timeout_lo;
 	unsigned int split_timeout_cycles;
-- 
cgit v1.2.3


From b5725cfa4120a4d234ab112aad151d731531d093 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Tue, 16 Sep 2025 08:47:46 +0900
Subject: firewire: core: use spin lock specific to timer for split transaction

At present the parameters to compute timeout time for split transaction is
protected by card-wide spin lock, while it is not necessarily convenient
in a point to narrower critical section.

This commit adds and uses another spin lock specific for the purpose.

Link: https://lore.kernel.org/r/20250915234747.915922-6-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/linux/firewire.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 8d6801cf2fca..6d208769d456 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -97,18 +97,21 @@ struct fw_card {
 		spinlock_t lock;
 	} transactions;
 
-	u32 split_timeout_hi;
-	u32 split_timeout_lo;
-	unsigned int split_timeout_cycles;
-	unsigned int split_timeout_jiffies;
+	struct {
+		u32 hi;
+		u32 lo;
+		unsigned int cycles;
+		unsigned int jiffies;
+		spinlock_t lock;
+	} split_timeout;
 
 	unsigned long long guid;
 	unsigned max_receive;
 	int link_speed;
 	int config_rom_generation;
 
-	spinlock_t lock; /* Take this lock when handling the lists in
-			  * this struct. */
+	spinlock_t lock;
+
 	struct fw_node *local_node;
 	struct fw_node *root_node;
 	struct fw_node *irm_node;
-- 
cgit v1.2.3


From f9fadf23c7f1a0df72ef50a873e1bd3bd4631ec1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 4 Feb 2024 21:25:18 -0500
Subject: security_dentry_init_security(): constify qstr argument

Nothing outside of fs/dcache.c has any business modifying
dentry names; passing &dentry->d_name as an argument should
have that argument declared as a const pointer.

Acked-by: Casey Schaufler <casey@schaufler-ca.com> # smack part
Acked-by: Paul Moore <paul@paul-moore.com>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/lsm_hook_defs.h | 2 +-
 include/linux/security.h      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index fd11fffdd3c3..aa4d6ec9c98b 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -85,7 +85,7 @@ LSM_HOOK(int, -EOPNOTSUPP, dentry_init_security, struct dentry *dentry,
 	 int mode, const struct qstr *name, const char **xattr_name,
 	 struct lsm_context *cp)
 LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode,
-	 struct qstr *name, const struct cred *old, struct cred *new)
+	 const struct qstr *name, const struct cred *old, struct cred *new)
 
 #ifdef CONFIG_SECURITY_PATH
 LSM_HOOK(int, 0, path_unlink, const struct path *dir, struct dentry *dentry)
diff --git a/include/linux/security.h b/include/linux/security.h
index 521bcb5b9717..3f694d3ebd70 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -391,7 +391,7 @@ int security_dentry_init_security(struct dentry *dentry, int mode,
 				  const char **xattr_name,
 				  struct lsm_context *lsmcxt);
 int security_dentry_create_files_as(struct dentry *dentry, int mode,
-					struct qstr *name,
+					const struct qstr *name,
 					const struct cred *old,
 					struct cred *new);
 int security_path_notify(const struct path *path, u64 mask,
@@ -871,7 +871,7 @@ static inline int security_dentry_init_security(struct dentry *dentry,
 }
 
 static inline int security_dentry_create_files_as(struct dentry *dentry,
-						  int mode, struct qstr *name,
+						  int mode, const struct qstr *name,
 						  const struct cred *old,
 						  struct cred *new)
 {
-- 
cgit v1.2.3


From ca97d6c60b1d1dff519a7e3dd86708304e657365 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 11 Jul 2025 05:45:01 -0400
Subject: generic_ci_validate_strict_name(): constify name argument

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7ab4f96d705..6dcfc1c399ca 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3719,7 +3719,8 @@ int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
  *   happens when a directory is casefolded and the filesystem is strict
  *   about its encoding.
  */
-static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name)
+static inline bool generic_ci_validate_strict_name(struct inode *dir,
+						   const struct qstr *name)
 {
 	if (!IS_CASEFOLDED(dir) || !sb_has_strict_encoding(dir->i_sb))
 		return true;
@@ -3734,7 +3735,8 @@ static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qst
 	return !utf8_validate(dir->i_sb->s_encoding, name);
 }
 #else
-static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name)
+static inline bool generic_ci_validate_strict_name(struct inode *dir,
+						   const struct qstr *name)
 {
 	return true;
 }
-- 
cgit v1.2.3


From 180a9cc3fd6a020746fbd7f97b9b62295a325fd2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 9 Feb 2024 14:57:43 -0500
Subject: make it easier to catch those who try to modify ->d_name

Turn d_name into an anon union of const struct qstr d_name with
struct qstr __d_name.  Very few places need to modify it (all
in fs/dcache.c); those are switched to use of ->__d_name.

Note that ->d_name can actually change under you unless you have
the right locking environment; this const just prohibits accidentally
doing stores without being easily spotted.

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/dcache.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index cc3e1c1a3454..c83e02b94389 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -95,7 +95,10 @@ struct dentry {
 	seqcount_spinlock_t d_seq;	/* per dentry seqlock */
 	struct hlist_bl_node d_hash;	/* lookup hash list */
 	struct dentry *d_parent;	/* parent directory */
-	struct qstr d_name;
+	union {
+	struct qstr __d_name;		/* for use ONLY in fs/dcache.c */
+	const struct qstr d_name;
+	};
 	struct inode *d_inode;		/* Where the name belongs to - NULL is
 					 * negative */
 	union shortname_store d_shortname;
-- 
cgit v1.2.3


From dae575e669811b201114702d96f6854d5c8324b5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 4 Sep 2025 23:16:35 -0400
Subject: backing_file_user_path(): constify struct path *

Callers never use the resulting pointer to modify the struct path it
points to (nor should they).

Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7ab4f96d705..3bcc878817be 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2879,7 +2879,7 @@ struct file *dentry_open_nonotify(const struct path *path, int flags,
 				  const struct cred *cred);
 struct file *dentry_create(const struct path *path, int flags, umode_t mode,
 			   const struct cred *cred);
-struct path *backing_file_user_path(const struct file *f);
+const struct path *backing_file_user_path(const struct file *f);
 
 /*
  * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file
-- 
cgit v1.2.3


From 63dbfb077cdad21b356e17d4ce76650e67b83159 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jul 2025 21:58:05 -0400
Subject: done_path_create(): constify path argument

Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/namei.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index 5d085428e471..75c0b665fbd4 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -60,7 +60,7 @@ extern int kern_path(const char *, unsigned, struct path *);
 
 extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int);
 extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
-extern void done_path_create(struct path *, struct dentry *);
+extern void done_path_create(const struct path *, struct dentry *);
 extern struct dentry *kern_path_locked(const char *, struct path *);
 extern struct dentry *kern_path_locked_negative(const char *, struct path *);
 extern struct dentry *user_path_locked_at(int , const char __user *, struct path *);
-- 
cgit v1.2.3


From 2930afe2c9cb9aec329269e40c851bf56cdcc09c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jul 2025 21:32:41 -0400
Subject: export_operations->open(): constify path argument

for the method and its sole instance...

Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/exportfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index cfb0dd1ea49c..f43c83e0b8c5 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -270,7 +270,7 @@ struct export_operations {
 	int (*commit_blocks)(struct inode *inode, struct iomap *iomaps,
 			     int nr_iomaps, struct iattr *iattr);
 	int (*permission)(struct handle_to_path_ctx *ctx, unsigned int oflags);
-	struct file * (*open)(struct path *path, unsigned int oflags);
+	struct file * (*open)(const struct path *path, unsigned int oflags);
 #define	EXPORT_OP_NOWCC			(0x1) /* don't collect v3 wcc data */
 #define	EXPORT_OP_NOSUBTREECHK		(0x2) /* no subtree checking */
 #define	EXPORT_OP_CLOSE_BEFORE_UNLINK	(0x4) /* close files before unlink */
-- 
cgit v1.2.3


From 1f6df5847454dee8608f78ee0df7352472cb2447 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jul 2025 18:45:02 -0400
Subject: drop_collected_paths(): constify arguments

... and use that to constify the pointers in callers

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/mount.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mount.h b/include/linux/mount.h
index 5f9c053b0897..c09032463b36 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -105,7 +105,7 @@ extern int may_umount(struct vfsmount *);
 int do_mount(const char *, const char __user *,
 		     const char *, unsigned long, void *);
 extern struct path *collect_paths(const struct path *, struct path *, unsigned);
-extern void drop_collected_paths(struct path *, struct path *);
+extern void drop_collected_paths(const struct path *, struct path *);
 extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num);
 
 extern int cifs_root_data(char **dev, char **opts);
-- 
cgit v1.2.3


From b42ffcd5069d5cfb777b8982a1c55c7e2f1d3998 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 24 Aug 2025 19:34:37 -0400
Subject: collect_paths(): constify the return value

callers have no business modifying the paths they get

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/mount.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mount.h b/include/linux/mount.h
index c09032463b36..18e4b97f8a98 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -104,8 +104,8 @@ extern int may_umount_tree(struct vfsmount *);
 extern int may_umount(struct vfsmount *);
 int do_mount(const char *, const char __user *,
 		     const char *, unsigned long, void *);
-extern struct path *collect_paths(const struct path *, struct path *, unsigned);
-extern void drop_collected_paths(const struct path *, struct path *);
+extern const struct path *collect_paths(const struct path *, struct path *, unsigned);
+extern void drop_collected_paths(const struct path *, const struct path *);
 extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num);
 
 extern int cifs_root_data(char **dev, char **opts);
-- 
cgit v1.2.3


From 880fcc329e2473ba02ffbc446fcd403972ab1fca Mon Sep 17 00:00:00 2001
From: Atish Patra <atishp@rivosinc.com>
Date: Tue, 9 Sep 2025 00:03:24 -0700
Subject: drivers/perf: riscv: Export PMU event info function

The event mapping function can be used in event info function to find out
the corresponding SBI PMU event encoding during the get_event_info function
as well. Refactor and export it so that it can be invoked from kvm and
internal driver.

Signed-off-by: Atish Patra <atishp@rivosinc.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Acked-by: Paul Walmsley <pjw@kernel.org>
Link: https://lore.kernel.org/r/20250909-pmu_event_info-v6-5-d8f80cacb884@rivosinc.com
Signed-off-by: Anup Patel <anup@brainfault.org>
---
 include/linux/perf/riscv_pmu.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
index 701974639ff2..f82a28040594 100644
--- a/include/linux/perf/riscv_pmu.h
+++ b/include/linux/perf/riscv_pmu.h
@@ -89,6 +89,7 @@ static inline void riscv_pmu_legacy_skip_init(void) {};
 struct riscv_pmu *riscv_pmu_alloc(void);
 #ifdef CONFIG_RISCV_PMU_SBI
 int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr);
+int riscv_pmu_get_event_info(u32 type, u64 config, u64 *econfig);
 #endif
 
 #endif /* CONFIG_RISCV_PMU */
-- 
cgit v1.2.3


From 94deac977fbd0246c971b4f1d17a6385f5e0b1a4 Mon Sep 17 00:00:00 2001
From: Hans Holmberg <Hans.Holmberg@wdc.com>
Date: Mon, 1 Sep 2025 10:52:04 +0000
Subject: fs: add an enum for number of life time hints

Add WRITE_LIFE_HINT_NR into the rw_hint enum to define the number of
values write life time hints can be set to. This is useful for e.g.
file systems which may want to map these values to allocation groups.

Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
---
 include/linux/rw_hint.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/rw_hint.h b/include/linux/rw_hint.h
index 309ca72f2dfb..adcc43042c90 100644
--- a/include/linux/rw_hint.h
+++ b/include/linux/rw_hint.h
@@ -14,6 +14,7 @@ enum rw_hint {
 	WRITE_LIFE_MEDIUM	= RWH_WRITE_LIFE_MEDIUM,
 	WRITE_LIFE_LONG		= RWH_WRITE_LIFE_LONG,
 	WRITE_LIFE_EXTREME	= RWH_WRITE_LIFE_EXTREME,
+	WRITE_LIFE_HINT_NR,
 } __packed;
 
 /* Sparse ignores __packed annotations on enums, hence the #ifndef below. */
-- 
cgit v1.2.3


From 51dad33ede63618a6b425c650f3042d85e646dac Mon Sep 17 00:00:00 2001
From: Ming Yu <a0282524688@gmail.com>
Date: Fri, 12 Sep 2025 17:19:46 +0800
Subject: mfd: Add core driver for Nuvoton NCT6694

The Nuvoton NCT6694 provides an USB interface to the host to
access its features.

Sub-devices can use the USB functions nct6694_read_msg() and
nct6694_write_msg() to issue a command. They can also request
interrupt that will be called when the USB device receives its
interrupt pipe.

Signed-off-by: Ming Yu <a0282524688@gmail.com>
Link: https://lore.kernel.org/r/20250912091952.1169369-2-a0282524688@gmail.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/nct6694.h | 102 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 include/linux/mfd/nct6694.h

(limited to 'include/linux')

diff --git a/include/linux/mfd/nct6694.h b/include/linux/mfd/nct6694.h
new file mode 100644
index 000000000000..6eb9be2cd4a0
--- /dev/null
+++ b/include/linux/mfd/nct6694.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2025 Nuvoton Technology Corp.
+ *
+ * Nuvoton NCT6694 USB transaction and data structure.
+ */
+
+#ifndef __MFD_NCT6694_H
+#define __MFD_NCT6694_H
+
+#define NCT6694_VENDOR_ID	0x0416
+#define NCT6694_PRODUCT_ID	0x200B
+#define NCT6694_INT_IN_EP	0x81
+#define NCT6694_BULK_IN_EP	0x02
+#define NCT6694_BULK_OUT_EP	0x03
+
+#define NCT6694_HCTRL_SET	0x40
+#define NCT6694_HCTRL_GET	0x80
+
+#define NCT6694_URB_TIMEOUT	1000
+
+enum nct6694_irq_id {
+	NCT6694_IRQ_GPIO0 = 0,
+	NCT6694_IRQ_GPIO1,
+	NCT6694_IRQ_GPIO2,
+	NCT6694_IRQ_GPIO3,
+	NCT6694_IRQ_GPIO4,
+	NCT6694_IRQ_GPIO5,
+	NCT6694_IRQ_GPIO6,
+	NCT6694_IRQ_GPIO7,
+	NCT6694_IRQ_GPIO8,
+	NCT6694_IRQ_GPIO9,
+	NCT6694_IRQ_GPIOA,
+	NCT6694_IRQ_GPIOB,
+	NCT6694_IRQ_GPIOC,
+	NCT6694_IRQ_GPIOD,
+	NCT6694_IRQ_GPIOE,
+	NCT6694_IRQ_GPIOF,
+	NCT6694_IRQ_CAN0,
+	NCT6694_IRQ_CAN1,
+	NCT6694_IRQ_RTC,
+	NCT6694_NR_IRQS,
+};
+
+enum nct6694_response_err_status {
+	NCT6694_NO_ERROR = 0,
+	NCT6694_FORMAT_ERROR,
+	NCT6694_RESERVED1,
+	NCT6694_RESERVED2,
+	NCT6694_NOT_SUPPORT_ERROR,
+	NCT6694_NO_RESPONSE_ERROR,
+	NCT6694_TIMEOUT_ERROR,
+	NCT6694_PENDING,
+};
+
+struct __packed nct6694_cmd_header {
+	u8 rsv1;
+	u8 mod;
+	union __packed {
+		__le16 offset;
+		struct __packed {
+			u8 cmd;
+			u8 sel;
+		};
+	};
+	u8 hctrl;
+	u8 rsv2;
+	__le16 len;
+};
+
+struct __packed nct6694_response_header {
+	u8 sequence_id;
+	u8 sts;
+	u8 reserved[4];
+	__le16 len;
+};
+
+union __packed nct6694_usb_msg {
+	struct nct6694_cmd_header cmd_header;
+	struct nct6694_response_header response_header;
+};
+
+struct nct6694 {
+	struct device *dev;
+	struct ida gpio_ida;
+	struct ida i2c_ida;
+	struct ida canfd_ida;
+	struct ida wdt_ida;
+	struct irq_domain *domain;
+	struct mutex access_lock;
+	spinlock_t irq_lock;
+	struct urb *int_in_urb;
+	struct usb_device *udev;
+	union nct6694_usb_msg *usb_msg;
+	__le32 *int_buffer;
+	unsigned int irq_enable;
+};
+
+int nct6694_read_msg(struct nct6694 *nct6694, const struct nct6694_cmd_header *cmd_hd, void *buf);
+int nct6694_write_msg(struct nct6694 *nct6694, const struct nct6694_cmd_header *cmd_hd, void *buf);
+
+#endif
-- 
cgit v1.2.3


From a22ddeef55c4df847d9ac862b6192da774948fe1 Mon Sep 17 00:00:00 2001
From: Kamel Bouhara <kamel.bouhara@bootlin.com>
Date: Sun, 24 Aug 2025 13:57:21 +0200
Subject: mfd: Add max7360 support

Add core driver to support MAX7360 i2c chip, multi function device
with keypad, GPIO, PWM, GPO and rotary encoder submodules.

Signed-off-by: Kamel Bouhara <kamel.bouhara@bootlin.com>
Co-developed-by: Mathieu Dubois-Briand <mathieu.dubois-briand@bootlin.com>
Signed-off-by: Mathieu Dubois-Briand <mathieu.dubois-briand@bootlin.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20250824-mdb-max7360-support-v14-2-435cfda2b1ea@bootlin.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/max7360.h | 109 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100644 include/linux/mfd/max7360.h

(limited to 'include/linux')

diff --git a/include/linux/mfd/max7360.h b/include/linux/mfd/max7360.h
new file mode 100644
index 000000000000..44cf2bf651a2
--- /dev/null
+++ b/include/linux/mfd/max7360.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __LINUX_MFD_MAX7360_H
+#define __LINUX_MFD_MAX7360_H
+
+#include <linux/bits.h>
+
+#define MAX7360_MAX_KEY_ROWS		8
+#define MAX7360_MAX_KEY_COLS		8
+#define MAX7360_MAX_KEY_NUM		(MAX7360_MAX_KEY_ROWS * MAX7360_MAX_KEY_COLS)
+#define MAX7360_ROW_SHIFT		3
+
+#define MAX7360_MAX_GPIO		8
+#define MAX7360_MAX_GPO			6
+#define MAX7360_PORT_PWM_COUNT		8
+#define MAX7360_PORT_RTR_PIN		(MAX7360_PORT_PWM_COUNT - 1)
+
+/*
+ * MAX7360 registers
+ */
+#define MAX7360_REG_KEYFIFO		0x00
+#define MAX7360_REG_CONFIG		0x01
+#define MAX7360_REG_DEBOUNCE		0x02
+#define MAX7360_REG_INTERRUPT		0x03
+#define MAX7360_REG_PORTS		0x04
+#define MAX7360_REG_KEYREP		0x05
+#define MAX7360_REG_SLEEP		0x06
+
+/*
+ * MAX7360 GPIO registers
+ *
+ * All these registers are reset together when writing bit 3 of
+ * MAX7360_REG_GPIOCFG.
+ */
+#define MAX7360_REG_GPIOCFG		0x40
+#define MAX7360_REG_GPIOCTRL		0x41
+#define MAX7360_REG_GPIODEB		0x42
+#define MAX7360_REG_GPIOCURR		0x43
+#define MAX7360_REG_GPIOOUTM		0x44
+#define MAX7360_REG_PWMCOM		0x45
+#define MAX7360_REG_RTRCFG		0x46
+#define MAX7360_REG_I2C_TIMEOUT		0x48
+#define MAX7360_REG_GPIOIN		0x49
+#define MAX7360_REG_RTR_CNT		0x4A
+#define MAX7360_REG_PWMBASE		0x50
+#define MAX7360_REG_PWMCFGBASE		0x58
+
+#define MAX7360_REG_GPIO_LAST		0x5F
+
+#define MAX7360_REG_PWM(x)		(MAX7360_REG_PWMBASE + (x))
+#define MAX7360_REG_PWMCFG(x)		(MAX7360_REG_PWMCFGBASE + (x))
+
+/*
+ * Configuration register bits
+ */
+#define MAX7360_FIFO_EMPTY		0x3F
+#define MAX7360_FIFO_OVERFLOW		0x7F
+#define MAX7360_FIFO_RELEASE		BIT(6)
+#define MAX7360_FIFO_COL		GENMASK(5, 3)
+#define MAX7360_FIFO_ROW		GENMASK(2, 0)
+
+#define MAX7360_CFG_SLEEP		BIT(7)
+#define MAX7360_CFG_INTERRUPT		BIT(5)
+#define MAX7360_CFG_KEY_RELEASE		BIT(3)
+#define MAX7360_CFG_WAKEUP		BIT(1)
+#define MAX7360_CFG_TIMEOUT		BIT(0)
+
+#define MAX7360_DEBOUNCE		GENMASK(4, 0)
+#define MAX7360_DEBOUNCE_MIN		9
+#define MAX7360_DEBOUNCE_MAX		40
+#define MAX7360_PORTS			GENMASK(8, 5)
+
+#define MAX7360_INTERRUPT_TIME_MASK	GENMASK(4, 0)
+#define MAX7360_INTERRUPT_FIFO_MASK	GENMASK(7, 5)
+
+#define MAX7360_PORT_CFG_INTERRUPT_MASK		BIT(7)
+#define MAX7360_PORT_CFG_INTERRUPT_EDGES	BIT(6)
+#define MAX7360_PORT_CFG_COMMON_PWM		BIT(5)
+
+/*
+ * Autosleep register values
+ */
+#define MAX7360_AUTOSLEEP_8192MS	0x01
+#define MAX7360_AUTOSLEEP_4096MS	0x02
+#define MAX7360_AUTOSLEEP_2048MS	0x03
+#define MAX7360_AUTOSLEEP_1024MS	0x04
+#define MAX7360_AUTOSLEEP_512MS		0x05
+#define MAX7360_AUTOSLEEP_256MS		0x06
+
+#define MAX7360_GPIO_CFG_RTR_EN		BIT(7)
+#define MAX7360_GPIO_CFG_GPIO_EN	BIT(4)
+#define MAX7360_GPIO_CFG_GPIO_RST	BIT(3)
+
+#define MAX7360_ROT_DEBOUNCE		GENMASK(3, 0)
+#define MAX7360_ROT_DEBOUNCE_MIN	0
+#define MAX7360_ROT_DEBOUNCE_MAX	15
+#define MAX7360_ROT_INTCNT		GENMASK(6, 4)
+#define MAX7360_ROT_INTCNT_DLY		BIT(7)
+
+#define MAX7360_INT_INTI		0
+#define MAX7360_INT_INTK		1
+
+#define MAX7360_INT_GPIO		0
+#define MAX7360_INT_KEYPAD		1
+#define MAX7360_INT_ROTARY		2
+
+#define MAX7360_NR_INTERNAL_IRQS	3
+
+#endif
-- 
cgit v1.2.3


From 553b75d4bfe9264f631d459fe9996744e0672b0e Mon Sep 17 00:00:00 2001
From: Mathieu Dubois-Briand <mathieu.dubois-briand@bootlin.com>
Date: Sun, 24 Aug 2025 13:57:24 +0200
Subject: gpio: regmap: Allow to allocate regmap-irq device

GPIO controller often have support for IRQ: allow to easily allocate
both gpio-regmap and regmap-irq in one operation.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Signed-off-by: Mathieu Dubois-Briand <mathieu.dubois-briand@bootlin.com>
Link: https://lore.kernel.org/r/20250824-mdb-max7360-support-v14-5-435cfda2b1ea@bootlin.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/gpio/regmap.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio/regmap.h b/include/linux/gpio/regmap.h
index c722c67668c6..19b52ac03a5d 100644
--- a/include/linux/gpio/regmap.h
+++ b/include/linux/gpio/regmap.h
@@ -40,6 +40,11 @@ struct regmap;
  * @drvdata:		(Optional) Pointer to driver specific data which is
  *			not used by gpio-remap but is provided "as is" to the
  *			driver callback(s).
+ * @regmap_irq_chip:	(Optional) Pointer on an regmap_irq_chip structure. If
+ *			set, a regmap-irq device will be created and the IRQ
+ *			domain will be set accordingly.
+ * @regmap_irq_line	(Optional) The IRQ the device uses to signal interrupts.
+ * @regmap_irq_flags	(Optional) The IRQF_ flags to use for the interrupt.
  *
  * The ->reg_mask_xlate translates a given base address and GPIO offset to
  * register and mask pair. The base address is one of the given register
@@ -78,6 +83,12 @@ struct gpio_regmap_config {
 	int ngpio_per_reg;
 	struct irq_domain *irq_domain;
 
+#ifdef CONFIG_REGMAP_IRQ
+	struct regmap_irq_chip *regmap_irq_chip;
+	int regmap_irq_line;
+	unsigned long regmap_irq_flags;
+#endif
+
 	int (*reg_mask_xlate)(struct gpio_regmap *gpio, unsigned int base,
 			      unsigned int offset, unsigned int *reg,
 			      unsigned int *mask);
-- 
cgit v1.2.3


From 0627b71fa5508ab605b6e9fd74baed40805cfdda Mon Sep 17 00:00:00 2001
From: Mathieu Dubois-Briand <mathieu.dubois-briand@bootlin.com>
Date: Sun, 24 Aug 2025 13:57:25 +0200
Subject: gpio: regmap: Allow to provide init_valid_mask callback

Allows to populate the gpio_regmap_config structure with
init_valid_mask() callback to set on the final gpio_chip structure.

Reviewed-by: Michael Walle <mwalle@kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Signed-off-by: Mathieu Dubois-Briand <mathieu.dubois-briand@bootlin.com>
Link: https://lore.kernel.org/r/20250824-mdb-max7360-support-v14-6-435cfda2b1ea@bootlin.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/gpio/regmap.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio/regmap.h b/include/linux/gpio/regmap.h
index 19b52ac03a5d..622a2939ebe0 100644
--- a/include/linux/gpio/regmap.h
+++ b/include/linux/gpio/regmap.h
@@ -6,6 +6,7 @@
 struct device;
 struct fwnode_handle;
 struct gpio_regmap;
+struct gpio_chip;
 struct irq_domain;
 struct regmap;
 
@@ -40,6 +41,8 @@ struct regmap;
  * @drvdata:		(Optional) Pointer to driver specific data which is
  *			not used by gpio-remap but is provided "as is" to the
  *			driver callback(s).
+ * @init_valid_mask:	(Optional) Routine to initialize @valid_mask, to be used
+ *			if not all GPIOs are valid.
  * @regmap_irq_chip:	(Optional) Pointer on an regmap_irq_chip structure. If
  *			set, a regmap-irq device will be created and the IRQ
  *			domain will be set accordingly.
@@ -93,6 +96,10 @@ struct gpio_regmap_config {
 			      unsigned int offset, unsigned int *reg,
 			      unsigned int *mask);
 
+	int (*init_valid_mask)(struct gpio_chip *gc,
+			       unsigned long *valid_mask,
+			       unsigned int ngpios);
+
 	void *drvdata;
 };
 
-- 
cgit v1.2.3


From f8d9e56aeb87ce82ce8636cd176cc59b69aa0e41 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Fri, 22 Aug 2025 13:56:27 +0300
Subject: i3c: master: Add helpers for DMA mapping and bounce buffer handling

Some I3C controllers such as MIPI I3C HCI may pad the last DWORD (32-bit)
with stale data from the RX FIFO in DMA transfers if the receive length
is not DWORD aligned and when the device DMA is IOMMU mapped.

In such a case, a properly sized bounce buffer is required in order to
avoid possible data corruption. In a review discussion, proposal was to
have a common helpers in I3C core for DMA mapping and bounce buffer
handling.

Drivers may use the helper i3c_master_dma_map_single() to map a buffer
for a DMA transfer. It internally allocates a bounce buffer if buffer is
not DMA'able or when the driver requires it for a transfer.

Helper i3c_master_dma_unmap_single() does the needed cleanups and
data copying from the bounce buffer.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://lore.kernel.org/r/20250822105630.2820009-2-jarkko.nikula@linux.intel.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/i3c/master.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h
index 043f5c7ff398..c52a82dd79a6 100644
--- a/include/linux/i3c/master.h
+++ b/include/linux/i3c/master.h
@@ -558,6 +558,26 @@ struct i3c_master_controller {
 #define i3c_bus_for_each_i3cdev(bus, dev)				\
 	list_for_each_entry(dev, &(bus)->devs.i3c, common.node)
 
+/**
+ * struct i3c_dma - DMA transfer and mapping descriptor
+ * @dev: device object of a device doing DMA
+ * @buf: destination/source buffer for DMA
+ * @len: length of transfer
+ * @map_len: length of DMA mapping
+ * @addr: mapped DMA address for a Host Controller Driver
+ * @dir: DMA direction
+ * @bounce_buf: an allocated bounce buffer if transfer needs it or NULL
+ */
+struct i3c_dma {
+	struct device *dev;
+	void *buf;
+	size_t len;
+	size_t map_len;
+	dma_addr_t addr;
+	enum dma_data_direction dir;
+	void *bounce_buf;
+};
+
 int i3c_master_do_i2c_xfers(struct i3c_master_controller *master,
 			    const struct i2c_msg *xfers,
 			    int nxfers);
@@ -575,6 +595,12 @@ int i3c_master_get_free_addr(struct i3c_master_controller *master,
 int i3c_master_add_i3c_dev_locked(struct i3c_master_controller *master,
 				  u8 addr);
 int i3c_master_do_daa(struct i3c_master_controller *master);
+struct i3c_dma *i3c_master_dma_map_single(struct device *dev, void *ptr,
+					  size_t len, bool force_bounce,
+					  enum dma_data_direction dir);
+void i3c_master_dma_unmap_single(struct i3c_dma *dma_xfer);
+DEFINE_FREE(i3c_master_dma_unmap_single, void *,
+	    if (_T) i3c_master_dma_unmap_single(_T))
 
 int i3c_master_set_info(struct i3c_master_controller *master,
 			const struct i3c_device_info *info);
-- 
cgit v1.2.3


From 3baeae36039afc233d4a42d6ff4aa7019892619f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Fri, 29 Aug 2025 16:10:57 +0300
Subject: PCI: Use pci_release_resource() instead of release_resource()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A few places in setup-bus.c call release_resource() directly and end up
duplicating functionality from pci_release_resource() such as parent check,
logging, and clearing the resource. Worse yet, the way the resource is
cleared is inconsistent between different sites.

Convert release_resource() calls into pci_release_resource() to remove code
duplication. This will also make the resource start, end, and flags
behavior consistent, i.e., start address is cleared, and only
IORESOURCE_UNSET is asserted for the resource.

While at it, eliminate the unnecessary initialization of idx variable in
pci_bridge_release_resources().

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://patch.msgid.link/20250829131113.36754-9-ilpo.jarvinen@linux.intel.com
---
 include/linux/pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 59876de13860..275df4058767 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1417,7 +1417,7 @@ void pci_reset_secondary_bus(struct pci_dev *dev);
 void pcibios_reset_secondary_bus(struct pci_dev *dev);
 void pci_update_resource(struct pci_dev *dev, int resno);
 int __must_check pci_assign_resource(struct pci_dev *dev, int i);
-void pci_release_resource(struct pci_dev *dev, int resno);
+int pci_release_resource(struct pci_dev *dev, int resno);
 static inline int pci_rebar_bytes_to_size(u64 bytes)
 {
 	bytes = roundup_pow_of_two(bytes);
-- 
cgit v1.2.3


From 4292a1e45fd464551efac7b2b52fd3606e956c28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Fri, 29 Aug 2025 16:11:09 +0300
Subject: PCI: Refactor distributing available memory to use loops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pci_bus_distribute_available_resources() and
pci_bridge_distribute_available_resources() retain bridge window resources
and related data needed for distributing the available window in
independent variables for io, memory, and prefetchable memory windows. The
code is essentially the same for all of them and therefore repeated three
times with different variable names.

Refactor pci_bus_distribute_available_resources() to take an array. This
is complicated slightly by the function taking advantage of passing the
struct as value, which cannot be done for arrays in C. Therefore, copy the
data into a local array in the stack in the first loop.

Variable names are (hopefully) improved slightly as well.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://patch.msgid.link/20250829131113.36754-21-ilpo.jarvinen@linux.intel.com
---
 include/linux/pci.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 275df4058767..723e9cede69d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -119,7 +119,8 @@ enum {
 #define PCI_CB_BRIDGE_MEM_1_WINDOW	(PCI_BRIDGE_RESOURCES + 3)
 
 /* Total number of bridge resources for P2P and CardBus */
-#define PCI_BRIDGE_RESOURCE_NUM 4
+#define PCI_P2P_BRIDGE_RESOURCE_NUM	3
+#define PCI_BRIDGE_RESOURCE_NUM		4
 
 	/* Resources assigned to buses behind the bridge */
 	PCI_BRIDGE_RESOURCES,
-- 
cgit v1.2.3


From dadb3ebcf395ebee3626d88ac7e5e234f15bae2c Mon Sep 17 00:00:00 2001
From: Marco Crivellari <marco.crivellari@suse.com>
Date: Sun, 14 Sep 2025 15:44:26 +0200
Subject: workqueue: WQ_PERCPU added to alloc_workqueue users
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently if a user enqueue a work item using schedule_delayed_work() the
used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use
WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to
schedule_work() that is using system_wq and queue_work(), that makes use
again of WORK_CPU_UNBOUND.
This lack of consistentcy cannot be addressed without refactoring the API.

alloc_workqueue() treats all queues as per-CPU by default, while unbound
workqueues must opt-in via WQ_UNBOUND.

This default is suboptimal: most workloads benefit from unbound queues,
allowing the scheduler to place worker threads where they’re needed and
reducing noise when CPUs are isolated.

This patch adds a new WQ_PERCPU flag to explicitly request the use of
the per-CPU behavior. Both flags coexist for one release cycle to allow
callers to transition their calls.

Once migration is complete, WQ_UNBOUND can be removed and unbound will
become the implicit default.

With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND),
any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND
must now use WQ_PERCPU.

All existing users have been updated accordingly.

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b6834b7aee4b..71a9900c03c7 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -410,7 +410,7 @@ enum wq_flags {
 	__WQ_LEGACY		= 1 << 18, /* internal: create*_workqueue() */
 
 	/* BH wq only allows the following flags */
-	__WQ_BH_ALLOWS		= WQ_BH | WQ_HIGHPRI,
+	__WQ_BH_ALLOWS		= WQ_BH | WQ_HIGHPRI | WQ_PERCPU,
 };
 
 enum wq_consts {
@@ -570,7 +570,7 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active,
 	alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
 
 #define create_workqueue(name)						\
-	alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
+	alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_PERCPU, 1, (name))
 #define create_freezable_workqueue(name)				\
 	alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND |	\
 			WQ_MEM_RECLAIM, 1, (name))
-- 
cgit v1.2.3


From 6b4be64fd9fec16418f365c2d8e47a7566e9eba5 Mon Sep 17 00:00:00 2001
From: Jianbo Liu <jianbol@nvidia.com>
Date: Mon, 15 Sep 2025 15:24:32 +0300
Subject: net/mlx5e: Harden uplink netdev access against device unbind

The function mlx5_uplink_netdev_get() gets the uplink netdevice
pointer from mdev->mlx5e_res.uplink_netdev. However, the netdevice can
be removed and its pointer cleared when unbound from the mlx5_core.eth
driver. This results in a NULL pointer, causing a kernel panic.

 BUG: unable to handle page fault for address: 0000000000001300
 at RIP: 0010:mlx5e_vport_rep_load+0x22a/0x270 [mlx5_core]
 Call Trace:
  <TASK>
  mlx5_esw_offloads_rep_load+0x68/0xe0 [mlx5_core]
  esw_offloads_enable+0x593/0x910 [mlx5_core]
  mlx5_eswitch_enable_locked+0x341/0x420 [mlx5_core]
  mlx5_devlink_eswitch_mode_set+0x17e/0x3a0 [mlx5_core]
  devlink_nl_eswitch_set_doit+0x60/0xd0
  genl_family_rcv_msg_doit+0xe0/0x130
  genl_rcv_msg+0x183/0x290
  netlink_rcv_skb+0x4b/0xf0
  genl_rcv+0x24/0x40
  netlink_unicast+0x255/0x380
  netlink_sendmsg+0x1f3/0x420
  __sock_sendmsg+0x38/0x60
  __sys_sendto+0x119/0x180
  do_syscall_64+0x53/0x1d0
  entry_SYSCALL_64_after_hwframe+0x4b/0x53

Ensure the pointer is valid before use by checking it for NULL. If it
is valid, immediately call netdev_hold() to take a reference, and
preventing the netdevice from being freed while it is in use.

Fixes: 7a9fb35e8c3a ("net/mlx5e: Do not reload ethernet ports when changing eswitch mode")
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1757939074-617281-2-git-send-email-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/driver.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 8c5fbfb85749..10fe492e1fed 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -663,6 +663,7 @@ struct mlx5e_resources {
 		bool			   tisn_valid;
 	} hw_objs;
 	struct net_device *uplink_netdev;
+	netdevice_tracker tracker;
 	struct mutex uplink_netdev_lock;
 	struct mlx5_crypto_dek_priv *dek_priv;
 };
-- 
cgit v1.2.3


From ea6bb47fd6a4c5a332f9349c39bf7462e3e7a35b Mon Sep 17 00:00:00 2001
From: Alan Borzeszkowski <alan.borzeszkowski@linux.intel.com>
Date: Wed, 27 Aug 2025 13:56:46 +0200
Subject: thunderbolt: Update thunderbolt.h header file

Make Thunderbolt header file compliant with current kernel-doc
standards. No functional changes.

Signed-off-by: Alan Borzeszkowski <alan.borzeszkowski@linux.intel.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 include/linux/thunderbolt.h | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h
index 75247486616b..0ba112175bb3 100644
--- a/include/linux/thunderbolt.h
+++ b/include/linux/thunderbolt.h
@@ -213,7 +213,7 @@ enum tb_link_width {
  *		queried first
  * @service_ids: Used to generate IDs for the services
  * @in_hopids: Input HopIDs for DMA tunneling
- * @out_hopids; Output HopIDs for DMA tunneling
+ * @out_hopids: Output HopIDs for DMA tunneling
  * @local_property_block: Local block of properties
  * @local_property_block_gen: Generation of @local_property_block
  * @local_property_block_len: Length of the @local_property_block in dwords
@@ -356,7 +356,7 @@ int tb_xdomain_request(struct tb_xdomain *xd, const void *request,
 		       unsigned int timeout_msec);
 
 /**
- * tb_protocol_handler - Protocol specific handler
+ * struct tb_protocol_handler - Protocol specific handler
  * @uuid: XDomain messages with this UUID are dispatched to this handler
  * @callback: Callback called with the XDomain message. Returning %1
  *	      here tells the XDomain core that the message was handled
@@ -437,7 +437,7 @@ static inline struct tb_service *tb_to_service(struct device *dev)
 }
 
 /**
- * tb_service_driver - Thunderbolt service driver
+ * struct tb_service_driver - Thunderbolt service driver
  * @driver: Driver structure
  * @probe: Called when the driver is probed
  * @remove: Called when the driver is removed (optional)
@@ -519,6 +519,7 @@ struct tb_nhi {
  * @head: Head of the ring (write next descriptor here)
  * @tail: Tail of the ring (complete next descriptor here)
  * @descriptors: Allocated descriptors for this ring
+ * @descriptors_dma: DMA address of descriptors for this ring
  * @queue: Queue holding frames to be transferred over this ring
  * @in_flight: Queue holding frames that are currently in flight
  * @work: Interrupt work structure
@@ -571,12 +572,12 @@ typedef void (*ring_cb)(struct tb_ring *, struct ring_frame *, bool canceled);
 
 /**
  * enum ring_desc_flags - Flags for DMA ring descriptor
- * %RING_DESC_ISOCH: Enable isonchronous DMA (Tx only)
- * %RING_DESC_CRC_ERROR: In frame mode CRC check failed for the frame (Rx only)
- * %RING_DESC_COMPLETED: Descriptor completed (set by NHI)
- * %RING_DESC_POSTED: Always set this
- * %RING_DESC_BUFFER_OVERRUN: RX buffer overrun
- * %RING_DESC_INTERRUPT: Request an interrupt on completion
+ * @RING_DESC_ISOCH: Enable isonchronous DMA (Tx only)
+ * @RING_DESC_CRC_ERROR: In frame mode CRC check failed for the frame (Rx only)
+ * @RING_DESC_COMPLETED: Descriptor completed (set by NHI)
+ * @RING_DESC_POSTED: Always set this
+ * @RING_DESC_BUFFER_OVERRUN: RX buffer overrun
+ * @RING_DESC_INTERRUPT: Request an interrupt on completion
  */
 enum ring_desc_flags {
 	RING_DESC_ISOCH = 0x1,
@@ -636,7 +637,7 @@ int __tb_ring_enqueue(struct tb_ring *ring, struct ring_frame *frame);
  * If ring_stop() is called after the packet has been enqueued
  * @frame->callback will be called with canceled set to true.
  *
- * Return: Returns %-ESHUTDOWN if ring_stop has been called. Zero otherwise.
+ * Return: %-ESHUTDOWN if ring_stop() has been called, %0 otherwise.
  */
 static inline int tb_ring_rx(struct tb_ring *ring, struct ring_frame *frame)
 {
@@ -657,7 +658,7 @@ static inline int tb_ring_rx(struct tb_ring *ring, struct ring_frame *frame)
  * If ring_stop() is called after the packet has been enqueued @frame->callback
  * will be called with canceled set to true.
  *
- * Return: Returns %-ESHUTDOWN if ring_stop has been called. Zero otherwise.
+ * Return: %-ESHUTDOWN if ring_stop has been called, %0 otherwise.
  */
 static inline int tb_ring_tx(struct tb_ring *ring, struct ring_frame *frame)
 {
@@ -675,6 +676,8 @@ void tb_ring_poll_complete(struct tb_ring *ring);
  *
  * Use this function when you are mapping DMA for buffers that are
  * passed to the ring for sending/receiving.
+ *
+ * Return: Pointer to device used for DMA mapping.
  */
 static inline struct device *tb_ring_dma_device(struct tb_ring *ring)
 {
-- 
cgit v1.2.3


From de2be98541dbe0de58d2dccf7fa19dfc9d9a8260 Mon Sep 17 00:00:00 2001
From: Carolina Jubran <cjubran@nvidia.com>
Date: Thu, 11 Sep 2025 10:10:17 +0300
Subject: net/mlx5: Remove VLAN insertion fields from WQE Ether segment

Now that the driver no longer uses VLAN TX insertion via the WQE
Ethernet segment, the related fields and flags can be removed.

Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1757574619-604874-2-git-send-email-tariqt@nvidia.com
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/qp.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index fc7eeff99a8a..5546c7bd2c83 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -237,13 +237,11 @@ enum {
 };
 
 enum {
-	MLX5_ETH_WQE_SVLAN              = 1 << 0,
 	MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC = 1 << 26,
 	MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC = 1 << 27,
 	MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC = 3 << 26,
 	MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC = 1 << 28,
 	MLX5_ETH_WQE_INSERT_TRAILER     = 1 << 30,
-	MLX5_ETH_WQE_INSERT_VLAN        = 1 << 15,
 };
 
 enum {
@@ -275,10 +273,6 @@ struct mlx5_wqe_eth_seg {
 				DECLARE_FLEX_ARRAY(u8, data);
 			};
 		} inline_hdr;
-		struct {
-			__be16 type;
-			__be16 vlan_tci;
-		} insert;
 		__be32 trailer;
 	};
 };
-- 
cgit v1.2.3


From cce65f32443b61db2370a67d2e92d16b773fe8a4 Mon Sep 17 00:00:00 2001
From: Carolina Jubran <cjubran@nvidia.com>
Date: Thu, 11 Sep 2025 10:10:18 +0300
Subject: net/mlx5: Refactor MACsec WQE metadata shifts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce MLX5_ETH_WQE_FT_META_SHIFT as a shared base offset for
features that use the lower 8 bits of the WQE flow_table_metadata
field, currently used for timestamping, IPsec, and MACsec.

Define MLX5_ETH_WQE_FT_META_MACSEC_FS_ID_MASK so that fs_id occupies
bits 2–5, making it clear that fs_id occupies bits in the metadata.

Set MLX5_ETH_WQE_FT_META_MACSEC_MASK as the OR of the MACsec flag and
MLX5_ETH_WQE_FT_META_MACSEC_FS_ID_MASK, corresponding to the original
0x3E mask.

Update the fs_id macro to right-shift the MACsec flag by
MLX5_ETH_WQE_FT_META_SHIFT and update the RoCE modify-header action to
use it.

Introduce the helper macro MLX5_MACSEC_TX_METADATA(fs_id) to compose
the full shifted MACsec metadata value.

These changes make it explicit exactly which metadata bits carry MACsec
information, simplifying future feature exclusions when multiple
features share the WQE flowtable metadata.

In addition, drop the incorrect “RX flow steering” comment, since this
applies to TX flow steering.

Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1757574619-604874-3-git-send-email-tariqt@nvidia.com
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/qp.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 5546c7bd2c83..b21be7630575 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -251,9 +251,14 @@ enum {
 	MLX5_ETH_WQE_SWP_OUTER_L4_UDP   = 1 << 5,
 };
 
+/* Base shift for metadata bits used by timestamping, IPsec, and MACsec */
+#define MLX5_ETH_WQE_FT_META_SHIFT 0
+
 enum {
-	MLX5_ETH_WQE_FT_META_IPSEC = BIT(0),
-	MLX5_ETH_WQE_FT_META_MACSEC = BIT(1),
+	MLX5_ETH_WQE_FT_META_IPSEC = BIT(0) << MLX5_ETH_WQE_FT_META_SHIFT,
+	MLX5_ETH_WQE_FT_META_MACSEC = BIT(1) << MLX5_ETH_WQE_FT_META_SHIFT,
+	MLX5_ETH_WQE_FT_META_MACSEC_FS_ID_MASK =
+		GENMASK(5, 2) << MLX5_ETH_WQE_FT_META_SHIFT,
 };
 
 struct mlx5_wqe_eth_seg {
-- 
cgit v1.2.3


From 2ac207381c37eebc49559634ce5642119784bc7c Mon Sep 17 00:00:00 2001
From: Carolina Jubran <cjubran@nvidia.com>
Date: Thu, 11 Sep 2025 10:10:19 +0300
Subject: net/mlx5e: Prevent WQE metadata conflicts between timestamping and
 offloads

Update the WQE metadata assignment to avoid overriding existing
metadata when setting the sysport timestamp ID. Since timestamp IDs are
limited to 256 values, they use only the lower 8 bits of the metadata
field.

To avoid conflicts, move IPsec and MACsec metadata ID to bits 8 and 9,
and shift the MACsec fs_id accordingly. This ensures safe coexistence
of timestamping and offload features that use the same metadata field.

Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Reviewed-by: Patrisious Haddad <phaddad@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1757574619-604874-4-git-send-email-tariqt@nvidia.com
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/qp.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index b21be7630575..d67aedc6ea68 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -251,8 +251,9 @@ enum {
 	MLX5_ETH_WQE_SWP_OUTER_L4_UDP   = 1 << 5,
 };
 
-/* Base shift for metadata bits used by timestamping, IPsec, and MACsec */
-#define MLX5_ETH_WQE_FT_META_SHIFT 0
+/* Metadata bits 0-7 are used by timestamping */
+/* Base shift for metadata bits used by IPsec and MACsec */
+#define MLX5_ETH_WQE_FT_META_SHIFT 8
 
 enum {
 	MLX5_ETH_WQE_FT_META_IPSEC = BIT(0) << MLX5_ETH_WQE_FT_META_SHIFT,
-- 
cgit v1.2.3


From 0b1619c38600fc06c73b1f59c64af0b7df08fc2c Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Mon, 15 Sep 2025 11:10:07 +0200
Subject: gpio: nomadik: fix the debugfs helper stub

Commit ddeb66d2cb10 ("gpio: nomadik: don't print out global GPIO numbers
in debugfs callbacks") failed to also update the stub of the debugfs
helper for !CONFIG_DEBUG_FS. Fix the resulting build failure.

Fixes: ddeb66d2cb10 ("gpio: nomadik: don't print out global GPIO numbers in debugfs callbacks")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202509132232.12viPUPB-lkp@intel.com/
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20250915091007.28438-1-brgl@bgdev.pl
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio/gpio-nomadik.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/gpio-nomadik.h b/include/linux/gpio/gpio-nomadik.h
index 7ba53b499e16..592a774a53cd 100644
--- a/include/linux/gpio/gpio-nomadik.h
+++ b/include/linux/gpio/gpio-nomadik.h
@@ -268,8 +268,7 @@ void nmk_gpio_dbg_show_one(struct seq_file *s, struct pinctrl_dev *pctldev,
 static inline void nmk_gpio_dbg_show_one(struct seq_file *s,
 					 struct pinctrl_dev *pctldev,
 					 struct gpio_chip *chip,
-					 unsigned int offset,
-					 unsigned int gpio)
+					 unsigned int offset)
 {
 }
 
-- 
cgit v1.2.3


From d1dd75c6500c74b91c5286fd3277710371d3e3ca Mon Sep 17 00:00:00 2001
From: Rahul Rameshbabu <sergeantsagara@protonmail.com>
Date: Sat, 13 Sep 2025 16:12:54 +0000
Subject: HID: core: Change hid_driver to use a const char* for name

name is never mutated by the core HID stack. Making name a const char*
simplifies passing the string from Rust to C. Otherwise, it becomes
difficult to pass a 'static lifetime CStr from Rust to a char*, rather than
a const char*, due to lack of guarantee that the underlying data of the
CStr will not be mutated by the C code.

Signed-off-by: Rahul Rameshbabu <sergeantsagara@protonmail.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 2cc4f1e4ea96..426b22ed42b4 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -816,7 +816,7 @@ struct hid_usage_id {
  * zero from them.
  */
 struct hid_driver {
-	char *name;
+	const char *name;
 	const struct hid_device_id *id_table;
 
 	struct list_head dyn_list;
-- 
cgit v1.2.3


From 648dbccc03a000cd64c2a9d86012d98053545e64 Mon Sep 17 00:00:00 2001
From: Ashish Kalra <ashish.kalra@amd.com>
Date: Tue, 16 Sep 2025 21:29:49 +0000
Subject: crypto: ccp - Add AMD Seamless Firmware Servicing (SFS) driver

AMD Seamless Firmware Servicing (SFS) is a secure method to allow
non-persistent updates to running firmware and settings without
requiring BIOS reflash and/or system reset.

SFS does not address anything that runs on the x86 processors and
it can be used to update ASP firmware, modules, register settings
and update firmware for other microprocessors like TMPM, etc.

SFS driver support adds ioctl support to communicate the SFS
commands to the ASP/PSP by using the TEE mailbox interface.

The Seamless Firmware Servicing (SFS) driver is added as a
PSP sub-device.

For detailed information, please look at the SFS specifications:
https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/specifications/58604.pdf

Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Link: https://lore.kernel.org/cover.1758057691.git.ashish.kalra@amd.com
---
 include/linux/psp-platform-access.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/psp-platform-access.h b/include/linux/psp-platform-access.h
index 1504fb012c05..540abf7de048 100644
--- a/include/linux/psp-platform-access.h
+++ b/include/linux/psp-platform-access.h
@@ -7,6 +7,8 @@
 
 enum psp_platform_access_msg {
 	PSP_CMD_NONE			= 0x0,
+	PSP_SFS_GET_FW_VERSIONS,
+	PSP_SFS_UPDATE,
 	PSP_CMD_HSTI_QUERY		= 0x14,
 	PSP_I2C_REQ_BUS_CMD		= 0x64,
 	PSP_DYNAMIC_BOOST_GET_NONCE,
-- 
cgit v1.2.3


From 45fe729be9a6be326a1ca25af82d34de32ba2ce8 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Tue, 16 Sep 2025 10:16:20 +0800
Subject: usb: typec: Stub out typec_switch APIs when CONFIG_TYPEC=n

Ease driver development by adding stubs for the typec_switch APIs when
CONFIG_TYPEC=n. Copy the same method used for the typec_mux APIs to be
consistent.

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
Link: https://lore.kernel.org/r/20250916021620.1303995-1-xu.yang_2@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/typec_mux.h | 46 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 41 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/typec_mux.h b/include/linux/usb/typec_mux.h
index 2489a7857d8e..aa9ebb7e2fe0 100644
--- a/include/linux/usb/typec_mux.h
+++ b/include/linux/usb/typec_mux.h
@@ -3,6 +3,7 @@
 #ifndef __USB_TYPEC_MUX
 #define __USB_TYPEC_MUX
 
+#include <linux/err.h>
 #include <linux/property.h>
 #include <linux/usb/typec.h>
 
@@ -24,16 +25,13 @@ struct typec_switch_desc {
 	void *drvdata;
 };
 
+#if IS_ENABLED(CONFIG_TYPEC)
+
 struct typec_switch *fwnode_typec_switch_get(struct fwnode_handle *fwnode);
 void typec_switch_put(struct typec_switch *sw);
 int typec_switch_set(struct typec_switch *sw,
 		     enum typec_orientation orientation);
 
-static inline struct typec_switch *typec_switch_get(struct device *dev)
-{
-	return fwnode_typec_switch_get(dev_fwnode(dev));
-}
-
 struct typec_switch_dev *
 typec_switch_register(struct device *parent,
 		      const struct typec_switch_desc *desc);
@@ -42,6 +40,44 @@ void typec_switch_unregister(struct typec_switch_dev *sw);
 void typec_switch_set_drvdata(struct typec_switch_dev *sw, void *data);
 void *typec_switch_get_drvdata(struct typec_switch_dev *sw);
 
+#else
+
+static inline struct typec_switch *
+fwnode_typec_switch_get(struct fwnode_handle *fwnode)
+{
+	return NULL;
+}
+
+static inline void typec_switch_put(struct typec_switch *sw) {}
+
+static inline int typec_switch_set(struct typec_switch *sw,
+				   enum typec_orientation orientation)
+{
+	return 0;
+}
+
+static inline struct typec_switch_dev *
+typec_switch_register(struct device *parent,
+		      const struct typec_switch_desc *desc)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void typec_switch_unregister(struct typec_switch_dev *sw) {}
+
+static inline void typec_switch_set_drvdata(struct typec_switch_dev *sw, void *data) {}
+static inline void *typec_switch_get_drvdata(struct typec_switch_dev *sw)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+#endif /* CONFIG_TYPEC */
+
+static inline struct typec_switch *typec_switch_get(struct device *dev)
+{
+	return fwnode_typec_switch_get(dev_fwnode(dev));
+}
+
 struct typec_mux_state {
 	struct typec_altmode *alt;
 	unsigned long mode;
-- 
cgit v1.2.3


From bfb1d99d969fe3b892db30848aeebfa19d21f57f Mon Sep 17 00:00:00 2001
From: Kuen-Han Tsai <khtsai@google.com>
Date: Tue, 16 Sep 2025 16:21:32 +0800
Subject: usb: gadget: Store endpoint pointer in usb_request

Gadget function drivers often have goto-based error handling in their
bind paths, which can be bug-prone. Refactoring these paths to use
__free() scope-based cleanup is desirable, but currently blocked.

The blocker is that usb_ep_free_request(ep, req) requires two
parameters, while the __free() mechanism can only pass a pointer to the
request itself.

Store an endpoint pointer in the struct usb_request. The pointer is
populated centrally in usb_ep_alloc_request() on every successful
allocation, making the request object self-contained.

Signed-off-by: Kuen-Han Tsai <khtsai@google.com>
Link: https://lore.kernel.org/r/20250916-ready-v1-1-4997bf277548@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20250916-ready-v1-1-4997bf277548@google.com
---
 include/linux/usb/gadget.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 0f28c5512fcb..0f2079476088 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -32,6 +32,7 @@ struct usb_ep;
 
 /**
  * struct usb_request - describes one i/o request
+ * @ep: The associated endpoint set by usb_ep_alloc_request().
  * @buf: Buffer used for data.  Always provide this; some controllers
  *	only use PIO, or don't use DMA for some endpoints.
  * @dma: DMA address corresponding to 'buf'.  If you don't set this
@@ -98,6 +99,7 @@ struct usb_ep;
  */
 
 struct usb_request {
+	struct usb_ep		*ep;
 	void			*buf;
 	unsigned		length;
 	dma_addr_t		dma;
-- 
cgit v1.2.3


From 201c53c687f2b55a7cc6d9f4000af4797860174b Mon Sep 17 00:00:00 2001
From: Kuen-Han Tsai <khtsai@google.com>
Date: Tue, 16 Sep 2025 16:21:33 +0800
Subject: usb: gadget: Introduce free_usb_request helper

Introduce the free_usb_request() function that frees both the request's
buffer and the request itself.

This function serves as the cleanup callback for DEFINE_FREE() to enable
automatic, scope-based cleanup for usb_request pointers.

Signed-off-by: Kuen-Han Tsai <khtsai@google.com>
Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com
---
 include/linux/usb/gadget.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 0f2079476088..3aaf19e77558 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -15,6 +15,7 @@
 #ifndef __LINUX_USB_GADGET_H
 #define __LINUX_USB_GADGET_H
 
+#include <linux/cleanup.h>
 #include <linux/configfs.h>
 #include <linux/device.h>
 #include <linux/errno.h>
@@ -293,6 +294,28 @@ static inline void usb_ep_fifo_flush(struct usb_ep *ep)
 
 /*-------------------------------------------------------------------------*/
 
+/**
+ * free_usb_request - frees a usb_request object and its buffer
+ * @req: the request being freed
+ *
+ * This helper function frees both the request's buffer and the request object
+ * itself by calling usb_ep_free_request(). Its signature is designed to be used
+ * with DEFINE_FREE() to enable automatic, scope-based cleanup for usb_request
+ * pointers.
+ */
+static inline void free_usb_request(struct usb_request *req)
+{
+	if (!req)
+		return;
+
+	kfree(req->buf);
+	usb_ep_free_request(req->ep, req);
+}
+
+DEFINE_FREE(free_usb_request, struct usb_request *, free_usb_request(_T))
+
+/*-------------------------------------------------------------------------*/
+
 struct usb_dcd_config_params {
 	__u8  bU1devExitLat;	/* U1 Device exit Latency */
 #define USB_DEFAULT_U1_DEV_EXIT_LAT	0x01	/* Less then 1 microsec */
-- 
cgit v1.2.3


From 413187f79062634575098653c40f95d439d3b157 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Wed, 17 Sep 2025 15:26:49 +0200
Subject: stddef: Remove token-pasting in TRAILING_OVERLAP()

Currently, TRAILING_OVERLAP() token-pastes the FAM parameter into the
name of internal pdding member `__offset_to_##FAM`. This forces FAM to
be a single identifier, which prevents callers from using a FAM when
it's a nested member. For instance, see the following scenario:

| struct flex {
| 	size_t count;
| 	int data[];
| };

| struct foo {
| 	int hdr_foo;
| 	struct flex f;
| };

| struct composite {
| 	struct foo hdr;
| 	int data[100];
| };

In this case, it'd be useful if TRAILING_OVERLAP() could be used in
the following way:

| struct composite {
| 	TRAILING_OVERLAP(struct foo, hdr, f.data,
| 		int data[100];
| 	);
| };

However, this is not current possible due to the token concatenation
in `__offset_to_##FAM`, which fails when FAM contains a dot.

So, remove token-pasting and use the fixed internal name
`__offset_to_FAM` and, with this, expand the capabilities of
TRAILING_OVERLAP(). :)

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/r/13b3e0a69aad837b4e32ca8269b9d91bf1fbe9ef.1758115257.git.gustavoars@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/stddef.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/stddef.h b/include/linux/stddef.h
index dab49e2ec8c0..701099c67c24 100644
--- a/include/linux/stddef.h
+++ b/include/linux/stddef.h
@@ -108,7 +108,7 @@ enum {
 	union {									\
 		TYPE NAME;							\
 		struct {							\
-			unsigned char __offset_to_##FAM[offsetof(TYPE, FAM)];	\
+			unsigned char __offset_to_FAM[offsetof(TYPE, FAM)];	\
 			MEMBERS							\
 		};								\
 	}
-- 
cgit v1.2.3


From 2bbdcf02c3f3df6c85abd0a2fee0d7f0f4113e96 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Wed, 17 Sep 2025 15:28:07 +0200
Subject: stddef: Introduce __TRAILING_OVERLAP()

Introduce underlying __TRAILING_OVERLAP() macro to let callers apply
atributes to trailing overlapping members.

For instance, the code below:

| struct flex {
| 	size_t count;
| 	int data[];
| };

| struct {
| 	struct flex f;
| 	struct foo a;
| 	struct boo b;
| } __packed instance;

can now be changed to the following, and preserve the __packed
attribute:

| __TRAILING_OVERLAP(struct flex, f, data, __packed,
| 	struct foo a;
| 	struct boo b;
| ) instance;

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/r/f80c529b239ce11f0a51f714fe00ddf839e05f5e.1758115257.git.gustavoars@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/stddef.h | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stddef.h b/include/linux/stddef.h
index 701099c67c24..80b6bfb944f0 100644
--- a/include/linux/stddef.h
+++ b/include/linux/stddef.h
@@ -94,7 +94,8 @@ enum {
 	__DECLARE_FLEX_ARRAY(TYPE, NAME)
 
 /**
- * TRAILING_OVERLAP() - Overlap a flexible-array member with trailing members.
+ * __TRAILING_OVERLAP() - Overlap a flexible-array member with trailing
+ *			  members.
  *
  * Creates a union between a flexible-array member (FAM) in a struct and a set
  * of additional members that would otherwise follow it.
@@ -102,15 +103,30 @@ enum {
  * @TYPE: Flexible structure type name, including "struct" keyword.
  * @NAME: Name for a variable to define.
  * @FAM: The flexible-array member within @TYPE
+ * @ATTRS: Any struct attributes (usually empty)
  * @MEMBERS: Trailing overlapping members.
  */
-#define TRAILING_OVERLAP(TYPE, NAME, FAM, MEMBERS)				\
+#define __TRAILING_OVERLAP(TYPE, NAME, FAM, ATTRS, MEMBERS)			\
 	union {									\
 		TYPE NAME;							\
 		struct {							\
 			unsigned char __offset_to_FAM[offsetof(TYPE, FAM)];	\
 			MEMBERS							\
-		};								\
+		} ATTRS;							\
 	}
 
+/**
+ * TRAILING_OVERLAP() - Overlap a flexible-array member with trailing members.
+ *
+ * Creates a union between a flexible-array member (FAM) in a struct and a set
+ * of additional members that would otherwise follow it.
+ *
+ * @TYPE: Flexible structure type name, including "struct" keyword.
+ * @NAME: Name for a variable to define.
+ * @FAM: The flexible-array member within @TYPE
+ * @MEMBERS: Trailing overlapping members.
+ */
+#define TRAILING_OVERLAP(TYPE, NAME, FAM, MEMBERS)				\
+	__TRAILING_OVERLAP(TYPE, NAME, FAM, /* no attrs */, MEMBERS)
+
 #endif
-- 
cgit v1.2.3


From 5c5db9efe323dd0b0d7917dbe5b9c0999c95e79e Mon Sep 17 00:00:00 2001
From: Sascha Bischoff <Sascha.Bischoff@arm.com>
Date: Thu, 28 Aug 2025 10:59:43 +0000
Subject: irqchip/gic-v5: Drop has_gcie_v3_compat from gic_kvm_info

The presence of FEAT_GCIE_LEGACY is now handled as a CPU
feature. Therefore, drop the check and flag from the GIC driver and
gic_kvm_info as it is no longer required or used by KVM.

Signed-off-by: Sascha Bischoff <sascha.bischoff@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irqchip/arm-vgic-info.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h
index ca1713fac6e3..a470a73a805a 100644
--- a/include/linux/irqchip/arm-vgic-info.h
+++ b/include/linux/irqchip/arm-vgic-info.h
@@ -36,8 +36,6 @@ struct gic_kvm_info {
 	bool		has_v4_1;
 	/* Deactivation impared, subpar stuff */
 	bool		no_hw_deactivation;
-	/* v3 compat support (GICv5 hosts, only) */
-	bool		has_gcie_v3_compat;
 };
 
 #ifdef CONFIG_KVM
-- 
cgit v1.2.3


From 652b08afba69d5d26fe91098eb832b1bcc0f91c2 Mon Sep 17 00:00:00 2001
From: Cristian Birsan <cristian.birsan@microchip.com>
Date: Thu, 21 Nov 2024 20:16:38 +0200
Subject: ARM: at91: remove default values for PMC_PLL_ACR

Remove default values for PMC PLL Analog Control Register(ACR) as the
values are specific for each SoC and PLL and load them from PLL
characteristics structure

Co-developed-by: Andrei Simion <andrei.simion@microchip.com>
Signed-off-by: Andrei Simion <andrei.simion@microchip.com>
Signed-off-by: Cristian Birsan <cristian.birsan@microchip.com>
[nicolas.ferre@microchip.com: fix pll acr write sequence, preserve val]
Signed-off-by: Nicolas Ferre <nicolas.ferre@microchip.com>
---
 include/linux/clk/at91_pmc.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h
index 7af499bdbecb..d60ce9708ea2 100644
--- a/include/linux/clk/at91_pmc.h
+++ b/include/linux/clk/at91_pmc.h
@@ -47,8 +47,6 @@
 #define	AT91_PMC_PCSR		0x18			/* Peripheral Clock Status Register */
 
 #define AT91_PMC_PLL_ACR	0x18			/* PLL Analog Control Register [for SAM9X60] */
-#define		AT91_PMC_PLL_ACR_DEFAULT_UPLL	UL(0x12020010)	/* Default PLL ACR value for UPLL */
-#define		AT91_PMC_PLL_ACR_DEFAULT_PLLA	UL(0x00020010)	/* Default PLL ACR value for PLLA */
 #define		AT91_PMC_PLL_ACR_UTMIVR		(1 << 12)	/* UPLL Voltage regulator Control */
 #define		AT91_PMC_PLL_ACR_UTMIBG		(1 << 13)	/* UPLL Bandgap Control */
 
-- 
cgit v1.2.3


From d4bf06592ad68ac4353a81c73e8e662cf88aa2cc Mon Sep 17 00:00:00 2001
From: Viken Dadhaniya <viken.dadhaniya@oss.qualcomm.com>
Date: Thu, 11 Sep 2025 10:02:53 +0530
Subject: soc: qcom: geni-se: Add support to load QUP SE Firmware via Linux
 subsystem

In Qualcomm SoCs, firmware loading for Serial Engines (SE) within the QUP
hardware has traditionally been managed by TrustZone (TZ). This restriction
poses a significant challenge for developers, as it limits their ability to
enable various protocols on any of the SEs from the Linux side, reducing
flexibility.

Load the firmware to QUP SE based on the 'firmware-name' property specified
in devicetree at bootup time.

Co-developed-by: Mukesh Kumar Savaliya <mukesh.savaliya@oss.qualcomm.com>
Signed-off-by: Mukesh Kumar Savaliya <mukesh.savaliya@oss.qualcomm.com>
Signed-off-by: Viken Dadhaniya <viken.dadhaniya@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20250911043256.3523057-4-viken.dadhaniya@oss.qualcomm.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/soc/qcom/geni-se.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/qcom/geni-se.h b/include/linux/soc/qcom/geni-se.h
index 2996a3c28ef3..0a984e2579fe 100644
--- a/include/linux/soc/qcom/geni-se.h
+++ b/include/linux/soc/qcom/geni-se.h
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved.
+ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
  */
 
 #ifndef _LINUX_QCOM_GENI_SE
@@ -36,6 +37,7 @@ enum geni_se_protocol_type {
 	GENI_SE_I2C,
 	GENI_SE_I3C,
 	GENI_SE_SPI_SLAVE,
+	GENI_SE_INVALID_PROTO = 255,
 };
 
 struct geni_wrapper;
@@ -531,5 +533,7 @@ void geni_icc_set_tag(struct geni_se *se, u32 tag);
 int geni_icc_enable(struct geni_se *se);
 
 int geni_icc_disable(struct geni_se *se);
+
+int geni_load_se_firmware(struct geni_se *se, enum geni_se_protocol_type protocol);
 #endif
 #endif
-- 
cgit v1.2.3


From 09a1b33c080f6ac700fadc67c8471e67bf75fda4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Aug 2025 12:33:11 -0400
Subject: preparations to taking MNT_WRITE_HOLD out of ->mnt_flags

We have an unpleasant wart in accessibility rules for struct mount.  There
are per-superblock lists of mounts, used by sb_prepare_remount_readonly()
to check if any of those is currently claimed for write access and to
block further attempts to get write access on those until we are done.

As soon as it is attached to a filesystem, mount becomes reachable
via that list.  Only sb_prepare_remount_readonly() traverses it and
it only accesses a few members of struct mount.  Unfortunately,
->mnt_flags is one of those and it is modified - MNT_WRITE_HOLD set
and then cleared.  It is done under mount_lock, so from the locking
rules POV everything's fine.

However, it has easily overlooked implications - once mount has been
attached to a filesystem, it has to be treated as globally visible.
In particular, initializing ->mnt_flags *must* be done either prior
to that point or under mount_lock.  All other members are still
private at that point.

Life gets simpler if we move that bit (and that's *all* that can get
touched by access via this list) out of ->mnt_flags.  It's not even
hard to do - currently the list is implemented as list_head one,
anchored in super_block->s_mounts and linked via mount->mnt_instance.

As the first step, switch it to hlist-like open-coded structure -
address of the first mount in the set is stored in ->s_mounts
and ->mnt_instance replaced with ->mnt_next_for_sb and ->mnt_pprev_for_sb -
the former either NULL or pointing to the next mount in set, the
latter - address of either ->s_mounts or ->mnt_next_for_sb in the
previous element of the set.

In the next commit we'll steal the LSB of ->mnt_pprev_for_sb as
replacement for MNT_WRITE_HOLD.

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7ab4f96d705..0e9c7f1460dc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1324,6 +1324,8 @@ struct sb_writers {
 	struct percpu_rw_semaphore	rw_sem[SB_FREEZE_LEVELS];
 };
 
+struct mount;
+
 struct super_block {
 	struct list_head	s_list;		/* Keep this first */
 	dev_t			s_dev;		/* search index; _not_ kdev_t */
@@ -1358,7 +1360,7 @@ struct super_block {
 	__u16 s_encoding_flags;
 #endif
 	struct hlist_bl_head	s_roots;	/* alternate root dentries for NFS */
-	struct list_head	s_mounts;	/* list of mounts; _not_ for fs use */
+	struct mount		*s_mounts;	/* list of mounts; _not_ for fs use */
 	struct block_device	*s_bdev;	/* can go away once we use an accessor for @s_bdev_file */
 	struct file		*s_bdev_file;
 	struct backing_dev_info *s_bdi;
-- 
cgit v1.2.3


From 3371fa2f27134fc4ec7d40b2ae7b9e92c3b2527e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Aug 2025 13:37:12 -0400
Subject: struct mount: relocate MNT_WRITE_HOLD bit

... from ->mnt_flags to LSB of ->mnt_pprev_for_sb.

This is safe - we always set and clear it within the same mount_lock
scope, so we won't interfere with list operations - traversals are
always forward, so they don't even look at ->mnt_prev_for_sb and
both insertions and removals are in mount_lock scopes of their own,
so that bit will be clear in *all* mount instances during those.

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/mount.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mount.h b/include/linux/mount.h
index 18e4b97f8a98..85e97b9340ff 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -33,7 +33,6 @@ enum mount_flags {
 	MNT_NOSYMFOLLOW	= 0x80,
 
 	MNT_SHRINKABLE	= 0x100,
-	MNT_WRITE_HOLD	= 0x200,
 
 	MNT_INTERNAL	= 0x4000,
 
@@ -52,7 +51,7 @@ enum mount_flags {
 				  | MNT_READONLY | MNT_NOSYMFOLLOW,
 	MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME,
 
-	MNT_INTERNAL_FLAGS = MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED |
+	MNT_INTERNAL_FLAGS = MNT_INTERNAL | MNT_DOOMED |
 			     MNT_SYNC_UMOUNT | MNT_LOCKED
 };
 
-- 
cgit v1.2.3


From a79765248649de77771c24f7be08ff4c96f16f7a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 30 Aug 2025 02:48:13 -0400
Subject: constify {__,}mnt_is_readonly()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/mount.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mount.h b/include/linux/mount.h
index 85e97b9340ff..acfe7ef86a1b 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -76,7 +76,7 @@ extern void mntput(struct vfsmount *mnt);
 extern struct vfsmount *mntget(struct vfsmount *mnt);
 extern void mnt_make_shortterm(struct vfsmount *mnt);
 extern struct vfsmount *mnt_clone_internal(const struct path *path);
-extern bool __mnt_is_readonly(struct vfsmount *mnt);
+extern bool __mnt_is_readonly(const struct vfsmount *mnt);
 extern bool mnt_may_suid(struct vfsmount *mnt);
 
 extern struct vfsmount *clone_private_mount(const struct path *path);
-- 
cgit v1.2.3


From b67a8631a4a8f26a18fac236aaf61aa2412c7a0d Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 13 Sep 2025 23:08:17 +0200
Subject: net: phy: remove mdio_board_info support from phylib

After having removed mdio_board_info usage from dsa_loop, there's no
user left. So let's drop support for it from phylib.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/01542a2e-05f5-4f13-acef-72632b33b5be@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 6f3b25cb7f4e..7da9e19471c9 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -2129,16 +2129,6 @@ int __phy_hwtstamp_set(struct phy_device *phydev,
 extern const struct bus_type mdio_bus_type;
 extern const struct class mdio_bus_class;
 
-struct mdio_board_info {
-	const char	*bus_id;
-	char		modalias[MDIO_NAME_SIZE];
-	int		mdio_addr;
-	const void	*platform_data;
-};
-
-int mdiobus_register_board_info(const struct mdio_board_info *info,
-				unsigned int n);
-
 /**
  * phy_module_driver() - Helper macro for registering PHY drivers
  * @__phy_drivers: array of PHY drivers to register
-- 
cgit v1.2.3


From d69ae81efbc95c94a2760fc82d27cdab4c26fe76 Mon Sep 17 00:00:00 2001
From: Fenglin Wu <fenglin.wu@oss.qualcomm.com>
Date: Wed, 17 Sep 2025 18:15:14 +0800
Subject: power: supply: core: Add resistance power supply property

Some battery drivers provide the ability to export internal resistance
as a parameter. Add internal_resistance power supply property for that
purpose.

Signed-off-by: Fenglin Wu <fenglin.wu@oss.qualcomm.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/power_supply.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index f21f806bfb38..f38da7c039d2 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -176,6 +176,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_MANUFACTURE_YEAR,
 	POWER_SUPPLY_PROP_MANUFACTURE_MONTH,
 	POWER_SUPPLY_PROP_MANUFACTURE_DAY,
+	POWER_SUPPLY_PROP_INTERNAL_RESISTANCE,
 	/* Properties of type `const char *' */
 	POWER_SUPPLY_PROP_MODEL_NAME,
 	POWER_SUPPLY_PROP_MANUFACTURER,
-- 
cgit v1.2.3


From cd93fbdce5981c947f22015ded3ac6bd1939b0ad Mon Sep 17 00:00:00 2001
From: Fenglin Wu <fenglin.wu@oss.qualcomm.com>
Date: Wed, 17 Sep 2025 18:15:15 +0800
Subject: power: supply: core: Add state_of_health power supply property

Add state_of_health power supply property to represent battery
health percentage.

Signed-off-by: Fenglin Wu <fenglin.wu@oss.qualcomm.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/power_supply.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index f38da7c039d2..360ffdf272da 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -177,6 +177,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_MANUFACTURE_MONTH,
 	POWER_SUPPLY_PROP_MANUFACTURE_DAY,
 	POWER_SUPPLY_PROP_INTERNAL_RESISTANCE,
+	POWER_SUPPLY_PROP_STATE_OF_HEALTH,
 	/* Properties of type `const char *' */
 	POWER_SUPPLY_PROP_MODEL_NAME,
 	POWER_SUPPLY_PROP_MANUFACTURER,
-- 
cgit v1.2.3


From 05dfe654b5932322e297aba11dd6f3f26eea6ecb Mon Sep 17 00:00:00 2001
From: Cosmin Ratiu <cratiu@nvidia.com>
Date: Tue, 16 Sep 2025 17:11:36 +0300
Subject: net/mlx5: Remove unused 'offset' field from mlx5_sq_bfreg

The 'offset' field was introduced in the original commit [1] and never
used until commit [2], which added an unnecessary use.

Remove the field and refactor the write-combining test to use a local
variable instead.

[1] commit a6d51b68611e ("net/mlx5: Introduce blue flame register
allocator")
[2] commit d98995b4bf98 ("net/mlx5: Reimplement write combining test")
Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/driver.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index fcfc18bfeba9..5a85b6d91ba3 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -434,7 +434,6 @@ struct mlx5_sq_bfreg {
 	struct mlx5_uars_page  *up;
 	bool			wc;
 	u32			index;
-	unsigned int		offset;
 };
 
 struct mlx5_core_health {
-- 
cgit v1.2.3


From aa4595d0ada65d5d44fa924a42a87c175d9d88e3 Mon Sep 17 00:00:00 2001
From: Cosmin Ratiu <cratiu@nvidia.com>
Date: Tue, 16 Sep 2025 17:11:38 +0300
Subject: net/mlx5: Store the global doorbell in mlx5_priv

The global doorbell is used for more than just Ethernet resources, so
move it out of mlx5e_hw_objs into a common place (mlx5_priv), to avoid
non-Ethernet modules (e.g. HWS, ASO) depending on Ethernet structs.

Use this opportunity to consolidate it with the 'uar' pointer already
there, which was used as an RX doorbell. Underneath the 'uar' pointer is
identical to 'bfreg->up', so store a single resource and use that
instead.

For CQ doorbells, care is taken to always use bfreg->up->index instead
of bfreg->index, which may refer to a subsequent UAR page from the same
ALLOC_UAR batch on some NICs.

This paves the way for cleanly supporting multiple doorbells in the
Ethernet driver.

Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/driver.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 5a85b6d91ba3..15c434fedff7 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -612,7 +612,7 @@ struct mlx5_priv {
 	struct mlx5_ft_pool		*ft_pool;
 
 	struct mlx5_bfreg_data		bfregs;
-	struct mlx5_uars_page	       *uar;
+	struct mlx5_sq_bfreg bfreg;
 #ifdef CONFIG_MLX5_SF
 	struct mlx5_vhca_state_notifier *vhca_state_notifier;
 	struct mlx5_sf_dev_table *sf_dev_table;
@@ -658,7 +658,6 @@ struct mlx5e_resources {
 		u32                        pdn;
 		struct mlx5_td             td;
 		u32			   mkey;
-		struct mlx5_sq_bfreg       bfreg;
 #define MLX5_MAX_NUM_TC 8
 		u32                        tisn[MLX5_MAX_PORTS][MLX5_MAX_NUM_TC];
 		bool			   tisn_valid;
-- 
cgit v1.2.3


From a315b723e87ba4e4573e1e5c759d512f38bdc0b3 Mon Sep 17 00:00:00 2001
From: Cosmin Ratiu <cratiu@nvidia.com>
Date: Tue, 16 Sep 2025 17:11:40 +0300
Subject: net/mlx5e: Prepare for using different CQ doorbells

Completion queues (CQs) in mlx5 use the same global doorbell, which may
become contended when accessed concurrently from many cores.

This patch prepares the CQ management code for supporting different
doorbells per CQ. This will be used in downstream patches to allow
separate doorbells to be used by channels CQs.

The main change is moving the 'uar' pointer from struct mlx5_core_cq to
struct mlx5e_cq, as the uar page to be used is better off stored
directly there. Other users of mlx5_core_cq also store the UAR to be
used separately and therefore the pointer being removed is dead weight
for them. As evidence, in this patch there are two users which set the
mcq.uar pointer but didn't use it, Software Steering and old Innova CQ
creation code. Instead, they rang the doorbell directly from another
pointer.

The 'uar' pointer added to struct mlx5e_cq remains in a hot cacheline
(as before), because it may get accessed for each packet.

Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/cq.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 991526039ccb..7ef2c7c7d803 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -41,7 +41,6 @@ struct mlx5_core_cq {
 	int			cqe_sz;
 	__be32		       *set_ci_db;
 	__be32		       *arm_db;
-	struct mlx5_uars_page  *uar;
 	refcount_t		refcount;
 	struct completion	free;
 	unsigned		vector;
-- 
cgit v1.2.3


From 71fb4832d50b01f0af2d257360c239879ce93a8e Mon Sep 17 00:00:00 2001
From: Cosmin Ratiu <cratiu@nvidia.com>
Date: Tue, 16 Sep 2025 17:11:41 +0300
Subject: net/mlx5e: Use multiple TX doorbells

First, allocate more doorbells in mlx5e_create_mdev_resources:
- one doorbell remains 'global' and will be used by all non-channel
  associated SQs (e.g. ASO, HWS, PTP, ...).
- allocate additional 'num_doorbells' doorbells. This defaults to
  minimum between 8 and max number of channels.

mlx5e_channel_pick_doorbell() now spreads out channel SQs across
available doorbells.

Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/driver.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 15c434fedff7..99b34e4809ae 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -658,6 +658,8 @@ struct mlx5e_resources {
 		u32                        pdn;
 		struct mlx5_td             td;
 		u32			   mkey;
+		struct mlx5_sq_bfreg      *bfregs;
+		unsigned int               num_bfregs;
 #define MLX5_MAX_NUM_TC 8
 		u32                        tisn[MLX5_MAX_PORTS][MLX5_MAX_NUM_TC];
 		bool			   tisn_valid;
@@ -801,6 +803,8 @@ struct mlx5_db {
 	int			index;
 };
 
+#define MLX5_DEFAULT_NUM_DOORBELLS 8
+
 enum {
 	MLX5_COMP_EQ_SIZE = 1024,
 };
-- 
cgit v1.2.3


From 542a495cbaa6dc57a310da62b501fdf318657cad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ij@kernel.org>
Date: Tue, 16 Sep 2025 10:24:25 +0200
Subject: tcp: AccECN core
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change implements Accurate ECN without negotiation and
AccECN Option (that will be added by later changes). Based on
AccECN specifications:
  https://tools.ietf.org/id/draft-ietf-tcpm-accurate-ecn-28.txt

Accurate ECN allows feeding back the number of CE (congestion
experienced) marks accurately to the sender in contrast to
RFC3168 ECN that can only signal one marks-seen-yes/no per RTT.
Congestion control algorithms can take advantage of the accurate
ECN information to fine-tune their congestion response to avoid
drastic rate reduction when only mild congestion is encountered.

With Accurate ECN, tp->received_ce (r.cep in AccECN spec) keeps
track of how many segments have arrived with a CE mark. Accurate
ECN uses ACE field (ECE, CWR, AE) to communicate the value back
to the sender which updates tp->delivered_ce (s.cep) based on the
feedback. This signalling channel is lossy when ACE field overflow
occurs.

Conservative strategy is selected here to deal with the ACE
overflow, however, some strategies using the AccECN option later
in the overall patchset mitigate against false overflows detected.

The ACE field values on the wire are offset by
TCP_ACCECN_CEP_INIT_OFFSET. Delivered_ce/received_ce count the
real CE marks rather than forcing all downstream users to adapt
to the wire offset.

This patch uses the first 1-byte hole and the last 4-byte hole of
the tcp_sock_write_txrx for 'received_ce_pending' and 'received_ce'.
Also, the group size of tcp_sock_write_txrx is increased from
91 + 4 to 95 + 4 due to the new u32 received_ce member. Below are
the trimmed pahole outcomes before and after this patch.

[BEFORE THIS PATCH]
struct tcp_sock {
    [...]
    __cacheline_group_begin__tcp_sock_write_txrx[0]; /*  2521     0 */
    u8                         nonagle:4;            /*  2521: 0  1 */
    u8                         rate_app_limited:1;   /*  2521: 4  1 */
    /* XXX 3 bits hole, try to pack */
    /* XXX 2 bytes hole, try to pack */

    [...]
    u32                        delivered_ce;         /*  2576     4 */
    u32                        app_limited;          /*  2580     4 */
    u32                        rcv_wnd;              /*  2684     4 */
    struct tcp_options_received rx_opt;              /*  2688    24 */
    __cacheline_group_end__tcp_sock_write_txrx[0];   /*  2612     0 */
    /* XXX 4 bytes hole, try to pack */

    [...]
    /* size: 3200, cachelines: 50, members: 161 */
}

[AFTER THIS PATCH]
struct tcp_sock {
    [...]
    __cacheline_group_begin__tcp_sock_write_txrx[0]; /*  2521     0 */
    u8                         nonagle:4;            /*  2521: 0  1 */
    u8                         rate_app_limited:1;   /*  2521: 4  1 */
    /* XXX 3 bits hole, try to pack */

    /* Force alignment to the next boundary: */
    u8                         :0;
    u8                         received_ce_pending:4;/*  2522: 0  1 */
    u8                         unused2:4;            /*  2522: 4  1 */
    /* XXX 1 byte hole, try to pack */

    [...]
    u32                        delivered_ce;         /*  2576     4 */
    u32                        received_ce;          /*  2580     4 */
    u32                        app_limited;          /*  2584     4 */
    u32                        rcv_wnd;              /*  2588     4 */
    struct tcp_options_received rx_opt;              /*  2592    24 */
    __cacheline_group_end__tcp_sock_write_txrx[0];   /*  2616     0 */

    [...]
    /* size: 3200, cachelines: 50, members: 164 */
}

Signed-off-by: Ilpo Järvinen <ij@kernel.org>
Co-developed-by: Olivier Tilmans <olivier.tilmans@nokia.com>
Signed-off-by: Olivier Tilmans <olivier.tilmans@nokia.com>
Co-developed-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Signed-off-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250916082434.100722-2-chia-yu.chang@nokia-bell-labs.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/tcp.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index d103cc0e7a35..90cee6e53527 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -287,6 +287,8 @@ struct tcp_sock {
  */
 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
 		rate_app_limited:1;  /* rate_{delivered,interval_us} limited? */
+	u8	received_ce_pending:4, /* Not yet transmit cnt of received_ce */
+		unused2:4;
 	__be32	pred_flags;
 	u64	tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */
 	u64	tcp_mstamp;	/* most recent packet received/sent */
@@ -299,6 +301,7 @@ struct tcp_sock {
 	u32	snd_up;		/* Urgent pointer		*/
 	u32	delivered;	/* Total data packets delivered incl. rexmits */
 	u32	delivered_ce;	/* Like the above but only ECE marked packets */
+	u32	received_ce;	/* Like the above but for rcvd CE marked pkts */
 	u32	app_limited;	/* limited until "delivered" reaches this val */
 	u32	rcv_wnd;	/* Current receiver window		*/
 /*
-- 
cgit v1.2.3


From 3cae34274c79e0c60ccd1c10516973af1aed2a7c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ij@kernel.org>
Date: Tue, 16 Sep 2025 10:24:26 +0200
Subject: tcp: accecn: AccECN negotiation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Accurate ECN negotiation parts based on the specification:
  https://tools.ietf.org/id/draft-ietf-tcpm-accurate-ecn-28.txt

Accurate ECN is negotiated using ECE, CWR and AE flags in the
TCP header. TCP falls back into using RFC3168 ECN if one of the
ends supports only RFC3168-style ECN.

The AccECN negotiation includes reflecting IP ECN field value
seen in SYN and SYNACK back using the same bits as negotiation
to allow responding to SYN CE marks and to detect ECN field
mangling. CE marks should not occur currently because SYN=1
segments are sent with Non-ECT in IP ECN field (but proposal
exists to remove this restriction).

Reflecting SYN IP ECN field in SYNACK is relatively simple.
Reflecting SYNACK IP ECN field in the final/third ACK of
the handshake is more challenging. Linux TCP code is not well
prepared for using the final/third ACK a signalling channel
which makes things somewhat complicated here.

tcp_ecn sysctl can be used to select the highest ECN variant
(Accurate ECN, ECN, No ECN) that is attemped to be negotiated and
requested for incoming connection and outgoing connection:
TCP_ECN_IN_NOECN_OUT_NOECN, TCP_ECN_IN_ECN_OUT_ECN,
TCP_ECN_IN_ECN_OUT_NOECN, TCP_ECN_IN_ACCECN_OUT_ACCECN,
TCP_ECN_IN_ACCECN_OUT_ECN, and TCP_ECN_IN_ACCECN_OUT_NOECN.

After this patch, the size of tcp_request_sock remains unchanged
and no new holes are added. Below are the pahole outcomes before
and after this patch:

[BEFORE THIS PATCH]
struct tcp_request_sock {
    [...]
    u32                        rcv_nxt;              /*   352     4 */
    u8                         syn_tos;              /*   356     1 */

    /* size: 360, cachelines: 6, members: 16 */
}

[AFTER THIS PATCH]
struct tcp_request_sock {
    [...]
    u32                        rcv_nxt;              /*   352     4 */
    u8                         syn_tos;              /*   356     1 */
    bool                       accecn_ok;            /*   357     1 */
    u8                         syn_ect_snt:2;        /*   358: 0  1 */
    u8                         syn_ect_rcv:2;        /*   358: 2  1 */
    u8                         accecn_fail_mode:4;   /*   358: 4  1 */

    /* size: 360, cachelines: 6, members: 20 */
}

After this patch, the size of tcp_sock remains unchanged and no new
holes are added. Also, 4 bits of the existing 2-byte hole are exploited.
Below are the pahole outcomes before and after this patch:

[BEFORE THIS PATCH]
struct tcp_sock {
    [...]
    u8                         dup_ack_counter:2;    /*  2761: 0  1 */
    u8                         tlp_retrans:1;        /*  2761: 2  1 */
    u8                         unused:5;             /*  2761: 3  1 */
    u8                         thin_lto:1;           /*  2762: 0  1 */
    u8                         fastopen_connect:1;   /*  2762: 1  1 */
    u8                         fastopen_no_cookie:1; /*  2762: 2  1 */
    u8                         fastopen_client_fail:2; /*  2762: 3  1 */
    u8                         frto:1;               /*  2762: 5  1 */
    /* XXX 2 bits hole, try to pack */

    [...]
    u8                         keepalive_probes;     /*  2765     1 */
    /* XXX 2 bytes hole, try to pack */

    [...]
    /* size: 3200, cachelines: 50, members: 164 */
}

[AFTER THIS PATCH]
struct tcp_sock {
    [...]
    u8                         dup_ack_counter:2;    /*  2761: 0  1 */
    u8                         tlp_retrans:1;        /*  2761: 2  1 */
    u8                         syn_ect_snt:2;        /*  2761: 3  1 */
    u8                         syn_ect_rcv:2;        /*  2761: 5  1 */
    u8                         thin_lto:1;           /*  2761: 7  1 */
    u8                         fastopen_connect:1;   /*  2762: 0  1 */
    u8                         fastopen_no_cookie:1; /*  2762: 1  1 */
    u8                         fastopen_client_fail:2; /*  2762: 2  1 */
    u8                         frto:1;               /*  2762: 4  1 */
    /* XXX 3 bits hole, try to pack */

    [...]
    u8                         keepalive_probes;     /*  2765     1 */
    u8                         accecn_fail_mode:4;   /*  2766: 0  1 */
    /* XXX 4 bits hole, try to pack */
    /* XXX 1 byte hole, try to pack */

    [...]
    /* size: 3200, cachelines: 50, members: 166 */
}

Signed-off-by: Ilpo Järvinen <ij@kernel.org>
Co-developed-by: Olivier Tilmans <olivier.tilmans@nokia.com>
Signed-off-by: Olivier Tilmans <olivier.tilmans@nokia.com>
Co-developed-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Signed-off-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250916082434.100722-3-chia-yu.chang@nokia-bell-labs.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/tcp.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 90cee6e53527..b8432bed546d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -168,6 +168,10 @@ struct tcp_request_sock {
 						  * after data-in-SYN.
 						  */
 	u8				syn_tos;
+	bool				accecn_ok;
+	u8				syn_ect_snt: 2,
+					syn_ect_rcv: 2,
+					accecn_fail_mode:4;
 #ifdef CONFIG_TCP_AO
 	u8				ao_keyid;
 	u8				ao_rcv_next;
@@ -375,7 +379,8 @@ struct tcp_sock {
 	u8	compressed_ack;
 	u8	dup_ack_counter:2,
 		tlp_retrans:1,	/* TLP is a retransmission */
-		unused:5;
+		syn_ect_snt:2,	/* AccECN ECT memory, only */
+		syn_ect_rcv:2;	/* ... needed during 3WHS + first seqno */
 	u8	thin_lto    : 1,/* Use linear timeouts for thin streams */
 		fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
 		fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
@@ -391,6 +396,7 @@ struct tcp_sock {
 		syn_fastopen_child:1; /* created TFO passive child socket */
 
 	u8	keepalive_probes; /* num of allowed keep alive probes	*/
+	u8	accecn_fail_mode:4;	/* AccECN failure handling */
 	u32	tcp_tx_delay;	/* delay (in usec) added to TX packets */
 
 /* RTT measurement */
-- 
cgit v1.2.3


From 9a011277445583bab002fbf5043fab0ea03dc5dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ij@kernel.org>
Date: Tue, 16 Sep 2025 10:24:27 +0200
Subject: tcp: accecn: add AccECN rx byte counters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These three byte counters track IP ECN field payload byte sums for
all arriving (acceptable) packets for ECT0, ECT1, and CE. The
AccECN option (added by a later patch in the series) echoes these
counters back to sender side; therefore, it is placed within the
group of tcp_sock_write_txrx.

Below are the pahole outcomes before and after this patch, in which
the group size of tcp_sock_write_txrx is increased from 95 + 4 to
107 + 4 and an extra 4-byte hole is created but will be exploited
in later patches:

[BEFORE THIS PATCH]
struct tcp_sock {
    [...]
    u32                        delivered_ce;         /*  2576     4 */
    u32                        received_ce;          /*  2580     4 */
    u32                        app_limited;          /*  2584     4 */
    u32                        rcv_wnd;              /*  2588     4 */
    struct tcp_options_received rx_opt;              /*  2592    24 */
    __cacheline_group_end__tcp_sock_write_txrx[0];   /*  2616     0 */

    [...]
    /* size: 3200, cachelines: 50, members: 166 */
}

[AFTER THIS PATCH]
struct tcp_sock {
    [...]
    u32                        delivered_ce;         /*  2576     4 */
    u32                        received_ce;          /*  2580     4 */
    u32                        received_ecn_bytes[3];/*  2584    12 */
    u32                        app_limited;          /*  2596     4 */
    u32                        rcv_wnd;              /*  2600     4 */
    struct tcp_options_received rx_opt;              /*  2604    24 */
    __cacheline_group_end__tcp_sock_write_txrx[0];   /*  2628     0 */
    /* XXX 4 bytes hole, try to pack */

    [...]
    /* size: 3200, cachelines: 50, members: 167 */
}

Signed-off-by: Ilpo Järvinen <ij@kernel.org>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Co-developed-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Signed-off-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250916082434.100722-4-chia-yu.chang@nokia-bell-labs.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/tcp.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index b8432bed546d..012d01347b3c 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -306,6 +306,10 @@ struct tcp_sock {
 	u32	delivered;	/* Total data packets delivered incl. rexmits */
 	u32	delivered_ce;	/* Like the above but only ECE marked packets */
 	u32	received_ce;	/* Like the above but for rcvd CE marked pkts */
+	u32	received_ecn_bytes[3]; /* received byte counters for three ECN
+					* types: INET_ECN_ECT_1, INET_ECN_ECT_0,
+					* and INET_ECN_CE
+					*/
 	u32	app_limited;	/* limited until "delivered" reaches this val */
 	u32	rcv_wnd;	/* Current receiver window		*/
 /*
-- 
cgit v1.2.3


From b5e74132dfbe60329b3ff0e5c485039f2e31605c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ij@kernel.org>
Date: Tue, 16 Sep 2025 10:24:30 +0200
Subject: tcp: accecn: AccECN option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Accurate ECN allows echoing back the sum of bytes for
each IP ECN field value in the received packets using
AccECN option. This change implements AccECN option tx & rx
side processing without option send control related features
that are added by a later change.

Based on specification:
  https://tools.ietf.org/id/draft-ietf-tcpm-accurate-ecn-28.txt
(Some features of the spec will be added in the later changes
rather than in this one).

A full-length AccECN option is always attempted but if it does
not fit, the minimum length is selected based on the counters
that have changed since the last update. The AccECN option
(with 24-bit fields) often ends in odd sizes so the option
write code tries to take advantage of some nop used to pad
the other TCP options.

The delivered_ecn_bytes pairs with received_ecn_bytes similar
to how delivered_ce pairs with received_ce. In contrast to
ACE field, however, the option is not always available to update
delivered_ecn_bytes. For ACK w/o AccECN option, the delivered
bytes calculated based on the cumulative ACK+SACK information
are assigned to one of the counters using an estimation
heuristic to select the most likely ECN byte counter. Any
estimation error is corrected when the next AccECN option
arrives. It may occur that the heuristic gets too confused
when there are enough different byte counter deltas between
ACKs with the AccECN option in which case the heuristic just
gives up on updating the counters for a while.

tcp_ecn_option sysctl can be used to select option sending
mode for AccECN: TCP_ECN_OPTION_DISABLED, TCP_ECN_OPTION_MINIMUM,
and TCP_ECN_OPTION_FULL.

This patch increases the size of tcp_info struct, as there is
no existing holes for new u32 variables. Below are the pahole
outcomes before and after this patch:

[BEFORE THIS PATCH]
struct tcp_info {
    [...]
     __u32                     tcpi_total_rto_time;  /*   244     4 */

    /* size: 248, cachelines: 4, members: 61 */
}

[AFTER THIS PATCH]
struct tcp_info {
    [...]
    __u32                      tcpi_total_rto_time;  /*   244     4 */
    __u32                      tcpi_received_ce;     /*   248     4 */
    __u32                      tcpi_delivered_e1_bytes; /*   252     4 */
    __u32                      tcpi_delivered_e0_bytes; /*   256     4 */
    __u32                      tcpi_delivered_ce_bytes; /*   260     4 */
    __u32                      tcpi_received_e1_bytes; /*   264     4 */
    __u32                      tcpi_received_e0_bytes; /*   268     4 */
    __u32                      tcpi_received_ce_bytes; /*   272     4 */

    /* size: 280, cachelines: 5, members: 68 */
}

This patch uses the existing 1-byte holes in the tcp_sock_write_txrx
group for new u8 members, but adds a 4-byte hole in tcp_sock_write_rx
group after the new u32 delivered_ecn_bytes[3] member. Therefore, the
group size of tcp_sock_write_rx is increased from 96 to 112. Below
are the pahole outcomes before and after this patch:

[BEFORE THIS PATCH]
struct tcp_sock {
    [...]
    u8                         received_ce_pending:4; /*  2522: 0  1 */
    u8                         unused2:4;             /*  2522: 4  1 */
    /* XXX 1 byte hole, try to pack */

    [...]
    u32                        rcv_rtt_last_tsecr;    /*  2668     4 */

    [...]
    __cacheline_group_end__tcp_sock_write_rx[0];      /*  2728     0 */

    [...]
    /* size: 3200, cachelines: 50, members: 167 */
}

[AFTER THIS PATCH]
struct tcp_sock {
    [...]
    u8                         received_ce_pending:4;/*  2522: 0  1 */
    u8                         unused2:4;            /*  2522: 4  1 */
    u8                         accecn_minlen:2;      /*  2523: 0  1 */
    u8                         est_ecnfield:2;       /*  2523: 2  1 */
    u8                         unused3:4;            /*  2523: 4  1 */

    [...]
    u32                        rcv_rtt_last_tsecr;   /*  2668     4 */
    u32                        delivered_ecn_bytes[3];/*  2672    12 */
    /* XXX 4 bytes hole, try to pack */

    [...]
    __cacheline_group_end__tcp_sock_write_rx[0];     /*  2744     0 */

    [...]
    /* size: 3200, cachelines: 50, members: 171 */
}

Signed-off-by: Ilpo Järvinen <ij@kernel.org>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Co-developed-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Signed-off-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250916082434.100722-7-chia-yu.chang@nokia-bell-labs.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/tcp.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 012d01347b3c..73557656cb2d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -122,8 +122,9 @@ struct tcp_options_received {
 		smc_ok : 1,	/* SMC seen on SYN packet		*/
 		snd_wscale : 4,	/* Window scaling received from sender	*/
 		rcv_wscale : 4;	/* Window scaling to send to receiver	*/
-	u8	saw_unknown:1,	/* Received unknown option		*/
-		unused:7;
+	u8	accecn:6,	/* AccECN index in header, 0=no options	*/
+		saw_unknown:1,	/* Received unknown option		*/
+		unused:1;
 	u8	num_sacks;	/* Number of SACK blocks		*/
 	u16	user_mss;	/* mss requested by user in ioctl	*/
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
@@ -293,6 +294,9 @@ struct tcp_sock {
 		rate_app_limited:1;  /* rate_{delivered,interval_us} limited? */
 	u8	received_ce_pending:4, /* Not yet transmit cnt of received_ce */
 		unused2:4;
+	u8	accecn_minlen:2,/* Minimum length of AccECN option sent */
+		est_ecnfield:2,/* ECN field for AccECN delivered estimates */
+		unused3:4;
 	__be32	pred_flags;
 	u64	tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */
 	u64	tcp_mstamp;	/* most recent packet received/sent */
@@ -337,6 +341,7 @@ struct tcp_sock {
 	u32	rate_delivered;    /* saved rate sample: packets delivered */
 	u32	rate_interval_us;  /* saved rate sample: time elapsed */
 	u32	rcv_rtt_last_tsecr;
+	u32	delivered_ecn_bytes[3];
 	u64	first_tx_mstamp;  /* start of window send phase */
 	u64	delivered_mstamp; /* time we reached "delivered" */
 	u64	bytes_acked;	/* RFC4898 tcpEStatsAppHCThruOctetsAcked
-- 
cgit v1.2.3


From aa55a7dde7ec506bb23448a5005ae3f4f809d022 Mon Sep 17 00:00:00 2001
From: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Date: Tue, 16 Sep 2025 10:24:31 +0200
Subject: tcp: accecn: AccECN option send control
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of sending the option in every ACK, limit sending to
those ACKs where the option is necessary:
- Handshake
- "Change-triggered ACK" + the ACK following it. The
  2nd ACK is necessary to unambiguously indicate which
  of the ECN byte counters in increasing. The first
  ACK has two counters increasing due to the ecnfield
  edge.
- ACKs with CE to allow CEP delta validations to take
  advantage of the option.
- Force option to be sent every at least once per 2^22
  bytes. The check is done using the bit edges of the
  byte counters (avoids need for extra variables).
- AccECN option beacon to send a few times per RTT even if
  nothing in the ECN state requires that. The default is 3
  times per RTT, and its period can be set via
  sysctl_tcp_ecn_option_beacon.

Below are the pahole outcomes before and after this patch,
in which the group size of tcp_sock_write_tx is increased
from 89 to 97 due to the new u64 accecn_opt_tstamp member:

[BEFORE THIS PATCH]
struct tcp_sock {
    [...]
    u64                        tcp_wstamp_ns;        /*  2488     8 */
    struct list_head           tsorted_sent_queue;   /*  2496    16 */

    [...]
    __cacheline_group_end__tcp_sock_write_tx[0];     /*  2521     0 */
    __cacheline_group_begin__tcp_sock_write_txrx[0]; /*  2521     0 */
    u8                         nonagle:4;            /*  2521: 0  1 */
    u8                         rate_app_limited:1;   /*  2521: 4  1 */
    /* XXX 3 bits hole, try to pack */

    /* Force alignment to the next boundary: */
    u8                         :0;
    u8                         received_ce_pending:4;/*  2522: 0  1 */
    u8                         unused2:4;            /*  2522: 4  1 */
    u8                         accecn_minlen:2;      /*  2523: 0  1 */
    u8                         est_ecnfield:2;       /*  2523: 2  1 */
    u8                         unused3:4;            /*  2523: 4  1 */

    [...]
    __cacheline_group_end__tcp_sock_write_txrx[0];   /*  2628     0 */

    [...]
    /* size: 3200, cachelines: 50, members: 171 */
}

[AFTER THIS PATCH]
struct tcp_sock {
    [...]
    u64                        tcp_wstamp_ns;        /*  2488     8 */
    u64                        accecn_opt_tstamp;    /*  2596     8 */
    struct list_head           tsorted_sent_queue;   /*  2504    16 */

    [...]
    __cacheline_group_end__tcp_sock_write_tx[0];     /*  2529     0 */
    __cacheline_group_begin__tcp_sock_write_txrx[0]; /*  2529     0 */
    u8                         nonagle:4;            /*  2529: 0  1 */
    u8                         rate_app_limited:1;   /*  2529: 4  1 */
    /* XXX 3 bits hole, try to pack */

    /* Force alignment to the next boundary: */
    u8                         :0;
    u8                         received_ce_pending:4;/*  2530: 0  1 */
    u8                         unused2:4;            /*  2530: 4  1 */
    u8                         accecn_minlen:2;      /*  2531: 0  1 */
    u8                         est_ecnfield:2;       /*  2531: 2  1 */
    u8                         accecn_opt_demand:2;  /*  2531: 4  1 */
    u8                         prev_ecnfield:2;      /*  2531: 6  1 */

    [...]
    __cacheline_group_end__tcp_sock_write_txrx[0];   /*  2636     0 */

    [...]
    /* size: 3200, cachelines: 50, members: 173 */
}

Signed-off-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Co-developed-by: Ilpo Järvinen <ij@kernel.org>
Signed-off-by: Ilpo Järvinen <ij@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250916082434.100722-8-chia-yu.chang@nokia-bell-labs.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/tcp.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 73557656cb2d..f637b659b35a 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -275,6 +275,7 @@ struct tcp_sock {
 	u32	mdev_us;	/* medium deviation			*/
 	u32	rtt_seq;	/* sequence number to update rttvar	*/
 	u64	tcp_wstamp_ns;	/* departure time for next sent data packet */
+	u64	accecn_opt_tstamp;	/* Last AccECN option sent timestamp */
 	struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */
 	struct sk_buff *highest_sack;   /* skb just after the highest
 					 * skb with SACKed bit set
@@ -296,7 +297,8 @@ struct tcp_sock {
 		unused2:4;
 	u8	accecn_minlen:2,/* Minimum length of AccECN option sent */
 		est_ecnfield:2,/* ECN field for AccECN delivered estimates */
-		unused3:4;
+		accecn_opt_demand:2,/* Demand AccECN option for n next ACKs */
+		prev_ecnfield:2; /* ECN bits from the previous segment */
 	__be32	pred_flags;
 	u64	tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */
 	u64	tcp_mstamp;	/* most recent packet received/sent */
-- 
cgit v1.2.3


From b40671b5ee588c8a61b2d0eacbad32ffc57e9a8f Mon Sep 17 00:00:00 2001
From: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Date: Tue, 16 Sep 2025 10:24:32 +0200
Subject: tcp: accecn: AccECN option failure handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

AccECN option may fail in various way, handle these:
- Attempt to negotiate the use of AccECN on the 1st retransmitted SYN
	- From the 2nd retransmitted SYN, stop AccECN negotiation
- Remove option from SYN/ACK rexmits to handle blackholes
- If no option arrives in SYN/ACK, assume Option is not usable
        - If an option arrives later, re-enabled
- If option is zeroed, disable AccECN option processing

This patch use existing padding bits in tcp_request_sock and
holes in tcp_sock without increasing the size.

Signed-off-by: Ilpo Järvinen <ij@kernel.org>
Signed-off-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250916082434.100722-9-chia-yu.chang@nokia-bell-labs.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/tcp.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index f637b659b35a..3ca5ed02de6d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -173,6 +173,7 @@ struct tcp_request_sock {
 	u8				syn_ect_snt: 2,
 					syn_ect_rcv: 2,
 					accecn_fail_mode:4;
+	u8				saw_accecn_opt  :2;
 #ifdef CONFIG_TCP_AO
 	u8				ao_keyid;
 	u8				ao_rcv_next;
@@ -407,7 +408,8 @@ struct tcp_sock {
 		syn_fastopen_child:1; /* created TFO passive child socket */
 
 	u8	keepalive_probes; /* num of allowed keep alive probes	*/
-	u8	accecn_fail_mode:4;	/* AccECN failure handling */
+	u8	accecn_fail_mode:4,	/* AccECN failure handling */
+		saw_accecn_opt:2;	/* An AccECN option was seen */
 	u32	tcp_tx_delay;	/* delay (in usec) added to TX packets */
 
 /* RTT measurement */
-- 
cgit v1.2.3


From 3fbb2a6f3a70c27a6a2be80d131970608c0f84d0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 16 Sep 2025 16:09:42 +0000
Subject: ipv6: make ipv6_pinfo.saddr_cache a boolean

ipv6_pinfo.saddr_cache is either NULL or &np->saddr.

We do not need 8 bytes, a boolean is enough.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250916160951.541279-2-edumazet@google.com
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/ipv6.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index f43314517396..55c4d1e4dd7d 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -216,10 +216,10 @@ struct inet6_cork {
 struct ipv6_pinfo {
 	struct in6_addr 	saddr;
 	struct in6_pktinfo	sticky_pktinfo;
-	const struct in6_addr		*daddr_cache;
 #ifdef CONFIG_IPV6_SUBTREES
-	const struct in6_addr		*saddr_cache;
+	bool			saddr_cache;
 #endif
+	const struct in6_addr		*daddr_cache;
 
 	__be32			flow_label;
 	__u32			frag_size;
-- 
cgit v1.2.3


From 5489f333ef993bfceebce9ae98944f04eaafcc30 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 16 Sep 2025 16:09:43 +0000
Subject: ipv6: make ipv6_pinfo.daddr_cache a boolean

ipv6_pinfo.daddr_cache is either NULL or &sk->sk_v6_daddr

We do not need 8 bytes, a boolean is enough.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250916160951.541279-3-edumazet@google.com
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/ipv6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 55c4d1e4dd7d..8e6d9f8b3dc8 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -219,7 +219,7 @@ struct ipv6_pinfo {
 #ifdef CONFIG_IPV6_SUBTREES
 	bool			saddr_cache;
 #endif
-	const struct in6_addr		*daddr_cache;
+	bool			daddr_cache;
 
 	__be32			flow_label;
 	__u32			frag_size;
-- 
cgit v1.2.3


From b76543b21fbcfbb96332fd80cc0d85bbcd72d8f0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 16 Sep 2025 16:09:45 +0000
Subject: ipv6: reorganise struct ipv6_pinfo

Move fields used in tx fast path at the beginning of the structure,
and seldom used ones at the end.

Note that rxopt is also in the first cache line.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250916160951.541279-5-edumazet@google.com
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/ipv6.h | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 8e6d9f8b3dc8..43b7bb828738 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -214,18 +214,21 @@ struct inet6_cork {
 
 /* struct ipv6_pinfo - ipv6 private area */
 struct ipv6_pinfo {
+	/* Used in tx path (inet6_csk_route_socket(), ip6_xmit()) */
 	struct in6_addr 	saddr;
-	struct in6_pktinfo	sticky_pktinfo;
+	__be32			flow_label;
+	u32			dst_cookie;
+	struct ipv6_txoptions __rcu	*opt;
+	s16			hop_limit;
+	u8			pmtudisc;
+	u8			tclass;
 #ifdef CONFIG_IPV6_SUBTREES
 	bool			saddr_cache;
 #endif
 	bool			daddr_cache;
 
-	__be32			flow_label;
-	__u32			frag_size;
-
-	s16			hop_limit;
 	u8			mcast_hops;
+	u32			frag_size;
 
 	int			ucast_oif;
 	int			mcast_oif;
@@ -233,7 +236,7 @@ struct ipv6_pinfo {
 	/* pktoption flags */
 	union {
 		struct {
-			__u16	srcrt:1,
+			u16	srcrt:1,
 				osrcrt:1,
 			        rxinfo:1,
 			        rxoinfo:1,
@@ -250,29 +253,25 @@ struct ipv6_pinfo {
 				recvfragsize:1;
 				/* 1 bits hole */
 		} bits;
-		__u16		all;
+		u16		all;
 	} rxopt;
 
 	/* sockopt flags */
-	__u8			srcprefs;	/* 001: prefer temporary address
+	u8			srcprefs;	/* 001: prefer temporary address
 						 * 010: prefer public address
 						 * 100: prefer care-of address
 						 */
-	__u8			pmtudisc;
-	__u8			min_hopcount;
-	__u8			tclass;
+	u8			min_hopcount;
 	__be32			rcv_flowinfo;
+	struct in6_pktinfo	sticky_pktinfo;
 
-	__u32			dst_cookie;
+	struct sk_buff		*pktoptions;
+	struct sk_buff		*rxpmtu;
+	struct inet6_cork	cork;
 
 	struct ipv6_mc_socklist	__rcu *ipv6_mc_list;
 	struct ipv6_ac_socklist	*ipv6_ac_list;
 	struct ipv6_fl_socklist __rcu *ipv6_fl_list;
-
-	struct ipv6_txoptions __rcu	*opt;
-	struct sk_buff		*pktoptions;
-	struct sk_buff		*rxpmtu;
-	struct inet6_cork	cork;
 };
 
 /* We currently use available bits from inet_sk(sk)->inet_flags,
-- 
cgit v1.2.3


From 3cd04c8f4afed71a48edef0db5255afc249c2feb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 16 Sep 2025 16:09:50 +0000
Subject: udp: make busylock per socket

While having all spinlocks packed into an array was a space saver,
this also caused NUMA imbalance and hash collisions.

UDPv6 socket size becomes 1600 after this patch.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250916160951.541279-10-edumazet@google.com
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/udp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 6ed008ab1665..e554890c4415 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -109,6 +109,7 @@ struct udp_sock {
 	 */
 	struct hlist_node	tunnel_list;
 	struct numa_drop_counters drop_counters;
+	spinlock_t		busylock ____cacheline_aligned_in_smp;
 };
 
 #define udp_test_bit(nr, sk)			\
-- 
cgit v1.2.3


From 6b88293aae7fb78872e5cc1ec36e2f750ae12e38 Mon Sep 17 00:00:00 2001
From: Markus Stockhausen <markus.stockhausen@gmx.de>
Date: Wed, 10 Sep 2025 14:32:58 -0400
Subject: mtd: nand: move nand_check_erased_ecc_chunk() to nand/core

The check function for bitflips in erased blocks will be needed
by the Realtek ECC engine driver (which is currently under
development). Right now it is located in raw/nand_base.c.
While this is sufficient for the current usecases, there is
no real dependency for an ECC engine on the raw nand library.

Move the function over to a more generic place in core library.

Suggested-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Markus Stockhausen <markus.stockhausen@gmx.de>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/nand.h    | 5 +++++
 include/linux/mtd/rawnand.h | 5 -----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 07486168d104..09c8c93e4dba 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -1136,4 +1136,9 @@ static inline bool nanddev_bbt_is_initialized(struct nand_device *nand)
 int nanddev_mtd_erase(struct mtd_info *mtd, struct erase_info *einfo);
 int nanddev_mtd_max_bad_blocks(struct mtd_info *mtd, loff_t offs, size_t len);
 
+int nand_check_erased_ecc_chunk(void *data, int datalen,
+				void *ecc, int ecclen,
+				void *extraoob, int extraooblen,
+				int threshold);
+
 #endif /* __LINUX_MTD_NAND_H */
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index e84522e31301..d30bdc3fcfd7 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1519,11 +1519,6 @@ int rawnand_sw_bch_correct(struct nand_chip *chip, unsigned char *buf,
 			   unsigned char *read_ecc, unsigned char *calc_ecc);
 void rawnand_sw_bch_cleanup(struct nand_chip *chip);
 
-int nand_check_erased_ecc_chunk(void *data, int datalen,
-				void *ecc, int ecclen,
-				void *extraoob, int extraooblen,
-				int threshold);
-
 int nand_ecc_choose_conf(struct nand_chip *chip,
 			 const struct nand_ecc_caps *caps, int oobavail);
 
-- 
cgit v1.2.3


From a3d076b0567e729d5f21a95525c4d096b1f59e79 Mon Sep 17 00:00:00 2001
From: Akiva Goldberger <agoldberger@nvidia.com>
Date: Wed, 17 Sep 2025 16:27:58 +0300
Subject: net/mlx5: Add uar access and odp page fault counters

Add bar_uar_access, odp_local_triggered_page_fault, and
odp_remote_triggered_page_fault counters to the query_vnic_env command.
Additionally, add corresponding capabilities bits to the HCA CAP.

Signed-off-by: Akiva Goldberger <agoldberger@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1758115678-643464-1-git-send-email-tariqt@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 097b1b7ada63..0cf187e13def 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1958,7 +1958,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         log_max_rqt[0x5];
 	u8         reserved_at_390[0x3];
 	u8         log_max_rqt_size[0x5];
-	u8         reserved_at_398[0x3];
+	u8         reserved_at_398[0x1];
+	u8	   vnic_env_cnt_bar_uar_access[0x1];
+	u8	   vnic_env_cnt_odp_page_fault[0x1];
 	u8         log_max_tis_per_sq[0x5];
 
 	u8         ext_stride_num_range[0x1];
@@ -4019,7 +4021,13 @@ struct mlx5_ifc_vnic_diagnostic_statistics_bits {
 
 	u8         handled_pkt_steering_fail[0x40];
 
-	u8         reserved_at_360[0xc80];
+	u8         bar_uar_access[0x20];
+
+	u8         odp_local_triggered_page_fault[0x20];
+
+	u8         odp_remote_triggered_page_fault[0x20];
+
+	u8         reserved_at_3c0[0xc20];
 };
 
 struct mlx5_ifc_traffic_counter_bits {
-- 
cgit v1.2.3


From 00c94ca2b99e6610e483f92e531b319eeaed94aa Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 16 Sep 2025 17:09:29 -0700
Subject: psp: base PSP device support

Add a netlink family for PSP and allow drivers to register support.

The "PSP device" is its own object. This allows us to perform more
flexible reference counting / lifetime control than if PSP information
was part of net_device. In the future we should also be able
to "delegate" PSP access to software devices, such as *vlan, veth
or netkit more easily.

Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Daniel Zahka <daniel.zahka@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250917000954.859376-3-daniel.zahka@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f5a840c07cf1..1c54d44805fa 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1906,6 +1906,7 @@ enum netdev_reg_state {
  *			 device struct
  *	@mpls_ptr:	mpls_dev struct pointer
  *	@mctp_ptr:	MCTP specific data
+ *	@psp_dev:	PSP crypto device registered for this netdev
  *
  *	@dev_addr:	Hw address (before bcast,
  *			because most packets are unicast)
@@ -2310,6 +2311,9 @@ struct net_device {
 #if IS_ENABLED(CONFIG_MCTP)
 	struct mctp_dev __rcu	*mctp_ptr;
 #endif
+#if IS_ENABLED(CONFIG_INET_PSP)
+	struct psp_dev __rcu	*psp_dev;
+#endif
 
 /*
  * Cache lines mostly used on receive path (including eth_type_trans())
-- 
cgit v1.2.3


From ed8a507b748336902525aa79e3573552534e8b3e Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 16 Sep 2025 17:09:30 -0700
Subject: net: modify core data structures for PSP datapath support

Add pointers to psp data structures to core networking structs,
and an SKB extension to carry the PSP information from the drivers
to the socket layer.

Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Co-developed-by: Daniel Zahka <daniel.zahka@gmail.com>
Signed-off-by: Daniel Zahka <daniel.zahka@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250917000954.859376-4-daniel.zahka@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/skbuff.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 62e7addccdf6..78ecfa7d00d0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4901,6 +4901,9 @@ enum skb_ext_id {
 #endif
 #if IS_ENABLED(CONFIG_MCTP_FLOWS)
 	SKB_EXT_MCTP,
+#endif
+#if IS_ENABLED(CONFIG_INET_PSP)
+	SKB_EXT_PSP,
 #endif
 	SKB_EXT_NUM, /* must be last */
 };
-- 
cgit v1.2.3


From 6684b91d04b408843bd65e2120f6db2159e4f40b Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Tue, 16 Sep 2025 21:08:38 -0700
Subject: bnxt_en: Implement ethtool .get_tunable() for
 ETHTOOL_PFC_PREVENTION_TOUT

Return the current PFC watchdog timeout value if it is supported.

Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Link: https://patch.msgid.link/20250917040839.1924698-10-michael.chan@broadcom.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/bnxt/hsi.h | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bnxt/hsi.h b/include/linux/bnxt/hsi.h
index 8c5dac3b3ef3..23e7b1290a92 100644
--- a/include/linux/bnxt/hsi.h
+++ b/include/linux/bnxt/hsi.h
@@ -6751,6 +6751,46 @@ struct hwrm_queue_dscp2pri_cfg_output {
 	u8	valid;
 };
 
+/* hwrm_queue_pfcwd_timeout_qcaps_input (size:128b/16B) */
+struct hwrm_queue_pfcwd_timeout_qcaps_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+};
+
+/* hwrm_queue_pfcwd_timeout_qcaps_output (size:128b/16B) */
+struct hwrm_queue_pfcwd_timeout_qcaps_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	__le16	max_pfcwd_timeout;
+	u8	unused_0[5];
+	u8	valid;
+};
+
+/* hwrm_queue_pfcwd_timeout_qcfg_input (size:128b/16B) */
+struct hwrm_queue_pfcwd_timeout_qcfg_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+};
+
+/* hwrm_queue_pfcwd_timeout_qcfg_output (size:128b/16B) */
+struct hwrm_queue_pfcwd_timeout_qcfg_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	__le16	pfcwd_timeout_value;
+	u8	unused_0[5];
+	u8	valid;
+};
+
 /* hwrm_vnic_alloc_input (size:192b/24B) */
 struct hwrm_vnic_alloc_input {
 	__le16	req_type;
-- 
cgit v1.2.3


From fa18932afb29b51530508f28adcc824a8c00b712 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Tue, 16 Sep 2025 21:08:39 -0700
Subject: bnxt_en: Implement ethtool .set_tunable() for
 ETHTOOL_PFC_PREVENTION_TOUT

Support the setting of the tunable if it is supported by firmware.
The supported range is 0 to the maximum msec value reported by
firmware.  PFC_STORM_PREVENTION_AUTO is also supported and 0 means it
is disabled.

Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Link: https://patch.msgid.link/20250917040839.1924698-11-michael.chan@broadcom.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/bnxt/hsi.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bnxt/hsi.h b/include/linux/bnxt/hsi.h
index 23e7b1290a92..47c34990cf23 100644
--- a/include/linux/bnxt/hsi.h
+++ b/include/linux/bnxt/hsi.h
@@ -6771,6 +6771,27 @@ struct hwrm_queue_pfcwd_timeout_qcaps_output {
 	u8	valid;
 };
 
+/* hwrm_queue_pfcwd_timeout_cfg_input (size:192b/24B) */
+struct hwrm_queue_pfcwd_timeout_cfg_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le16	pfcwd_timeout_value;
+	u8	unused_0[6];
+};
+
+/* hwrm_queue_pfcwd_timeout_cfg_output (size:128b/16B) */
+struct hwrm_queue_pfcwd_timeout_cfg_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	u8	unused_0[7];
+	u8	valid;
+};
+
 /* hwrm_queue_pfcwd_timeout_qcfg_input (size:128b/16B) */
 struct hwrm_queue_pfcwd_timeout_qcfg_input {
 	__le16	req_type;
-- 
cgit v1.2.3


From f72e2cff13aefe305fc8fc6afe4f43626e4ad88c Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Tue, 16 Sep 2025 15:48:01 +0200
Subject: compiler_types: Add __assume macro

Make the statement attribute "assume" with a new __assume macro available.

The assume attribute is used to indicate that a certain condition is
assumed to be true. Compilers may or may not use this indication to
generate optimized code. If this condition is violated at runtime, the
behavior is undefined.

Note that the clang documentation states that optimizers may react
differently to this attribute, and this may even have a negative
performance impact. Therefore this attribute should be used with care.

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
---
 include/linux/compiler_types.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 16755431fc11..2f3e80bf9f35 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -329,6 +329,29 @@ struct ftrace_likely_data {
 #define __no_sanitize_or_inline __always_inline
 #endif
 
+/*
+ * The assume attribute is used to indicate that a certain condition is
+ * assumed to be true. If this condition is violated at runtime, the behavior
+ * is undefined. Compilers may or may not use this indication to generate
+ * optimized code.
+ *
+ * Note that the clang documentation states that optimizers may react
+ * differently to this attribute, and this may even have a negative
+ * performance impact. Therefore this attribute should be used with care.
+ *
+ * Optional: only supported since gcc >= 13
+ * Optional: only supported since clang >= 19
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html#index-assume-statement-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#id13
+ *
+ */
+#ifdef CONFIG_CC_HAS_ASSUME
+# define __assume(expr)			__attribute__((__assume__(expr)))
+#else
+# define __assume(expr)
+#endif
+
 /*
  * Optional: only supported since gcc >= 15
  * Optional: only supported since clang >= 18
-- 
cgit v1.2.3


From 84eaf4359c36b0ba888f571a964138d22ba5914f Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Wed, 17 Sep 2025 02:58:11 -0700
Subject: net: ethtool: add get_rx_ring_count callback to optimize RX ring
 queries

Add a new optional get_rx_ring_count callback in ethtool_ops to allow
drivers to provide the number of RX rings directly without going through
the full get_rxnfc flow classification interface.

Create ethtool_get_rx_ring_count() to use .get_rx_ring_count if
available, falling back to get_rxnfc() otherwise. It needs to be
non-static, given it will be called by other ethtool functions laters,
as those calling get_rxfh().

Signed-off-by: Breno Leitao <leitao@debian.org>
Link: https://patch.msgid.link/20250917-gxrings-v4-4-dae520e2e1cb@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index d7d757e72554..c869b7f8bce8 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -968,6 +968,7 @@ struct kernel_ethtool_ts_info {
  * @reset: Reset (part of) the device, as specified by a bitmask of
  *	flags from &enum ethtool_reset_flags.  Returns a negative
  *	error code or zero.
+ * @get_rx_ring_count: Return the number of RX rings
  * @get_rxfh_key_size: Get the size of the RX flow hash key.
  *	Returns zero if not supported for this specific device.
  * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table.
@@ -1162,6 +1163,7 @@ struct ethtool_ops {
 	int	(*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *);
 	int	(*flash_device)(struct net_device *, struct ethtool_flash *);
 	int	(*reset)(struct net_device *, u32 *);
+	u32	(*get_rx_ring_count)(struct net_device *dev);
 	u32	(*get_rxfh_key_size)(struct net_device *);
 	u32	(*get_rxfh_indir_size)(struct net_device *);
 	int	(*get_rxfh)(struct net_device *, struct ethtool_rxfh_param *);
-- 
cgit v1.2.3


From 8d5b7009aabc27e626e4167fedf1e1c1c3d6b143 Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Fri, 5 Sep 2025 21:19:45 +0530
Subject: mei: bus: add mei_cldev_mtu interface

Add a new helper function that allows MEI client drivers
to query the maximum transmission unit (MTU) for a connected
MEI client.

This is useful for clients that need to transmit large payloads,
such as firmware blobs, allowing them to determine the maximum
message size that can be safely sent before starting transmission and
size of the buffer to allocate when receiving data.

Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20250905154953.3974335-2-badal.nilawar@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 include/linux/mei_cl_bus.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index 725fd7727422..a82755e1fc40 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -113,6 +113,7 @@ int mei_cldev_register_notif_cb(struct mei_cl_device *cldev,
 				mei_cldev_cb_t notif_cb);
 
 u8 mei_cldev_ver(const struct mei_cl_device *cldev);
+size_t mei_cldev_mtu(const struct mei_cl_device *cldev);
 
 void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev);
 void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data);
-- 
cgit v1.2.3


From df8922afc37aa2111ca79a216653a629146763ad Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 18 Sep 2025 13:59:15 -0600
Subject: io_uring/msg_ring: kill alloc_cache for io_kiocb allocations

A recent commit:

fc582cd26e88 ("io_uring/msg_ring: ensure io_kiocb freeing is deferred for RCU")

fixed an issue with not deferring freeing of io_kiocb structs that
msg_ring allocates to after the current RCU grace period. But this only
covers requests that don't end up in the allocation cache. If a request
goes into the alloc cache, it can get reused before it is sane to do so.
A recent syzbot report would seem to indicate that there's something
there, however it may very well just be because of the KASAN poisoning
that the alloc_cache handles manually.

Rather than attempt to make the alloc_cache sane for that use case, just
drop the usage of the alloc_cache for msg_ring request payload data.

Fixes: 50cf5f3842af ("io_uring/msg_ring: add an alloc cache for io_kiocb entries")
Link: https://lore.kernel.org/io-uring/68cc2687.050a0220.139b6.0005.GAE@google.com/
Reported-by: syzbot+baa2e0f4e02df602583e@syzkaller.appspotmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 80a178f3d896..12f5ee43850e 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -420,9 +420,6 @@ struct io_ring_ctx {
 	struct list_head		defer_list;
 	unsigned			nr_drained;
 
-	struct io_alloc_cache		msg_cache;
-	spinlock_t			msg_lock;
-
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	struct list_head	napi_list;	/* track busy poll napi_id */
 	spinlock_t		napi_lock;	/* napi_list lock */
-- 
cgit v1.2.3


From a660194dd101e937c319171ad99c3fbe466fd825 Mon Sep 17 00:00:00 2001
From: Dev Jain <dev.jain@arm.com>
Date: Wed, 17 Sep 2025 12:02:07 -0700
Subject: arm64: Enable permission change on arm64 kernel block mappings

This patch paves the path to enable huge mappings in vmalloc space and
linear map space by default on arm64. For this we must ensure that we
can handle any permission games on the kernel (init_mm) pagetable.
Previously, __change_memory_common() used apply_to_page_range() which
does not support changing permissions for block mappings. We move away
from this by using the pagewalk API, similar to what riscv does right
now. It is the responsibility of the caller to ensure that the range
over which permissions are being changed falls on leaf mapping
boundaries. For systems with BBML2, this will be handled in future
patches by dyanmically splitting the mappings when required.

Unlike apply_to_page_range(), the pagewalk API currently enforces the
init_mm.mmap_lock to be held. To avoid the unnecessary bottleneck of the
mmap_lock for our usecase, this patch extends this generic API to be
used locklessly, so as to retain the existing behaviour for changing
permissions. Apart from this reason, it is noted at [1] that KFENCE can
manipulate kernel pgtable entries during softirqs. It does this by
calling set_memory_valid() -> __change_memory_common(). This being a
non-sleepable context, we cannot take the init_mm mmap lock.

Add comments to highlight the conditions under which we can use the
lockless variant - no underlying VMA, and the user having exclusive
control over the range, thus guaranteeing no concurrent access.

We require that the start and end of a given range do not partially
overlap block mappings, or cont mappings. Return -EINVAL in case a
partial block mapping is detected in any of the PGD/P4D/PUD/PMD levels;
add a corresponding comment in update_range_prot() to warn that
eliminating such a condition is the responsibility of the caller.

Note that, the pte level callback may change permissions for a whole
contpte block, and that will be done one pte at a time, as opposed to an
atomic operation for the block mappings. This is fine as any access will
decode either the old or the new permission until the TLBI.

apply_to_page_range() currently performs all pte level callbacks while
in lazy mmu mode. Since arm64 can optimize performance by batching
barriers when modifying kernel pgtables in lazy mmu mode, we would like
to continue to benefit from this optimisation. Unfortunately
walk_kernel_page_table_range() does not use lazy mmu mode. However,
since the pagewalk framework is not allocating any memory, we can safely
bracket the whole operation inside lazy mmu mode ourselves. Therefore,
wrap the call to walk_kernel_page_table_range() with the lazy MMU
helpers.

Link: https://lore.kernel.org/linux-arm-kernel/89d0ad18-4772-4d8f-ae8a-7c48d26a927e@arm.com/ [1]
Signed-off-by: Dev Jain <dev.jain@arm.com>
Signed-off-by: Yang Shi <yshi@os.amperecomputing.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/pagewalk.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index 682472c15495..88e18615dd72 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -134,6 +134,9 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
 int walk_kernel_page_table_range(unsigned long start,
 		unsigned long end, const struct mm_walk_ops *ops,
 		pgd_t *pgd, void *private);
+int walk_kernel_page_table_range_lockless(unsigned long start,
+		unsigned long end, const struct mm_walk_ops *ops,
+		pgd_t *pgd, void *private);
 int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start,
 			unsigned long end, const struct mm_walk_ops *ops,
 			void *private);
-- 
cgit v1.2.3


From 76cffc3eb1bdee0a7e8cca090adfd46a740f1cb0 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@oss.qualcomm.com>
Date: Tue, 9 Sep 2025 13:19:44 +0100
Subject: soundwire: bus: add of_sdw_find_device_by_node helper

There has been more than 3 instances of this helper in multiple codec
drivers, it does not make sense to keep duplicating this part of code.

Lets add a helper of_sdw_find_device_by_node for codec drivers to use it.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@oss.qualcomm.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Acked-by: Vinod Koul <vkoul@kernel.org>
Link: https://patch.msgid.link/20250909121954.225833-4-srinivas.kandagatla@oss.qualcomm.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/soundwire/sdw.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 0832776262ac..096213956d31 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -19,6 +19,7 @@
 
 struct dentry;
 struct fwnode_handle;
+struct device_node;
 
 struct sdw_bus;
 struct sdw_slave;
@@ -1086,6 +1087,8 @@ int sdw_stream_add_slave(struct sdw_slave *slave,
 int sdw_stream_remove_slave(struct sdw_slave *slave,
 			    struct sdw_stream_runtime *stream);
 
+struct device *of_sdw_find_device_by_node(struct device_node *np);
+
 int sdw_slave_get_scale_index(struct sdw_slave *slave, u8 *base);
 
 /* messaging and data APIs */
@@ -1119,6 +1122,12 @@ static inline int sdw_stream_remove_slave(struct sdw_slave *slave,
 	return -EINVAL;
 }
 
+static inline struct device *of_sdw_find_device_by_node(struct device_node *np)
+{
+	WARN_ONCE(1, "SoundWire API is disabled");
+	return NULL;
+}
+
 /* messaging and data APIs */
 static inline int sdw_read(struct sdw_slave *slave, u32 addr)
 {
-- 
cgit v1.2.3


From 2e07017b28e8bbace4a4973d11d0646575d36f94 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@oss.qualcomm.com>
Date: Tue, 9 Sep 2025 13:19:45 +0100
Subject: soundwire: bus: add sdw_slave_get_current_bank helper

There has been 2 instances of this helper in codec drivers,
it does not make sense to keep duplicating this part of code.

Lets add a helper sdw_get_current_bank() for codec drivers to use it.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@oss.qualcomm.com>
Acked-by: Vinod Koul <vkoul@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Link: https://patch.msgid.link/20250909121954.225833-5-srinivas.kandagatla@oss.qualcomm.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/soundwire/sdw.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 096213956d31..e6a3476bcef1 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -1089,6 +1089,8 @@ int sdw_stream_remove_slave(struct sdw_slave *slave,
 
 struct device *of_sdw_find_device_by_node(struct device_node *np);
 
+int sdw_slave_get_current_bank(struct sdw_slave *sdev);
+
 int sdw_slave_get_scale_index(struct sdw_slave *slave, u8 *base);
 
 /* messaging and data APIs */
@@ -1128,6 +1130,12 @@ static inline struct device *of_sdw_find_device_by_node(struct device_node *np)
 	return NULL;
 }
 
+static inline int sdw_slave_get_current_bank(struct sdw_slave *sdev)
+{
+	WARN_ONCE(1, "SoundWire API is disabled");
+	return -EINVAL;
+}
+
 /* messaging and data APIs */
 static inline int sdw_read(struct sdw_slave *slave, u32 addr)
 {
-- 
cgit v1.2.3


From ef1e734dbe257ce8bc42383b9977b5558f061288 Mon Sep 17 00:00:00 2001
From: Dzmitry Sankouski <dsankouski@gmail.com>
Date: Thu, 18 Sep 2025 20:06:48 +0300
Subject: power: supply: max77705_charger: use regfields for config registers

Using regfields allows to cleanup masks and register offset definition,
allowing to access register info by it's functional name.

Signed-off-by: Dzmitry Sankouski <dsankouski@gmail.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/power/max77705_charger.h | 102 +++++++++++++++++----------------
 1 file changed, 54 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/power/max77705_charger.h b/include/linux/power/max77705_charger.h
index fdec9af9c541..a612795577b6 100644
--- a/include/linux/power/max77705_charger.h
+++ b/include/linux/power/max77705_charger.h
@@ -9,6 +9,8 @@
 #ifndef __MAX77705_CHARGER_H
 #define __MAX77705_CHARGER_H __FILE__
 
+#include <linux/regmap.h>
+
 /* MAX77705_CHG_REG_CHG_INT */
 #define MAX77705_BYP_I		BIT(0)
 #define MAX77705_INP_LIMIT_I	BIT(1)
@@ -63,7 +65,6 @@
 #define MAX77705_BUCK_SHIFT	2
 #define MAX77705_BOOST_SHIFT	3
 #define MAX77705_WDTEN_SHIFT	4
-#define MAX77705_MODE_MASK	GENMASK(3, 0)
 #define MAX77705_CHG_MASK	BIT(MAX77705_CHG_SHIFT)
 #define MAX77705_UNO_MASK	BIT(MAX77705_UNO_SHIFT)
 #define MAX77705_OTG_MASK	BIT(MAX77705_OTG_SHIFT)
@@ -74,34 +75,19 @@
 #define MAX77705_OTG_CTRL	(MAX77705_OTG_MASK | MAX77705_BOOST_MASK)
 
 /* MAX77705_CHG_REG_CNFG_01 */
-#define MAX77705_FCHGTIME_SHIFT		0
-#define MAX77705_FCHGTIME_MASK		GENMASK(2, 0)
-#define MAX77705_CHG_RSTRT_SHIFT	4
-#define MAX77705_CHG_RSTRT_MASK		GENMASK(5, 4)
 #define MAX77705_FCHGTIME_DISABLE	0
 #define MAX77705_CHG_RSTRT_DISABLE	0x3
 
-#define MAX77705_PQEN_SHIFT		7
-#define MAX77705_PQEN_MASK		BIT(7)
 #define MAX77705_CHG_PQEN_DISABLE	0
 #define MAX77705_CHG_PQEN_ENABLE	1
 
 /* MAX77705_CHG_REG_CNFG_02 */
-#define MAX77705_OTG_ILIM_SHIFT		6
-#define MAX77705_OTG_ILIM_MASK		GENMASK(7, 6)
 #define MAX77705_OTG_ILIM_500		0
 #define MAX77705_OTG_ILIM_900		1
 #define MAX77705_OTG_ILIM_1200		2
 #define MAX77705_OTG_ILIM_1500		3
-#define MAX77705_CHG_CC			GENMASK(5, 0)
 
 /* MAX77705_CHG_REG_CNFG_03 */
-#define MAX77705_TO_ITH_SHIFT		0
-#define MAX77705_TO_ITH_MASK		GENMASK(2, 0)
-#define MAX77705_TO_TIME_SHIFT		3
-#define MAX77705_TO_TIME_MASK		GENMASK(5, 3)
-#define MAX77705_SYS_TRACK_DIS_SHIFT	7
-#define MAX77705_SYS_TRACK_DIS_MASK	BIT(7)
 #define MAX77705_TO_ITH_150MA		0
 #define MAX77705_TO_TIME_30M		3
 #define MAX77705_SYS_TRACK_ENABLE	0
@@ -110,15 +96,8 @@
 /* MAX77705_CHG_REG_CNFG_04 */
 #define MAX77705_CHG_MINVSYS_SHIFT	6
 #define MAX77705_CHG_MINVSYS_MASK	GENMASK(7, 6)
-#define MAX77705_CHG_PRM_SHIFT		0
-#define MAX77705_CHG_PRM_MASK		GENMASK(5, 0)
-
-#define MAX77705_CHG_CV_PRM_SHIFT	0
-#define MAX77705_CHG_CV_PRM_MASK	GENMASK(5, 0)
 
 /* MAX77705_CHG_REG_CNFG_05 */
-#define MAX77705_REG_B2SOVRC_SHIFT	0
-#define MAX77705_REG_B2SOVRC_MASK	GENMASK(3, 0)
 #define MAX77705_B2SOVRC_DISABLE	0
 #define MAX77705_B2SOVRC_4_5A		6
 #define MAX77705_B2SOVRC_4_8A		8
@@ -128,9 +107,8 @@
 #define MAX77705_WDTCLR_SHIFT		0
 #define MAX77705_WDTCLR_MASK		GENMASK(1, 0)
 #define MAX77705_WDTCLR			1
-#define MAX77705_CHGPROT_MASK		GENMASK(3, 2)
-#define MAX77705_CHGPROT_UNLOCKED	GENMASK(3, 2)
-#define MAX77705_SLOWEST_LX_SLOPE	GENMASK(6, 5)
+#define MAX77705_CHGPROT_UNLOCKED	3
+#define MAX77705_SLOWEST_LX_SLOPE	3
 
 /* MAX77705_CHG_REG_CNFG_07 */
 #define MAX77705_CHG_FMBST		4
@@ -140,36 +118,14 @@
 #define MAX77705_REG_FGSRC_MASK		BIT(MAX77705_REG_FGSRC_SHIFT)
 
 /* MAX77705_CHG_REG_CNFG_08 */
-#define MAX77705_REG_FSW_SHIFT		0
-#define MAX77705_REG_FSW_MASK		GENMASK(1, 0)
 #define MAX77705_CHG_FSW_3MHz		0
 #define MAX77705_CHG_FSW_2MHz		1
 #define MAX77705_CHG_FSW_1_5MHz		2
 
 /* MAX77705_CHG_REG_CNFG_09 */
-#define MAX77705_CHG_CHGIN_LIM_MASK		GENMASK(6, 0)
-#define MAX77705_CHG_EN_MASK			BIT(7)
 #define MAX77705_CHG_DISABLE			0
-#define MAX77705_CHARGER_CHG_CHARGING(_reg) \
-				(((_reg) & MAX77705_CHG_EN_MASK) > 1)
-
-
-/* MAX77705_CHG_REG_CNFG_10 */
-#define MAX77705_CHG_WCIN_LIM		GENMASK(5, 0)
-
-/* MAX77705_CHG_REG_CNFG_11 */
-#define MAX77705_VBYPSET_SHIFT		0
-#define MAX77705_VBYPSET_MASK		GENMASK(6, 0)
 
 /* MAX77705_CHG_REG_CNFG_12 */
-#define MAX77705_CHGINSEL_SHIFT		5
-#define MAX77705_CHGINSEL_MASK		BIT(MAX77705_CHGINSEL_SHIFT)
-#define MAX77705_WCINSEL_SHIFT		6
-#define MAX77705_WCINSEL_MASK		BIT(MAX77705_WCINSEL_SHIFT)
-#define MAX77705_VCHGIN_REG_MASK	GENMASK(4, 3)
-#define MAX77705_WCIN_REG_MASK		GENMASK(2, 1)
-#define MAX77705_REG_DISKIP_SHIFT	0
-#define MAX77705_REG_DISKIP_MASK	BIT(MAX77705_REG_DISKIP_SHIFT)
 /* REG=4.5V, UVLO=4.7V */
 #define MAX77705_VCHGIN_4_5		0
 /* REG=4.5V, UVLO=4.7V */
@@ -183,9 +139,59 @@
 #define MAX77705_CURRENT_CHGIN_MIN	100000
 #define MAX77705_CURRENT_CHGIN_MAX	3200000
 
+enum max77705_field_idx {
+	MAX77705_CHGPROT,
+	MAX77705_CHG_EN,
+	MAX77705_CHG_CC_LIM,
+	MAX77705_CHG_CHGIN_LIM,
+	MAX77705_CHG_CV_PRM,
+	MAX77705_CHG_PQEN,
+	MAX77705_CHG_RSTRT,
+	MAX77705_CHG_WCIN,
+	MAX77705_FCHGTIME,
+	MAX77705_LX_SLOPE,
+	MAX77705_MODE,
+	MAX77705_OTG_ILIM,
+	MAX77705_REG_B2SOVRC,
+	MAX77705_REG_DISKIP,
+	MAX77705_REG_FSW,
+	MAX77705_SYS_TRACK,
+	MAX77705_TO,
+	MAX77705_TO_TIME,
+	MAX77705_VBYPSET,
+	MAX77705_VCHGIN,
+	MAX77705_WCIN,
+	MAX77705_N_REGMAP_FIELDS,
+};
+
+static const struct reg_field max77705_reg_field[MAX77705_N_REGMAP_FIELDS] = {
+	[MAX77705_MODE]			= REG_FIELD(MAX77705_CHG_REG_CNFG_00,   0, 3),
+	[MAX77705_FCHGTIME]		= REG_FIELD(MAX77705_CHG_REG_CNFG_01,   0, 2),
+	[MAX77705_CHG_RSTRT]		= REG_FIELD(MAX77705_CHG_REG_CNFG_01,   4, 5),
+	[MAX77705_CHG_PQEN]		= REG_FIELD(MAX77705_CHG_REG_CNFG_01,   7, 7),
+	[MAX77705_CHG_CC_LIM]		= REG_FIELD(MAX77705_CHG_REG_CNFG_02,   0, 5),
+	[MAX77705_OTG_ILIM]		= REG_FIELD(MAX77705_CHG_REG_CNFG_02,   6, 7),
+	[MAX77705_TO]			= REG_FIELD(MAX77705_CHG_REG_CNFG_03,   0, 2),
+	[MAX77705_TO_TIME]		= REG_FIELD(MAX77705_CHG_REG_CNFG_03,   3, 5),
+	[MAX77705_SYS_TRACK]		= REG_FIELD(MAX77705_CHG_REG_CNFG_03,   7, 7),
+	[MAX77705_CHG_CV_PRM]		= REG_FIELD(MAX77705_CHG_REG_CNFG_04,   0, 5),
+	[MAX77705_REG_B2SOVRC]		= REG_FIELD(MAX77705_CHG_REG_CNFG_05,   0, 3),
+	[MAX77705_CHGPROT]		= REG_FIELD(MAX77705_CHG_REG_CNFG_06,   2, 3),
+	[MAX77705_LX_SLOPE]		= REG_FIELD(MAX77705_CHG_REG_CNFG_06,   5, 6),
+	[MAX77705_REG_FSW]		= REG_FIELD(MAX77705_CHG_REG_CNFG_08,   0, 1),
+	[MAX77705_CHG_CHGIN_LIM]	= REG_FIELD(MAX77705_CHG_REG_CNFG_09,   0, 6),
+	[MAX77705_CHG_EN]		= REG_FIELD(MAX77705_CHG_REG_CNFG_09,   7, 7),
+	[MAX77705_CHG_WCIN]		= REG_FIELD(MAX77705_CHG_REG_CNFG_10,   0, 5),
+	[MAX77705_VBYPSET]		= REG_FIELD(MAX77705_CHG_REG_CNFG_11,   0, 6),
+	[MAX77705_REG_DISKIP]		= REG_FIELD(MAX77705_CHG_REG_CNFG_12,   0, 0),
+	[MAX77705_WCIN]			= REG_FIELD(MAX77705_CHG_REG_CNFG_12,   1, 2),
+	[MAX77705_VCHGIN]		= REG_FIELD(MAX77705_CHG_REG_CNFG_12,   3, 4),
+};
+
 struct max77705_charger_data {
 	struct device			*dev;
 	struct regmap		*regmap;
+	struct regmap_field	*rfield[MAX77705_N_REGMAP_FIELDS];
 	struct power_supply_battery_info *bat_info;
 	struct workqueue_struct *wqueue;
 	struct work_struct	chgin_work;
-- 
cgit v1.2.3


From bc7d3a0f92dad811110f5602f58fe756cefce2b8 Mon Sep 17 00:00:00 2001
From: Dzmitry Sankouski <dsankouski@gmail.com>
Date: Thu, 18 Sep 2025 20:06:52 +0300
Subject: power: supply: max77705_charger: use REGMAP_IRQ_REG_LINE macro

Refactor regmap_irq declarations with REGMAP_IRQ_REG_LINE saves a few
lines on definitions.

Signed-off-by: Dzmitry Sankouski <dsankouski@gmail.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/power/max77705_charger.h | 42 +++++++++++++---------------------
 1 file changed, 16 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/power/max77705_charger.h b/include/linux/power/max77705_charger.h
index a612795577b6..6653abfdf747 100644
--- a/include/linux/power/max77705_charger.h
+++ b/include/linux/power/max77705_charger.h
@@ -12,34 +12,24 @@
 #include <linux/regmap.h>
 
 /* MAX77705_CHG_REG_CHG_INT */
-#define MAX77705_BYP_I		BIT(0)
-#define MAX77705_INP_LIMIT_I	BIT(1)
-#define MAX77705_BATP_I		BIT(2)
-#define MAX77705_BAT_I		BIT(3)
-#define MAX77705_CHG_I		BIT(4)
-#define MAX77705_WCIN_I		BIT(5)
-#define MAX77705_CHGIN_I	BIT(6)
-#define MAX77705_AICL_I		BIT(7)
-
-/* MAX77705_CHG_REG_CHG_INT_MASK */
-#define MAX77705_BYP_IM		BIT(0)
-#define MAX77705_INP_LIMIT_IM	BIT(1)
-#define MAX77705_BATP_IM	BIT(2)
-#define MAX77705_BAT_IM		BIT(3)
-#define MAX77705_CHG_IM		BIT(4)
-#define MAX77705_WCIN_IM	BIT(5)
-#define MAX77705_CHGIN_IM	BIT(6)
-#define MAX77705_AICL_IM	BIT(7)
+#define MAX77705_BYP_I		(0)
+#define MAX77705_INP_LIMIT_I	(1)
+#define MAX77705_BATP_I		(2)
+#define MAX77705_BAT_I		(3)
+#define MAX77705_CHG_I		(4)
+#define MAX77705_WCIN_I		(5)
+#define MAX77705_CHGIN_I	(6)
+#define MAX77705_AICL_I		(7)
 
 /* MAX77705_CHG_REG_CHG_INT_OK */
-#define MAX77705_BYP_OK		BIT(0)
-#define MAX77705_DISQBAT_OK	BIT(1)
-#define MAX77705_BATP_OK	BIT(2)
-#define MAX77705_BAT_OK		BIT(3)
-#define MAX77705_CHG_OK		BIT(4)
-#define MAX77705_WCIN_OK	BIT(5)
-#define MAX77705_CHGIN_OK	BIT(6)
-#define MAX77705_AICL_OK	BIT(7)
+#define MAX77705_BYP_OK		BIT(MAX77705_BYP_I)
+#define MAX77705_DISQBAT_OK	BIT(MAX77705_INP_LIMIT_I)
+#define MAX77705_BATP_OK	BIT(MAX77705_BATP_I)
+#define MAX77705_BAT_OK		BIT(MAX77705_BAT_I)
+#define MAX77705_CHG_OK		BIT(MAX77705_CHG_I)
+#define MAX77705_WCIN_OK	BIT(MAX77705_WCIN_I)
+#define MAX77705_CHGIN_OK	BIT(MAX77705_CHGIN_I)
+#define MAX77705_AICL_OK	BIT(MAX77705_AICL_I)
 
 /* MAX77705_CHG_REG_DETAILS_00 */
 #define MAX77705_BATP_DTLS		BIT(0)
-- 
cgit v1.2.3


From 603b4416232524dafde8e2cf859788dae786dea1 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@kernel.org>
Date: Sun, 14 Sep 2025 23:51:30 +0200
Subject: bpf: Update the bpf_prog_calc_tag to use SHA256

Exclusive maps restrict map access to specific programs using a hash.
The current hash used for this is SHA1, which is prone to collisions.
This patch uses SHA256, which  is more resilient against
collisions. This new hash is stored in bpf_prog and used by the verifier
to determine if a program can access a given exclusive map.

The original 64-bit tags are kept, as they are used by users as a short,
possibly colliding program identifier for non-security purposes.

Signed-off-by: KP Singh <kpsingh@kernel.org>
Link: https://lore.kernel.org/r/20250914215141.15144-2-kpsingh@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 41f776071ff5..d75902074bd1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -31,6 +31,7 @@
 #include <linux/memcontrol.h>
 #include <linux/cfi.h>
 #include <asm/rqspinlock.h>
+#include <crypto/sha2.h>
 
 struct bpf_verifier_env;
 struct bpf_verifier_log;
@@ -1717,7 +1718,10 @@ struct bpf_prog {
 	enum bpf_attach_type	expected_attach_type; /* For some prog types */
 	u32			len;		/* Number of filter blocks */
 	u32			jited_len;	/* Size of jited insns in bytes */
-	u8			tag[BPF_TAG_SIZE];
+	union {
+		u8 digest[SHA256_DIGEST_SIZE];
+		u8 tag[BPF_TAG_SIZE];
+	};
 	struct bpf_prog_stats __percpu *stats;
 	int __percpu		*active;
 	unsigned int		(*bpf_func)(const void *ctx,
-- 
cgit v1.2.3


From baefdbdf6812e120c9fba9cfb101d3656f478026 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@kernel.org>
Date: Sun, 14 Sep 2025 23:51:31 +0200
Subject: bpf: Implement exclusive map creation

Exclusive maps allow maps to only be accessed by program with a
program with a matching hash which is specified in the excl_prog_hash
attr.

For the signing use-case, this allows the trusted loader program
to load the map and verify the integrity

Signed-off-by: KP Singh <kpsingh@kernel.org>
Link: https://lore.kernel.org/r/20250914215141.15144-3-kpsingh@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d75902074bd1..c6a6ee1b2938 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -329,6 +329,7 @@ struct bpf_map {
 	atomic64_t sleepable_refcnt;
 	s64 __percpu *elem_count;
 	u64 cookie; /* write-once */
+	char *excl_prog_sha;
 };
 
 static inline const char *btf_field_type_name(enum btf_field_type type)
-- 
cgit v1.2.3


From ea2e6467ac36bf3d785defc89e58269b15d182f7 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@kernel.org>
Date: Sun, 14 Sep 2025 23:51:35 +0200
Subject: bpf: Return hashes of maps in BPF_OBJ_GET_INFO_BY_FD

Currently only array maps are supported, but the implementation can be
extended for other maps and objects. The hash is memoized only for
exclusive and frozen maps as their content is stable until the exclusive
program modifies the map.

This is required for BPF signing, enabling a trusted loader program to
verify a map's integrity. The loader retrieves
the map's runtime hash from the kernel and compares it against an
expected hash computed at build time.

Signed-off-by: KP Singh <kpsingh@kernel.org>
Link: https://lore.kernel.org/r/20250914215141.15144-7-kpsingh@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c6a6ee1b2938..e0c2c78a5faa 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -7,6 +7,7 @@
 #include <uapi/linux/bpf.h>
 #include <uapi/linux/filter.h>
 
+#include <crypto/sha2.h>
 #include <linux/workqueue.h>
 #include <linux/file.h>
 #include <linux/percpu.h>
@@ -110,6 +111,7 @@ struct bpf_map_ops {
 	long (*map_pop_elem)(struct bpf_map *map, void *value);
 	long (*map_peek_elem)(struct bpf_map *map, void *value);
 	void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu);
+	int (*map_get_hash)(struct bpf_map *map, u32 hash_buf_size, void *hash_buf);
 
 	/* funcs called by prog_array and perf_event_array map */
 	void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
@@ -289,6 +291,7 @@ struct bpf_map_owner {
 };
 
 struct bpf_map {
+	u8 sha[SHA256_DIGEST_SIZE];
 	const struct bpf_map_ops *ops;
 	struct bpf_map *inner_map_meta;
 #ifdef CONFIG_SECURITY
-- 
cgit v1.2.3


From 8cd189e414bb705312fbfff7f7b5605f6de2459a Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@kernel.org>
Date: Sun, 14 Sep 2025 23:51:36 +0200
Subject: bpf: Move the signature kfuncs to helpers.c

No functional changes, except for the addition of the headers for the
kfuncs so that they can be used for signature verification.

Signed-off-by: KP Singh <kpsingh@kernel.org>
Link: https://lore.kernel.org/r/20250914215141.15144-8-kpsingh@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e0c2c78a5faa..dfc1a27b56d5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -3424,6 +3424,38 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
 #endif /* CONFIG_BPF_SYSCALL */
 #endif /* defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) */
 
+#if defined(CONFIG_KEYS) && defined(CONFIG_BPF_SYSCALL)
+
+struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags);
+struct bpf_key *bpf_lookup_system_key(u64 id);
+void bpf_key_put(struct bpf_key *bkey);
+int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
+			       struct bpf_dynptr *sig_p,
+			       struct bpf_key *trusted_keyring);
+
+#else
+static inline struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags)
+{
+	return NULL;
+}
+
+static inline struct bpf_key *bpf_lookup_system_key(u64 id)
+{
+	return NULL;
+}
+
+static inline void bpf_key_put(struct bpf_key *bkey)
+{
+}
+
+static inline int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
+					     struct bpf_dynptr *sig_p,
+					     struct bpf_key *trusted_keyring)
+{
+	return -EOPNOTSUPP;
+}
+#endif /* defined(CONFIG_KEYS) && defined(CONFIG_BPF_SYSCALL) */
+
 /* verifier prototypes for helper functions called from eBPF programs */
 extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
-- 
cgit v1.2.3


From b9c3d426c8a5823b3a1e5078719750c6abb0d2c1 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Mon, 8 Sep 2025 14:12:59 +0300
Subject: wifi: cfg80211: Advertise supported NAN capabilities

Allow drivers to specify the supported NAN capabilities and support
advertising the NAN capabilities to user space.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20250908140015.2976966556f5.Ic6e43b10049573180c909dad806f279cfb31143e@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index d350263f23f3..2110345de8ef 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -6065,4 +6065,21 @@ static inline u32 ieee80211_eml_trans_timeout_in_us(u16 eml_cap)
 				 _data + ieee80211_mle_common_size(_data),\
 				 _len - ieee80211_mle_common_size(_data))
 
+/* NAN operation mode, as defined in Wi-Fi Aware (TM) specification Table 81 */
+#define NAN_OP_MODE_PHY_MODE_VHT	0x01
+#define NAN_OP_MODE_PHY_MODE_HE		0x10
+#define NAN_OP_MODE_PHY_MODE_MASK	0x11
+#define NAN_OP_MODE_80P80MHZ		0x02
+#define NAN_OP_MODE_160MHZ		0x04
+#define NAN_OP_MODE_PNDL_SUPPRTED	0x08
+
+/* NAN Device capabilities, as defined in Wi-Fi Aware (TM) specification
+ * Table 79
+ */
+#define NAN_DEV_CAPA_DFS_OWNER			0x01
+#define NAN_DEV_CAPA_EXT_KEY_ID_SUPPORTED	0x02
+#define NAN_DEV_CAPA_SIM_NDP_RX_SUPPORTED	0x04
+#define NAN_DEV_CAPA_NDPE_SUPPORTED		0x08
+#define NAN_DEV_CAPA_S3_SUPPORTED		0x10
+
 #endif /* LINUX_IEEE80211_H */
-- 
cgit v1.2.3


From 31e7681da78d7e8d2d83185c0e640012a018f229 Mon Sep 17 00:00:00 2001
From: Lachlan Hodges <lachlan.hodges@morsemicro.com>
Date: Thu, 18 Sep 2025 15:19:12 +1000
Subject: wifi: mac80211: correctly initialise S1G chandef for STA

When moving to the APs channel, ensure we correctly initialise the chandef
and perform the required validation. Additionally, if the AP is beaconing on a
2MHz primary, calculate the 2MHz primary center frequency by extracting
the sibling 1MHz primary and averaging the frequencies to find the 2MHz
primary center frequency.

Signed-off-by: Lachlan Hodges <lachlan.hodges@morsemicro.com>
Link: https://patch.msgid.link/20250918051913.500781-3-lachlan.hodges@morsemicro.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 2110345de8ef..ddff9102f633 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1182,6 +1182,18 @@ enum ieee80211_s1g_chanwidth {
 	IEEE80211_S1G_CHANWIDTH_16MHZ = 15,
 };
 
+/**
+ * enum ieee80211_s1g_pri_chanwidth - S1G primary channel widths
+ *	described in IEEE80211-2024 Table 10-39.
+ *
+ * @IEEE80211_S1G_PRI_CHANWIDTH_2MHZ: 2MHz primary channel
+ * @IEEE80211_S1G_PRI_CHANWIDTH_1MHZ: 1MHz primary channel
+ */
+enum ieee80211_s1g_pri_chanwidth {
+	IEEE80211_S1G_PRI_CHANWIDTH_2MHZ = 0,
+	IEEE80211_S1G_PRI_CHANWIDTH_1MHZ = 1,
+};
+
 #define WLAN_SA_QUERY_TR_ID_LEN 2
 #define WLAN_MEMBERSHIP_LEN 8
 #define WLAN_USER_POSITION_LEN 16
@@ -3170,8 +3182,12 @@ ieee80211_he_spr_size(const u8 *he_spr_ie)
 
 #define S1G_CAP9_LINK_ADAPT_PER_CONTROL_RESPONSE BIT(0)
 
-#define S1G_OPER_CH_WIDTH_PRIMARY_1MHZ	BIT(0)
+#define S1G_OPER_CH_WIDTH_PRIMARY	BIT(0)
 #define S1G_OPER_CH_WIDTH_OPER		GENMASK(4, 1)
+#define S1G_OPER_CH_PRIMARY_LOCATION	BIT(5)
+
+#define S1G_2M_PRIMARY_LOCATION_LOWER	0
+#define S1G_2M_PRIMARY_LOCATION_UPPER	1
 
 /* EHT MAC capabilities as defined in P802.11be_D2.0 section 9.4.2.313.2 */
 #define IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS			0x01
-- 
cgit v1.2.3


From e1b849cfa6b61f1c866a908c9e8dd9b5aaab820b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 9 Apr 2025 17:12:59 +0200
Subject: writeback: Avoid contention on wb->list_lock when switching inodes

There can be multiple inode switch works that are trying to switch
inodes to / from the same wb. This can happen in particular if some
cgroup exits which owns many (thousands) inodes and we need to switch
them all. In this case several inode_switch_wbs_work_fn() instances will
be just spinning on the same wb->list_lock while only one of them makes
forward progress. This wastes CPU cycles and quickly leads to softlockup
reports and unusable system.

Instead of running several inode_switch_wbs_work_fn() instances in
parallel switching to the same wb and contending on wb->list_lock, run
just one work item per wb and manage a queue of isw items switching to
this wb.

Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/backing-dev-defs.h | 4 ++++
 include/linux/writeback.h        | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 2ad261082bba..c5c9d89c73ed 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -152,6 +152,10 @@ struct bdi_writeback {
 	struct list_head blkcg_node;	/* anchored at blkcg->cgwb_list */
 	struct list_head b_attached;	/* attached inodes, protected by list_lock */
 	struct list_head offline_node;	/* anchored at offline_cgwbs */
+	struct work_struct switch_work;	/* work used to perform inode switching
+					 * to this wb */
+	struct llist_head switch_wbs_ctxs;	/* queued contexts for
+						 * writeback switching */
 
 	union {
 		struct work_struct release_work;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a2848d731a46..15a4bc4ab819 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -265,6 +265,8 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
 		bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css);
 }
 
+void inode_switch_wbs_work_fn(struct work_struct *work);
+
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
 static inline void inode_attach_wb(struct inode *inode, struct folio *folio)
-- 
cgit v1.2.3


From e3e1812f8e25ac277f5cc9249802365300c582e3 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 12 Sep 2025 13:52:28 +0200
Subject: ns: move to_ns_common() to ns_common.h

Move the helper to ns_common.h where it belongs.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/ns_common.h | 20 ++++++++++++++++++++
 include/linux/nsproxy.h   | 11 -----------
 2 files changed, 20 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 7d22ea50b098..bc2e0758e1c9 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -6,6 +6,15 @@
 
 struct proc_ns_operations;
 
+struct cgroup_namespace;
+struct ipc_namespace;
+struct mnt_namespace;
+struct net;
+struct pid_namespace;
+struct time_namespace;
+struct user_namespace;
+struct uts_namespace;
+
 struct ns_common {
 	struct dentry *stashed;
 	const struct proc_ns_operations *ops;
@@ -13,4 +22,15 @@ struct ns_common {
 	refcount_t count;
 };
 
+#define to_ns_common(__ns)                              \
+	_Generic((__ns),                                \
+		struct cgroup_namespace *: &(__ns)->ns, \
+		struct ipc_namespace *:    &(__ns)->ns, \
+		struct mnt_namespace *:    &(__ns)->ns, \
+		struct net *:              &(__ns)->ns, \
+		struct pid_namespace *:    &(__ns)->ns, \
+		struct time_namespace *:   &(__ns)->ns, \
+		struct user_namespace *:   &(__ns)->ns, \
+		struct uts_namespace *:    &(__ns)->ns)
+
 #endif
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index dab6a1734a22..e6bec522b139 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -42,17 +42,6 @@ struct nsproxy {
 };
 extern struct nsproxy init_nsproxy;
 
-#define to_ns_common(__ns)                              \
-	_Generic((__ns),                                \
-		struct cgroup_namespace *: &(__ns->ns), \
-		struct ipc_namespace *:    &(__ns->ns), \
-		struct net *:              &(__ns->ns), \
-		struct pid_namespace *:    &(__ns->ns), \
-		struct mnt_namespace *:    &(__ns->ns), \
-		struct time_namespace *:   &(__ns->ns), \
-		struct user_namespace *:   &(__ns->ns), \
-		struct uts_namespace *:    &(__ns->ns))
-
 /*
  * A structure to encompass all bits needed to install
  * a partial or complete new set of namespaces.
-- 
cgit v1.2.3


From 9296f46a9645cf753d2522093485cebe77635aa6 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 12 Sep 2025 13:52:29 +0200
Subject: nsfs: add nsfs.h header

And move the stuff out from proc_ns.h where it really doesn't belong.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/nsfs.h    | 26 ++++++++++++++++++++++++++
 include/linux/proc_ns.h | 13 +------------
 2 files changed, 27 insertions(+), 12 deletions(-)
 create mode 100644 include/linux/nsfs.h

(limited to 'include/linux')

diff --git a/include/linux/nsfs.h b/include/linux/nsfs.h
new file mode 100644
index 000000000000..fb84aa538091
--- /dev/null
+++ b/include/linux/nsfs.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2025 Christian Brauner <brauner@kernel.org> */
+
+#ifndef _LINUX_NSFS_H
+#define _LINUX_NSFS_H
+
+#include <linux/ns_common.h>
+
+struct path;
+struct task_struct;
+struct proc_ns_operations;
+
+int ns_get_path(struct path *path, struct task_struct *task,
+		const struct proc_ns_operations *ns_ops);
+typedef struct ns_common *ns_get_path_helper_t(void *);
+int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb,
+		   void *private_data);
+
+bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino);
+
+int ns_get_name(char *buf, size_t size, struct task_struct *task,
+			const struct proc_ns_operations *ns_ops);
+void nsfs_init(void);
+
+#endif /* _LINUX_NSFS_H */
+
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 4b20375f3783..5e1a4b378b79 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -5,7 +5,7 @@
 #ifndef _LINUX_PROC_NS_H
 #define _LINUX_PROC_NS_H
 
-#include <linux/ns_common.h>
+#include <linux/nsfs.h>
 #include <uapi/linux/nsfs.h>
 
 struct pid_namespace;
@@ -75,16 +75,5 @@ static inline int ns_alloc_inum(struct ns_common *ns)
 #define ns_free_inum(ns) proc_free_inum((ns)->inum)
 
 #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
-extern int ns_get_path(struct path *path, struct task_struct *task,
-			const struct proc_ns_operations *ns_ops);
-typedef struct ns_common *ns_get_path_helper_t(void *);
-extern int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb,
-			    void *private_data);
-
-extern bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino);
-
-extern int ns_get_name(char *buf, size_t size, struct task_struct *task,
-			const struct proc_ns_operations *ns_ops);
-extern void nsfs_init(void);
 
 #endif /* _LINUX_PROC_NS_H */
-- 
cgit v1.2.3


From 660def10b01b248fd97255afacb7b0e305ac833a Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 12 Sep 2025 13:52:30 +0200
Subject: ns: uniformly initialize ns_common

No point in cargo-culting the same code across all the different types.
Use one common initializer.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/proc_ns.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 5e1a4b378b79..dbb119bda097 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -72,6 +72,22 @@ static inline int ns_alloc_inum(struct ns_common *ns)
 	return proc_alloc_inum(&ns->inum);
 }
 
+static inline int ns_common_init(struct ns_common *ns,
+				 const struct proc_ns_operations *ops,
+				 bool alloc_inum)
+{
+	if (alloc_inum) {
+		int ret;
+		ret = proc_alloc_inum(&ns->inum);
+		if (ret)
+			return ret;
+	}
+	refcount_set(&ns->count, 1);
+	ns->stashed = NULL;
+	ns->ops = ops;
+	return 0;
+}
+
 #define ns_free_inum(ns) proc_free_inum((ns)->inum)
 
 #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
-- 
cgit v1.2.3


From 86c5aba210b145d7de011a5abaf9b785aa70a183 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 12 Sep 2025 13:52:39 +0200
Subject: ns: remove ns_alloc_inum()

It's now unused.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/proc_ns.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index dbb119bda097..e50d312f9fee 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -66,12 +66,6 @@ static inline void proc_free_inum(unsigned int inum) {}
 
 #endif /* CONFIG_PROC_FS */
 
-static inline int ns_alloc_inum(struct ns_common *ns)
-{
-	WRITE_ONCE(ns->stashed, NULL);
-	return proc_alloc_inum(&ns->inum);
-}
-
 static inline int ns_common_init(struct ns_common *ns,
 				 const struct proc_ns_operations *ops,
 				 bool alloc_inum)
-- 
cgit v1.2.3


From 885fc8ac0a4dc70f5d87b80b0977292870e35c60 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 12 Sep 2025 13:52:40 +0200
Subject: nstree: make iterator generic

Move the namespace iteration infrastructure originally introduced for
mount namespaces into a generic library usable by all namespace types.

Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/ns_common.h |  9 +++++
 include/linux/nstree.h    | 91 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/proc_ns.h   |  3 ++
 3 files changed, 103 insertions(+)
 create mode 100644 include/linux/nstree.h

(limited to 'include/linux')

diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index bc2e0758e1c9..7224072cccc5 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -3,6 +3,7 @@
 #define _LINUX_NS_COMMON_H
 
 #include <linux/refcount.h>
+#include <linux/rbtree.h>
 
 struct proc_ns_operations;
 
@@ -20,6 +21,14 @@ struct ns_common {
 	const struct proc_ns_operations *ops;
 	unsigned int inum;
 	refcount_t count;
+	union {
+		struct {
+			u64 ns_id;
+			struct rb_node ns_tree_node;
+			struct list_head ns_list_node;
+		};
+		struct rcu_head ns_rcu;
+	};
 };
 
 #define to_ns_common(__ns)                              \
diff --git a/include/linux/nstree.h b/include/linux/nstree.h
new file mode 100644
index 000000000000..29ad6402260c
--- /dev/null
+++ b/include/linux/nstree.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_NSTREE_H
+#define _LINUX_NSTREE_H
+
+#include <linux/ns_common.h>
+#include <linux/nsproxy.h>
+#include <linux/rbtree.h>
+#include <linux/seqlock.h>
+#include <linux/rculist.h>
+#include <linux/cookie.h>
+
+/**
+ * struct ns_tree - Namespace tree
+ * @ns_tree: Rbtree of namespaces of a particular type
+ * @ns_list: Sequentially walkable list of all namespaces of this type
+ * @ns_tree_lock: Seqlock to protect the tree and list
+ */
+struct ns_tree {
+	struct rb_root ns_tree;
+	struct list_head ns_list;
+	seqlock_t ns_tree_lock;
+	int type;
+};
+
+extern struct ns_tree cgroup_ns_tree;
+extern struct ns_tree ipc_ns_tree;
+extern struct ns_tree mnt_ns_tree;
+extern struct ns_tree net_ns_tree;
+extern struct ns_tree pid_ns_tree;
+extern struct ns_tree time_ns_tree;
+extern struct ns_tree user_ns_tree;
+extern struct ns_tree uts_ns_tree;
+
+#define to_ns_tree(__ns)					\
+	_Generic((__ns),					\
+		struct cgroup_namespace *: &(cgroup_ns_tree),	\
+		struct ipc_namespace *:    &(ipc_ns_tree),	\
+		struct net *:              &(net_ns_tree),	\
+		struct pid_namespace *:    &(pid_ns_tree),	\
+		struct mnt_namespace *:    &(mnt_ns_tree),	\
+		struct time_namespace *:   &(time_ns_tree),	\
+		struct user_namespace *:   &(user_ns_tree),	\
+		struct uts_namespace *:    &(uts_ns_tree))
+
+u64 ns_tree_gen_id(struct ns_common *ns);
+void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree);
+void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree);
+struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type);
+struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns,
+					 struct ns_tree *ns_tree,
+					 bool previous);
+
+static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree)
+{
+	ns_tree_gen_id(ns);
+	__ns_tree_add_raw(ns, ns_tree);
+}
+
+/**
+ * ns_tree_add_raw - Add a namespace to a namespace
+ * @ns: Namespace to add
+ *
+ * This function adds a namespace to the appropriate namespace tree
+ * without assigning a id.
+ */
+#define ns_tree_add_raw(__ns) __ns_tree_add_raw(to_ns_common(__ns), to_ns_tree(__ns))
+
+/**
+ * ns_tree_add - Add a namespace to a namespace tree
+ * @ns: Namespace to add
+ *
+ * This function assigns a new id to the namespace and adds it to the
+ * appropriate namespace tree and list.
+ */
+#define ns_tree_add(__ns) __ns_tree_add(to_ns_common(__ns), to_ns_tree(__ns))
+
+/**
+ * ns_tree_remove - Remove a namespace from a namespace tree
+ * @ns: Namespace to remove
+ *
+ * This function removes a namespace from the appropriate namespace
+ * tree and list.
+ */
+#define ns_tree_remove(__ns)  __ns_tree_remove(to_ns_common(__ns), to_ns_tree(__ns))
+
+#define ns_tree_adjoined_rcu(__ns, __previous) \
+	__ns_tree_adjoined_rcu(to_ns_common(__ns), to_ns_tree(__ns), __previous)
+
+#define ns_tree_active(__ns) (!RB_EMPTY_NODE(&to_ns_common(__ns)->ns_tree_node))
+
+#endif /* _LINUX_NSTREE_H */
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index e50d312f9fee..7f89f0829e60 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -79,6 +79,9 @@ static inline int ns_common_init(struct ns_common *ns,
 	refcount_set(&ns->count, 1);
 	ns->stashed = NULL;
 	ns->ops = ops;
+	ns->ns_id = 0;
+	RB_CLEAR_NODE(&ns->ns_tree_node);
+	INIT_LIST_HEAD(&ns->ns_list_node);
 	return 0;
 }
 
-- 
cgit v1.2.3


From b36c823b9a4be5b0c8e38c3fd60cade7d41c216c Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 12 Sep 2025 13:52:46 +0200
Subject: time: support ns lookup

Support the generic ns lookup infrastructure to support file handles for
namespaces.

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/time_namespace.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index bb2c52f4fc94..7f6af7a9771e 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -33,6 +33,7 @@ struct time_namespace {
 extern struct time_namespace init_time_ns;
 
 #ifdef CONFIG_TIME_NS
+void __init time_ns_init(void);
 extern int vdso_join_timens(struct task_struct *task,
 			    struct time_namespace *ns);
 extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns);
@@ -108,6 +109,10 @@ static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim)
 }
 
 #else
+static inline void __init time_ns_init(void)
+{
+}
+
 static inline int vdso_join_timens(struct task_struct *task,
 				   struct time_namespace *ns)
 {
-- 
cgit v1.2.3


From d7afdf889561058068ab46fd8f306c70ef29216a Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 12 Sep 2025 13:52:49 +0200
Subject: ns: add to_<type>_ns() to respective headers

Every namespace type has a container_of(ns, <ns_type>, ns) static inline
function that is currently not exposed in the header. So we have a bunch
of places that open-code it via container_of(). Move it to the headers
so we can use it directly.

Reviewed-by: Aleksa Sarai <cyphar@cyphar.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/cgroup.h         | 5 +++++
 include/linux/ipc_namespace.h  | 5 +++++
 include/linux/pid_namespace.h  | 5 +++++
 include/linux/time_namespace.h | 4 ++++
 include/linux/user_namespace.h | 5 +++++
 include/linux/utsname.h        | 5 +++++
 6 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b18fb5fcb38e..9ca25346f7cb 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -794,6 +794,11 @@ extern struct cgroup_namespace init_cgroup_ns;
 
 #ifdef CONFIG_CGROUPS
 
+static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns)
+{
+	return container_of(ns, struct cgroup_namespace, ns);
+}
+
 void free_cgroup_ns(struct cgroup_namespace *ns);
 
 struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index e8240cf2611a..924e4754374f 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -129,6 +129,11 @@ static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
 #endif
 
 #if defined(CONFIG_IPC_NS)
+static inline struct ipc_namespace *to_ipc_ns(struct ns_common *ns)
+{
+	return container_of(ns, struct ipc_namespace, ns);
+}
+
 extern struct ipc_namespace *copy_ipcs(unsigned long flags,
 	struct user_namespace *user_ns, struct ipc_namespace *ns);
 
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 7c67a5811199..ba0efc8c8596 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -54,6 +54,11 @@ extern struct pid_namespace init_pid_ns;
 #define PIDNS_ADDING (1U << 31)
 
 #ifdef CONFIG_PID_NS
+static inline struct pid_namespace *to_pid_ns(struct ns_common *ns)
+{
+	return container_of(ns, struct pid_namespace, ns);
+}
+
 static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
 {
 	if (ns != &init_pid_ns)
diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index 7f6af7a9771e..a47a4ce4183e 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -33,6 +33,10 @@ struct time_namespace {
 extern struct time_namespace init_time_ns;
 
 #ifdef CONFIG_TIME_NS
+static inline struct time_namespace *to_time_ns(struct ns_common *ns)
+{
+	return container_of(ns, struct time_namespace, ns);
+}
 void __init time_ns_init(void);
 extern int vdso_join_timens(struct task_struct *task,
 			    struct time_namespace *ns);
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index a0bb6d012137..a09056ad090e 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -168,6 +168,11 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns,
 
 #ifdef CONFIG_USER_NS
 
+static inline struct user_namespace *to_user_ns(struct ns_common *ns)
+{
+	return container_of(ns, struct user_namespace, ns);
+}
+
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
 {
 	if (ns)
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index bf7613ba412b..5d34c4f0f945 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -30,6 +30,11 @@ struct uts_namespace {
 extern struct uts_namespace init_uts_ns;
 
 #ifdef CONFIG_UTS_NS
+static inline struct uts_namespace *to_uts_ns(struct ns_common *ns)
+{
+	return container_of(ns, struct uts_namespace, ns);
+}
+
 static inline void get_uts_ns(struct uts_namespace *ns)
 {
 	refcount_inc(&ns->ns.count);
-- 
cgit v1.2.3


From d2afdb73f8ad77b49eca9d110d0c54bf30d1df0f Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 12 Sep 2025 13:52:50 +0200
Subject: nsfs: add current_in_namespace()

Add a helper to easily check whether a given namespace is the caller's
current namespace. This is currently open-coded in a lot of places.
Simply switch on the type and compare the results.

Reviewed-by: Aleksa Sarai <cyphar@cyphar.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/nsfs.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nsfs.h b/include/linux/nsfs.h
index fb84aa538091..e5a5fa83d36b 100644
--- a/include/linux/nsfs.h
+++ b/include/linux/nsfs.h
@@ -5,6 +5,8 @@
 #define _LINUX_NSFS_H
 
 #include <linux/ns_common.h>
+#include <linux/cred.h>
+#include <linux/pid_namespace.h>
 
 struct path;
 struct task_struct;
@@ -22,5 +24,17 @@ int ns_get_name(char *buf, size_t size, struct task_struct *task,
 			const struct proc_ns_operations *ns_ops);
 void nsfs_init(void);
 
-#endif /* _LINUX_NSFS_H */
+#define __current_namespace_from_type(__ns)				\
+	_Generic((__ns),						\
+		struct cgroup_namespace *: current->nsproxy->cgroup_ns,	\
+		struct ipc_namespace *:    current->nsproxy->ipc_ns,	\
+		struct net *:              current->nsproxy->net_ns,	\
+		struct pid_namespace *:    task_active_pid_ns(current),	\
+		struct mnt_namespace *:    current->nsproxy->mnt_ns,	\
+		struct time_namespace *:   current->nsproxy->time_ns,	\
+		struct user_namespace *:   current_user_ns(),		\
+		struct uts_namespace *:    current->nsproxy->uts_ns)
+
+#define current_in_namespace(__ns) (__current_namespace_from_type(__ns) == __ns)
 
+#endif /* _LINUX_NSFS_H */
-- 
cgit v1.2.3


From 5222470b2fbb3740f931f189db33dd1367b1ae75 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 12 Sep 2025 13:52:51 +0200
Subject: nsfs: support file handles

A while ago we added support for file handles to pidfs so pidfds can be
encoded and decoded as file handles. Userspace has adopted this quickly
and it's proven very useful. Implement file handles for namespaces as
well.

A process is not always able to open /proc/self/ns/. That requires
procfs to be mounted and for /proc/self/ or /proc/self/ns/ to not be
overmounted. However, userspace can always derive a namespace fd from
a pidfd. And that always works for a task's own namespace.

There's no need to introduce unnecessary behavioral differences between
/proc/self/ns/ fds, pidfd-derived namespace fds, and file-handle-derived
namespace fds. So namespace file handles are always decodable if the
caller is located in the namespace the file handle refers to.

This also allows a task to e.g., store a set of file handles to its
namespaces in a file on-disk so it can verify when it gets rexeced that
they're still valid and so on. This is akin to the pidfd use-case.

Or just plainly for namespace comparison reasons where a file handle to
the task's own namespace can be easily compared against others.

Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/exportfs.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index cfb0dd1ea49c..3aac58a520c7 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -122,6 +122,12 @@ enum fid_type {
 	FILEID_BCACHEFS_WITHOUT_PARENT = 0xb1,
 	FILEID_BCACHEFS_WITH_PARENT = 0xb2,
 
+	/*
+	 *
+	 * 64 bit namespace identifier, 32 bit namespace type, 32 bit inode number.
+	 */
+	FILEID_NSFS = 0xf1,
+
 	/*
 	 * 64 bit unique kernfs id
 	 */
-- 
cgit v1.2.3


From 93f67a7ddadf6ed8997c000df9790e5d64617196 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 17 Sep 2025 12:28:00 +0200
Subject: uts: split namespace into separate header

We have dedicated headers for all namespace types. Add one for the uts
namespace as well. Now it's consistent for all namespace types.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/uts_namespace.h | 65 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/utsname.h       | 58 +-------------------------------------
 2 files changed, 66 insertions(+), 57 deletions(-)
 create mode 100644 include/linux/uts_namespace.h

(limited to 'include/linux')

diff --git a/include/linux/uts_namespace.h b/include/linux/uts_namespace.h
new file mode 100644
index 000000000000..c2b619bb4e57
--- /dev/null
+++ b/include/linux/uts_namespace.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_UTS_NAMESPACE_H
+#define _LINUX_UTS_NAMESPACE_H
+
+#include <linux/ns_common.h>
+#include <uapi/linux/utsname.h>
+
+struct user_namespace;
+extern struct user_namespace init_user_ns;
+
+struct uts_namespace {
+	struct new_utsname name;
+	struct user_namespace *user_ns;
+	struct ucounts *ucounts;
+	struct ns_common ns;
+} __randomize_layout;
+
+extern struct uts_namespace init_uts_ns;
+
+#ifdef CONFIG_UTS_NS
+static inline struct uts_namespace *to_uts_ns(struct ns_common *ns)
+{
+	return container_of(ns, struct uts_namespace, ns);
+}
+
+static inline void get_uts_ns(struct uts_namespace *ns)
+{
+	refcount_inc(&ns->ns.count);
+}
+
+extern struct uts_namespace *copy_utsname(unsigned long flags,
+	struct user_namespace *user_ns, struct uts_namespace *old_ns);
+extern void free_uts_ns(struct uts_namespace *ns);
+
+static inline void put_uts_ns(struct uts_namespace *ns)
+{
+	if (refcount_dec_and_test(&ns->ns.count))
+		free_uts_ns(ns);
+}
+
+void uts_ns_init(void);
+#else
+static inline void get_uts_ns(struct uts_namespace *ns)
+{
+}
+
+static inline void put_uts_ns(struct uts_namespace *ns)
+{
+}
+
+static inline struct uts_namespace *copy_utsname(unsigned long flags,
+	struct user_namespace *user_ns, struct uts_namespace *old_ns)
+{
+	if (flags & CLONE_NEWUTS)
+		return ERR_PTR(-EINVAL);
+
+	return old_ns;
+}
+
+static inline void uts_ns_init(void)
+{
+}
+#endif
+
+#endif /* _LINUX_UTS_NAMESPACE_H */
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 5d34c4f0f945..547bd4439706 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -7,7 +7,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ns_common.h>
 #include <linux/err.h>
-#include <uapi/linux/utsname.h>
+#include <linux/uts_namespace.h>
 
 enum uts_proc {
 	UTS_PROC_ARCH,
@@ -18,62 +18,6 @@ enum uts_proc {
 	UTS_PROC_DOMAINNAME,
 };
 
-struct user_namespace;
-extern struct user_namespace init_user_ns;
-
-struct uts_namespace {
-	struct new_utsname name;
-	struct user_namespace *user_ns;
-	struct ucounts *ucounts;
-	struct ns_common ns;
-} __randomize_layout;
-extern struct uts_namespace init_uts_ns;
-
-#ifdef CONFIG_UTS_NS
-static inline struct uts_namespace *to_uts_ns(struct ns_common *ns)
-{
-	return container_of(ns, struct uts_namespace, ns);
-}
-
-static inline void get_uts_ns(struct uts_namespace *ns)
-{
-	refcount_inc(&ns->ns.count);
-}
-
-extern struct uts_namespace *copy_utsname(unsigned long flags,
-	struct user_namespace *user_ns, struct uts_namespace *old_ns);
-extern void free_uts_ns(struct uts_namespace *ns);
-
-static inline void put_uts_ns(struct uts_namespace *ns)
-{
-	if (refcount_dec_and_test(&ns->ns.count))
-		free_uts_ns(ns);
-}
-
-void uts_ns_init(void);
-#else
-static inline void get_uts_ns(struct uts_namespace *ns)
-{
-}
-
-static inline void put_uts_ns(struct uts_namespace *ns)
-{
-}
-
-static inline struct uts_namespace *copy_utsname(unsigned long flags,
-	struct user_namespace *user_ns, struct uts_namespace *old_ns)
-{
-	if (flags & CLONE_NEWUTS)
-		return ERR_PTR(-EINVAL);
-
-	return old_ns;
-}
-
-static inline void uts_ns_init(void)
-{
-}
-#endif
-
 #ifdef CONFIG_PROC_SYSCTL
 extern void uts_proc_notify(enum uts_proc proc);
 #else
-- 
cgit v1.2.3


From b2a0b192084acd0a86d66cbbc61e17ba1f5bd583 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 17 Sep 2025 12:28:01 +0200
Subject: mnt: expose pointer to init_mnt_ns

There's various scenarios where we need to know whether we are in the
initial set of namespaces or not to e.g., shortcut permission checking.
All namespaces expose that information. Let's do that too.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/mnt_namespace.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 70b366b64816..6d1c4c218c14 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -11,6 +11,8 @@ struct fs_struct;
 struct user_namespace;
 struct ns_common;
 
+extern struct mnt_namespace init_mnt_ns;
+
 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
 		struct user_namespace *, struct fs_struct *);
 extern void put_mnt_ns(struct mnt_namespace *ns);
-- 
cgit v1.2.3


From f74ca6da113d5d4b21c00bb4da3f3c137162b4fe Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 17 Sep 2025 12:28:02 +0200
Subject: nscommon: move to separate file

It's really awkward spilling the ns common infrastructure into multiple
headers. Move it to a separate file.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/ns_common.h |  3 +++
 include/linux/proc_ns.h   | 19 -------------------
 2 files changed, 3 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 7224072cccc5..78b17fe80b62 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -31,6 +31,9 @@ struct ns_common {
 	};
 };
 
+int ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops,
+		   bool alloc_inum);
+
 #define to_ns_common(__ns)                              \
 	_Generic((__ns),                                \
 		struct cgroup_namespace *: &(__ns)->ns, \
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 7f89f0829e60..9f21670b5824 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -66,25 +66,6 @@ static inline void proc_free_inum(unsigned int inum) {}
 
 #endif /* CONFIG_PROC_FS */
 
-static inline int ns_common_init(struct ns_common *ns,
-				 const struct proc_ns_operations *ops,
-				 bool alloc_inum)
-{
-	if (alloc_inum) {
-		int ret;
-		ret = proc_alloc_inum(&ns->inum);
-		if (ret)
-			return ret;
-	}
-	refcount_set(&ns->count, 1);
-	ns->stashed = NULL;
-	ns->ops = ops;
-	ns->ns_id = 0;
-	RB_CLEAR_NODE(&ns->ns_tree_node);
-	INIT_LIST_HEAD(&ns->ns_list_node);
-	return 0;
-}
-
 #define ns_free_inum(ns) proc_free_inum((ns)->inum)
 
 #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
-- 
cgit v1.2.3


From 5fc6bef178f1b644f1439e520c8f83bfc83a1252 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 17 Sep 2025 12:28:03 +0200
Subject: cgroup: split namespace into separate header

We have dedicated headers for all namespace types. Add one for the
cgroup namespace as well. Now it's consistent for all namespace types
and easy to figure out what to include.

Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/cgroup.h           | 51 +-----------------------------------
 include/linux/cgroup_namespace.h | 56 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 50 deletions(-)
 create mode 100644 include/linux/cgroup_namespace.h

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 9ca25346f7cb..5156fed8cbc3 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -27,6 +27,7 @@
 #include <linux/kernel_stat.h>
 
 #include <linux/cgroup-defs.h>
+#include <linux/cgroup_namespace.h>
 
 struct kernel_clone_args;
 
@@ -783,56 +784,6 @@ static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {}
 
 #endif	/* CONFIG_CGROUP_DATA */
 
-struct cgroup_namespace {
-	struct ns_common	ns;
-	struct user_namespace	*user_ns;
-	struct ucounts		*ucounts;
-	struct css_set          *root_cset;
-};
-
-extern struct cgroup_namespace init_cgroup_ns;
-
-#ifdef CONFIG_CGROUPS
-
-static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns)
-{
-	return container_of(ns, struct cgroup_namespace, ns);
-}
-
-void free_cgroup_ns(struct cgroup_namespace *ns);
-
-struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
-					struct user_namespace *user_ns,
-					struct cgroup_namespace *old_ns);
-
-int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
-		   struct cgroup_namespace *ns);
-
-static inline void get_cgroup_ns(struct cgroup_namespace *ns)
-{
-	refcount_inc(&ns->ns.count);
-}
-
-static inline void put_cgroup_ns(struct cgroup_namespace *ns)
-{
-	if (refcount_dec_and_test(&ns->ns.count))
-		free_cgroup_ns(ns);
-}
-
-#else /* !CONFIG_CGROUPS */
-
-static inline void free_cgroup_ns(struct cgroup_namespace *ns) { }
-static inline struct cgroup_namespace *
-copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns,
-	       struct cgroup_namespace *old_ns)
-{
-	return old_ns;
-}
-
-static inline void get_cgroup_ns(struct cgroup_namespace *ns) { }
-static inline void put_cgroup_ns(struct cgroup_namespace *ns) { }
-
-#endif /* !CONFIG_CGROUPS */
 
 #ifdef CONFIG_CGROUPS
 
diff --git a/include/linux/cgroup_namespace.h b/include/linux/cgroup_namespace.h
new file mode 100644
index 000000000000..c02bb76c5e32
--- /dev/null
+++ b/include/linux/cgroup_namespace.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CGROUP_NAMESPACE_H
+#define _LINUX_CGROUP_NAMESPACE_H
+
+struct cgroup_namespace {
+	struct ns_common	ns;
+	struct user_namespace	*user_ns;
+	struct ucounts		*ucounts;
+	struct css_set          *root_cset;
+};
+
+extern struct cgroup_namespace init_cgroup_ns;
+
+#ifdef CONFIG_CGROUPS
+
+static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns)
+{
+	return container_of(ns, struct cgroup_namespace, ns);
+}
+
+void free_cgroup_ns(struct cgroup_namespace *ns);
+
+struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
+					struct user_namespace *user_ns,
+					struct cgroup_namespace *old_ns);
+
+int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
+		   struct cgroup_namespace *ns);
+
+static inline void get_cgroup_ns(struct cgroup_namespace *ns)
+{
+	refcount_inc(&ns->ns.count);
+}
+
+static inline void put_cgroup_ns(struct cgroup_namespace *ns)
+{
+	if (refcount_dec_and_test(&ns->ns.count))
+		free_cgroup_ns(ns);
+}
+
+#else /* !CONFIG_CGROUPS */
+
+static inline void free_cgroup_ns(struct cgroup_namespace *ns) { }
+static inline struct cgroup_namespace *
+copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns,
+	       struct cgroup_namespace *old_ns)
+{
+	return old_ns;
+}
+
+static inline void get_cgroup_ns(struct cgroup_namespace *ns) { }
+static inline void put_cgroup_ns(struct cgroup_namespace *ns) { }
+
+#endif /* !CONFIG_CGROUPS */
+
+#endif /* _LINUX_CGROUP_NAMESPACE_H */
-- 
cgit v1.2.3


From 5612ff3ec588be09f11a9424db6d1186bcdeb3fa Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 17 Sep 2025 12:28:07 +0200
Subject: nscommon: simplify initialization

There's a lot of information that namespace implementers don't need to
know about at all. Encapsulate this all in the initialization helper.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/ns_common.h | 39 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 78b17fe80b62..05c7a7dd211b 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -16,6 +16,15 @@ struct time_namespace;
 struct user_namespace;
 struct uts_namespace;
 
+extern struct cgroup_namespace init_cgroup_ns;
+extern struct ipc_namespace init_ipc_ns;
+extern struct mnt_namespace init_mnt_ns;
+extern struct net init_net;
+extern struct pid_namespace init_pid_ns;
+extern struct time_namespace init_time_ns;
+extern struct user_namespace init_user_ns;
+extern struct uts_namespace init_uts_ns;
+
 struct ns_common {
 	struct dentry *stashed;
 	const struct proc_ns_operations *ops;
@@ -31,8 +40,7 @@ struct ns_common {
 	};
 };
 
-int ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops,
-		   bool alloc_inum);
+int __ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops, int inum);
 
 #define to_ns_common(__ns)                              \
 	_Generic((__ns),                                \
@@ -45,4 +53,31 @@ int ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops,
 		struct user_namespace *:   &(__ns)->ns, \
 		struct uts_namespace *:    &(__ns)->ns)
 
+#define ns_init_inum(__ns)                                     \
+	_Generic((__ns),                                       \
+		struct cgroup_namespace *: CGROUP_NS_INIT_INO, \
+		struct ipc_namespace *:    IPC_NS_INIT_INO,    \
+		struct mnt_namespace *:    MNT_NS_INIT_INO,    \
+		struct net *:              NET_NS_INIT_INO,    \
+		struct pid_namespace *:    PID_NS_INIT_INO,    \
+		struct time_namespace *:   TIME_NS_INIT_INO,   \
+		struct user_namespace *:   USER_NS_INIT_INO,   \
+		struct uts_namespace *:    UTS_NS_INIT_INO)
+
+#define ns_init_ns(__ns)                                    \
+	_Generic((__ns),                                    \
+		struct cgroup_namespace *: &init_cgroup_ns, \
+		struct ipc_namespace *:    &init_ipc_ns,    \
+		struct mnt_namespace *:    &init_mnt_ns,     \
+		struct net *:              &init_net,       \
+		struct pid_namespace *:    &init_pid_ns,    \
+		struct time_namespace *:   &init_time_ns,   \
+		struct user_namespace *:   &init_user_ns,   \
+		struct uts_namespace *:    &init_uts_ns)
+
+#define ns_common_init(__ns, __ops) \
+	__ns_common_init(to_ns_common(__ns), __ops, (((__ns) == ns_init_ns(__ns)) ? ns_init_inum(__ns) : 0))
+
+#define ns_common_init_inum(__ns, __ops, __inum) __ns_common_init(to_ns_common(__ns), __ops, __inum)
+
 #endif
-- 
cgit v1.2.3


From be5f21d3985f00827e09b798f7a07ebd6dd7f54a Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 17 Sep 2025 12:28:08 +0200
Subject: ns: add ns_common_free()

And drop ns_free_inum(). Anything common that can be wasted centrally
should be wasted in the new common helper.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/ns_common.h | 3 +++
 include/linux/proc_ns.h   | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 05c7a7dd211b..19833ac547f9 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -41,6 +41,7 @@ struct ns_common {
 };
 
 int __ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops, int inum);
+void __ns_common_free(struct ns_common *ns);
 
 #define to_ns_common(__ns)                              \
 	_Generic((__ns),                                \
@@ -80,4 +81,6 @@ int __ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops,
 
 #define ns_common_init_inum(__ns, __ops, __inum) __ns_common_init(to_ns_common(__ns), __ops, __inum)
 
+#define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns)))
+
 #endif
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 9f21670b5824..08016f6e0e6f 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -66,8 +66,6 @@ static inline void proc_free_inum(unsigned int inum) {}
 
 #endif /* CONFIG_PROC_FS */
 
-#define ns_free_inum(ns) proc_free_inum((ns)->inum)
-
 #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
 
 #endif /* _LINUX_PROC_NS_H */
-- 
cgit v1.2.3


From 224ef741ce87aa6474b82e0eb76e0e8e1bafe544 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 18 Sep 2025 12:11:46 +0200
Subject: ns: add reference count helpers

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/ns_common.h | 45 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 19833ac547f9..65e258e1fdc6 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -43,16 +43,24 @@ struct ns_common {
 int __ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops, int inum);
 void __ns_common_free(struct ns_common *ns);
 
-#define to_ns_common(__ns)                              \
-	_Generic((__ns),                                \
-		struct cgroup_namespace *: &(__ns)->ns, \
-		struct ipc_namespace *:    &(__ns)->ns, \
-		struct mnt_namespace *:    &(__ns)->ns, \
-		struct net *:              &(__ns)->ns, \
-		struct pid_namespace *:    &(__ns)->ns, \
-		struct time_namespace *:   &(__ns)->ns, \
-		struct user_namespace *:   &(__ns)->ns, \
-		struct uts_namespace *:    &(__ns)->ns)
+#define to_ns_common(__ns)                                    \
+	_Generic((__ns),                                      \
+		struct cgroup_namespace *:       &(__ns)->ns, \
+		const struct cgroup_namespace *: &(__ns)->ns, \
+		struct ipc_namespace *:          &(__ns)->ns, \
+		const struct ipc_namespace *:    &(__ns)->ns, \
+		struct mnt_namespace *:          &(__ns)->ns, \
+		const struct mnt_namespace *:    &(__ns)->ns, \
+		struct net *:                    &(__ns)->ns, \
+		const struct net *:              &(__ns)->ns, \
+		struct pid_namespace *:          &(__ns)->ns, \
+		const struct pid_namespace *:    &(__ns)->ns, \
+		struct time_namespace *:         &(__ns)->ns, \
+		const struct time_namespace *:   &(__ns)->ns, \
+		struct user_namespace *:         &(__ns)->ns, \
+		const struct user_namespace *:   &(__ns)->ns, \
+		struct uts_namespace *:          &(__ns)->ns, \
+		const struct uts_namespace *:    &(__ns)->ns)
 
 #define ns_init_inum(__ns)                                     \
 	_Generic((__ns),                                       \
@@ -83,4 +91,21 @@ void __ns_common_free(struct ns_common *ns);
 
 #define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns)))
 
+static __always_inline __must_check bool __ns_ref_put(struct ns_common *ns)
+{
+	return refcount_dec_and_test(&ns->count);
+}
+
+static __always_inline __must_check bool __ns_ref_get(struct ns_common *ns)
+{
+	return refcount_inc_not_zero(&ns->count);
+}
+
+#define ns_ref_read(__ns) refcount_read(&to_ns_common((__ns))->count)
+#define ns_ref_inc(__ns) refcount_inc(&to_ns_common((__ns))->count)
+#define ns_ref_get(__ns) __ns_ref_get(to_ns_common((__ns)))
+#define ns_ref_put(__ns) __ns_ref_put(to_ns_common((__ns)))
+#define ns_ref_put_and_lock(__ns, __lock) \
+	refcount_dec_and_lock(&to_ns_common((__ns))->count, (__lock))
+
 #endif
-- 
cgit v1.2.3


From 06099e374f3ab818f0501671b21493ba2e1b94b9 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 18 Sep 2025 12:11:48 +0200
Subject: cgroup: port to ns_ref_*() helpers

Stop accessing ns.count directly.

Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/cgroup_namespace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup_namespace.h b/include/linux/cgroup_namespace.h
index c02bb76c5e32..b7dbf4d623d2 100644
--- a/include/linux/cgroup_namespace.h
+++ b/include/linux/cgroup_namespace.h
@@ -29,12 +29,12 @@ int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
 
 static inline void get_cgroup_ns(struct cgroup_namespace *ns)
 {
-	refcount_inc(&ns->ns.count);
+	ns_ref_inc(ns);
 }
 
 static inline void put_cgroup_ns(struct cgroup_namespace *ns)
 {
-	if (refcount_dec_and_test(&ns->ns.count))
+	if (ns_ref_put(ns))
 		free_cgroup_ns(ns);
 }
 
-- 
cgit v1.2.3


From d4825c99d6a738c565d5142ce37369368a4352da Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 18 Sep 2025 12:11:49 +0200
Subject: ipc: port to ns_ref_*() helpers

Stop accessing ns.count directly.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/ipc_namespace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 924e4754374f..21eff63f47da 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -140,14 +140,14 @@ extern struct ipc_namespace *copy_ipcs(unsigned long flags,
 static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
 {
 	if (ns)
-		refcount_inc(&ns->ns.count);
+		ns_ref_inc(ns);
 	return ns;
 }
 
 static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
 {
 	if (ns) {
-		if (refcount_inc_not_zero(&ns->ns.count))
+		if (ns_ref_get(ns))
 			return ns;
 	}
 
-- 
cgit v1.2.3


From 07897b38eadf5a370a6001790239f23036d5b970 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 18 Sep 2025 12:11:50 +0200
Subject: pid: port to ns_ref_*() helpers

Stop accessing ns.count directly.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/pid_namespace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index ba0efc8c8596..5b2f29d369c4 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -62,7 +62,7 @@ static inline struct pid_namespace *to_pid_ns(struct ns_common *ns)
 static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
 {
 	if (ns != &init_pid_ns)
-		refcount_inc(&ns->ns.count);
+		ns_ref_inc(ns);
 	return ns;
 }
 
-- 
cgit v1.2.3


From e0c173f1fa02c0b08720aa8aa0cc91c3063146ae Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 18 Sep 2025 12:11:51 +0200
Subject: time: port to ns_ref_*() helpers

Stop accessing ns.count directly.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/time_namespace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index a47a4ce4183e..f3b9567cf1f4 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -44,7 +44,7 @@ extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns);
 
 static inline struct time_namespace *get_time_ns(struct time_namespace *ns)
 {
-	refcount_inc(&ns->ns.count);
+	ns_ref_inc(ns);
 	return ns;
 }
 
@@ -57,7 +57,7 @@ struct page *find_timens_vvar_page(struct vm_area_struct *vma);
 
 static inline void put_time_ns(struct time_namespace *ns)
 {
-	if (refcount_dec_and_test(&ns->ns.count))
+	if (ns_ref_put(ns))
 		free_time_ns(ns);
 }
 
-- 
cgit v1.2.3


From 96d997ea5ad1911cc393ffdb5c928b532f2f921a Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 18 Sep 2025 12:11:52 +0200
Subject: user: port to ns_ref_*() helpers

Stop accessing ns.count directly.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/user_namespace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index a09056ad090e..9a9aebbf96b9 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -176,7 +176,7 @@ static inline struct user_namespace *to_user_ns(struct ns_common *ns)
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
 {
 	if (ns)
-		refcount_inc(&ns->ns.count);
+		ns_ref_inc(ns);
 	return ns;
 }
 
@@ -186,7 +186,7 @@ extern void __put_user_ns(struct user_namespace *ns);
 
 static inline void put_user_ns(struct user_namespace *ns)
 {
-	if (ns && refcount_dec_and_test(&ns->ns.count))
+	if (ns && ns_ref_put(ns))
 		__put_user_ns(ns);
 }
 
-- 
cgit v1.2.3


From 2438b7d63ad866d6b2bb7b8d3455a6365d9b0fbe Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 18 Sep 2025 12:11:56 +0200
Subject: uts: port to ns_ref_*() helpers

Stop accessing ns.count directly.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/uts_namespace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uts_namespace.h b/include/linux/uts_namespace.h
index c2b619bb4e57..23b4f0e1b338 100644
--- a/include/linux/uts_namespace.h
+++ b/include/linux/uts_namespace.h
@@ -25,7 +25,7 @@ static inline struct uts_namespace *to_uts_ns(struct ns_common *ns)
 
 static inline void get_uts_ns(struct uts_namespace *ns)
 {
-	refcount_inc(&ns->ns.count);
+	ns_ref_inc(ns);
 }
 
 extern struct uts_namespace *copy_utsname(unsigned long flags,
@@ -34,7 +34,7 @@ extern void free_uts_ns(struct uts_namespace *ns);
 
 static inline void put_uts_ns(struct uts_namespace *ns)
 {
-	if (refcount_dec_and_test(&ns->ns.count))
+	if (ns_ref_put(ns))
 		free_uts_ns(ns);
 }
 
-- 
cgit v1.2.3


From 024596a4e2802e457a9f92af79f246fa9631f8de Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 18 Sep 2025 12:11:59 +0200
Subject: ns: rename to __ns_ref

Make it easier to grep and rename to ns_count.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/ns_common.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 65e258e1fdc6..aea8528d799a 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -29,7 +29,7 @@ struct ns_common {
 	struct dentry *stashed;
 	const struct proc_ns_operations *ops;
 	unsigned int inum;
-	refcount_t count;
+	refcount_t __ns_ref; /* do not use directly */
 	union {
 		struct {
 			u64 ns_id;
@@ -93,19 +93,19 @@ void __ns_common_free(struct ns_common *ns);
 
 static __always_inline __must_check bool __ns_ref_put(struct ns_common *ns)
 {
-	return refcount_dec_and_test(&ns->count);
+	return refcount_dec_and_test(&ns->__ns_ref);
 }
 
 static __always_inline __must_check bool __ns_ref_get(struct ns_common *ns)
 {
-	return refcount_inc_not_zero(&ns->count);
+	return refcount_inc_not_zero(&ns->__ns_ref);
 }
 
-#define ns_ref_read(__ns) refcount_read(&to_ns_common((__ns))->count)
-#define ns_ref_inc(__ns) refcount_inc(&to_ns_common((__ns))->count)
+#define ns_ref_read(__ns) refcount_read(&to_ns_common((__ns))->__ns_ref)
+#define ns_ref_inc(__ns) refcount_inc(&to_ns_common((__ns))->__ns_ref)
 #define ns_ref_get(__ns) __ns_ref_get(to_ns_common((__ns)))
 #define ns_ref_put(__ns) __ns_ref_put(to_ns_common((__ns)))
 #define ns_ref_put_and_lock(__ns, __lock) \
-	refcount_dec_and_lock(&to_ns_common((__ns))->count, (__lock))
+	refcount_dec_and_lock(&to_ns_common((__ns))->__ns_ref, (__lock))
 
 #endif
-- 
cgit v1.2.3


From daf4c2929fb792d24af0cd7bb6ca1f2949190fa4 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Thu, 18 Sep 2025 19:18:34 -0700
Subject: bpf: bpf_verifier_state->cleaned flag instead of REG_LIVE_DONE

Prepare for bpf_reg_state->live field removal by introducing a
separate flag to track if clean_verifier_state() had been applied to
the state. No functional changes.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20250918-callchain-sensitive-liveness-v3-1-c3cd27bacc60@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 020de62bd09c..ac16da8b49dc 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -45,7 +45,6 @@ enum bpf_reg_liveness {
 	REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */
 	REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64,
 	REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */
-	REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */
 };
 
 #define ITER_PREFIX "bpf_iter_"
@@ -445,6 +444,7 @@ struct bpf_verifier_state {
 
 	bool speculative;
 	bool in_sleepable;
+	bool cleaned;
 
 	/* first and last insn idx of this verifier state */
 	u32 first_insn_idx;
-- 
cgit v1.2.3


From 3b20d3c120bae1e18ee11aa04531b161743db682 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Thu, 18 Sep 2025 19:18:37 -0700
Subject: bpf: declare a few utility functions as internal api

Namely, rename the following functions and add prototypes to
bpf_verifier.h:
- find_containing_subprog -> bpf_find_containing_subprog
- insn_successors         -> bpf_insn_successors
- calls_callback          -> bpf_calls_callback
- fmt_stack_mask          -> bpf_fmt_stack_mask

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20250918-callchain-sensitive-liveness-v3-4-c3cd27bacc60@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index ac16da8b49dc..93563564bde5 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -1065,4 +1065,9 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
 void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate,
 		      u32 frameno);
 
+struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off);
+int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2]);
+void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask);
+bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx);
+
 #endif /* _LINUX_BPF_VERIFIER_H */
-- 
cgit v1.2.3


From efcda22aa541bbda827e54302baf9ae4fd44cdf2 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Thu, 18 Sep 2025 19:18:38 -0700
Subject: bpf: compute instructions postorder per subprogram

The next patch would require doing postorder traversal of individual
subprograms. Facilitate this by moving env->cfg.insn_postorder
computation from check_cfg() to a separate pass, as check_cfg()
descends into called subprograms (and it needs to, because of
merge_callee_effects() logic).

env->cfg.insn_postorder is used only by compute_live_registers(),
this function does not track cross subprogram dependencies,
thus the change does not affect it's operation.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20250918-callchain-sensitive-liveness-v3-5-c3cd27bacc60@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 93563564bde5..bd87e80f9423 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -665,6 +665,7 @@ struct bpf_subprog_info {
 	/* 'start' has to be the first field otherwise find_subprog() won't work */
 	u32 start; /* insn idx of function entry point */
 	u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
+	u32 postorder_start; /* The idx to the env->cfg.insn_postorder */
 	u16 stack_depth; /* max. stack depth used by this function */
 	u16 stack_extra;
 	/* offsets in range [stack_depth .. fastcall_stack_off)
@@ -794,7 +795,10 @@ struct bpf_verifier_env {
 	struct {
 		int *insn_state;
 		int *insn_stack;
-		/* vector of instruction indexes sorted in post-order */
+		/*
+		 * vector of instruction indexes sorted in post-order, grouped by subprogram,
+		 * see bpf_subprog_info->postorder_start.
+		 */
 		int *insn_postorder;
 		int cur_stack;
 		/* current position in the insn_postorder vector */
-- 
cgit v1.2.3


From b3698c356ad92bcdb9920655bc9df02a2a8946f9 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Thu, 18 Sep 2025 19:18:39 -0700
Subject: bpf: callchain sensitive stack liveness tracking using CFG

This commit adds a flow-sensitive, context-sensitive, path-insensitive
data flow analysis for live stack slots:
- flow-sensitive: uses program control flow graph to compute data flow
  values;
- context-sensitive: collects data flow values for each possible call
  chain in a program;
- path-insensitive: does not distinguish between separate control flow
  graph paths reaching the same instruction.

Compared to the current path-sensitive analysis, this approach trades
some precision for not having to enumerate every path in the program.
This gives a theoretical capability to run the analysis before main
verification pass. See cover letter for motivation.

The basic idea is as follows:
- Data flow values indicate stack slots that might be read and stack
  slots that are definitely written.
- Data flow values are collected for each
  (call chain, instruction number) combination in the program.
- Within a subprogram, data flow values are propagated using control
  flow graph.
- Data flow values are transferred from entry instructions of callee
  subprograms to call sites in caller subprograms.

In other words, a tree of all possible call chains is constructed.
Each node of this tree represents a subprogram. Read and write marks
are collected for each instruction of each node. Live stack slots are
first computed for lower level nodes. Then, information about outer
stack slots that might be read or are definitely written by a
subprogram is propagated one level up, to the corresponding call
instructions of the upper nodes. Procedure repeats until root node is
processed.

In the absence of value range analysis, stack read/write marks are
collected during main verification pass, and data flow computation is
triggered each time verifier.c:states_equal() needs to query the
information.

Implementation details are documented in kernel/bpf/liveness.c.
Quantitative data about verification performance changes and memory
consumption is in the cover letter.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20250918-callchain-sensitive-liveness-v3-6-c3cd27bacc60@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index bd87e80f9423..2e3bdd50e2ba 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -745,6 +745,8 @@ struct bpf_scc_info {
 	struct bpf_scc_visit visits[];
 };
 
+struct bpf_liveness;
+
 /* single container for all structs
  * one verifier_env per bpf_check() call
  */
@@ -846,6 +848,7 @@ struct bpf_verifier_env {
 	struct bpf_insn insn_buf[INSN_BUF_SIZE];
 	struct bpf_insn epilogue_buf[INSN_BUF_SIZE];
 	struct bpf_scc_callchain callchain_buf;
+	struct bpf_liveness *liveness;
 	/* array of pointers to bpf_scc_info indexed by SCC id */
 	struct bpf_scc_info **scc_info;
 	u32 scc_cnt;
@@ -1074,4 +1077,15 @@ int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2]);
 void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask);
 bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx);
 
+int bpf_stack_liveness_init(struct bpf_verifier_env *env);
+void bpf_stack_liveness_free(struct bpf_verifier_env *env);
+int bpf_update_live_stack(struct bpf_verifier_env *env);
+int bpf_mark_stack_read(struct bpf_verifier_env *env, u32 frameno, u32 insn_idx, u64 mask);
+void bpf_mark_stack_write(struct bpf_verifier_env *env, u32 frameno, u64 mask);
+int bpf_reset_stack_write_marks(struct bpf_verifier_env *env, u32 insn_idx);
+int bpf_commit_stack_write_marks(struct bpf_verifier_env *env);
+int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_state *st);
+bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 spi);
+void bpf_reset_live_stack_callchain(struct bpf_verifier_env *env);
+
 #endif /* _LINUX_BPF_VERIFIER_H */
-- 
cgit v1.2.3


From ccf25a67c7e29cfa6815d193054789b45ef825ad Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Thu, 18 Sep 2025 19:18:41 -0700
Subject: bpf: signal error if old liveness is more conservative than new

Unlike the new algorithm, register chain based liveness tracking is
fully path sensitive, and thus should be strictly more accurate.
Validate the new algorithm by signaling an error whenever it considers
a stack slot dead while the old algorithm considers it alive.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20250918-callchain-sensitive-liveness-v3-8-c3cd27bacc60@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 2e3bdd50e2ba..dec5da3a2e59 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -852,6 +852,7 @@ struct bpf_verifier_env {
 	/* array of pointers to bpf_scc_info indexed by SCC id */
 	struct bpf_scc_info **scc_info;
 	u32 scc_cnt;
+	bool internal_error;
 };
 
 static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog)
-- 
cgit v1.2.3


From 107e169799057bc6a379ddb625cbe1e51cfc7d72 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Thu, 18 Sep 2025 19:18:42 -0700
Subject: bpf: disable and remove registers chain based liveness

Remove register chain based liveness tracking:
- struct bpf_reg_state->{parent,live} fields are no longer needed;
- REG_LIVE_WRITTEN marks are superseded by bpf_mark_stack_write()
  calls;
- mark_reg_read() calls are superseded by bpf_mark_stack_read();
- log.c:print_liveness() is superseded by logging in liveness.c;
- propagate_liveness() is superseded by bpf_update_live_stack();
- no need to establish register chains in is_state_visited() anymore;
- fix a bunch of tests expecting "_w" suffixes in verifier log
  messages.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20250918-callchain-sensitive-liveness-v3-9-c3cd27bacc60@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 25 -------------------------
 1 file changed, 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index dec5da3a2e59..c7515da8500c 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -26,27 +26,6 @@
 /* Patch buffer size */
 #define INSN_BUF_SIZE 32
 
-/* Liveness marks, used for registers and spilled-regs (in stack slots).
- * Read marks propagate upwards until they find a write mark; they record that
- * "one of this state's descendants read this reg" (and therefore the reg is
- * relevant for states_equal() checks).
- * Write marks collect downwards and do not propagate; they record that "the
- * straight-line code that reached this state (from its parent) wrote this reg"
- * (and therefore that reads propagated from this state or its descendants
- * should not propagate to its parent).
- * A state with a write mark can receive read marks; it just won't propagate
- * them to its parent, since the write mark is a property, not of the state,
- * but of the link between it and its parent.  See mark_reg_read() and
- * mark_stack_slot_read() in kernel/bpf/verifier.c.
- */
-enum bpf_reg_liveness {
-	REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */
-	REG_LIVE_READ32 = 0x1, /* reg was read, so we're sensitive to initial value */
-	REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */
-	REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64,
-	REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */
-};
-
 #define ITER_PREFIX "bpf_iter_"
 
 enum bpf_iter_state {
@@ -211,8 +190,6 @@ struct bpf_reg_state {
 	 * allowed and has the same effect as bpf_sk_release(sk).
 	 */
 	u32 ref_obj_id;
-	/* parentage chain for liveness checking */
-	struct bpf_reg_state *parent;
 	/* Inside the callee two registers can be both PTR_TO_STACK like
 	 * R1=fp-8 and R2=fp-8, but one of them points to this function stack
 	 * while another to the caller's stack. To differentiate them 'frameno'
@@ -225,7 +202,6 @@ struct bpf_reg_state {
 	 * patching which only happens after main verification finished.
 	 */
 	s32 subreg_def;
-	enum bpf_reg_liveness live;
 	/* if (!precise && SCALAR_VALUE) min/max/tnum don't affect safety */
 	bool precise;
 };
@@ -852,7 +828,6 @@ struct bpf_verifier_env {
 	/* array of pointers to bpf_scc_info indexed by SCC id */
 	struct bpf_scc_info **scc_info;
 	u32 scc_cnt;
-	bool internal_error;
 };
 
 static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog)
-- 
cgit v1.2.3


From 79f047c7d968b21ff4b72bd70c4533140553c56c Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Thu, 18 Sep 2025 19:18:43 -0700
Subject: bpf: table based bpf_insn_successors()

Converting bpf_insn_successors() to use lookup table makes it ~1.5
times faster.

Also remove unnecessary conditionals:
- `idx + 1 < prog->len` is unnecessary because after check_cfg() all
  jump targets are guaranteed to be within a program;
- `i == 0 || succ[0] != dst` is unnecessary because any client of
  bpf_insn_successors() can handle duplicate edges:
  - compute_live_registers()
  - compute_scc()

Moving bpf_insn_successors() to liveness.c allows its inlining in
liveness.c:__update_stack_liveness().
Such inlining speeds up __update_stack_liveness() by ~40%.
bpf_insn_successors() is used in both verifier.c and liveness.c.
perf shows such move does not negatively impact users in verifier.c,
as these are executed only once before main varification pass.
Unlike __update_stack_liveness() which can be triggered multiple
times.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20250918-callchain-sensitive-liveness-v3-10-c3cd27bacc60@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c7515da8500c..4c497e839526 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -1049,6 +1049,7 @@ void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_st
 		      u32 frameno);
 
 struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off);
+int bpf_jmp_offset(struct bpf_insn *insn);
 int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2]);
 void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask);
 bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx);
-- 
cgit v1.2.3


From 32a8d2a197c1d2d36badb401657aa193938f071c Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Wed, 17 Sep 2025 16:12:01 +0100
Subject: net: stmmac: rework mac_interface and phy_interface documentation

Based on new research, it has come to light that the comment that I
added in a014c35556b9 ("net: stmmac: clarify difference between
"interface" and "phy_interface"") is not fully correct.

Update the comment to properly describe the difference between the two.

All of the DTS files in the kernel tree do not mention the "mac-mode"
property, which results in mac_interface and phy_interface being the
same. Also, none of the platform glue drivers set mac_interface to
anything but PHY_INTERFACE_MODE_NA. This means that for all the
platforms known to mainline, mac_interface is either the same as
phy_interface, or it is PHY_INTERFACE_MODE_NA.

Thus, updating the definition for mac_interface in stmmac.h has no
material effect on current uses known to mainline, but the change opens
the door to cleaning up all uses.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1uytpB-00000006H23-0pRi@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/stmmac.h | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index e284f04964bf..f14f34ec6d5e 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -190,18 +190,32 @@ struct plat_stmmacenet_data {
 	int bus_id;
 	int phy_addr;
 	/* MAC ----- optional PCS ----- SerDes ----- optional PHY ----- Media
-	 *       ^                               ^
-	 * mac_interface                   phy_interface
+	 *                          ^            ^
+	 *                    mac_interface phy_interface
 	 *
-	 * mac_interface is the MAC-side interface, which may be the same
-	 * as phy_interface if there is no intervening PCS. If there is a
-	 * PCS, then mac_interface describes the interface mode between the
-	 * MAC and PCS, and phy_interface describes the interface mode
-	 * between the PCS and PHY.
+	 * The Synopsys dwmac core only covers the MAC and an optional
+	 * integrated PCS. Where the integrated PCS is used with a SerDes,
+	 * e.g. for 1000base-X or Cisco SGMII, the connection between the
+	 * PCS and SerDes will be TBI.
+	 *
+	 * Where the Synopsys dwmac core has been instantiated with multiple
+	 * interface modes, these are selected via core-external configuration
+	 * which is sampled when the dwmac core is reset. How this is done is
+	 * platform glue specific, but this defines the interface used from
+	 * the Synopsys dwmac core to the rest of the SoC.
+	 *
+	 * Where PCS other than the optional integrated Synopsys dwmac PCS
+	 * is used, this counts as "the rest of the SoC" in the above
+	 * paragraph.
+	 *
+	 * Thus, mac_interface is of little use inside the stmmac code;
+	 * please do not use unless there is a definite requirement, and
+	 * make sure to gain review feedback first.
 	 */
 	phy_interface_t mac_interface;
 	/* phy_interface is the PHY-side interface - the interface used by
-	 * an attached PHY.
+	 * an attached PHY or SFP etc. This is equivalent to the interface
+	 * that phylink uses.
 	 */
 	phy_interface_t phy_interface;
 	struct stmmac_mdio_bus_data *mdio_bus_data;
-- 
cgit v1.2.3


From 6b0ed6a3a89cd2d04980e15a44c645bebb077418 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Wed, 17 Sep 2025 16:12:47 +0100
Subject: net: stmmac: remove mac_interface

mac_interface has served little purpose, and has only caused confusion.
Now that we have cleaned up all platform glue drivers which should not
have been using mac_interface, there are no users remaining. Remove
mac_interface.

This results in the special dwmac specific "mac-mode" DT property
becoming redundant, and an in case, no DTS files in the kernel make use
of this property. Add a warning if the property is set, and it is
different from the "phy-mode".

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Acked-by: Vladimir Zapolskiy <vz@mleia.com>
Link: https://patch.msgid.link/E1uytpv-00000006H2x-196h@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/stmmac.h | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index f14f34ec6d5e..fa1318bac06c 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -190,8 +190,8 @@ struct plat_stmmacenet_data {
 	int bus_id;
 	int phy_addr;
 	/* MAC ----- optional PCS ----- SerDes ----- optional PHY ----- Media
-	 *                          ^            ^
-	 *                    mac_interface phy_interface
+	 *                                       ^
+	 *                                  phy_interface
 	 *
 	 * The Synopsys dwmac core only covers the MAC and an optional
 	 * integrated PCS. Where the integrated PCS is used with a SerDes,
@@ -208,12 +208,7 @@ struct plat_stmmacenet_data {
 	 * is used, this counts as "the rest of the SoC" in the above
 	 * paragraph.
 	 *
-	 * Thus, mac_interface is of little use inside the stmmac code;
-	 * please do not use unless there is a definite requirement, and
-	 * make sure to gain review feedback first.
-	 */
-	phy_interface_t mac_interface;
-	/* phy_interface is the PHY-side interface - the interface used by
+	 * phy_interface is the PHY-side interface - the interface used by
 	 * an attached PHY or SFP etc. This is equivalent to the interface
 	 * that phylink uses.
 	 */
-- 
cgit v1.2.3


From b34df17d588de926212527a2f2ce72bc4e330260 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Thu, 18 Sep 2025 05:25:57 -0700
Subject: net: netpoll: remove unused netpoll pointer from netpoll_info

The netpoll_info structure contains an useless pointer back to its
associated netpoll. This field is never used, and the assignment in
__netpoll_setup() is does not comtemplate multiple instances, as
reported by Jay[1].

Drop both the member and its initialization to simplify the structure.

Link: https://lore.kernel.org/all/2930648.1757463506@famine/ [1]
Signed-off-by: Breno Leitao <leitao@debian.org>
Link: https://patch.msgid.link/20250918-netpoll_jv-v1-1-67d50eeb2c26@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netpoll.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index b5ea9882eda8..f22eec466040 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -55,7 +55,6 @@ struct netpoll_info {
 
 	struct delayed_work tx_work;
 
-	struct netpoll *netpoll;
 	struct rcu_head rcu;
 };
 
-- 
cgit v1.2.3


From bee8a520eb84950193d0566ea2c2e46406a4b6ce Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 9 Sep 2025 17:50:56 +0800
Subject: rhashtable: Use rcu_dereference_all and rcu_dereference_all_check

Add rcu_dereference_all and rcu_dereference_all_check so that
library code such as rhashtable can be used with any RCU variant.

As it stands rcu_dereference is used within rashtable, which
creates false-positive warnings if the user calls it from another
RCU context, such as preempt_disable().

Use the rcu_dereference_all and rcu_dereference_all_check calls
in rhashtable to suppress these warnings.

Also replace the rcu_dereference_raw calls in the list iterators
with rcu_dereference_all to uncover buggy calls.

Reported-by: Menglong Dong <dongml2@chinatelecom.cn>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/rcupdate.h   | 26 ++++++++++++++++++++++++++
 include/linux/rhashtable.h | 14 +++++++-------
 2 files changed, 33 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 120536f4c6eb..448eb1f0cb48 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -713,6 +713,24 @@ do {									      \
 				(c) || rcu_read_lock_sched_held(), \
 				__rcu)
 
+/**
+ * rcu_dereference_all_check() - rcu_dereference_all with debug checking
+ * @p: The pointer to read, prior to dereferencing
+ * @c: The conditions under which the dereference will take place
+ *
+ * This is similar to rcu_dereference_check(), but allows protection
+ * by all forms of vanilla RCU readers, including preemption disabled,
+ * bh-disabled, and interrupt-disabled regions of code.  Note that "vanilla
+ * RCU" excludes SRCU and the various Tasks RCU flavors.  Please note
+ * that this macro should not be backported to any Linux-kernel version
+ * preceding v5.0 due to changes in synchronize_rcu() semantics prior
+ * to that version.
+ */
+#define rcu_dereference_all_check(p, c) \
+	__rcu_dereference_check((p), __UNIQUE_ID(rcu), \
+				(c) || rcu_read_lock_any_held(), \
+				__rcu)
+
 /*
  * The tracing infrastructure traces RCU (we want that), but unfortunately
  * some of the RCU checks causes tracing to lock up the system.
@@ -767,6 +785,14 @@ do {									      \
  */
 #define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0)
 
+/**
+ * rcu_dereference_all() - fetch RCU-all-protected pointer for dereferencing
+ * @p: The pointer to read, prior to dereferencing
+ *
+ * Makes rcu_dereference_check() do the dirty work.
+ */
+#define rcu_dereference_all(p) rcu_dereference_all_check(p, 0)
+
 /**
  * rcu_pointer_handoff() - Hand off a pointer from RCU to other mechanism
  * @p: The pointer to hand off
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index e740157f3cd7..05a221ce79a6 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -272,13 +272,13 @@ struct rhash_lock_head __rcu **rht_bucket_nested_insert(
 	rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
 
 #define rht_dereference_rcu(p, ht) \
-	rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht))
+	rcu_dereference_all_check(p, lockdep_rht_mutex_is_held(ht))
 
 #define rht_dereference_bucket(p, tbl, hash) \
 	rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash))
 
 #define rht_dereference_bucket_rcu(p, tbl, hash) \
-	rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash))
+	rcu_dereference_all_check(p, lockdep_rht_bucket_is_held(tbl, hash))
 
 #define rht_entry(tpos, pos, member) \
 	({ tpos = container_of(pos, typeof(*tpos), member); 1; })
@@ -373,7 +373,7 @@ static inline struct rhash_head *__rht_ptr(
 static inline struct rhash_head *rht_ptr_rcu(
 	struct rhash_lock_head __rcu *const *bkt)
 {
-	return __rht_ptr(rcu_dereference(*bkt), bkt);
+	return __rht_ptr(rcu_dereference_all(*bkt), bkt);
 }
 
 static inline struct rhash_head *rht_ptr(
@@ -497,7 +497,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl,
 	for (({barrier(); }),						\
 	     pos = head;						\
 	     !rht_is_a_nulls(pos);					\
-	     pos = rcu_dereference_raw(pos->next))
+	     pos = rcu_dereference_all(pos->next))
 
 /**
  * rht_for_each_rcu - iterate over rcu hash chain
@@ -513,7 +513,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl,
 	for (({barrier(); }),					\
 	     pos = rht_ptr_rcu(rht_bucket(tbl, hash));		\
 	     !rht_is_a_nulls(pos);				\
-	     pos = rcu_dereference_raw(pos->next))
+	     pos = rcu_dereference_all(pos->next))
 
 /**
  * rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head
@@ -560,7 +560,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl,
  * list returned by rhltable_lookup.
  */
 #define rhl_for_each_rcu(pos, list)					\
-	for (pos = list; pos; pos = rcu_dereference_raw(pos->next))
+	for (pos = list; pos; pos = rcu_dereference_all(pos->next))
 
 /**
  * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type
@@ -574,7 +574,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl,
  */
 #define rhl_for_each_entry_rcu(tpos, pos, list, member)			\
 	for (pos = list; pos && rht_entry(tpos, pos, member);		\
-	     pos = rcu_dereference_raw(pos->next))
+	     pos = rcu_dereference_all(pos->next))
 
 static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
 				     const void *obj)
-- 
cgit v1.2.3


From 3d716c51e0e8791f8dd72479a3e6d5e7650ac35e Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Sat, 13 Sep 2025 18:57:51 +0800
Subject: crypto: hisilicon/qm - mask axi error before memory init

After the device memory is cleared, if the software sends
the doorbell operation, the hardware may trigger a axi error
when processing the doorbell. This error is caused by memory
clearing and hardware access to address 0. Therefore, the axi
error is masked during this period.

Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/hisi_acc_qm.h | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index f2254ddc327c..c4690e365ade 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -104,6 +104,8 @@
 #define UACCE_MODE_SVA			1 /* use uacce sva mode */
 #define UACCE_MODE_DESC	"0(default) means only register to crypto, 1 means both register to crypto and uacce"
 
+#define QM_ECC_MBIT			BIT(2)
+
 enum qm_stop_reason {
 	QM_NORMAL,
 	QM_SOFT_RESET,
@@ -240,19 +242,22 @@ enum acc_err_result {
 	ACC_ERR_RECOVERED,
 };
 
-struct hisi_qm_err_info {
-	char *acpi_rst;
-	u32 msi_wr_port;
+struct hisi_qm_err_mask {
 	u32 ecc_2bits_mask;
-	u32 qm_shutdown_mask;
-	u32 dev_shutdown_mask;
-	u32 qm_reset_mask;
-	u32 dev_reset_mask;
+	u32 shutdown_mask;
+	u32 reset_mask;
 	u32 ce;
 	u32 nfe;
 	u32 fe;
 };
 
+struct hisi_qm_err_info {
+	char *acpi_rst;
+	u32 msi_wr_port;
+	struct hisi_qm_err_mask qm_err;
+	struct hisi_qm_err_mask dev_err;
+};
+
 struct hisi_qm_err_status {
 	u32 is_qm_ecc_mbit;
 	u32 is_dev_ecc_mbit;
@@ -273,6 +278,8 @@ struct hisi_qm_err_ini {
 	enum acc_err_result (*get_err_result)(struct hisi_qm *qm);
 	bool (*dev_is_abnormal)(struct hisi_qm *qm);
 	int (*set_priv_status)(struct hisi_qm *qm);
+	void (*disable_axi_error)(struct hisi_qm *qm);
+	void (*enable_axi_error)(struct hisi_qm *qm);
 };
 
 struct hisi_qm_cap_info {
-- 
cgit v1.2.3


From 79525b51acc1c8e331ab47eb131a99f5370a76c2 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Fri, 19 Sep 2025 12:38:58 -0700
Subject: io_uring: fix nvme's 32b cqes on mixed cq

The nvme uring_cmd only uses 32b CQEs. If the ring uses a mixed CQ, then
we need to make sure we flag the completion as a 32b CQE.

On the other hand, if nvme uring_cmd was using a dedicated 32b CQE, the
posting was missing the extra memcpy because it only applied to bit CQEs
on a mixed CQ.

Fixes: e26dca67fde1943 ("io_uring: add support for IORING_SETUP_CQE_MIXED")
Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring/cmd.h | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index c8185f54fde9..02d50f08f668 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -56,8 +56,8 @@ int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd,
  * Note: the caller should never hard code @issue_flags and is only allowed
  * to pass the mask provided by the core io_uring code.
  */
-void io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret, u64 res2,
-			unsigned issue_flags);
+void __io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret, u64 res2,
+			 unsigned issue_flags, bool is_cqe32);
 
 void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
 			    io_uring_cmd_tw_t task_work_cb,
@@ -104,8 +104,8 @@ static inline int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd,
 {
 	return -EOPNOTSUPP;
 }
-static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret,
-		u64 ret2, unsigned issue_flags)
+static inline void __io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret,
+		u64 ret2, unsigned issue_flags, bool is_cqe32)
 {
 }
 static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
@@ -159,6 +159,18 @@ static inline void *io_uring_cmd_ctx_handle(struct io_uring_cmd *cmd)
 	return cmd_to_io_kiocb(cmd)->ctx;
 }
 
+static inline void io_uring_cmd_done(struct io_uring_cmd *ioucmd, s32 ret,
+				     u64 res2, unsigned issue_flags)
+{
+	return __io_uring_cmd_done(ioucmd, ret, res2, issue_flags, false);
+}
+
+static inline void io_uring_cmd_done32(struct io_uring_cmd *ioucmd, s32 ret,
+				       u64 res2, unsigned issue_flags)
+{
+	return __io_uring_cmd_done(ioucmd, ret, res2, issue_flags, true);
+}
+
 int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
 			    void (*release)(void *), unsigned int index,
 			    unsigned int issue_flags);
-- 
cgit v1.2.3


From 1e338f4d99e6814ede16bad1db1cc463aad8032c Mon Sep 17 00:00:00 2001
From: Sabyrzhan Tasbolatov <snovitoll@gmail.com>
Date: Sun, 10 Aug 2025 17:57:45 +0500
Subject: kasan: introduce ARCH_DEFER_KASAN and unify static key across modes

Patch series "kasan: unify kasan_enabled() and remove arch-specific
implementations", v6.

This patch series addresses the fragmentation in KASAN initialization
across architectures by introducing a unified approach that eliminates
duplicate static keys and arch-specific kasan_arch_is_ready()
implementations.

The core issue is that different architectures have inconsistent approaches
to KASAN readiness tracking:
- PowerPC, LoongArch, and UML arch, each implement own kasan_arch_is_ready()
- Only HW_TAGS mode had a unified static key (kasan_flag_enabled)
- Generic and SW_TAGS modes relied on arch-specific solutions
  or always-on behavior


This patch (of 2):

Introduce CONFIG_ARCH_DEFER_KASAN to identify architectures [1] that need
to defer KASAN initialization until shadow memory is properly set up, and
unify the static key infrastructure across all KASAN modes.

[1] PowerPC, UML, LoongArch selects ARCH_DEFER_KASAN.

The core issue is that different architectures haveinconsistent approaches
to KASAN readiness tracking:
- PowerPC, LoongArch, and UML arch, each implement own
  kasan_arch_is_ready()
- Only HW_TAGS mode had a unified static key (kasan_flag_enabled)
- Generic and SW_TAGS modes relied on arch-specific solutions or always-on
    behavior

This patch addresses the fragmentation in KASAN initialization across
architectures by introducing a unified approach that eliminates duplicate
static keys and arch-specific kasan_arch_is_ready() implementations.

Let's replace kasan_arch_is_ready() with existing kasan_enabled() check,
which examines the static key being enabled if arch selects
ARCH_DEFER_KASAN or has HW_TAGS mode support.  For other arch,
kasan_enabled() checks the enablement during compile time.

Now KASAN users can use a single kasan_enabled() check everywhere.

Link: https://lkml.kernel.org/r/20250810125746.1105476-1-snovitoll@gmail.com
Link: https://lkml.kernel.org/r/20250810125746.1105476-2-snovitoll@gmail.com
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217049
Signed-off-by: Sabyrzhan Tasbolatov <snovitoll@gmail.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com> #powerpc
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: David Gow <davidgow@google.com>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@loongson.cn>
Cc: Marco Elver <elver@google.com>
Cc: Qing Zhang <zhangqing@loongson.cn>
Cc: Sabyrzhan Tasbolatov <snovitoll@gmail.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kasan-enabled.h | 32 +++++++++++++++++++++++---------
 include/linux/kasan.h         |  6 ++++++
 2 files changed, 29 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kasan-enabled.h b/include/linux/kasan-enabled.h
index 6f612d69ea0c..9eca967d8526 100644
--- a/include/linux/kasan-enabled.h
+++ b/include/linux/kasan-enabled.h
@@ -4,32 +4,46 @@
 
 #include <linux/static_key.h>
 
-#ifdef CONFIG_KASAN_HW_TAGS
-
+#if defined(CONFIG_ARCH_DEFER_KASAN) || defined(CONFIG_KASAN_HW_TAGS)
+/*
+ * Global runtime flag for KASAN modes that need runtime control.
+ * Used by ARCH_DEFER_KASAN architectures and HW_TAGS mode.
+ */
 DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled);
 
+/*
+ * Runtime control for shadow memory initialization or HW_TAGS mode.
+ * Uses static key for architectures that need deferred KASAN or HW_TAGS.
+ */
 static __always_inline bool kasan_enabled(void)
 {
 	return static_branch_likely(&kasan_flag_enabled);
 }
 
-static inline bool kasan_hw_tags_enabled(void)
+static inline void kasan_enable(void)
 {
-	return kasan_enabled();
+	static_branch_enable(&kasan_flag_enabled);
 }
-
-#else /* CONFIG_KASAN_HW_TAGS */
-
-static inline bool kasan_enabled(void)
+#else
+/* For architectures that can enable KASAN early, use compile-time check. */
+static __always_inline bool kasan_enabled(void)
 {
 	return IS_ENABLED(CONFIG_KASAN);
 }
 
+static inline void kasan_enable(void) {}
+#endif /* CONFIG_ARCH_DEFER_KASAN || CONFIG_KASAN_HW_TAGS */
+
+#ifdef CONFIG_KASAN_HW_TAGS
+static inline bool kasan_hw_tags_enabled(void)
+{
+	return kasan_enabled();
+}
+#else
 static inline bool kasan_hw_tags_enabled(void)
 {
 	return false;
 }
-
 #endif /* CONFIG_KASAN_HW_TAGS */
 
 #endif /* LINUX_KASAN_ENABLED_H */
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index fe5ce9215821..b509a8d36949 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -543,6 +543,12 @@ void kasan_report_async(void);
 
 #endif /* CONFIG_KASAN_HW_TAGS */
 
+#ifdef CONFIG_KASAN_GENERIC
+void __init kasan_init_generic(void);
+#else
+static inline void kasan_init_generic(void) { }
+#endif
+
 #ifdef CONFIG_KASAN_SW_TAGS
 void __init kasan_init_sw_tags(void);
 #else
-- 
cgit v1.2.3


From 2ccd9fecd9163f168761d4398564c81554f636ef Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 29 Aug 2025 17:15:27 +0100
Subject: mm: remove unused zpool layer

With zswap using zsmalloc directly, there are no more in-tree users of
this code.  Remove it.

With zpool gone, zsmalloc is now always a simple dependency and no
longer something the user needs to configure. Hide CONFIG_ZSMALLOC
from the user and have zswap and zram pull it in as needed.

Link: https://lkml.kernel.org/r/20250829162212.208258-3-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: SeongJae Park <sj@kernel.org>
Acked-by: Yosry Ahmed <yosry.ahmed@linux.dev>
Cc: Chengming Zhou <zhouchengming@bytedance.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Vitaly Wool <vitaly.wool@konsulko.se>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/zpool.h | 86 ---------------------------------------------------
 1 file changed, 86 deletions(-)
 delete mode 100644 include/linux/zpool.h

(limited to 'include/linux')

diff --git a/include/linux/zpool.h b/include/linux/zpool.h
deleted file mode 100644
index 369ef068fad8..000000000000
--- a/include/linux/zpool.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * zpool memory storage api
- *
- * Copyright (C) 2014 Dan Streetman
- *
- * This is a common frontend for the zswap compressed memory storage
- * implementations.
- */
-
-#ifndef _ZPOOL_H_
-#define _ZPOOL_H_
-
-struct zpool;
-
-bool zpool_has_pool(char *type);
-
-struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp);
-
-const char *zpool_get_type(struct zpool *pool);
-
-void zpool_destroy_pool(struct zpool *pool);
-
-int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
-		 unsigned long *handle, const int nid);
-
-void zpool_free(struct zpool *pool, unsigned long handle);
-
-void *zpool_obj_read_begin(struct zpool *zpool, unsigned long handle,
-			   void *local_copy);
-
-void zpool_obj_read_end(struct zpool *zpool, unsigned long handle,
-			void *handle_mem);
-
-void zpool_obj_write(struct zpool *zpool, unsigned long handle,
-		     void *handle_mem, size_t mem_len);
-
-u64 zpool_get_total_pages(struct zpool *pool);
-
-
-/**
- * struct zpool_driver - driver implementation for zpool
- * @type:	name of the driver.
- * @list:	entry in the list of zpool drivers.
- * @create:	create a new pool.
- * @destroy:	destroy a pool.
- * @malloc:	allocate mem from a pool.
- * @free:	free mem from a pool.
- * @sleep_mapped: whether zpool driver can sleep during map.
- * @map:	map a handle.
- * @unmap:	unmap a handle.
- * @total_size:	get total size of a pool.
- *
- * This is created by a zpool implementation and registered
- * with zpool.
- */
-struct zpool_driver {
-	char *type;
-	struct module *owner;
-	atomic_t refcount;
-	struct list_head list;
-
-	void *(*create)(const char *name, gfp_t gfp);
-	void (*destroy)(void *pool);
-
-	int (*malloc)(void *pool, size_t size, gfp_t gfp,
-		      unsigned long *handle, const int nid);
-	void (*free)(void *pool, unsigned long handle);
-
-	void *(*obj_read_begin)(void *pool, unsigned long handle,
-				void *local_copy);
-	void (*obj_read_end)(void *pool, unsigned long handle,
-			     void *handle_mem);
-	void (*obj_write)(void *pool, unsigned long handle,
-			  void *handle_mem, size_t mem_len);
-
-	u64 (*total_pages)(void *pool);
-};
-
-void zpool_register_driver(struct zpool_driver *driver);
-
-int zpool_unregister_driver(struct zpool_driver *driver);
-
-bool zpool_can_sleep_mapped(struct zpool *pool);
-
-#endif
-- 
cgit v1.2.3


From 0bf2edf041dcb0b304a8dbda8c699771d5a245d2 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 1 Sep 2025 17:03:27 +0200
Subject: mm/page_alloc: reject unreasonable folio/compound page sizes in
 alloc_contig_range_noprof()

Let's reject them early, which in turn makes folio_alloc_gigantic() reject
them properly.

To avoid converting from order to nr_pages, let's just add MAX_FOLIO_ORDER
and calculate MAX_FOLIO_NR_PAGES based on that.

While at it, let's just make the order a "const unsigned order".

Link: https://lkml.kernel.org/r/20250901150359.867252-7-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Acked-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 00c8a54127d3..77737cbf2216 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2055,11 +2055,13 @@ static inline long folio_nr_pages(const struct folio *folio)
 
 /* Only hugetlbfs can allocate folios larger than MAX_ORDER */
 #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
-#define MAX_FOLIO_NR_PAGES	(1UL << PUD_ORDER)
+#define MAX_FOLIO_ORDER		PUD_ORDER
 #else
-#define MAX_FOLIO_NR_PAGES	MAX_ORDER_NR_PAGES
+#define MAX_FOLIO_ORDER		MAX_PAGE_ORDER
 #endif
 
+#define MAX_FOLIO_NR_PAGES	(1UL << MAX_FOLIO_ORDER)
+
 /*
  * compound_nr() returns the number of pages in this potentially compound
  * page.  compound_nr() can be called on a tail page, and is defined to
-- 
cgit v1.2.3


From 4751c39eee0c3fcc742aa7d7242ce2b78faa3606 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 1 Sep 2025 17:03:32 +0200
Subject: mm: limit folio/compound page sizes in problematic kernel configs

Let's limit the maximum folio size in problematic kernel config where the
memmap is allocated per memory section (SPARSEMEM without
SPARSEMEM_VMEMMAP) to a single memory section.

Currently, only a single architectures supports ARCH_HAS_GIGANTIC_PAGE but
not SPARSEMEM_VMEMMAP: sh.

Fortunately, the biggest hugetlb size sh supports is 64 MiB
(HUGETLB_PAGE_SIZE_64MB) and the section size is at least 64 MiB
(SECTION_SIZE_BITS == 26), so their use case is not degraded.

As folios and memory sections are naturally aligned to their order-2 size
in memory, consequently a single folio can no longer span multiple memory
sections on these problematic kernel configs.

nth_page() is no longer required when operating within a single compound
page / folio.

Link: https://lkml.kernel.org/r/20250901150359.867252-12-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 77737cbf2216..2dee79fa2efc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2053,11 +2053,25 @@ static inline long folio_nr_pages(const struct folio *folio)
 	return folio_large_nr_pages(folio);
 }
 
-/* Only hugetlbfs can allocate folios larger than MAX_ORDER */
-#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
-#define MAX_FOLIO_ORDER		PUD_ORDER
-#else
+#if !defined(CONFIG_ARCH_HAS_GIGANTIC_PAGE)
+/*
+ * We don't expect any folios that exceed buddy sizes (and consequently
+ * memory sections).
+ */
 #define MAX_FOLIO_ORDER		MAX_PAGE_ORDER
+#elif defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
+/*
+ * Only pages within a single memory section are guaranteed to be
+ * contiguous. By limiting folios to a single memory section, all folio
+ * pages are guaranteed to be contiguous.
+ */
+#define MAX_FOLIO_ORDER		PFN_SECTION_SHIFT
+#else
+/*
+ * There is no real limit on the folio size. We limit them to the maximum we
+ * currently expect (e.g., hugetlb, dax).
+ */
+#define MAX_FOLIO_ORDER		PUD_ORDER
 #endif
 
 #define MAX_FOLIO_NR_PAGES	(1UL << MAX_FOLIO_ORDER)
-- 
cgit v1.2.3


From 73b3294b1152e94c1971a735b8db8c7503fd97a1 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 1 Sep 2025 17:03:33 +0200
Subject: mm: simplify folio_page() and folio_page_idx()

Now that a single folio/compound page can no longer span memory sections
in problematic kernel configurations, we can stop using nth_page() in
folio_page() and folio_page_idx().

While at it, turn both macros into static inline functions and add kernel
doc for folio_page_idx().

Link: https://lkml.kernel.org/r/20250901150359.867252-13-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h         | 16 ++++++++++++++--
 include/linux/page-flags.h |  5 ++++-
 2 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2dee79fa2efc..f6880e3225c5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -210,10 +210,8 @@ extern unsigned long sysctl_admin_reserve_kbytes;
 
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
 #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
-#define folio_page_idx(folio, p)	(page_to_pfn(p) - folio_pfn(folio))
 #else
 #define nth_page(page,n) ((page) + (n))
-#define folio_page_idx(folio, p)	((p) - &(folio)->page)
 #endif
 
 /* to align the pointer to the (next) page boundary */
@@ -225,6 +223,20 @@ extern unsigned long sysctl_admin_reserve_kbytes;
 /* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */
 #define PAGE_ALIGNED(addr)	IS_ALIGNED((unsigned long)(addr), PAGE_SIZE)
 
+/**
+ * folio_page_idx - Return the number of a page in a folio.
+ * @folio: The folio.
+ * @page: The folio page.
+ *
+ * This function expects that the page is actually part of the folio.
+ * The returned number is relative to the start of the folio.
+ */
+static inline unsigned long folio_page_idx(const struct folio *folio,
+		const struct page *page)
+{
+	return page - &folio->page;
+}
+
 static inline struct folio *lru_to_folio(struct list_head *head)
 {
 	return list_entry((head)->prev, struct folio, lru);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index d53a86e68c89..a88b61eec3f8 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -316,7 +316,10 @@ static __always_inline unsigned long _compound_head(const struct page *page)
  * check that the page number lies within @folio; the caller is presumed
  * to have a reference to the page.
  */
-#define folio_page(folio, n)	nth_page(&(folio)->page, n)
+static inline struct page *folio_page(struct folio *folio, unsigned long n)
+{
+	return &folio->page + n;
+}
 
 static __always_inline int PageTail(const struct page *page)
 {
-- 
cgit v1.2.3


From 6972706f95926838f9bd3ec2b2393c034bdb85ba Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 1 Sep 2025 17:03:43 +0200
Subject: mm/cma: refuse handing out non-contiguous page ranges

Let's disallow handing out PFN ranges with non-contiguous pages, so we can
remove the nth-page usage in __cma_alloc(), and so any callers don't have
to worry about that either when wanting to blindly iterate pages.

This is really only a problem in configs with SPARSEMEM but without
SPARSEMEM_VMEMMAP, and only when we would cross memory sections in some
cases.

Will this cause harm?  Probably not, because it's mostly 32bit that does
not support SPARSEMEM_VMEMMAP.  If this ever becomes a problem we could
look into allocating the memmap for the memory sections spanned by a
single CMA region in one go from memblock.

[david@redhat.com: we can have NUMMU configs with SPARSEMEM enabled]
  Link: https://lkml.kernel.org/r/6ec933b1-b3f7-41c0-95d8-e518bb87375e@redhat.com
Link: https://lkml.kernel.org/r/20250901150359.867252-23-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f6880e3225c5..2ca1eb2db63e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -209,9 +209,15 @@ extern unsigned long sysctl_user_reserve_kbytes;
 extern unsigned long sysctl_admin_reserve_kbytes;
 
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
+bool page_range_contiguous(const struct page *page, unsigned long nr_pages);
 #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
 #else
 #define nth_page(page,n) ((page) + (n))
+static inline bool page_range_contiguous(const struct page *page,
+		unsigned long nr_pages)
+{
+	return true;
+}
 #endif
 
 /* to align the pointer to the (next) page boundary */
-- 
cgit v1.2.3


From 80e7bb74d4ff24725f0ddb1c72d8de45a3d975f6 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 1 Sep 2025 17:03:45 +0200
Subject: scatterlist: disallow non-contigous page ranges in a single SG entry

The expectation is that there is currently no user that would pass in
non-contigous page ranges: no allocator, not even VMA, will hand these
out.

The only problematic part would be if someone would provide a range
obtained directly from memblock, or manually merge problematic ranges.  If
we find such cases, we should fix them to create separate SG entries.

Let's check in sg_set_page() that this is really the case.  No need to
check in sg_set_folio(), as pages in a folio are guaranteed to be
contiguous.  As sg_set_page() gets inlined into modules, we have to export
the page_range_contiguous() helper -- use EXPORT_SYMBOL, there is nothing
special about this helper such that we would want to enforce GPL-only
modules.

We can now drop the nth_page() usage in sg_page_iter_page().

Link: https://lkml.kernel.org/r/20250901150359.867252-25-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/scatterlist.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 6f8a4965f9b9..29f6ceb98d74 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -158,6 +158,7 @@ static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
 static inline void sg_set_page(struct scatterlist *sg, struct page *page,
 			       unsigned int len, unsigned int offset)
 {
+	VM_WARN_ON_ONCE(!page_range_contiguous(page, ALIGN(len + offset, PAGE_SIZE) / PAGE_SIZE));
 	sg_assign_page(sg, page);
 	sg->offset = offset;
 	sg->length = len;
@@ -600,7 +601,7 @@ void __sg_page_iter_start(struct sg_page_iter *piter,
  */
 static inline struct page *sg_page_iter_page(struct sg_page_iter *piter)
 {
-	return nth_page(sg_page(piter->sg), piter->sg_pgoffset);
+	return sg_page(piter->sg) + piter->sg_pgoffset;
 }
 
 /**
-- 
cgit v1.2.3


From d5170ce4d71b3843613ee1840bca50ad71c3671e Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 1 Sep 2025 17:03:57 +0200
Subject: block: update comment of "struct bio_vec" regarding nth_page()

Ever since commit 858c708d9efb ("block: move the bi_size update out of
__bio_try_merge_page"), page_is_mergeable() no longer exists, and the
logic in bvec_try_merge_page() is now a simple page pointer comparison.

Link: https://lkml.kernel.org/r/20250901150359.867252-37-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/bvec.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 0a80e1f9aa20..3fc0efa0825b 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -22,11 +22,8 @@ struct page;
  * @bv_len:    Number of bytes in the address range.
  * @bv_offset: Start of the address range relative to the start of @bv_page.
  *
- * The following holds for a bvec if n * PAGE_SIZE < bv_offset + bv_len:
- *
- *   nth_page(@bv_page, n) == @bv_page + n
- *
- * This holds because page_is_mergeable() checks the above property.
+ * All pages within a bio_vec starting from @bv_page are contiguous and
+ * can simply be iterated (see bvec_advance()).
  */
 struct bio_vec {
 	struct page	*bv_page;
-- 
cgit v1.2.3


From 84efbefa26df36a845a9210ee962aa6866f99bb7 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 1 Sep 2025 17:03:58 +0200
Subject: mm: remove nth_page()

Now that all users are gone, let's remove it.

Link: https://lkml.kernel.org/r/20250901150359.867252-38-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2ca1eb2db63e..b26ca8b2162d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -210,9 +210,7 @@ extern unsigned long sysctl_admin_reserve_kbytes;
 
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
 bool page_range_contiguous(const struct page *page, unsigned long nr_pages);
-#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
 #else
-#define nth_page(page,n) ((page) + (n))
 static inline bool page_range_contiguous(const struct page *page,
 		unsigned long nr_pages)
 {
-- 
cgit v1.2.3


From 4a25f995bd59843a898b531bb3e472d710ef9439 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Wed, 10 Sep 2025 21:39:56 +0800
Subject: mm: hugetlb: directly pass order when allocate a hugetlb folio

Use order instead of struct hstate to remove huge_page_order() call from
all hugetlb folio allocation, also order_is_gigantic() is added to check
whether it is a gigantic order.

Link: https://lkml.kernel.org/r/20250910133958.301467-4-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Jane Chu <jane.chu@oracle.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Cc: Brendan Jackman <jackmanb@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 526d27e88b3b..8e63e46b8e1f 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -788,9 +788,14 @@ static inline unsigned huge_page_shift(struct hstate *h)
 	return h->order + PAGE_SHIFT;
 }
 
+static inline bool order_is_gigantic(unsigned int order)
+{
+	return order > MAX_PAGE_ORDER;
+}
+
 static inline bool hstate_is_gigantic(struct hstate *h)
 {
-	return huge_page_order(h) > MAX_PAGE_ORDER;
+	return order_is_gigantic(huge_page_order(h));
 }
 
 static inline unsigned int pages_per_huge_page(const struct hstate *h)
-- 
cgit v1.2.3


From 8eccb066f28747e966bda716cb90dbca13b78032 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 1 Sep 2025 22:50:10 +0200
Subject: mm: constify shmem related test functions for improved
 const-correctness

Patch series "mm: establish const-correctness for pointer parameters", v6.

This series is to improved const-correctness in the low-level
memory-management subsystem, which provides a basis for further
constification further up the call stack (e.g.  filesystems).

I started this work when I tried to constify the Ceph filesystem code, but
found that to be impossible because many "mm" functions accept non-const
pointers, even though they modify nothing.


This patch (of 12):

We select certain test functions which either invoke each other, functions
that are already const-ified, or no further functions.

It is therefore relatively trivial to const-ify them, which provides a
basis for further const-ification further up the call stack.

Link: https://lkml.kernel.org/r/20250901205021.3573313-1-max.kellermann@ionos.com
Link: https://lkml.kernel.org/r/20250901205021.3573313-2-max.kellermann@ionos.com
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christian Zankel <chris@zankel.net>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Bottomley <james.bottomley@HansenPartnership.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jocelyn Falempe <jfalempe@redhat.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Nysal Jan K.A" <nysal@linux.ibm.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russel King <linux@armlinux.org.uk>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Huth <thuth@redhat.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h       | 8 ++++----
 include/linux/shmem_fs.h | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index b26ca8b2162d..45a47b555499 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -995,11 +995,11 @@ static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr)
  * The vma_is_shmem is not inline because it is used only by slow
  * paths in userfault.
  */
-bool vma_is_shmem(struct vm_area_struct *vma);
-bool vma_is_anon_shmem(struct vm_area_struct *vma);
+bool vma_is_shmem(const struct vm_area_struct *vma);
+bool vma_is_anon_shmem(const struct vm_area_struct *vma);
 #else
-static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
-static inline bool vma_is_anon_shmem(struct vm_area_struct *vma) { return false; }
+static inline bool vma_is_shmem(const struct vm_area_struct *vma) { return false; }
+static inline bool vma_is_anon_shmem(const struct vm_area_struct *vma) { return false; }
 #endif
 
 int vma_is_stack_for_current(struct vm_area_struct *vma);
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 6d0f9c599ff7..0e47465ef0fd 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -99,9 +99,9 @@ extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags);
 extern int shmem_lock(struct file *file, int lock, struct ucounts *ucounts);
 #ifdef CONFIG_SHMEM
-bool shmem_mapping(struct address_space *mapping);
+bool shmem_mapping(const struct address_space *mapping);
 #else
-static inline bool shmem_mapping(struct address_space *mapping)
+static inline bool shmem_mapping(const struct address_space *mapping)
 {
 	return false;
 }
-- 
cgit v1.2.3


From 7c3e97ac0d75306d9d03de575c9878f8fd9efe3b Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 1 Sep 2025 22:50:11 +0200
Subject: mm: constify pagemap related test/getter functions

For improved const-correctness.

We select certain test functions which either invoke each other, functions
that are already const-ified, or no further functions.

It is therefore relatively trivial to const-ify them, which provides a
basis for further const-ification further up the call stack.

Link: https://lkml.kernel.org/r/20250901205021.3573313-3-max.kellermann@ionos.com
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christian Zankel <chris@zankel.net>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Bottomley <james.bottomley@HansenPartnership.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jocelyn Falempe <jfalempe@redhat.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Nysal Jan K.A" <nysal@linux.ibm.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russel King <linux@armlinux.org.uk>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Huth <thuth@redhat.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagemap.h | 55 +++++++++++++++++++++++++------------------------
 1 file changed, 28 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index f0dfdfb13cd9..0d66a252b06f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -140,7 +140,7 @@ static inline int inode_drain_writes(struct inode *inode)
 	return filemap_write_and_wait(inode->i_mapping);
 }
 
-static inline bool mapping_empty(struct address_space *mapping)
+static inline bool mapping_empty(const struct address_space *mapping)
 {
 	return xa_empty(&mapping->i_pages);
 }
@@ -166,7 +166,7 @@ static inline bool mapping_empty(struct address_space *mapping)
  * refcount and the referenced bit, which will be elevated or set in
  * the process of adding new cache pages to an inode.
  */
-static inline bool mapping_shrinkable(struct address_space *mapping)
+static inline bool mapping_shrinkable(const struct address_space *mapping)
 {
 	void *head;
 
@@ -267,7 +267,7 @@ static inline void mapping_clear_unevictable(struct address_space *mapping)
 	clear_bit(AS_UNEVICTABLE, &mapping->flags);
 }
 
-static inline bool mapping_unevictable(struct address_space *mapping)
+static inline bool mapping_unevictable(const struct address_space *mapping)
 {
 	return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags);
 }
@@ -277,7 +277,7 @@ static inline void mapping_set_exiting(struct address_space *mapping)
 	set_bit(AS_EXITING, &mapping->flags);
 }
 
-static inline int mapping_exiting(struct address_space *mapping)
+static inline int mapping_exiting(const struct address_space *mapping)
 {
 	return test_bit(AS_EXITING, &mapping->flags);
 }
@@ -287,7 +287,7 @@ static inline void mapping_set_no_writeback_tags(struct address_space *mapping)
 	set_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
 }
 
-static inline int mapping_use_writeback_tags(struct address_space *mapping)
+static inline int mapping_use_writeback_tags(const struct address_space *mapping)
 {
 	return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
 }
@@ -333,7 +333,7 @@ static inline void mapping_set_inaccessible(struct address_space *mapping)
 	set_bit(AS_INACCESSIBLE, &mapping->flags);
 }
 
-static inline bool mapping_inaccessible(struct address_space *mapping)
+static inline bool mapping_inaccessible(const struct address_space *mapping)
 {
 	return test_bit(AS_INACCESSIBLE, &mapping->flags);
 }
@@ -343,18 +343,18 @@ static inline void mapping_set_writeback_may_deadlock_on_reclaim(struct address_
 	set_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags);
 }
 
-static inline bool mapping_writeback_may_deadlock_on_reclaim(struct address_space *mapping)
+static inline bool mapping_writeback_may_deadlock_on_reclaim(const struct address_space *mapping)
 {
 	return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags);
 }
 
-static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
+static inline gfp_t mapping_gfp_mask(const struct address_space *mapping)
 {
 	return mapping->gfp_mask;
 }
 
 /* Restricts the given gfp_mask to what the mapping allows. */
-static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
+static inline gfp_t mapping_gfp_constraint(const struct address_space *mapping,
 		gfp_t gfp_mask)
 {
 	return mapping_gfp_mask(mapping) & gfp_mask;
@@ -477,7 +477,7 @@ mapping_min_folio_order(const struct address_space *mapping)
 }
 
 static inline unsigned long
-mapping_min_folio_nrpages(struct address_space *mapping)
+mapping_min_folio_nrpages(const struct address_space *mapping)
 {
 	return 1UL << mapping_min_folio_order(mapping);
 }
@@ -491,7 +491,7 @@ mapping_min_folio_nrpages(struct address_space *mapping)
  * new folio to the page cache and need to know what index to give it,
  * call this function.
  */
-static inline pgoff_t mapping_align_index(struct address_space *mapping,
+static inline pgoff_t mapping_align_index(const struct address_space *mapping,
 					  pgoff_t index)
 {
 	return round_down(index, mapping_min_folio_nrpages(mapping));
@@ -501,7 +501,7 @@ static inline pgoff_t mapping_align_index(struct address_space *mapping,
  * Large folio support currently depends on THP.  These dependencies are
  * being worked on but are not yet fixed.
  */
-static inline bool mapping_large_folio_support(struct address_space *mapping)
+static inline bool mapping_large_folio_support(const struct address_space *mapping)
 {
 	/* AS_FOLIO_ORDER is only reasonable for pagecache folios */
 	VM_WARN_ONCE((unsigned long)mapping & FOLIO_MAPPING_ANON,
@@ -516,7 +516,7 @@ static inline size_t mapping_max_folio_size(const struct address_space *mapping)
 	return PAGE_SIZE << mapping_max_folio_order(mapping);
 }
 
-static inline int filemap_nr_thps(struct address_space *mapping)
+static inline int filemap_nr_thps(const struct address_space *mapping)
 {
 #ifdef CONFIG_READ_ONLY_THP_FOR_FS
 	return atomic_read(&mapping->nr_thps);
@@ -930,7 +930,7 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
  *
  * Return: The index of the folio which follows this folio in the file.
  */
-static inline pgoff_t folio_next_index(struct folio *folio)
+static inline pgoff_t folio_next_index(const struct folio *folio)
 {
 	return folio->index + folio_nr_pages(folio);
 }
@@ -959,7 +959,7 @@ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
  * e.g., shmem did not move this folio to the swap cache.
  * Return: true or false.
  */
-static inline bool folio_contains(struct folio *folio, pgoff_t index)
+static inline bool folio_contains(const struct folio *folio, pgoff_t index)
 {
 	VM_WARN_ON_ONCE_FOLIO(folio_test_swapcache(folio), folio);
 	return index - folio->index < folio_nr_pages(folio);
@@ -1036,13 +1036,13 @@ static inline loff_t page_offset(struct page *page)
 /*
  * Get the offset in PAGE_SIZE (even for hugetlb folios).
  */
-static inline pgoff_t folio_pgoff(struct folio *folio)
+static inline pgoff_t folio_pgoff(const struct folio *folio)
 {
 	return folio->index;
 }
 
-static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
-					unsigned long address)
+static inline pgoff_t linear_page_index(const struct vm_area_struct *vma,
+					const unsigned long address)
 {
 	pgoff_t pgoff;
 	pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
@@ -1462,7 +1462,7 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac,
  * readahead_pos - The byte offset into the file of this readahead request.
  * @rac: The readahead request.
  */
-static inline loff_t readahead_pos(struct readahead_control *rac)
+static inline loff_t readahead_pos(const struct readahead_control *rac)
 {
 	return (loff_t)rac->_index * PAGE_SIZE;
 }
@@ -1471,7 +1471,7 @@ static inline loff_t readahead_pos(struct readahead_control *rac)
  * readahead_length - The number of bytes in this readahead request.
  * @rac: The readahead request.
  */
-static inline size_t readahead_length(struct readahead_control *rac)
+static inline size_t readahead_length(const struct readahead_control *rac)
 {
 	return rac->_nr_pages * PAGE_SIZE;
 }
@@ -1480,7 +1480,7 @@ static inline size_t readahead_length(struct readahead_control *rac)
  * readahead_index - The index of the first page in this readahead request.
  * @rac: The readahead request.
  */
-static inline pgoff_t readahead_index(struct readahead_control *rac)
+static inline pgoff_t readahead_index(const struct readahead_control *rac)
 {
 	return rac->_index;
 }
@@ -1489,7 +1489,7 @@ static inline pgoff_t readahead_index(struct readahead_control *rac)
  * readahead_count - The number of pages in this readahead request.
  * @rac: The readahead request.
  */
-static inline unsigned int readahead_count(struct readahead_control *rac)
+static inline unsigned int readahead_count(const struct readahead_control *rac)
 {
 	return rac->_nr_pages;
 }
@@ -1498,12 +1498,12 @@ static inline unsigned int readahead_count(struct readahead_control *rac)
  * readahead_batch_length - The number of bytes in the current batch.
  * @rac: The readahead request.
  */
-static inline size_t readahead_batch_length(struct readahead_control *rac)
+static inline size_t readahead_batch_length(const struct readahead_control *rac)
 {
 	return rac->_batch_count * PAGE_SIZE;
 }
 
-static inline unsigned long dir_pages(struct inode *inode)
+static inline unsigned long dir_pages(const struct inode *inode)
 {
 	return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >>
 			       PAGE_SHIFT;
@@ -1517,8 +1517,8 @@ static inline unsigned long dir_pages(struct inode *inode)
  * Return: the number of bytes in the folio up to EOF,
  * or -EFAULT if the folio was truncated.
  */
-static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio,
-					      struct inode *inode)
+static inline ssize_t folio_mkwrite_check_truncate(const struct folio *folio,
+						   const struct inode *inode)
 {
 	loff_t size = i_size_read(inode);
 	pgoff_t index = size >> PAGE_SHIFT;
@@ -1549,7 +1549,8 @@ static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio,
  * Return: The number of filesystem blocks covered by this folio.
  */
 static inline
-unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio)
+unsigned int i_blocks_per_folio(const struct inode *inode,
+				const struct folio *folio)
 {
 	return folio_size(folio) >> inode->i_blkbits;
 }
-- 
cgit v1.2.3


From 959b0886256b6896b44633e0e07c5464169087c1 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 1 Sep 2025 22:50:12 +0200
Subject: mm: constify zone related test/getter functions

For improved const-correctness.

We select certain test functions which either invoke each other,
functions that are already const-ified, or no further functions.

It is therefore relatively trivial to const-ify them, which provides a
basis for further const-ification further up the call stack.

Link: https://lkml.kernel.org/r/20250901205021.3573313-4-max.kellermann@ionos.com
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christian Zankel <chris@zankel.net>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Bottomley <james.bottomley@HansenPartnership.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jocelyn Falempe <jfalempe@redhat.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Nysal Jan K.A" <nysal@linux.ibm.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russel King <linux@armlinux.org.uk>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Huth <thuth@redhat.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f3272ef5131b..6c4eae96160d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1104,7 +1104,7 @@ static inline unsigned long promo_wmark_pages(const struct zone *z)
 	return wmark_pages(z, WMARK_PROMO);
 }
 
-static inline unsigned long zone_managed_pages(struct zone *zone)
+static inline unsigned long zone_managed_pages(const struct zone *zone)
 {
 	return (unsigned long)atomic_long_read(&zone->managed_pages);
 }
@@ -1128,12 +1128,12 @@ static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn)
 	return zone->zone_start_pfn <= pfn && pfn < zone_end_pfn(zone);
 }
 
-static inline bool zone_is_initialized(struct zone *zone)
+static inline bool zone_is_initialized(const struct zone *zone)
 {
 	return zone->initialized;
 }
 
-static inline bool zone_is_empty(struct zone *zone)
+static inline bool zone_is_empty(const struct zone *zone)
 {
 	return zone->spanned_pages == 0;
 }
@@ -1273,7 +1273,7 @@ static inline bool folio_is_zone_movable(const struct folio *folio)
  * Return true if [start_pfn, start_pfn + nr_pages) range has a non-empty
  * intersection with the given zone
  */
-static inline bool zone_intersects(struct zone *zone,
+static inline bool zone_intersects(const struct zone *zone,
 		unsigned long start_pfn, unsigned long nr_pages)
 {
 	if (zone_is_empty(zone))
@@ -1581,12 +1581,12 @@ static inline int local_memory_node(int node_id) { return node_id; };
 #define zone_idx(zone)		((zone) - (zone)->zone_pgdat->node_zones)
 
 #ifdef CONFIG_ZONE_DEVICE
-static inline bool zone_is_zone_device(struct zone *zone)
+static inline bool zone_is_zone_device(const struct zone *zone)
 {
 	return zone_idx(zone) == ZONE_DEVICE;
 }
 #else
-static inline bool zone_is_zone_device(struct zone *zone)
+static inline bool zone_is_zone_device(const struct zone *zone)
 {
 	return false;
 }
@@ -1598,19 +1598,19 @@ static inline bool zone_is_zone_device(struct zone *zone)
  * populated_zone(). If the whole zone is reserved then we can easily
  * end up with populated_zone() && !managed_zone().
  */
-static inline bool managed_zone(struct zone *zone)
+static inline bool managed_zone(const struct zone *zone)
 {
 	return zone_managed_pages(zone);
 }
 
 /* Returns true if a zone has memory */
-static inline bool populated_zone(struct zone *zone)
+static inline bool populated_zone(const struct zone *zone)
 {
 	return zone->present_pages;
 }
 
 #ifdef CONFIG_NUMA
-static inline int zone_to_nid(struct zone *zone)
+static inline int zone_to_nid(const struct zone *zone)
 {
 	return zone->node;
 }
@@ -1620,7 +1620,7 @@ static inline void zone_set_nid(struct zone *zone, int nid)
 	zone->node = nid;
 }
 #else
-static inline int zone_to_nid(struct zone *zone)
+static inline int zone_to_nid(const struct zone *zone)
 {
 	return 0;
 }
@@ -1647,7 +1647,7 @@ static inline int is_highmem_idx(enum zone_type idx)
  * @zone: pointer to struct zone variable
  * Return: 1 for a highmem zone, 0 otherwise
  */
-static inline int is_highmem(struct zone *zone)
+static inline int is_highmem(const struct zone *zone)
 {
 	return is_highmem_idx(zone_idx(zone));
 }
@@ -1713,12 +1713,12 @@ static inline struct zone *zonelist_zone(struct zoneref *zoneref)
 	return zoneref->zone;
 }
 
-static inline int zonelist_zone_idx(struct zoneref *zoneref)
+static inline int zonelist_zone_idx(const struct zoneref *zoneref)
 {
 	return zoneref->zone_idx;
 }
 
-static inline int zonelist_node_idx(struct zoneref *zoneref)
+static inline int zonelist_node_idx(const struct zoneref *zoneref)
 {
 	return zone_to_nid(zoneref->zone);
 }
@@ -2021,7 +2021,7 @@ static inline struct page *__section_mem_map_addr(struct mem_section *section)
 	return (struct page *)map;
 }
 
-static inline int present_section(struct mem_section *section)
+static inline int present_section(const struct mem_section *section)
 {
 	return (section && (section->section_mem_map & SECTION_MARKED_PRESENT));
 }
@@ -2031,12 +2031,12 @@ static inline int present_section_nr(unsigned long nr)
 	return present_section(__nr_to_section(nr));
 }
 
-static inline int valid_section(struct mem_section *section)
+static inline int valid_section(const struct mem_section *section)
 {
 	return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP));
 }
 
-static inline int early_section(struct mem_section *section)
+static inline int early_section(const struct mem_section *section)
 {
 	return (section && (section->section_mem_map & SECTION_IS_EARLY));
 }
@@ -2046,27 +2046,27 @@ static inline int valid_section_nr(unsigned long nr)
 	return valid_section(__nr_to_section(nr));
 }
 
-static inline int online_section(struct mem_section *section)
+static inline int online_section(const struct mem_section *section)
 {
 	return (section && (section->section_mem_map & SECTION_IS_ONLINE));
 }
 
 #ifdef CONFIG_ZONE_DEVICE
-static inline int online_device_section(struct mem_section *section)
+static inline int online_device_section(const struct mem_section *section)
 {
 	unsigned long flags = SECTION_IS_ONLINE | SECTION_TAINT_ZONE_DEVICE;
 
 	return section && ((section->section_mem_map & flags) == flags);
 }
 #else
-static inline int online_device_section(struct mem_section *section)
+static inline int online_device_section(const struct mem_section *section)
 {
 	return 0;
 }
 #endif
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT
-static inline int preinited_vmemmap_section(struct mem_section *section)
+static inline int preinited_vmemmap_section(const struct mem_section *section)
 {
 	return (section &&
 		(section->section_mem_map & SECTION_IS_VMEMMAP_PREINIT));
@@ -2076,7 +2076,7 @@ void sparse_vmemmap_init_nid_early(int nid);
 void sparse_vmemmap_init_nid_late(int nid);
 
 #else
-static inline int preinited_vmemmap_section(struct mem_section *section)
+static inline int preinited_vmemmap_section(const struct mem_section *section)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From b119fb0927738f150cbd179d23d08057dccd75c1 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 1 Sep 2025 22:50:13 +0200
Subject: fs: constify mapping related test functions for improved
 const-correctness

We select certain test functions which either invoke each other, functions
that are already const-ified, or no further functions.

It is therefore relatively trivial to const-ify them, which provides a
basis for further const-ification further up the call stack.

Link: https://lkml.kernel.org/r/20250901205021.3573313-5-max.kellermann@ionos.com
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Zankel <chris@zankel.net>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Bottomley <james.bottomley@HansenPartnership.com>
Cc: Jocelyn Falempe <jfalempe@redhat.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Nysal Jan K.A" <nysal@linux.ibm.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russel King <linux@armlinux.org.uk>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Huth <thuth@redhat.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/fs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7ab4f96d705..0783c5d05d3f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -537,7 +537,7 @@ struct address_space {
 /*
  * Returns true if any of the pages in the mapping are marked with the tag.
  */
-static inline bool mapping_tagged(struct address_space *mapping, xa_mark_t tag)
+static inline bool mapping_tagged(const struct address_space *mapping, xa_mark_t tag)
 {
 	return xa_marked(&mapping->i_pages, tag);
 }
@@ -585,7 +585,7 @@ static inline void i_mmap_assert_write_locked(struct address_space *mapping)
 /*
  * Might pages of this file be mapped into userspace?
  */
-static inline int mapping_mapped(struct address_space *mapping)
+static inline int mapping_mapped(const struct address_space *mapping)
 {
 	return	!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root);
 }
@@ -599,7 +599,7 @@ static inline int mapping_mapped(struct address_space *mapping)
  * If i_mmap_writable is negative, no new writable mappings are allowed. You
  * can only deny writable mappings, if none exists right now.
  */
-static inline int mapping_writably_mapped(struct address_space *mapping)
+static inline int mapping_writably_mapped(const struct address_space *mapping)
 {
 	return atomic_read(&mapping->i_mmap_writable) > 0;
 }
-- 
cgit v1.2.3


From 4680092f8ccb4406e771a6b1a2c0243ebd40bab7 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 1 Sep 2025 22:50:14 +0200
Subject: mm: constify process_shares_mm() for improved const-correctness

This function only reads from the pointer arguments.

Local (loop) variables are also annotated with `const` to clarify that
these will not be written to.

Link: https://lkml.kernel.org/r/20250901205021.3573313-6-max.kellermann@ionos.com
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christian Zankel <chris@zankel.net>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Bottomley <james.bottomley@HansenPartnership.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jocelyn Falempe <jfalempe@redhat.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Nysal Jan K.A" <nysal@linux.ibm.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russel King <linux@armlinux.org.uk>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Huth <thuth@redhat.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 45a47b555499..b3b63058e1a3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3872,7 +3872,7 @@ static inline int in_gate_area(struct mm_struct *mm, unsigned long addr)
 }
 #endif	/* __HAVE_ARCH_GATE_AREA */
 
-extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm);
+bool process_shares_mm(const struct task_struct *p, const struct mm_struct *mm);
 
 void drop_slab(void);
 
-- 
cgit v1.2.3


From 0bf25cfc9e795ab302ee23550fdeebd2aeedf800 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 1 Sep 2025 22:50:15 +0200
Subject: mm, s390: constify mapping related test/getter functions

For improved const-correctness.

We select certain test functions which either invoke each other, functions
that are already const-ified, or no further functions.

It is therefore relatively trivial to const-ify them, which provides a
basis for further const-ification further up the call stack.

(Even though seemingly unrelated, this also constifies the pointer
parameter of mmap_is_legacy() in arch/s390/mm/mmap.c because a copy of the
function exists in mm/util.c.)

Link: https://lkml.kernel.org/r/20250901205021.3573313-7-max.kellermann@ionos.com
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christian Zankel <chris@zankel.net>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Bottomley <james.bottomley@HansenPartnership.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jocelyn Falempe <jfalempe@redhat.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Nysal Jan K.A" <nysal@linux.ibm.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russel King <linux@armlinux.org.uk>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Huth <thuth@redhat.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h      | 6 +++---
 include/linux/pagemap.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index b3b63058e1a3..221e98bb7689 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1002,7 +1002,7 @@ static inline bool vma_is_shmem(const struct vm_area_struct *vma) { return false
 static inline bool vma_is_anon_shmem(const struct vm_area_struct *vma) { return false; }
 #endif
 
-int vma_is_stack_for_current(struct vm_area_struct *vma);
+int vma_is_stack_for_current(const struct vm_area_struct *vma);
 
 /* flush_tlb_range() takes a vma, not a mm, and can care about flags */
 #define TLB_FLUSH_VMA(mm,flags) { .vm_mm = (mm), .vm_flags = (flags) }
@@ -2617,7 +2617,7 @@ void folio_add_pin(struct folio *folio);
 
 int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc);
 int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
-			struct task_struct *task, bool bypass_rlim);
+			const struct task_struct *task, bool bypass_rlim);
 
 struct kvec;
 struct page *get_dump_page(unsigned long addr, int *locked);
@@ -3380,7 +3380,7 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node);
 	     avc; avc = anon_vma_interval_tree_iter_next(avc, start, last))
 
 /* mmap.c */
-extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
+extern int __vm_enough_memory(const struct mm_struct *mm, long pages, int cap_sys_admin);
 extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
 extern void exit_mmap(struct mm_struct *);
 bool mmap_read_lock_maybe_expand(struct mm_struct *mm, struct vm_area_struct *vma,
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0d66a252b06f..aec4a11565bc 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -545,7 +545,7 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping)
 #endif
 }
 
-struct address_space *folio_mapping(struct folio *);
+struct address_space *folio_mapping(const struct folio *folio);
 
 /**
  * folio_flush_mapping - Find the file mapping this folio belongs to.
-- 
cgit v1.2.3


From a955cca37288fe37cc1cde8d291e02717c8a7409 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 1 Sep 2025 22:50:17 +0200
Subject: mm: constify arch_pick_mmap_layout() for improved const-correctness

This function only reads from the rlimit pointer (but writes to the
mm_struct pointer which is kept without `const`).

All callees are already const-ified or (internal functions) are being
constified by this patch.

Link: https://lkml.kernel.org/r/20250901205021.3573313-9-max.kellermann@ionos.com
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christian Zankel <chris@zankel.net>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Bottomley <james.bottomley@HansenPartnership.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jocelyn Falempe <jfalempe@redhat.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Nysal Jan K.A" <nysal@linux.ibm.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russel King <linux@armlinux.org.uk>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Huth <thuth@redhat.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sched/mm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 2201da0afecc..0232d983b715 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -178,7 +178,7 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
 #endif
 
 extern void arch_pick_mmap_layout(struct mm_struct *mm,
-				  struct rlimit *rlim_stack);
+				  const struct rlimit *rlim_stack);
 
 unsigned long
 arch_get_unmapped_area(struct file *filp, unsigned long addr,
@@ -211,7 +211,7 @@ generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 				  unsigned long flags, vm_flags_t vm_flags);
 #else
 static inline void arch_pick_mmap_layout(struct mm_struct *mm,
-					 struct rlimit *rlim_stack) {}
+					 const struct rlimit *rlim_stack) {}
 #endif
 
 static inline bool in_vfork(struct task_struct *tsk)
-- 
cgit v1.2.3


From 89bf840b84bb53393436426cd4acd80604bd26fd Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 1 Sep 2025 22:50:18 +0200
Subject: mm: constify ptdesc_pmd_pts_count() and folio_get_private()

These functions from mm_types.h are trivial getters that should never
write to the given pointers.

Link: https://lkml.kernel.org/r/20250901205021.3573313-10-max.kellermann@ionos.com
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christian Zankel <chris@zankel.net>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Bottomley <james.bottomley@HansenPartnership.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jocelyn Falempe <jfalempe@redhat.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Nysal Jan K.A" <nysal@linux.ibm.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russel King <linux@armlinux.org.uk>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Huth <thuth@redhat.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index d934a3a5b443..275e8060d918 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -632,7 +632,7 @@ static inline void ptdesc_pmd_pts_dec(struct ptdesc *ptdesc)
 	atomic_dec(&ptdesc->pt_share_count);
 }
 
-static inline int ptdesc_pmd_pts_count(struct ptdesc *ptdesc)
+static inline int ptdesc_pmd_pts_count(const struct ptdesc *ptdesc)
 {
 	return atomic_read(&ptdesc->pt_share_count);
 }
@@ -660,7 +660,7 @@ static inline void set_page_private(struct page *page, unsigned long private)
 	page->private = private;
 }
 
-static inline void *folio_get_private(struct folio *folio)
+static inline void *folio_get_private(const struct folio *folio)
 {
 	return folio->private;
 }
-- 
cgit v1.2.3


From f346a9473a2fbbab785d1733d475160f1fc54e5a Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 1 Sep 2025 22:50:19 +0200
Subject: mm: constify various inline functions for improved const-correctness

We select certain test functions plus folio_migrate_refs() from
mm_inline.h which either invoke each other, functions that are already
const-ified, or no further functions.

It is therefore relatively trivial to const-ify them, which provides a
basis for further const-ification further up the call stack.

One exception is the function folio_migrate_refs() which does write to the
"new" folio pointer; there, only the "old" folio pointer is being
constified; only its "flags" field is read, but nothing written.

Link: https://lkml.kernel.org/r/20250901205021.3573313-11-max.kellermann@ionos.com
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christian Zankel <chris@zankel.net>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Bottomley <james.bottomley@HansenPartnership.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jocelyn Falempe <jfalempe@redhat.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Nysal Jan K.A" <nysal@linux.ibm.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russel King <linux@armlinux.org.uk>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Huth <thuth@redhat.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_inline.h | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 150302b4a905..d6c1011b38f2 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -25,7 +25,7 @@
  * 0 if @folio is a normal anonymous folio, a tmpfs folio or otherwise
  * ram or swap backed folio.
  */
-static inline int folio_is_file_lru(struct folio *folio)
+static inline int folio_is_file_lru(const struct folio *folio)
 {
 	return !folio_test_swapbacked(folio);
 }
@@ -84,7 +84,7 @@ static __always_inline void __folio_clear_lru_flags(struct folio *folio)
  * Return: The LRU list a folio should be on, as an index
  * into the array of LRU lists.
  */
-static __always_inline enum lru_list folio_lru_list(struct folio *folio)
+static __always_inline enum lru_list folio_lru_list(const struct folio *folio)
 {
 	enum lru_list lru;
 
@@ -141,7 +141,7 @@ static inline int lru_tier_from_refs(int refs, bool workingset)
 	return workingset ? MAX_NR_TIERS - 1 : order_base_2(refs);
 }
 
-static inline int folio_lru_refs(struct folio *folio)
+static inline int folio_lru_refs(const struct folio *folio)
 {
 	unsigned long flags = READ_ONCE(folio->flags.f);
 
@@ -154,14 +154,14 @@ static inline int folio_lru_refs(struct folio *folio)
 	return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + 1;
 }
 
-static inline int folio_lru_gen(struct folio *folio)
+static inline int folio_lru_gen(const struct folio *folio)
 {
 	unsigned long flags = READ_ONCE(folio->flags.f);
 
 	return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
 }
 
-static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen)
+static inline bool lru_gen_is_active(const struct lruvec *lruvec, int gen)
 {
 	unsigned long max_seq = lruvec->lrugen.max_seq;
 
@@ -217,12 +217,13 @@ static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *foli
 	VM_WARN_ON_ONCE(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen));
 }
 
-static inline unsigned long lru_gen_folio_seq(struct lruvec *lruvec, struct folio *folio,
+static inline unsigned long lru_gen_folio_seq(const struct lruvec *lruvec,
+					      const struct folio *folio,
 					      bool reclaiming)
 {
 	int gen;
 	int type = folio_is_file_lru(folio);
-	struct lru_gen_folio *lrugen = &lruvec->lrugen;
+	const struct lru_gen_folio *lrugen = &lruvec->lrugen;
 
 	/*
 	 * +-----------------------------------+-----------------------------------+
@@ -302,7 +303,7 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio,
 	return true;
 }
 
-static inline void folio_migrate_refs(struct folio *new, struct folio *old)
+static inline void folio_migrate_refs(struct folio *new, const struct folio *old)
 {
 	unsigned long refs = READ_ONCE(old->flags.f) & LRU_REFS_MASK;
 
@@ -330,7 +331,7 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio,
 	return false;
 }
 
-static inline void folio_migrate_refs(struct folio *new, struct folio *old)
+static inline void folio_migrate_refs(struct folio *new, const struct folio *old)
 {
 
 }
@@ -508,7 +509,7 @@ static inline void dec_tlb_flush_pending(struct mm_struct *mm)
 	atomic_dec(&mm->tlb_flush_pending);
 }
 
-static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
+static inline bool mm_tlb_flush_pending(const struct mm_struct *mm)
 {
 	/*
 	 * Must be called after having acquired the PTL; orders against that
@@ -521,7 +522,7 @@ static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
 	return atomic_read(&mm->tlb_flush_pending);
 }
 
-static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
+static inline bool mm_tlb_flush_nested(const struct mm_struct *mm)
 {
 	/*
 	 * Similar to mm_tlb_flush_pending(), we must have acquired the PTL
@@ -605,7 +606,7 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr,
 	return false;
 }
 
-static inline bool vma_has_recency(struct vm_area_struct *vma)
+static inline bool vma_has_recency(const struct vm_area_struct *vma)
 {
 	if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))
 		return false;
-- 
cgit v1.2.3


From da0045587d59d4ffd7710fa45cea51e5a48453a4 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 1 Sep 2025 22:50:20 +0200
Subject: mm: constify assert/test functions in mm.h

For improved const-correctness.

We select certain assert and test functions which either invoke each
other, functions that are already const-ified, or no further functions.

It is therefore relatively trivial to const-ify them, which provides a
basis for further const-ification further up the call stack.

Link: https://lkml.kernel.org/r/20250901205021.3573313-12-max.kellermann@ionos.com
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christian Zankel <chris@zankel.net>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Bottomley <james.bottomley@HansenPartnership.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jocelyn Falempe <jfalempe@redhat.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Nysal Jan K.A" <nysal@linux.ibm.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russel King <linux@armlinux.org.uk>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Huth <thuth@redhat.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 221e98bb7689..a6bfa46937a8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -719,7 +719,7 @@ static inline void release_fault_lock(struct vm_fault *vmf)
 		mmap_read_unlock(vmf->vma->vm_mm);
 }
 
-static inline void assert_fault_locked(struct vm_fault *vmf)
+static inline void assert_fault_locked(const struct vm_fault *vmf)
 {
 	if (vmf->flags & FAULT_FLAG_VMA_LOCK)
 		vma_assert_locked(vmf->vma);
@@ -732,7 +732,7 @@ static inline void release_fault_lock(struct vm_fault *vmf)
 	mmap_read_unlock(vmf->vma->vm_mm);
 }
 
-static inline void assert_fault_locked(struct vm_fault *vmf)
+static inline void assert_fault_locked(const struct vm_fault *vmf)
 {
 	mmap_assert_locked(vmf->vma->vm_mm);
 }
@@ -875,7 +875,7 @@ static inline bool vma_is_initial_stack(const struct vm_area_struct *vma)
 		vma->vm_end >= vma->vm_mm->start_stack;
 }
 
-static inline bool vma_is_temporary_stack(struct vm_area_struct *vma)
+static inline bool vma_is_temporary_stack(const struct vm_area_struct *vma)
 {
 	int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
 
@@ -889,7 +889,7 @@ static inline bool vma_is_temporary_stack(struct vm_area_struct *vma)
 	return false;
 }
 
-static inline bool vma_is_foreign(struct vm_area_struct *vma)
+static inline bool vma_is_foreign(const struct vm_area_struct *vma)
 {
 	if (!current->mm)
 		return true;
@@ -900,7 +900,7 @@ static inline bool vma_is_foreign(struct vm_area_struct *vma)
 	return false;
 }
 
-static inline bool vma_is_accessible(struct vm_area_struct *vma)
+static inline bool vma_is_accessible(const struct vm_area_struct *vma)
 {
 	return vma->vm_flags & VM_ACCESS_FLAGS;
 }
@@ -911,7 +911,7 @@ static inline bool is_shared_maywrite(vm_flags_t vm_flags)
 		(VM_SHARED | VM_MAYWRITE);
 }
 
-static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma)
+static inline bool vma_is_shared_maywrite(const struct vm_area_struct *vma)
 {
 	return is_shared_maywrite(vma->vm_flags);
 }
@@ -1855,7 +1855,7 @@ static inline struct folio *pfn_folio(unsigned long pfn)
 }
 
 #ifdef CONFIG_MMU
-static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
+static inline pte_t mk_pte(const struct page *page, pgprot_t pgprot)
 {
 	return pfn_pte(page_to_pfn(page), pgprot);
 }
@@ -1870,7 +1870,7 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
  *
  * Return: A page table entry suitable for mapping this folio.
  */
-static inline pte_t folio_mk_pte(struct folio *folio, pgprot_t pgprot)
+static inline pte_t folio_mk_pte(const struct folio *folio, pgprot_t pgprot)
 {
 	return pfn_pte(folio_pfn(folio), pgprot);
 }
@@ -1886,7 +1886,7 @@ static inline pte_t folio_mk_pte(struct folio *folio, pgprot_t pgprot)
  *
  * Return: A page table entry suitable for mapping this folio.
  */
-static inline pmd_t folio_mk_pmd(struct folio *folio, pgprot_t pgprot)
+static inline pmd_t folio_mk_pmd(const struct folio *folio, pgprot_t pgprot)
 {
 	return pmd_mkhuge(pfn_pmd(folio_pfn(folio), pgprot));
 }
@@ -1902,7 +1902,7 @@ static inline pmd_t folio_mk_pmd(struct folio *folio, pgprot_t pgprot)
  *
  * Return: A page table entry suitable for mapping this folio.
  */
-static inline pud_t folio_mk_pud(struct folio *folio, pgprot_t pgprot)
+static inline pud_t folio_mk_pud(const struct folio *folio, pgprot_t pgprot)
 {
 	return pud_mkhuge(pfn_pud(folio_pfn(folio), pgprot));
 }
@@ -3520,7 +3520,7 @@ struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
 	return mtree_load(&mm->mm_mt, addr);
 }
 
-static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma)
+static inline unsigned long stack_guard_start_gap(const struct vm_area_struct *vma)
 {
 	if (vma->vm_flags & VM_GROWSDOWN)
 		return stack_guard_gap;
@@ -3532,7 +3532,7 @@ static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma)
 	return 0;
 }
 
-static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
+static inline unsigned long vm_start_gap(const struct vm_area_struct *vma)
 {
 	unsigned long gap = stack_guard_start_gap(vma);
 	unsigned long vm_start = vma->vm_start;
@@ -3543,7 +3543,7 @@ static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
 	return vm_start;
 }
 
-static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
+static inline unsigned long vm_end_gap(const struct vm_area_struct *vma)
 {
 	unsigned long vm_end = vma->vm_end;
 
@@ -3555,7 +3555,7 @@ static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
 	return vm_end;
 }
 
-static inline unsigned long vma_pages(struct vm_area_struct *vma)
+static inline unsigned long vma_pages(const struct vm_area_struct *vma)
 {
 	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 }
@@ -3572,7 +3572,7 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
 	return vma;
 }
 
-static inline bool range_in_vma(struct vm_area_struct *vma,
+static inline bool range_in_vma(const struct vm_area_struct *vma,
 				unsigned long start, unsigned long end)
 {
 	return (vma && vma->vm_start <= start && end <= vma->vm_end);
@@ -3688,7 +3688,7 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
  * Indicates whether GUP can follow a PROT_NONE mapped page, or whether
  * a (NUMA hinting) fault is required.
  */
-static inline bool gup_can_follow_protnone(struct vm_area_struct *vma,
+static inline bool gup_can_follow_protnone(const struct vm_area_struct *vma,
 					   unsigned int flags)
 {
 	/*
@@ -3818,7 +3818,7 @@ static inline bool debug_guardpage_enabled(void)
 	return static_branch_unlikely(&_debug_guardpage_enabled);
 }
 
-static inline bool page_is_guard(struct page *page)
+static inline bool page_is_guard(const struct page *page)
 {
 	if (!debug_guardpage_enabled())
 		return false;
@@ -3849,7 +3849,7 @@ static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {}
 static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {}
 static inline unsigned int debug_guardpage_minorder(void) { return 0; }
 static inline bool debug_guardpage_enabled(void) { return false; }
-static inline bool page_is_guard(struct page *page) { return false; }
+static inline bool page_is_guard(const struct page *page) { return false; }
 static inline bool set_page_guard(struct zone *zone, struct page *page,
 			unsigned int order) { return false; }
 static inline void clear_page_guard(struct zone *zone, struct page *page,
@@ -3931,7 +3931,7 @@ void vmemmap_free(unsigned long start, unsigned long end,
 #endif
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
+static inline unsigned long vmem_altmap_offset(const struct vmem_altmap *altmap)
 {
 	/* number of pfns from base where pfn_to_page() is valid */
 	if (altmap)
@@ -3945,7 +3945,7 @@ static inline void vmem_altmap_free(struct vmem_altmap *altmap,
 	altmap->alloc -= nr_pfns;
 }
 #else
-static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
+static inline unsigned long vmem_altmap_offset(const struct vmem_altmap *altmap)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From a847b17009ec271514b269c90320a3893cd9b667 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 1 Sep 2025 22:50:21 +0200
Subject: mm: constify highmem related functions for improved const-correctness

Lots of functions in mm/highmem.c do not write to the given pointers and
do not call functions that take non-const pointers and can therefore be
constified.

This includes functions like kunmap() which might be implemented in a way
that writes to the pointer (e.g.  to update reference counters or mapping
fields), but currently are not.

kmap() on the other hand cannot be made const because it calls
set_page_address() which is non-const in some
architectures/configurations.

[akpm@linux-foundation.org: "fix" folio_page() build failure]
Link: https://lkml.kernel.org/r/20250901205021.3573313-13-max.kellermann@ionos.com
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christian Zankel <chris@zankel.net>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Bottomley <james.bottomley@HansenPartnership.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jocelyn Falempe <jfalempe@redhat.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Nysal Jan K.A" <nysal@linux.ibm.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russel King <linux@armlinux.org.uk>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Huth <thuth@redhat.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/highmem-internal.h | 36 ++++++++++++++++++------------------
 include/linux/highmem.h          |  8 ++++----
 include/linux/page-flags.h       |  4 ++--
 3 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index 36053c3d6d64..0574c21ca45d 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -7,7 +7,7 @@
  */
 #ifdef CONFIG_KMAP_LOCAL
 void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot);
-void *__kmap_local_page_prot(struct page *page, pgprot_t prot);
+void *__kmap_local_page_prot(const struct page *page, pgprot_t prot);
 void kunmap_local_indexed(const void *vaddr);
 void kmap_local_fork(struct task_struct *tsk);
 void __kmap_local_sched_out(void);
@@ -33,7 +33,7 @@ static inline void kmap_flush_tlb(unsigned long addr) { }
 #endif
 
 void *kmap_high(struct page *page);
-void kunmap_high(struct page *page);
+void kunmap_high(const struct page *page);
 void __kmap_flush_unused(void);
 struct page *__kmap_to_page(void *addr);
 
@@ -50,7 +50,7 @@ static inline void *kmap(struct page *page)
 	return addr;
 }
 
-static inline void kunmap(struct page *page)
+static inline void kunmap(const struct page *page)
 {
 	might_sleep();
 	if (!PageHighMem(page))
@@ -68,12 +68,12 @@ static inline void kmap_flush_unused(void)
 	__kmap_flush_unused();
 }
 
-static inline void *kmap_local_page(struct page *page)
+static inline void *kmap_local_page(const struct page *page)
 {
 	return __kmap_local_page_prot(page, kmap_prot);
 }
 
-static inline void *kmap_local_page_try_from_panic(struct page *page)
+static inline void *kmap_local_page_try_from_panic(const struct page *page)
 {
 	if (!PageHighMem(page))
 		return page_address(page);
@@ -81,13 +81,13 @@ static inline void *kmap_local_page_try_from_panic(struct page *page)
 	return NULL;
 }
 
-static inline void *kmap_local_folio(struct folio *folio, size_t offset)
+static inline void *kmap_local_folio(const struct folio *folio, size_t offset)
 {
-	struct page *page = folio_page(folio, offset / PAGE_SIZE);
+	const struct page *page = folio_page(folio, offset / PAGE_SIZE);
 	return __kmap_local_page_prot(page, kmap_prot) + offset % PAGE_SIZE;
 }
 
-static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot)
+static inline void *kmap_local_page_prot(const struct page *page, pgprot_t prot)
 {
 	return __kmap_local_page_prot(page, prot);
 }
@@ -102,7 +102,7 @@ static inline void __kunmap_local(const void *vaddr)
 	kunmap_local_indexed(vaddr);
 }
 
-static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
+static inline void *kmap_atomic_prot(const struct page *page, pgprot_t prot)
 {
 	if (IS_ENABLED(CONFIG_PREEMPT_RT))
 		migrate_disable();
@@ -113,7 +113,7 @@ static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
 	return __kmap_local_page_prot(page, prot);
 }
 
-static inline void *kmap_atomic(struct page *page)
+static inline void *kmap_atomic(const struct page *page)
 {
 	return kmap_atomic_prot(page, kmap_prot);
 }
@@ -173,32 +173,32 @@ static inline void *kmap(struct page *page)
 	return page_address(page);
 }
 
-static inline void kunmap_high(struct page *page) { }
+static inline void kunmap_high(const struct page *page) { }
 static inline void kmap_flush_unused(void) { }
 
-static inline void kunmap(struct page *page)
+static inline void kunmap(const struct page *page)
 {
 #ifdef ARCH_HAS_FLUSH_ON_KUNMAP
 	kunmap_flush_on_unmap(page_address(page));
 #endif
 }
 
-static inline void *kmap_local_page(struct page *page)
+static inline void *kmap_local_page(const struct page *page)
 {
 	return page_address(page);
 }
 
-static inline void *kmap_local_page_try_from_panic(struct page *page)
+static inline void *kmap_local_page_try_from_panic(const struct page *page)
 {
 	return page_address(page);
 }
 
-static inline void *kmap_local_folio(struct folio *folio, size_t offset)
+static inline void *kmap_local_folio(const struct folio *folio, size_t offset)
 {
 	return folio_address(folio) + offset;
 }
 
-static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot)
+static inline void *kmap_local_page_prot(const struct page *page, pgprot_t prot)
 {
 	return kmap_local_page(page);
 }
@@ -215,7 +215,7 @@ static inline void __kunmap_local(const void *addr)
 #endif
 }
 
-static inline void *kmap_atomic(struct page *page)
+static inline void *kmap_atomic(const struct page *page)
 {
 	if (IS_ENABLED(CONFIG_PREEMPT_RT))
 		migrate_disable();
@@ -225,7 +225,7 @@ static inline void *kmap_atomic(struct page *page)
 	return page_address(page);
 }
 
-static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
+static inline void *kmap_atomic_prot(const struct page *page, pgprot_t prot)
 {
 	return kmap_atomic(page);
 }
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 6234f316468c..105cc4c00cc3 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -43,7 +43,7 @@ static inline void *kmap(struct page *page);
  * Counterpart to kmap(). A NOOP for CONFIG_HIGHMEM=n and for mappings of
  * pages in the low memory area.
  */
-static inline void kunmap(struct page *page);
+static inline void kunmap(const struct page *page);
 
 /**
  * kmap_to_page - Get the page for a kmap'ed address
@@ -93,7 +93,7 @@ static inline void kmap_flush_unused(void);
  * disabling migration in order to keep the virtual address stable across
  * preemption. No caller of kmap_local_page() can rely on this side effect.
  */
-static inline void *kmap_local_page(struct page *page);
+static inline void *kmap_local_page(const struct page *page);
 
 /**
  * kmap_local_folio - Map a page in this folio for temporary usage
@@ -129,7 +129,7 @@ static inline void *kmap_local_page(struct page *page);
  * Context: Can be invoked from any context.
  * Return: The virtual address of @offset.
  */
-static inline void *kmap_local_folio(struct folio *folio, size_t offset);
+static inline void *kmap_local_folio(const struct folio *folio, size_t offset);
 
 /**
  * kmap_atomic - Atomically map a page for temporary usage - Deprecated!
@@ -176,7 +176,7 @@ static inline void *kmap_local_folio(struct folio *folio, size_t offset);
  * kunmap_atomic(vaddr2);
  * kunmap_atomic(vaddr1);
  */
-static inline void *kmap_atomic(struct page *page);
+static inline void *kmap_atomic(const struct page *page);
 
 /* Highmem related interfaces for management code */
 static inline unsigned long nr_free_highpages(void);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index a88b61eec3f8..568011930e35 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -316,9 +316,9 @@ static __always_inline unsigned long _compound_head(const struct page *page)
  * check that the page number lies within @folio; the caller is presumed
  * to have a reference to the page.
  */
-static inline struct page *folio_page(struct folio *folio, unsigned long n)
+static inline struct page *folio_page(const struct folio *folio, unsigned long n)
 {
-	return &folio->page + n;
+	return (struct page *)(&folio->page + n);
 }
 
 static __always_inline int PageTail(const struct page *page)
-- 
cgit v1.2.3


From 9fd53c8122271d9fe8b687f50a9bdf5588d41d0b Mon Sep 17 00:00:00 2001
From: Youling Tang <tangyouling@kylinos.cn>
Date: Fri, 11 Jul 2025 13:55:09 +0800
Subject: mm/filemap: align last_index to folio size

On XFS systems with pagesize=4K, blocksize=16K, and
CONFIG_TRANSPARENT_HUGEPAGE enabled, We observed the following readahead
behaviors:

 # echo 3 > /proc/sys/vm/drop_caches
 # dd if=test of=/dev/null bs=64k count=1
 # ./tools/mm/page-types -r -L -f  /mnt/xfs/test
 foffset	offset	flags
 0	136d4c	__RU_l_________H______t_________________F_1
 1	136d4d	__RU_l__________T_____t_________________F_1
 2	136d4e	__RU_l__________T_____t_________________F_1
 3	136d4f	__RU_l__________T_____t_________________F_1
 ...
 c	136bb8	__RU_l_________H______t_________________F_1
 d	136bb9	__RU_l__________T_____t_________________F_1
 e	136bba	__RU_l__________T_____t_________________F_1
 f	136bbb	__RU_l__________T_____t_________________F_1   <-- first read
 10	13c2cc	___U_l_________H______t______________I__F_1   <-- readahead flag
 11	13c2cd	___U_l__________T_____t______________I__F_1
 12	13c2ce	___U_l__________T_____t______________I__F_1
 13	13c2cf	___U_l__________T_____t______________I__F_1
 ...
 1c	1405d4	___U_l_________H______t_________________F_1
 1d	1405d5	___U_l__________T_____t_________________F_1
 1e	1405d6	___U_l__________T_____t_________________F_1
 1f	1405d7	___U_l__________T_____t_________________F_1
 [ra_size = 32, req_count = 16, async_size = 16]

 # echo 3 > /proc/sys/vm/drop_caches
 # dd if=test of=/dev/null bs=60k count=1
 # ./page-types -r -L -f  /mnt/xfs/test
 foffset	offset	flags
 0	136048	__RU_l_________H______t_________________F_1
 ...
 c	110a40	__RU_l_________H______t_________________F_1
 d	110a41	__RU_l__________T_____t_________________F_1
 e	110a42	__RU_l__________T_____t_________________F_1   <-- first read
 f	110a43	__RU_l__________T_____t_________________F_1   <-- first readahead flag
 10	13e7a8	___U_l_________H______t_________________F_1
 ...
 20	137a00	___U_l_________H______t_______P______I__F_1   <-- second readahead flag (20 - 2f)
 21	137a01	___U_l__________T_____t_______P______I__F_1
 ...
 3f	10d4af	___U_l__________T_____t_______P_________F_1
 [first readahead: ra_size = 32, req_count = 15, async_size = 17]

When reading 64k data (same for 61-63k range, where last_index is
page-aligned in filemap_get_pages()), 128k readahead is triggered via
page_cache_sync_ra() and the PG_readahead flag is set on the next folio
(the one containing 0x10 page).

When reading 60k data, 128k readahead is also triggered via
page_cache_sync_ra().  However, in this case the readahead flag is set on
the 0xf page.  Although the requested read size (req_count) is 60k, the
actual read will be aligned to folio size (64k), which triggers the
readahead flag and initiates asynchronous readahead via
page_cache_async_ra().  This results in two readahead operations totaling
256k.

The root cause is that when the requested size is smaller than the actual
read size (due to folio alignment), it triggers asynchronous readahead.
By changing last_index alignment from page size to folio size, we ensure
the requested size matches the actual read size, preventing the case where
a single read operation triggers two readahead operations.

After applying the patch:
 # echo 3 > /proc/sys/vm/drop_caches
 # dd if=test of=/dev/null bs=60k count=1
 # ./page-types -r -L -f  /mnt/xfs/test
 foffset	offset	flags
 0	136d4c	__RU_l_________H______t_________________F_1
 1	136d4d	__RU_l__________T_____t_________________F_1
 2	136d4e	__RU_l__________T_____t_________________F_1
 3	136d4f	__RU_l__________T_____t_________________F_1
 ...
 c	136bb8	__RU_l_________H______t_________________F_1
 d	136bb9	__RU_l__________T_____t_________________F_1
 e	136bba	__RU_l__________T_____t_________________F_1   <-- first read
 f	136bbb	__RU_l__________T_____t_________________F_1
 10	13c2cc	___U_l_________H______t______________I__F_1   <-- readahead flag
 11	13c2cd	___U_l__________T_____t______________I__F_1
 12	13c2ce	___U_l__________T_____t______________I__F_1
 13	13c2cf	___U_l__________T_____t______________I__F_1
 ...
 1c	1405d4	___U_l_________H______t_________________F_1
 1d	1405d5	___U_l__________T_____t_________________F_1
 1e	1405d6	___U_l__________T_____t_________________F_1
 1f	1405d7	___U_l__________T_____t_________________F_1
 [ra_size = 32, req_count = 16, async_size = 16]

The same phenomenon will occur when reading from 49k to 64k.  Set the
readahead flag to the next folio.

Because the minimum order of folio in address_space equals the block size
(at least in xfs and bcachefs that already support bs > ps), having
request_count aligned to block size will not cause overread.

[klarasmodin@gmail.com: fix overflow on 32-bit]
  Link: https://lkml.kernel.org/r/yru7qf5gvyzccq5ohhpylvxug5lr5tf54omspbjh4sm6pcdb2r@fpjgj2pxw7va
[akpm@linux-foundation.org: update it for Max's constification efforts]
Link: https://lkml.kernel.org/r/20250711055509.91587-1-youling.tang@linux.dev
Co-developed-by: Chi Zhiling <chizhiling@kylinos.cn>
Signed-off-by: Chi Zhiling <chizhiling@kylinos.cn>
Signed-off-by: Youling Tang <tangyouling@kylinos.cn>
Signed-off-by: Klara Modin <klarasmodin@gmail.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Youling Tang <youling.tang@linux.dev>
Cc: David Hildenbrand <david@redhat.com>
Cc: Klara Modin <klarasmodin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagemap.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index aec4a11565bc..185644e288ea 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -482,6 +482,12 @@ mapping_min_folio_nrpages(const struct address_space *mapping)
 	return 1UL << mapping_min_folio_order(mapping);
 }
 
+static inline unsigned long
+mapping_min_folio_nrbytes(const struct address_space *mapping)
+{
+	return mapping_min_folio_nrpages(mapping) << PAGE_SHIFT;
+}
+
 /**
  * mapping_align_index() - Align index for this mapping.
  * @mapping: The address_space.
-- 
cgit v1.2.3


From 94326d3130b5e78a35265bbf7822148372b39231 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 3 Sep 2025 20:10:39 +0100
Subject: mm: remove mlock_count from struct page

All users now use folio->mlock_count so we can remove this element of
struct page.  Move the useful comments over to struct folio.

Link: https://lkml.kernel.org/r/20250903191041.1630338-1-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 275e8060d918..ff2b4e13215f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -94,14 +94,6 @@ struct page {
 			union {
 				struct list_head lru;
 
-				/* Or, for the Unevictable "LRU list" slot */
-				struct {
-					/* Always even, to negate PageTail */
-					void *__filler;
-					/* Count page's or folio's mlocks */
-					unsigned int mlock_count;
-				};
-
 				/* Or, free page */
 				struct list_head buddy_list;
 				struct list_head pcp_list;
@@ -391,7 +383,9 @@ struct folio {
 			union {
 				struct list_head lru;
 	/* private: avoid cluttering the output */
+				/* For the Unevictable "LRU list" slot */
 				struct {
+					/* Avoid compound_head */
 					void *__filler;
 	/* public: */
 					unsigned int mlock_count;
-- 
cgit v1.2.3


From da939ef4c494246bc2102ecb628bbcc71d650410 Mon Sep 17 00:00:00 2001
From: Alice Ryhl <aliceryhl@google.com>
Date: Tue, 2 Sep 2025 08:35:11 +0000
Subject: rust: maple_tree: add MapleTree
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "Add Rust abstraction for Maple Trees", v3.

This will be used in the Tyr driver [1] to allocate from the GPU's VA
space that is not owned by userspace, but by the kernel, for kernel GPU
mappings.

Danilo tells me that in nouveau, the maple tree is used for keeping track
of "VM regions" on top of GPUVM, and that he will most likely end up doing
the same in the Rust Nova driver as well.

These abstractions intentionally do not expose any way to make use of
external locking.  You are required to use the internal spinlock.  For
now, we do not support loads that only utilize rcu for protection.

This contains some parts taken from Andrew Ballance's RFC [2] from April.
However, it has also been reworked significantly compared to that RFC
taking the use-cases in Tyr into account.


This patch (of 3):

The maple tree will be used in the Tyr driver to allocate and keep track
of GPU allocations created internally (i.e.  not by userspace).  It will
likely also be used in the Nova driver eventually.

This adds the simplest methods for additional and removal that do not
require any special care with respect to concurrency.

This implementation is based on the RFC by Andrew but with significant
changes to simplify the implementation.

[ojeda@kernel.org: fix intra-doc links]
  Link: https://lkml.kernel.org/r/20250910140212.997771-1-ojeda@kernel.org
Link: https://lkml.kernel.org/r/20250902-maple-tree-v3-0-fb5c8958fb1e@google.com
Link: https://lkml.kernel.org/r/20250902-maple-tree-v3-1-fb5c8958fb1e@google.com
Link: https://lore.kernel.org/r/20250627-tyr-v1-1-cb5f4c6ced46@collabora.com [1]
Link: https://lore.kernel.org/r/20250405060154.1550858-1-andrewjballance@gmail.com [2]
Co-developed-by: Andrew Ballance <andrewjballance@gmail.com>
Signed-off-by: Andrew Ballance <andrewjballance@gmail.com>
Signed-off-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Danilo Krummrich <dakr@kernel.org>
Cc: Andreas Hindborg <a.hindborg@kernel.org>
Cc: Björn Roy Baron <bjorn3_gh@protonmail.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Daniel Almeida <daniel.almeida@collabora.com>
Cc: Gary Guo <gary@garyguo.net>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Trevor Gross <tmgross@umich.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 41e633264e51..05730171d201 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -481,6 +481,9 @@ struct ma_wr_state {
 #define MA_ERROR(err) \
 		((struct maple_enode *)(((unsigned long)err << 2) | 2UL))
 
+/*
+ * When changing MA_STATE, remember to also change rust/kernel/maple_tree.rs
+ */
 #define MA_STATE(name, mt, first, end)					\
 	struct ma_state name = {					\
 		.tree = mt,						\
-- 
cgit v1.2.3


From a488ba3124c82d704963fcd760fe653df1987b13 Mon Sep 17 00:00:00 2001
From: Pankaj Raghav <p.raghav@samsung.com>
Date: Fri, 5 Sep 2025 17:00:12 +0200
Subject: huge_memory: return -EINVAL in folio split functions when THP is
 disabled

split_huge_page_to_list_[to_order](), split_huge_page() and
try_folio_split() return 0 on success and error codes on failure.

When THP is disabled, these functions return 0 indicating success even
though an error code should be returned as it is not possible to split a
folio when THP is disabled.

Make all these functions return -EINVAL to indicate failure instead of 0.
As large folios depend on CONFIG_THP, issue warning as this function
should not be called without a large folio.

Link: https://lkml.kernel.org/r/20250905150012.93714-1-kernel@pankajraghav.com
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202509051753.riCeG7LC-lkp@intel.com/
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Acked-by: Kiryl Shutsemau <kas@kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 29ef70022da1..f327d62fc985 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -588,22 +588,26 @@ static inline int
 split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
 		unsigned int new_order)
 {
-	return 0;
+	VM_WARN_ON_ONCE_PAGE(1, page);
+	return -EINVAL;
 }
 static inline int split_huge_page(struct page *page)
 {
-	return 0;
+	VM_WARN_ON_ONCE_PAGE(1, page);
+	return -EINVAL;
 }
 
 static inline int split_folio_to_list(struct folio *folio, struct list_head *list)
 {
-	return 0;
+	VM_WARN_ON_ONCE_FOLIO(1, folio);
+	return -EINVAL;
 }
 
 static inline int try_folio_split(struct folio *folio, struct page *page,
 		struct list_head *list)
 {
-	return 0;
+	VM_WARN_ON_ONCE_FOLIO(1, folio);
+	return -EINVAL;
 }
 
 static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {}
-- 
cgit v1.2.3


From 614d850efda98e4455e4f2b55e64864f68a4e370 Mon Sep 17 00:00:00 2001
From: Alistair Popple <apopple@nvidia.com>
Date: Thu, 4 Sep 2025 08:59:26 +1000
Subject: mm/memremap: remove unused get_dev_pagemap() parameter

GUP no longer uses get_dev_pagemap().  As it was the only user of the
get_dev_pagemap() pgmap caching feature it can be removed.

Link: https://lkml.kernel.org/r/20250903225926.34702-2-apopple@nvidia.com
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memremap.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index aa1b6aa877a0..e5951ba12a28 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -211,8 +211,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid);
 void memunmap_pages(struct dev_pagemap *pgmap);
 void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
 void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap);
-struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
-		struct dev_pagemap *pgmap);
+struct dev_pagemap *get_dev_pagemap(unsigned long pfn);
 bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn);
 
 unsigned long memremap_compat_align(void);
@@ -234,8 +233,7 @@ static inline void devm_memunmap_pages(struct device *dev,
 {
 }
 
-static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
-		struct dev_pagemap *pgmap)
+static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn)
 {
 	return NULL;
 }
-- 
cgit v1.2.3


From 4522aed4fffbbd18ab3581d733d0572d45780d07 Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@tencent.com>
Date: Wed, 17 Sep 2025 00:00:51 +0800
Subject: mm, swap: rename and move some swap cluster definition and helpers

No feature change, move cluster related definitions and helpers to
mm/swap.h, also tidy up and add a "swap_" prefix for cluster lock/unlock
helpers, so they can be used outside of swap files.  And while at it, add
kerneldoc.

Link: https://lkml.kernel.org/r/20250916160100.31545-7-ryncsn@gmail.com
Signed-off-by: Kairui Song <kasong@tencent.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Acked-by: Chris Li <chrisl@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Suggested-by: Chris Li <chrisl@kernel.org>
Acked-by: Nhat Pham <nphamcs@gmail.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: kernel test robot <oliver.sang@intel.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 34 ----------------------------------
 1 file changed, 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index a2bb20841616..78cc48a65512 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -235,40 +235,6 @@ enum {
 /* Special value in each swap_map continuation */
 #define SWAP_CONT_MAX	0x7f	/* Max count */
 
-/*
- * We use this to track usage of a cluster. A cluster is a block of swap disk
- * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
- * free clusters are organized into a list. We fetch an entry from the list to
- * get a free cluster.
- *
- * The flags field determines if a cluster is free. This is
- * protected by cluster lock.
- */
-struct swap_cluster_info {
-	spinlock_t lock;	/*
-				 * Protect swap_cluster_info fields
-				 * other than list, and swap_info_struct->swap_map
-				 * elements corresponding to the swap cluster.
-				 */
-	u16 count;
-	u8 flags;
-	u8 order;
-	struct list_head list;
-};
-
-/* All on-list cluster must have a non-zero flag. */
-enum swap_cluster_flags {
-	CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */
-	CLUSTER_FLAG_FREE,
-	CLUSTER_FLAG_NONFULL,
-	CLUSTER_FLAG_FRAG,
-	/* Clusters with flags above are allocatable */
-	CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG,
-	CLUSTER_FLAG_FULL,
-	CLUSTER_FLAG_DISCARD,
-	CLUSTER_FLAG_MAX,
-};
-
 /*
  * The first page in the swap file is the swap header, which is always marked
  * bad to prevent it from being allocated as an entry. This also prevents the
-- 
cgit v1.2.3


From 0fcf8ef4fdab8e5c91d1bce39c7fe6565974ffad Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@tencent.com>
Date: Wed, 17 Sep 2025 00:00:52 +0800
Subject: mm, swap: tidy up swap device and cluster info helpers

swp_swap_info is the most commonly used helper for retrieving swap info.
It has an internal check that may lead to a NULL return value, but almost
none of its caller checks the return value, making the internal check
pointless.  In fact, most of these callers already ensured the entry is
valid and never expect a NULL value.

Tidy this up and improve the function names.  If the caller can make sure
the swap entry/type is valid and the device is pinned, use the new
introduced __swap_entry_to_info/__swap_type_to_info instead.  They have
more debug sanity checks and lower overhead as they are inlined.

Callers that may expect a NULL value should use
swap_entry_to_info/swap_type_to_info instead.

No feature change.  The rearranged codes should have had no effect, or
they should have been hitting NULL de-ref bugs already.  Only some new
sanity checks are added so potential issues may show up in debug build.

The new helpers will be frequently used with swap table later when working
with swap cache folios.  A locked swap cache folio ensures the entries are
valid and stable so these helpers are very helpful.

Link: https://lkml.kernel.org/r/20250916160100.31545-8-ryncsn@gmail.com
Signed-off-by: Kairui Song <kasong@tencent.com>
Acked-by: Chris Li <chrisl@kernel.org>
Reviewed-by: Barry Song <baohua@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Suggested-by: Chris Li <chrisl@kernel.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: kernel test robot <oliver.sang@intel.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 78cc48a65512..762f8db0e811 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -479,7 +479,6 @@ extern sector_t swapdev_block(int, pgoff_t);
 extern int __swap_count(swp_entry_t entry);
 extern bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry);
 extern int swp_swapcount(swp_entry_t entry);
-struct swap_info_struct *swp_swap_info(swp_entry_t entry);
 struct backing_dev_info;
 extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
 extern void exit_swap_address_space(unsigned int type);
@@ -492,11 +491,6 @@ static inline void put_swap_device(struct swap_info_struct *si)
 }
 
 #else /* CONFIG_SWAP */
-static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry)
-{
-	return NULL;
-}
-
 static inline struct swap_info_struct *get_swap_device(swp_entry_t entry)
 {
 	return NULL;
-- 
cgit v1.2.3


From 8578e0c00dcf0c58fbc32d4904ecaf8e802a6590 Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@tencent.com>
Date: Wed, 17 Sep 2025 00:00:56 +0800
Subject: mm, swap: use the swap table for the swap cache and switch API

Introduce basic swap table infrastructures, which are now just a
fixed-sized flat array inside each swap cluster, with access wrappers.

Each cluster contains a swap table of 512 entries.  Each table entry is an
opaque atomic long.  It could be in 3 types: a shadow type (XA_VALUE), a
folio type (pointer), or NULL.

In this first step, it only supports storing a folio or shadow, and it is
a drop-in replacement for the current swap cache.  Convert all swap cache
users to use the new sets of APIs.  Chris Li has been suggesting using a
new infrastructure for swap cache for better performance, and that idea
combined well with the swap table as the new backing structure.  Now the
lock contention range is reduced to 2M clusters, which is much smaller
than the 64M address_space.  And we can also drop the multiple
address_space design.

All the internal works are done with swap_cache_get_* helpers.  Swap cache
lookup is still lock-less like before, and the helper's contexts are same
with original swap cache helpers.  They still require a pin on the swap
device to prevent the backing data from being freed.

Swap cache updates are now protected by the swap cluster lock instead of
the XArray lock.  This is mostly handled internally, but new
__swap_cache_* helpers require the caller to lock the cluster.  So, a few
new cluster access and locking helpers are also introduced.

A fully cluster-based unified swap table can be implemented on top of this
to take care of all count tracking and synchronization work, with dynamic
allocation.  It should reduce the memory usage while making the
performance even better.

Link: https://lkml.kernel.org/r/20250916160100.31545-12-ryncsn@gmail.com
Co-developed-by: Chris Li <chrisl@kernel.org>
Signed-off-by: Chris Li <chrisl@kernel.org>
Signed-off-by: Kairui Song <kasong@tencent.com>
Acked-by: Chris Li <chrisl@kernel.org>
Suggested-by: Chris Li <chrisl@kernel.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Barry Song <baohua@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: kernel test robot <oliver.sang@intel.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 762f8db0e811..e818fbade1e2 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -480,8 +480,6 @@ extern int __swap_count(swp_entry_t entry);
 extern bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry);
 extern int swp_swapcount(swp_entry_t entry);
 struct backing_dev_info;
-extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
-extern void exit_swap_address_space(unsigned int type);
 extern struct swap_info_struct *get_swap_device(swp_entry_t entry);
 sector_t swap_folio_sector(struct folio *folio);
 
-- 
cgit v1.2.3


From 6106864b878e1ce5ecab4b8ffffff85e9ec69b78 Mon Sep 17 00:00:00 2001
From: Alice Ryhl <aliceryhl@google.com>
Date: Tue, 2 Sep 2025 08:36:11 +0000
Subject: maple_tree: remove lockdep_map_p typedef

Having the ma_external_lock field exist when CONFIG_LOCKDEP=n isn't used
anywhere, so just get rid of it.  This also avoids generating a typedef
called lockdep_map_p that could overlap with typedefs in other header
files.

Link: https://lkml.kernel.org/r/20250902-maple-lockdep-p-v1-1-3ae5a398a379@google.com
Signed-off-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Danilo Krummrich <dakr@kernel.org>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 05730171d201..47f9002ae92d 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -194,7 +194,6 @@ enum store_type {
 #define MAPLE_RESERVED_RANGE	4096
 
 #ifdef CONFIG_LOCKDEP
-typedef struct lockdep_map *lockdep_map_p;
 #define mt_lock_is_held(mt)                                             \
 	(!(mt)->ma_external_lock || lock_is_held((mt)->ma_external_lock))
 
@@ -207,7 +206,6 @@ typedef struct lockdep_map *lockdep_map_p;
 
 #define mt_on_stack(mt)			(mt).ma_external_lock = NULL
 #else
-typedef struct { /* nothing */ } lockdep_map_p;
 #define mt_lock_is_held(mt)		1
 #define mt_write_lock_is_held(mt)	1
 #define mt_set_external_lock(mt, lock)	do { } while (0)
@@ -230,8 +228,10 @@ typedef struct { /* nothing */ } lockdep_map_p;
  */
 struct maple_tree {
 	union {
-		spinlock_t	ma_lock;
-		lockdep_map_p	ma_external_lock;
+		spinlock_t		ma_lock;
+#ifdef CONFIG_LOCKDEP
+		struct lockdep_map	*ma_external_lock;
+#endif
 	};
 	unsigned int	ma_flags;
 	void __rcu      *ma_root;
-- 
cgit v1.2.3


From 522abd92279a8ea55bcc687f77697d4c0aaba6c0 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 8 Sep 2025 18:11:00 +0100
Subject: ptdesc: convert __page_flags to pt_flags

Patch series "Some ptdesc cleanups".

The first two patches here are preparation for splitting struct ptdesc
from struct page and struct folio.  I think their only dependency is on
the memdesc_flags_t patches from August which is in mm-new.  The third
patch is just something I noticed while working on the code.


This patch (of 3):

Use the new memdesc_flags_t type to show that these are the same bits as
page/folio/slab and thesefore have the zone/node/section information in
them.  Remove a use of ptdesc_folio() by converting
pagetable_is_reserved() to use test_bit() directly.

Link: https://lkml.kernel.org/r/20250908171104.2409217-1-willy@infradead.org
Link: https://lkml.kernel.org/r/20250908171104.2409217-2-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h       | 7 ++++++-
 include/linux/mm_types.h | 6 +++---
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a6bfa46937a8..8dd71392eba7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2934,6 +2934,11 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
 }
 #endif /* CONFIG_MMU */
 
+enum pt_flags {
+	PT_reserved = PG_reserved,
+	/* High bits are used for zone/node/section */
+};
+
 static inline struct ptdesc *virt_to_ptdesc(const void *x)
 {
 	return page_ptdesc(virt_to_page(x));
@@ -2951,7 +2956,7 @@ static inline void *ptdesc_address(const struct ptdesc *pt)
 
 static inline bool pagetable_is_reserved(struct ptdesc *pt)
 {
-	return folio_test_reserved(ptdesc_folio(pt));
+	return test_bit(PT_reserved, &pt->pt_flags.f);
 }
 
 /**
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ff2b4e13215f..f048dc80646e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -524,7 +524,7 @@ FOLIO_MATCH(compound_head, _head_3);
 
 /**
  * struct ptdesc -    Memory descriptor for page tables.
- * @__page_flags:     Same as page flags. Powerpc only.
+ * @pt_flags: enum pt_flags plus zone/node/section.
  * @pt_rcu_head:      For freeing page table pages.
  * @pt_list:          List of used page tables. Used for s390 gmap shadow pages
  *                    (which are not linked into the user page tables) and x86
@@ -546,7 +546,7 @@ FOLIO_MATCH(compound_head, _head_3);
  * understanding of the issues.
  */
 struct ptdesc {
-	unsigned long __page_flags;
+	memdesc_flags_t pt_flags;
 
 	union {
 		struct rcu_head pt_rcu_head;
@@ -584,7 +584,7 @@ struct ptdesc {
 
 #define TABLE_MATCH(pg, pt)						\
 	static_assert(offsetof(struct page, pg) == offsetof(struct ptdesc, pt))
-TABLE_MATCH(flags, __page_flags);
+TABLE_MATCH(flags, pt_flags);
 TABLE_MATCH(compound_head, pt_list);
 TABLE_MATCH(compound_head, _pt_pad_1);
 TABLE_MATCH(mapping, __page_mapping);
-- 
cgit v1.2.3


From f0c92726e89f5c6c092526787465617a68af154f Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 8 Sep 2025 18:11:01 +0100
Subject: ptdesc: remove references to folios from __pagetable_ctor() and
 pagetable_dtor()

In preparation for splitting struct ptdesc from struct page and struct
folio, remove mentions of struct folio from these functions.  Introduce
ptdesc_nr_pages() to avoid using lruvec_stat_add/sub_folio()

Link: https://lkml.kernel.org/r/20250908171104.2409217-3-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8dd71392eba7..25f56e209ec8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2097,9 +2097,9 @@ static inline long folio_nr_pages(const struct folio *folio)
  * page.  compound_nr() can be called on a tail page, and is defined to
  * return 1 in that case.
  */
-static inline long compound_nr(struct page *page)
+static inline long compound_nr(const struct page *page)
 {
-	struct folio *folio = (struct folio *)page;
+	const struct folio *folio = (struct folio *)page;
 
 	if (!test_bit(PG_head, &folio->flags.f))
 		return 1;
@@ -3066,21 +3066,26 @@ static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; }
 static inline void ptlock_free(struct ptdesc *ptdesc) {}
 #endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */
 
+static inline unsigned long ptdesc_nr_pages(const struct ptdesc *ptdesc)
+{
+	return compound_nr(ptdesc_page(ptdesc));
+}
+
 static inline void __pagetable_ctor(struct ptdesc *ptdesc)
 {
-	struct folio *folio = ptdesc_folio(ptdesc);
+	pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags));
 
-	__folio_set_pgtable(folio);
-	lruvec_stat_add_folio(folio, NR_PAGETABLE);
+	__SetPageTable(ptdesc_page(ptdesc));
+	mod_node_page_state(pgdat, NR_PAGETABLE, ptdesc_nr_pages(ptdesc));
 }
 
 static inline void pagetable_dtor(struct ptdesc *ptdesc)
 {
-	struct folio *folio = ptdesc_folio(ptdesc);
+	pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags));
 
 	ptlock_free(ptdesc);
-	__folio_clear_pgtable(folio);
-	lruvec_stat_sub_folio(folio, NR_PAGETABLE);
+	__ClearPageTable(ptdesc_page(ptdesc));
+	mod_node_page_state(pgdat, NR_PAGETABLE, -ptdesc_nr_pages(ptdesc));
 }
 
 static inline void pagetable_dtor_free(struct ptdesc *ptdesc)
-- 
cgit v1.2.3


From 90ec2df9dd31653ceac4a35d2440b108bdf27550 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 8 Sep 2025 18:11:02 +0100
Subject: ptdesc: remove ptdesc_to_virt()

This has the same effect as ptdesc_address() so convert the callers to use
that and delete the function.  Add kernel-doc for ptdesc_address().

Link: https://lkml.kernel.org/r/20250908171104.2409217-4-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 25f56e209ec8..da6e0abad2cb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2944,11 +2944,12 @@ static inline struct ptdesc *virt_to_ptdesc(const void *x)
 	return page_ptdesc(virt_to_page(x));
 }
 
-static inline void *ptdesc_to_virt(const struct ptdesc *pt)
-{
-	return page_to_virt(ptdesc_page(pt));
-}
-
+/**
+ * ptdesc_address - Virtual address of page table.
+ * @pt: Page table descriptor.
+ *
+ * Return: The first byte of the page table described by @pt.
+ */
 static inline void *ptdesc_address(const struct ptdesc *pt)
 {
 	return folio_address(ptdesc_folio(pt));
-- 
cgit v1.2.3


From e7a5f249e6db3b41a4618763e0f840639f3578f4 Mon Sep 17 00:00:00 2001
From: Chanwon Park <flyinrm@gmail.com>
Date: Mon, 8 Sep 2025 19:04:10 +0900
Subject: mm: re-enable kswapd when memory pressure subsides or demotion is
 toggled

If kswapd fails to reclaim pages from a node MAX_RECLAIM_RETRIES in a
row, kswapd on that node gets disabled. That is, the system won't wakeup
kswapd for that node until page reclamation is observed at least once.
That reclamation is mostly done by direct reclaim, which in turn enables
kswapd back.

However, on systems with CXL memory nodes, workloads with high anon page
usage can disable kswapd indefinitely, without triggering direct
reclaim. This can be reproduced with following steps:

   numa node 0   (32GB memory, 48 CPUs)
   numa node 2~5 (512GB CXL memory, 128GB each)
   (numa node 1 is disabled)
   swap space 8GB

   1) Set /sys/kernel/mm/demotion_enabled to 0.
   2) Set /proc/sys/kernel/numa_balancing to 0.
   3) Run a process that allocates and random accesses 500GB of anon
      pages.
   4) Let the process exit normally.

During 3), free memory on node 0 gets lower than low watermark, and
kswapd runs and depletes swap space. Then, kswapd fails consecutively
and gets disabled. Allocation afterwards happens on CXL memory, so node
0 never gains more memory pressure to trigger direct reclaim.

After 4), kswapd on node 0 remains disabled, and tasks running on that
node are unable to swap. If you turn on NUMA_BALANCING_MEMORY_TIERING
and demotion now, it won't work properly since kswapd is disabled.

To mitigate this problem, reset kswapd_failures to 0 on following
conditions:

   a) ZONE_BELOW_HIGH bit of a zone in hopeless node with a fallback
      memory node gets cleared.
   b) demotion_enabled is changed from false to true.

Rationale for a):
   ZONE_BELOW_HIGH bit being cleared might be a sign that the node may
   be reclaimable afterwards. This won't help much if the memory-hungry
   process keeps running without freeing anything, but at least the node
   will go back to reclaimable state when the process exits.

Rationale for b):
   When demotion_enabled is false, kswapd can only reclaim anon pages by
   swapping them out to swap space. If demotion_enabled is turned on,
   kswapd can demote anon pages to another node for reclaiming. So, the
   original failure count for determining reclaimability is no longer
   valid.

Since kswapd_failures resets may be missed by ++ operation, it is
changed from int to atomic_t.

[akpm@linux-foundation.org: tweak whitespace]
Link: https://lkml.kernel.org/r/aL6qGi69jWXfPc4D@pcw-MS-7D22
Signed-off-by: Chanwon Park <flyinrm@gmail.com>
Cc: Brendan Jackman <jackmanb@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6c4eae96160d..7fb7331c5725 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1440,7 +1440,7 @@ typedef struct pglist_data {
 	int kswapd_order;
 	enum zone_type kswapd_highest_zoneidx;
 
-	int kswapd_failures;		/* Number of 'reclaimed == 0' runs */
+	atomic_t kswapd_failures;	/* Number of 'reclaimed == 0' runs */
 
 #ifdef CONFIG_COMPACTION
 	int kcompactd_max_order;
-- 
cgit v1.2.3


From 032c31127f27acb1b8152b512830ecef04ed2ebc Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Tue, 9 Sep 2025 13:13:57 -0700
Subject: mm: vm_event_item: explicit #include for THREAD_SIZE

This header uses THREAD_SIZE, which is provided by the thread_info.h
header but is not included in this header.  Depending on the #include
ordering in other files, this can produce preprocessor errors.

Link: https://lkml.kernel.org/r/20250909201419.827638-1-briannorris@chromium.org
Signed-off-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/vm_event_item.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 9e15a088ba38..92f80b4d69a6 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -2,6 +2,8 @@
 #ifndef VM_EVENT_ITEM_H_INCLUDED
 #define VM_EVENT_ITEM_H_INCLUDED
 
+#include <linux/thread_info.h>
+
 #ifdef CONFIG_ZONE_DMA
 #define DMA_ZONE(xx) xx##_DMA,
 #else
-- 
cgit v1.2.3


From fa17bcd5f65ed702df001579cca8c885fa6bf3e7 Mon Sep 17 00:00:00 2001
From: Aristeu Rozanski <aris@ruivo.org>
Date: Tue, 26 Aug 2025 11:37:21 -0400
Subject: mm: make folio page count functions return unsigned

As raised by Andrew [1], a folio/compound page never spans a negative
number of pages.  Consequently, let's use "unsigned long" instead of
"long" consistently for folio_nr_pages(), folio_large_nr_pages() and
compound_nr().

Using "unsigned long" as return value is fine, because even
"(long)-folio_nr_pages()" will keep on working as expected.  Using
"unsigned int" instead would actually break these use cases.

This patch takes the first step changing these to return unsigned long
(and making drm_gem_get_pages() use the new types instead of replacing
min()).

In the future, we might want to make more callers of these functions to
consistently use "unsigned long".


Link: https://lore.kernel.org/linux-mm/20250503182858.5a02729fcffd6d4723afcfc2@linux-foundation.org/
Link: https://lkml.kernel.org/r/20250826153721.GA23292@cathedrallabs.org
Link: https://lore.kernel.org/linux-mm/20250503182858.5a02729fcffd6d4723afcfc2@linux-foundation.org/ [1]
Signed-off-by: Aristeu Rozanski <aris@ruivo.org>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Suggested-by: David Hildenbrand <david@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: David Airlie <airlied@gmail.com>
Cc: Simona Vetter <simona@ffwll.ch>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index da6e0abad2cb..8f5b4df9b166 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1018,12 +1018,12 @@ static inline unsigned int folio_large_order(const struct folio *folio)
 }
 
 #ifdef NR_PAGES_IN_LARGE_FOLIO
-static inline long folio_large_nr_pages(const struct folio *folio)
+static inline unsigned long folio_large_nr_pages(const struct folio *folio)
 {
 	return folio->_nr_pages;
 }
 #else
-static inline long folio_large_nr_pages(const struct folio *folio)
+static inline unsigned long folio_large_nr_pages(const struct folio *folio)
 {
 	return 1L << folio_large_order(folio);
 }
@@ -2062,7 +2062,7 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
  *
  * Return: A positive power of two.
  */
-static inline long folio_nr_pages(const struct folio *folio)
+static inline unsigned long folio_nr_pages(const struct folio *folio)
 {
 	if (!folio_test_large(folio))
 		return 1;
@@ -2097,7 +2097,7 @@ static inline long folio_nr_pages(const struct folio *folio)
  * page.  compound_nr() can be called on a tail page, and is defined to
  * return 1 in that case.
  */
-static inline long compound_nr(const struct page *page)
+static inline unsigned long compound_nr(const struct page *page)
 {
 	const struct folio *folio = (struct folio *)page;
 
-- 
cgit v1.2.3


From 3a37469e5ac004905e4125bf60b43cc5216e83dc Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 10 Sep 2025 15:29:17 +0100
Subject: mm: constify compound_order() and page_size()

Patch series "Small cleanups".

These small cleanups can be applied now to reduce conflicts during the
next merge window.  They're all from various efforts to split struct page
from other memdescs.  Thanks to Vlastimil for the suggestion.


This patch (of 3):

These functions do not modify their arguments.  Telling the compiler this
may improve code generation, and allows us to pass const arguments from
other functions.

Link: https://lkml.kernel.org/r/20250910142923.2465470-1-willy@infradead.org
Link: https://lkml.kernel.org/r/20250910142923.2465470-2-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8f5b4df9b166..fcb1e72eea40 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1036,9 +1036,9 @@ static inline unsigned long folio_large_nr_pages(const struct folio *folio)
  * set before the order is initialised, or this may be a tail page.
  * See compaction.c for some good examples.
  */
-static inline unsigned int compound_order(struct page *page)
+static inline unsigned int compound_order(const struct page *page)
 {
-	struct folio *folio = (struct folio *)page;
+	const struct folio *folio = (struct folio *)page;
 
 	if (!test_bit(PG_head, &folio->flags.f))
 		return 0;
@@ -1256,7 +1256,7 @@ int folio_mc_copy(struct folio *dst, struct folio *src);
 unsigned long nr_free_buffer_pages(void);
 
 /* Returns the number of bytes in this potentially compound page. */
-static inline unsigned long page_size(struct page *page)
+static inline unsigned long page_size(const struct page *page)
 {
 	return PAGE_SIZE << compound_order(page);
 }
-- 
cgit v1.2.3


From 9d003dec972563efb8ce14c9962af3652d0e201d Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 10 Sep 2025 15:29:19 +0100
Subject: mm: remove page->order

We already use page->private for storing the order of a page while it's in
the buddy allocator system; extend that to also storing the order while
it's in the pcp_llist.

Link: https://lkml.kernel.org/r/20250910142923.2465470-4-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index f048dc80646e..6920c816f6c6 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -97,10 +97,7 @@ struct page {
 				/* Or, free page */
 				struct list_head buddy_list;
 				struct list_head pcp_list;
-				struct {
-					struct llist_node pcp_llist;
-					unsigned int order;
-				};
+				struct llist_node pcp_llist;
 			};
 			struct address_space *mapping;
 			union {
@@ -111,7 +108,8 @@ struct page {
 			 * @private: Mapping-private opaque data.
 			 * Usually used for buffer_heads if PagePrivate.
 			 * Used for swp_entry_t if swapcache flag set.
-			 * Indicates order in the buddy system if PageBuddy.
+			 * Indicates order in the buddy system if PageBuddy
+			 * or on pcp_llist.
 			 */
 			unsigned long private;
 		};
-- 
cgit v1.2.3


From d02ac836e4d6bdfd7d44927d01a4cd048ad4aba8 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sat, 13 Sep 2025 17:03:39 -0700
Subject: include/linux/pgtable.h: convert arch_enter_lazy_mmu_mode() and
 friends to static inlines

For all the usual reasons, plus a new one.  Calling

	(void)arch_enter_lazy_mmu_mode();

deservedly blows up.

Cc: Balbir Singh <balbirs@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 94249e671a7e..32e8457ad535 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -232,9 +232,9 @@ static inline int pmd_dirty(pmd_t pmd)
  * and the mode cannot be used in interrupt context.
  */
 #ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
-#define arch_enter_lazy_mmu_mode()	do {} while (0)
-#define arch_leave_lazy_mmu_mode()	do {} while (0)
-#define arch_flush_lazy_mmu_mode()	do {} while (0)
+static inline void arch_enter_lazy_mmu_mode(void) {}
+static inline void arch_leave_lazy_mmu_mode(void) {}
+static inline void arch_flush_lazy_mmu_mode(void) {}
 #endif
 
 #ifndef pte_batch_hint
-- 
cgit v1.2.3


From 59d4d36158ba3cdbce141d8e9261eea154d4c441 Mon Sep 17 00:00:00 2001
From: zhongjinji <zhongjinji@honor.com>
Date: Tue, 16 Sep 2025 00:29:45 +0800
Subject: mm/oom_kill: thaw the entire OOM victim process

Patch series "Improvements to Victim Process Thawing and OOM Reaper
Traversal Order", v10.

This patch series focuses on optimizing victim process thawing and
refining the traversal order of the OOM reaper.  Since __thaw_task() is
used to thaw a single thread of the victim, thawing only one thread cannot
guarantee the exit of the OOM victim when it is frozen.  Patch 1 thaw the
entire process of the OOM victim to ensure that OOM victims are able to
terminate themselves.  Even if the oom_reaper is delayed, patch 2 is still
beneficial for reaping processes with a large address space footprint, and
it also greatly improves process_mrelease.


This patch (of 10):

OOM killer is a mechanism that selects and kills processes when the system
runs out of memory to reclaim resources and keep the system stable.  But
the oom victim cannot terminate on its own when it is frozen, even if the
OOM victim task is thawed through __thaw_task().  This is because
__thaw_task() can only thaw a single OOM victim thread, and cannot thaw
the entire OOM victim process.

In addition, freezing_slow_path() determines whether a task is an OOM
victim by checking the task's TIF_MEMDIE flag.  When a task is identified
as an OOM victim, the freezer bypasses both PM freezing and cgroup
freezing states to thaw it.

Historically, TIF_MEMDIE was a "this is the oom victim & it has access to
memory reserves" flag in the past.  It has that thread vs.  process
problems and tsk_is_oom_victim was introduced later to get rid of them and
other issues as well as the guarantee that we can identify the oom
victim's mm reliably for other oom_reaper.

Therefore, thaw_process() is introduced to unfreeze all threads within the
OOM victim process, ensuring that every thread is properly thawed.  The
freezer now uses tsk_is_oom_victim() to determine OOM victim status,
allowing all victim threads to be unfrozen as necessary.

With this change, the entire OOM victim process will be thawed when an OOM
event occurs, ensuring that the victim can terminate on its own.

Link: https://lkml.kernel.org/r/20250915162946.5515-1-zhongjinji@honor.com
Link: https://lkml.kernel.org/r/20250915162946.5515-2-zhongjinji@honor.com
Signed-off-by: zhongjinji <zhongjinji@honor.com>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/freezer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index b303472255be..32884c9721e5 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -47,6 +47,7 @@ extern int freeze_processes(void);
 extern int freeze_kernel_threads(void);
 extern void thaw_processes(void);
 extern void thaw_kernel_threads(void);
+extern void thaw_process(struct task_struct *p);
 
 static inline bool try_to_freeze(void)
 {
@@ -80,6 +81,7 @@ static inline int freeze_processes(void) { return -ENOSYS; }
 static inline int freeze_kernel_threads(void) { return -ENOSYS; }
 static inline void thaw_processes(void) {}
 static inline void thaw_kernel_threads(void) {}
+static inline void thaw_process(struct task_struct *p) {}
 
 static inline bool try_to_freeze(void) { return false; }
 
-- 
cgit v1.2.3


From b9e2f58ffb84afcbba7e66f96ca14f98e0e88f26 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Mon, 15 Sep 2025 16:02:24 -0700
Subject: alloc_tag: mark inaccurate allocation counters in /proc/allocinfo
 output

While rare, memory allocation profiling can contain inaccurate counters if
slab object extension vector allocation fails.  That allocation might
succeed later but prior to that, slab allocations that would have used
that object extension vector will not be accounted for.  To indicate
incorrect counters, "accurate:no" marker is appended to the call site line
in the /proc/allocinfo output.  Bump up /proc/allocinfo version to reflect
the change in the file format and update documentation.

Example output with invalid counters:
allocinfo - version: 2.0
           0        0 arch/x86/kernel/kdebugfs.c:105 func:create_setup_data_nodes
           0        0 arch/x86/kernel/alternative.c:2090 func:alternatives_smp_module_add
           0        0 arch/x86/kernel/alternative.c:127 func:__its_alloc accurate:no
           0        0 arch/x86/kernel/fpu/regset.c:160 func:xstateregs_set
           0        0 arch/x86/kernel/fpu/xstate.c:1590 func:fpstate_realloc
           0        0 arch/x86/kernel/cpu/aperfmperf.c:379 func:arch_enable_hybrid_capacity_scale
           0        0 arch/x86/kernel/cpu/amd_cache_disable.c:258 func:init_amd_l3_attrs
       49152       48 arch/x86/kernel/cpu/mce/core.c:2709 func:mce_device_create accurate:no
       32768        1 arch/x86/kernel/cpu/mce/genpool.c:132 func:mce_gen_pool_create
           0        0 arch/x86/kernel/cpu/mce/amd.c:1341 func:mce_threshold_create_device

[surenb@google.com: document new "accurate:no" marker]
  Fixes: 39d117e04d15 ("alloc_tag: mark inaccurate allocation counters in /proc/allocinfo output")
[akpm@linux-foundation.org: simplification per Usama, reflow text]
[akpm@linux-foundation.org: add newline to prevent docs warning, per Randy]
Link: https://lkml.kernel.org/r/20250915230224.4115531-1-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Usama Arif <usamaarif642@gmail.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: David Rientjes <rientjes@google.com>
Cc: David Wang <00107082@163.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Sourav Panda <souravpanda@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/alloc_tag.h | 12 ++++++++++++
 include/linux/codetag.h   |  5 ++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index 9ef2633e2c08..d40ac39bfbe8 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -221,6 +221,16 @@ static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes)
 	ref->ct = NULL;
 }
 
+static inline void alloc_tag_set_inaccurate(struct alloc_tag *tag)
+{
+	tag->ct.flags |= CODETAG_FLAG_INACCURATE;
+}
+
+static inline bool alloc_tag_is_inaccurate(struct alloc_tag *tag)
+{
+	return !!(tag->ct.flags & CODETAG_FLAG_INACCURATE);
+}
+
 #define alloc_tag_record(p)	((p) = current->alloc_tag)
 
 #else /* CONFIG_MEM_ALLOC_PROFILING */
@@ -230,6 +240,8 @@ static inline bool mem_alloc_profiling_enabled(void) { return false; }
 static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag,
 				 size_t bytes) {}
 static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes) {}
+static inline void alloc_tag_set_inaccurate(struct alloc_tag *tag) {}
+static inline bool alloc_tag_is_inaccurate(struct alloc_tag *tag) { return false; }
 #define alloc_tag_record(p)	do {} while (0)
 
 #endif /* CONFIG_MEM_ALLOC_PROFILING */
diff --git a/include/linux/codetag.h b/include/linux/codetag.h
index 457ed8fd3214..8ea2a5f7c98a 100644
--- a/include/linux/codetag.h
+++ b/include/linux/codetag.h
@@ -16,13 +16,16 @@ struct module;
 #define CODETAG_SECTION_START_PREFIX	"__start_"
 #define CODETAG_SECTION_STOP_PREFIX	"__stop_"
 
+/* codetag flags */
+#define CODETAG_FLAG_INACCURATE	(1 << 0)
+
 /*
  * An instance of this structure is created in a special ELF section at every
  * code location being tagged.  At runtime, the special section is treated as
  * an array of these.
  */
 struct codetag {
-	unsigned int flags; /* used in later patches */
+	unsigned int flags;
 	unsigned int lineno;
 	const char *modname;
 	const char *function;
-- 
cgit v1.2.3


From ab152db3cae520154d572cff32e63de441672454 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Mon, 15 Sep 2025 20:35:05 -0700
Subject: mm/damon/core: implement damon_initialized() function

Patch series "mm/damon: define and use DAMON initialization check
function".

DAMON is initialized in subsystem initialization time, by damon_init().
If DAMON API functions are called before the initialization, the
system could crash.  Actually such issues happened and were fixed [1]
in the past.  For the fix, DAMON API callers have updated to check if
DAMON is initialized or not, using their own hacks.  The hacks are
unnecessarily duplicated on every DAMON API callers and therefore it
would be difficult to reliably maintain in the long term.

Make it reliable and easy to maintain.  For this, implement a new DAMON
core layer API function that returns if DAMON is successfully
initialized.  If it returns true, it means DAMON API functions are safe
to be used.  After the introduction of the new API, update DAMON API
callers to use the new function instead of their own hacks.


This patch (of 7):

If DAMON is tried to be used when it is not yet successfully initialized,
the caller could be crashed.  DAMON core layer is not providing a reliable
way to see if it is successfully initialized and therefore ready to be
used, though.  As a result, DAMON API callers are implementing their own
hacks to see it.  The hacks simply assume DAMON should be ready on module
init time.  It is not reliable as DAMON initialization can indeed fail if
KMEM_CACHE() fails, and difficult to maintain as those are duplicates.
Implement a core layer API function for better reliability and
maintainability to replace the hacks with followup commits.

Link: https://lkml.kernel.org/r/20250916033511.116366-2-sj@kernel.org
Link: https://lkml.kernel.org/r/20250916033511.116366-2-sj@kernel.org
Link: https://lore.kernel.org/20250909022238.2989-1-sj@kernel.org [1]
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index aa7381be388c..cae8c613c5fc 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -938,6 +938,7 @@ static inline unsigned int damon_max_nr_accesses(const struct damon_attrs *attrs
 }
 
 
+bool damon_initialized(void);
 int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive);
 int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
 bool damon_is_running(struct damon_ctx *ctx);
-- 
cgit v1.2.3


From a05e4e935a6689542d86162b33a484cc704ce39a Mon Sep 17 00:00:00 2001
From: Alyssa Ross <hi@alyssa.is>
Date: Fri, 29 Aug 2025 17:09:44 +0200
Subject: virtio_config: clarify output parameters

This was ambiguous enough for a broken patch (206cc44588f7 ("virtio:
reject shm region if length is zero")) to make it into the kernel, so
make it clearer.

Link: https://lore.kernel.org/r/20250816071600-mutt-send-email-mst@kernel.org/
Signed-off-by: Alyssa Ross <hi@alyssa.is>
Message-Id: <20250829150944.233505-1-hi@alyssa.is>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_config.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 8bf156dde554..7427b79d6f3d 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -193,14 +193,15 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
 }
 
 static inline void virtio_get_features(struct virtio_device *vdev,
-				       u64 *features)
+				       u64 *features_out)
 {
 	if (vdev->config->get_extended_features) {
-		vdev->config->get_extended_features(vdev, features);
+		vdev->config->get_extended_features(vdev, features_out);
 		return;
 	}
 
-	virtio_features_from_u64(features, vdev->config->get_features(vdev));
+	virtio_features_from_u64(features_out,
+		vdev->config->get_features(vdev));
 }
 
 /**
@@ -326,11 +327,11 @@ int virtqueue_set_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask)
 
 static inline
 bool virtio_get_shm_region(struct virtio_device *vdev,
-			   struct virtio_shm_region *region, u8 id)
+			   struct virtio_shm_region *region_out, u8 id)
 {
 	if (!vdev->config->get_shm_region)
 		return false;
-	return vdev->config->get_shm_region(vdev, region, id);
+	return vdev->config->get_shm_region(vdev, region_out, id);
 }
 
 static inline bool virtio_is_little_endian(struct virtio_device *vdev)
-- 
cgit v1.2.3


From 2ee3a75e42081db3d951c0893f5d654f16d1c0e8 Mon Sep 17 00:00:00 2001
From: NeilBrown <neil@brown.name>
Date: Fri, 18 Jul 2025 11:26:15 +1000
Subject: nfsd: discard nfsd_file_get_local()

This interface was deprecated by commit e6f7e1487ab5 ("nfs_localio:
simplify interface to nfsd for getting nfsd_file") and is now
unused. So let's remove it.

Signed-off-by: NeilBrown <neil@brown.name>
Reviewed-by: Mike Snitzer <snitzer@kernel.org>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/nfslocalio.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
index 5c7c92659e73..59ea90bd136b 100644
--- a/include/linux/nfslocalio.h
+++ b/include/linux/nfslocalio.h
@@ -63,7 +63,6 @@ struct nfsd_localio_operations {
 						struct nfsd_file __rcu **pnf,
 						const fmode_t);
 	struct net *(*nfsd_file_put_local)(struct nfsd_file __rcu **);
-	struct nfsd_file *(*nfsd_file_get_local)(struct nfsd_file *);
 	struct file *(*nfsd_file_file)(struct nfsd_file *);
 } ____cacheline_aligned;
 
-- 
cgit v1.2.3


From c97b737ef8f10f28424822c139e3b22b9e9bcc2b Mon Sep 17 00:00:00 2001
From: Sergey Bashirov <sergeybashirov@gmail.com>
Date: Fri, 18 Jul 2025 11:09:56 +0300
Subject: sunrpc: Change ret code of xdr_stream_decode_opaque_fixed

Since the opaque is fixed in size, the caller already knows how many
bytes were decoded, on success. Thus, xdr_stream_decode_opaque_fixed()
doesn't need to return that value. And, xdr_stream_decode_u32 and _u64
both return zero on success.

This patch simplifies the caller's error checking to avoid potential
integer promotion issues.

Suggested-by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/xdr.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 8a9ec617cf66..8d354015d762 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -721,7 +721,7 @@ xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr)
  * @len: size of buffer pointed to by @ptr
  *
  * Return values:
- *   On success, returns size of object stored in @ptr
+ *   %0 on success
  *   %-EBADMSG on XDR buffer overflow
  */
 static inline ssize_t
@@ -732,7 +732,7 @@ xdr_stream_decode_opaque_fixed(struct xdr_stream *xdr, void *ptr, size_t len)
 	if (unlikely(!p))
 		return -EBADMSG;
 	xdr_decode_opaque_fixed(p, ptr, len);
-	return len;
+	return 0;
 }
 
 /**
-- 
cgit v1.2.3


From afc5b36e29b95fbd31a60b9630d148857e5e513d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 30 Jul 2025 09:24:32 -0400
Subject: vfs: add ATTR_CTIME_SET flag

When ATTR_ATIME_SET and ATTR_MTIME_SET are set in the ia_valid mask, the
notify_change() logic takes that to mean that the request should set
those values explicitly, and not override them with "now".

With the advent of delegated timestamps, similar functionality is needed
for the ctime. Add a ATTR_CTIME_SET flag, and use that to indicate that
the ctime should be accepted as-is. Also, clean up the if statements to
eliminate the extra negatives.

In setattr_copy() and setattr_copy_mgtime() use inode_set_ctime_deleg()
when ATTR_CTIME_SET is set, instead of basing the decision on ATTR_DELEG.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 601d036a6c78..74f2bfc51926 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -238,6 +238,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define ATTR_ATIME_SET	(1 << 7)
 #define ATTR_MTIME_SET	(1 << 8)
 #define ATTR_FORCE	(1 << 9) /* Not a change, but a change it */
+#define ATTR_CTIME_SET	(1 << 10)
 #define ATTR_KILL_SUID	(1 << 11)
 #define ATTR_KILL_SGID	(1 << 12)
 #define ATTR_FILE	(1 << 13)
-- 
cgit v1.2.3


From 898374fdd7f06fa4c4a66e8be3135efeae6128d5 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <okorniev@redhat.com>
Date: Tue, 19 Aug 2025 14:04:02 -0400
Subject: nfsd: unregister with rpcbind when deleting a transport

When a listener is added, a part of creation of transport also registers
program/port with rpcbind. However, when the listener is removed,
while transport goes away, rpcbind still has the entry for that
port/type.

When deleting the transport, unregister with rpcbind when appropriate.

---v2 created a new xpt_flag XPT_RPCB_UNREG to mark TCP and UDP
transport and at xprt destroy send rpcbind unregister if flag set.

Suggested-by: Chuck Lever <chuck.lever@oracle.com>
Fixes: d093c9089260 ("nfsd: fix management of listener transports")
Cc: stable@vger.kernel.org
Signed-off-by: Olga Kornievskaia <okorniev@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_xprt.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 369a89aea186..2b886f7eb295 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -104,6 +104,9 @@ enum {
 				 * it has access to.  It is NOT counted
 				 * in ->sv_tmpcnt.
 				 */
+	XPT_RPCB_UNREG,		/* transport that needs unregistering
+				 * with rpcbind (TCP, UDP) on destroy
+				 */
 };
 
 /*
-- 
cgit v1.2.3


From d73d06dac604043b94a5f18ebb6a69da1b867702 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 20 Aug 2025 10:27:28 -0400
Subject: SUNRPC: Move the svc_rpcb_cleanup() call sites

Clean up: because svc_rpcb_cleanup() and svc_xprt_destroy_all()
are always invoked in pairs, we can deduplicate code by moving
the svc_rpcb_cleanup() call sites into svc_xprt_destroy_all().

Tested-by: Olga Kornievskaia <okorniev@redhat.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_xprt.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 2b886f7eb295..da2a2531e110 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -168,7 +168,8 @@ int	svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
 			struct net *net, const int family,
 			const unsigned short port, int flags,
 			const struct cred *cred);
-void	svc_xprt_destroy_all(struct svc_serv *serv, struct net *net);
+void	svc_xprt_destroy_all(struct svc_serv *serv, struct net *net,
+			     bool unregister);
 void	svc_xprt_received(struct svc_xprt *xprt);
 void	svc_xprt_enqueue(struct svc_xprt *xprt);
 void	svc_xprt_put(struct svc_xprt *xprt);
-- 
cgit v1.2.3


From 099f942182e3695554cba44e4bafb08a4111b50f Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jonas.gorski@gmail.com>
Date: Mon, 15 Sep 2025 20:37:20 +0200
Subject: spi: keep track of number of chipselects in spi_device

There are several places where we need to iterate over a device's
chipselect. To be able to do it efficiently, store the number of
chipselects in spi_device, like we do for controllers.

Since we now use a device supplied value, add a check to make sure it
isn't more than we can support.

Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
Link: https://patch.msgid.link/20250915183725.219473-3-jonas.gorski@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index e9ea43234d9a..49c048277e97 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -170,6 +170,7 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg,
  *	two delays will be added up.
  * @chip_select: Array of physical chipselect, spi->chipselect[i] gives
  *	the corresponding physical CS for logical CS i.
+ * @num_chipselect: Number of physical chipselects used.
  * @cs_index_mask: Bit mask of the active chipselect(s) in the chipselect array
  * @cs_gpiod: Array of GPIO descriptors of the corresponding chipselect lines
  *	(optional, NULL when not using a GPIO line)
@@ -229,6 +230,7 @@ struct spi_device {
 	struct spi_delay	cs_inactive;
 
 	u8			chip_select[SPI_CS_CNT_MAX];
+	u8			num_chipselect;
 
 	/*
 	 * Bit mask of the chipselect(s) that the driver need to use from
@@ -315,7 +317,7 @@ static inline bool spi_is_csgpiod(struct spi_device *spi)
 {
 	u8 idx;
 
-	for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) {
+	for (idx = 0; idx < spi->num_chipselect; idx++) {
 		if (spi_get_csgpiod(spi, idx))
 			return true;
 	}
-- 
cgit v1.2.3


From 08fda410bae41cc8dde9697f9104da525be53153 Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jonas.gorski@gmail.com>
Date: Mon, 15 Sep 2025 20:37:24 +0200
Subject: spi: reduce device chip select limit again

The spi chipselect limit SPI_CS_CNT_MAX was raised with commit
2f8c7c3715f2 ("spi: Raise limit on number of chip selects") from 4 to 16
to accommodate spi controllers with more than 4 chip selects, and then
later to 24 with commit 96893cdd4760 ("spi: Raise limit on number of
chip selects to 24").

Now that we removed SPI_CS_CNT_MAX limiting the chip selects of
controllers, we can reduce the amount of chip selects per device again
to 4, the original value.

Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
Link: https://patch.msgid.link/20250915183725.219473-7-jonas.gorski@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 49c048277e97..df4842abbc6f 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -21,7 +21,7 @@
 #include <uapi/linux/spi/spi.h>
 
 /* Max no. of CS supported per spi device */
-#define SPI_CS_CNT_MAX 24
+#define SPI_CS_CNT_MAX 4
 
 struct dma_chan;
 struct software_node;
-- 
cgit v1.2.3


From e336ab509b43ea601801dfa05b4270023c3ed007 Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jonas.gorski@gmail.com>
Date: Mon, 15 Sep 2025 20:37:25 +0200
Subject: spi: rename SPI_CS_CNT_MAX => SPI_DEVICE_CS_CNT_MAX

Rename SPI_CS_CNT_MAX to SPI_DEVICE_CS_CNT_MAX to make it more obvious
that this is the max number of CS per device supported, not per
controller.

Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
Link: https://patch.msgid.link/20250915183725.219473-8-jonas.gorski@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index df4842abbc6f..cb2c2df31089 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -21,7 +21,7 @@
 #include <uapi/linux/spi/spi.h>
 
 /* Max no. of CS supported per spi device */
-#define SPI_CS_CNT_MAX 4
+#define SPI_DEVICE_CS_CNT_MAX 4
 
 struct dma_chan;
 struct software_node;
@@ -229,7 +229,7 @@ struct spi_device {
 	struct spi_delay	cs_hold;
 	struct spi_delay	cs_inactive;
 
-	u8			chip_select[SPI_CS_CNT_MAX];
+	u8			chip_select[SPI_DEVICE_CS_CNT_MAX];
 	u8			num_chipselect;
 
 	/*
@@ -238,9 +238,9 @@ struct spi_device {
 	 * multiple chip selects & memories are connected in parallel
 	 * then more than one bit need to be set in cs_index_mask.
 	 */
-	u32			cs_index_mask : SPI_CS_CNT_MAX;
+	u32			cs_index_mask : SPI_DEVICE_CS_CNT_MAX;
 
-	struct gpio_desc	*cs_gpiod[SPI_CS_CNT_MAX];	/* Chip select gpio desc */
+	struct gpio_desc	*cs_gpiod[SPI_DEVICE_CS_CNT_MAX];	/* Chip select gpio desc */
 
 	/*
 	 * Likely need more hooks for more protocol options affecting how
@@ -721,8 +721,8 @@ struct spi_controller {
 	bool				auto_runtime_pm;
 	bool                            fallback;
 	bool				last_cs_mode_high;
-	s8				last_cs[SPI_CS_CNT_MAX];
-	u32				last_cs_index_mask : SPI_CS_CNT_MAX;
+	s8				last_cs[SPI_DEVICE_CS_CNT_MAX];
+	u32				last_cs_index_mask : SPI_DEVICE_CS_CNT_MAX;
 	struct completion               xfer_completion;
 	size_t				max_dma_len;
 
-- 
cgit v1.2.3


From 8535bd38b4d58a3d19bf8e7dfa66e1d8180b316a Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 22 Sep 2025 14:42:35 +0200
Subject: cgroup: add missing ns_common include

Add the missing include of the ns_common header.

Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/cgroup_namespace.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup_namespace.h b/include/linux/cgroup_namespace.h
index b7dbf4d623d2..81ccbdee425b 100644
--- a/include/linux/cgroup_namespace.h
+++ b/include/linux/cgroup_namespace.h
@@ -2,6 +2,8 @@
 #ifndef _LINUX_CGROUP_NAMESPACE_H
 #define _LINUX_CGROUP_NAMESPACE_H
 
+#include <linux/ns_common.h>
+
 struct cgroup_namespace {
 	struct ns_common	ns;
 	struct user_namespace	*user_ns;
-- 
cgit v1.2.3


From d7610cb7454bbd8bf6d58f71b0ed57155d3c545f Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 22 Sep 2025 14:42:36 +0200
Subject: ns: simplify ns_common_init() further

Simply derive the ns operations from the namespace type.

Acked-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/ns_common.h | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index aea8528d799a..56492cd9ff8d 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -25,6 +25,17 @@ extern struct time_namespace init_time_ns;
 extern struct user_namespace init_user_ns;
 extern struct uts_namespace init_uts_ns;
 
+extern const struct proc_ns_operations netns_operations;
+extern const struct proc_ns_operations utsns_operations;
+extern const struct proc_ns_operations ipcns_operations;
+extern const struct proc_ns_operations pidns_operations;
+extern const struct proc_ns_operations pidns_for_children_operations;
+extern const struct proc_ns_operations userns_operations;
+extern const struct proc_ns_operations mntns_operations;
+extern const struct proc_ns_operations cgroupns_operations;
+extern const struct proc_ns_operations timens_operations;
+extern const struct proc_ns_operations timens_for_children_operations;
+
 struct ns_common {
 	struct dentry *stashed;
 	const struct proc_ns_operations *ops;
@@ -84,10 +95,21 @@ void __ns_common_free(struct ns_common *ns);
 		struct user_namespace *:   &init_user_ns,   \
 		struct uts_namespace *:    &init_uts_ns)
 
-#define ns_common_init(__ns, __ops) \
-	__ns_common_init(to_ns_common(__ns), __ops, (((__ns) == ns_init_ns(__ns)) ? ns_init_inum(__ns) : 0))
-
-#define ns_common_init_inum(__ns, __ops, __inum) __ns_common_init(to_ns_common(__ns), __ops, __inum)
+#define to_ns_operations(__ns)                                                                         \
+	_Generic((__ns),                                                                               \
+		struct cgroup_namespace *: (IS_ENABLED(CONFIG_CGROUPS) ? &cgroupns_operations : NULL), \
+		struct ipc_namespace *:    (IS_ENABLED(CONFIG_IPC_NS)  ? &ipcns_operations    : NULL), \
+		struct mnt_namespace *:    &mntns_operations,                                          \
+		struct net *:              (IS_ENABLED(CONFIG_NET_NS)  ? &netns_operations    : NULL), \
+		struct pid_namespace *:    (IS_ENABLED(CONFIG_PID_NS)  ? &pidns_operations    : NULL), \
+		struct time_namespace *:   (IS_ENABLED(CONFIG_TIME_NS) ? &timens_operations   : NULL), \
+		struct user_namespace *:   (IS_ENABLED(CONFIG_USER_NS) ? &userns_operations   : NULL), \
+		struct uts_namespace *:    (IS_ENABLED(CONFIG_UTS_NS)  ? &utsns_operations    : NULL))
+
+#define ns_common_init(__ns) \
+	__ns_common_init(to_ns_common(__ns), to_ns_operations(__ns), (((__ns) == ns_init_ns(__ns)) ? ns_init_inum(__ns) : 0))
+
+#define ns_common_init_inum(__ns, __inum) __ns_common_init(to_ns_common(__ns), to_ns_operations(__ns), __inum)
 
 #define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns)))
 
-- 
cgit v1.2.3


From 0950c64ae38661bd97127e9aa0522f1624f82006 Mon Sep 17 00:00:00 2001
From: Kriish Sharma <kriish.sharma2006@gmail.com>
Date: Mon, 22 Sep 2025 12:26:06 +0000
Subject: workqueue: fix texinfodocs warning for WQ_* flags reference

Sphinx emitted a warning during make texinfodocs:

  WARNING: Inline literal start-string without end-string.

This was caused by the trailing '*' in "%WQ_*" being parsed as
reStructuredText markup in the kernel-doc comment.

Escape the '*' in the comment so that Sphinx treats it as a literal
character, resolving the warning.

Signed-off-by: Kriish Sharma <kriish.sharma2006@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 71a9900c03c7..dabc351cc127 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -502,7 +502,7 @@ void workqueue_softirq_dead(unsigned int cpu);
  * min_active which is set to min(@max_active, %WQ_DFL_MIN_ACTIVE). This means
  * that the sum of per-node max_active's may be larger than @max_active.
  *
- * For detailed information on %WQ_* flags, please refer to
+ * For detailed information on %WQ_\* flags, please refer to
  * Documentation/core-api/workqueue.rst.
  *
  * RETURNS:
-- 
cgit v1.2.3


From a571f08d3db215dd6ec294d8faac8cc4184bc4e4 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Tue, 16 Sep 2025 22:46:36 +0100
Subject: net: phy: add phy_interface_copy()

Add a helper for copying PHY interface bitmasks. This will be used by
the SFP bus code, which will then be moved to phylink in the subsequent
patches.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1uydVU-000000061W8-2IDT@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 7da9e19471c9..d09fc42e61f3 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -169,6 +169,11 @@ static inline bool phy_interface_empty(const unsigned long *intf)
 	return bitmap_empty(intf, PHY_INTERFACE_MODE_MAX);
 }
 
+static inline void phy_interface_copy(unsigned long *d, const unsigned long *s)
+{
+	bitmap_copy(d, s, PHY_INTERFACE_MODE_MAX);
+}
+
 static inline unsigned int phy_interface_weight(const unsigned long *intf)
 {
 	return bitmap_weight(intf, PHY_INTERFACE_MODE_MAX);
-- 
cgit v1.2.3


From ddae6127afbba46e32af3b31eb7bba939e1fad96 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Tue, 16 Sep 2025 22:46:41 +0100
Subject: net: sfp: pre-parse the module support

Pre-parse the module support on insert rather than when the upstream
requests the data. This will allow more flexible and extensible
parsing.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1uydVZ-000000061WE-2pXD@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/sfp.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index 60c65cea74f6..5fb59cf49882 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -521,6 +521,28 @@ struct ethtool_eeprom;
 struct ethtool_modinfo;
 struct sfp_bus;
 
+/**
+ * struct sfp_module_caps - sfp module capabilities
+ * @interfaces: bitmap of interfaces that the module may support
+ * @link_modes: bitmap of ethtool link modes that the module may support
+ */
+struct sfp_module_caps {
+	DECLARE_PHY_INTERFACE_MASK(interfaces);
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(link_modes);
+	/**
+	 * @may_have_phy: indicate whether the module may have an ethernet PHY
+	 * There is no way to be sure that a module has a PHY as the EEPROM
+	 * doesn't contain this information. When set, this does not mean that
+	 * the module definitely has a PHY.
+	 */
+	bool may_have_phy;
+	/**
+	 * @port: one of ethtool %PORT_* definitions, parsed from the module
+	 * EEPROM, or %PORT_OTHER if the port type is not known.
+	 */
+	u8 port;
+};
+
 /**
  * struct sfp_upstream_ops - upstream operations structure
  * @attach: called when the sfp socket driver is bound to the upstream
-- 
cgit v1.2.3


From 64fb4a3ae8a5d9c4d27d9f4ae58e38200fc3d44b Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Tue, 16 Sep 2025 22:46:51 +0100
Subject: net: sfp: provide sfp_get_module_caps()

Provide a function to retrieve the current sfp_module_caps structure
so that upstreams can get the entire module support in one go.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1uydVj-000000061WQ-3q47@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/sfp.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index 5fb59cf49882..9f29fcad52be 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -576,6 +576,7 @@ struct sfp_upstream_ops {
 };
 
 #if IS_ENABLED(CONFIG_SFP)
+const struct sfp_module_caps *sfp_get_module_caps(struct sfp_bus *bus);
 int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 		   unsigned long *support);
 bool sfp_may_have_phy(struct sfp_bus *bus, const struct sfp_eeprom_id *id);
@@ -600,6 +601,12 @@ int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream,
 void sfp_bus_del_upstream(struct sfp_bus *bus);
 const char *sfp_get_name(struct sfp_bus *bus);
 #else
+static inline const struct sfp_module_caps *
+sfp_get_module_caps(struct sfp_bus *bus)
+{
+	return NULL;
+}
+
 static inline int sfp_parse_port(struct sfp_bus *bus,
 				 const struct sfp_eeprom_id *id,
 				 unsigned long *support)
-- 
cgit v1.2.3


From 9ce138735efcb395974952972aa5dbd1d444ac2c Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Tue, 16 Sep 2025 22:47:07 +0100
Subject: net: sfp: remove old sfp_parse_* functions

Remove the old sfp_parse_*() functions that are now no longer used.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/E1uydVz-000000061Wj-13Yd@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/sfp.h | 25 -------------------------
 1 file changed, 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index 9f29fcad52be..5c71945a5e4d 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -577,11 +577,6 @@ struct sfp_upstream_ops {
 
 #if IS_ENABLED(CONFIG_SFP)
 const struct sfp_module_caps *sfp_get_module_caps(struct sfp_bus *bus);
-int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
-		   unsigned long *support);
-bool sfp_may_have_phy(struct sfp_bus *bus, const struct sfp_eeprom_id *id);
-void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
-		       unsigned long *support, unsigned long *interfaces);
 phy_interface_t sfp_select_interface(struct sfp_bus *bus,
 				     const unsigned long *link_modes);
 
@@ -607,26 +602,6 @@ sfp_get_module_caps(struct sfp_bus *bus)
 	return NULL;
 }
 
-static inline int sfp_parse_port(struct sfp_bus *bus,
-				 const struct sfp_eeprom_id *id,
-				 unsigned long *support)
-{
-	return PORT_OTHER;
-}
-
-static inline bool sfp_may_have_phy(struct sfp_bus *bus,
-				    const struct sfp_eeprom_id *id)
-{
-	return false;
-}
-
-static inline void sfp_parse_support(struct sfp_bus *bus,
-				     const struct sfp_eeprom_id *id,
-				     unsigned long *support,
-				     unsigned long *interfaces)
-{
-}
-
 static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus,
 						const unsigned long *link_modes)
 {
-- 
cgit v1.2.3


From 1b44d700023e77dd92821e7811db825e75a1a394 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 19 Sep 2025 20:48:52 +0000
Subject: tcp: move tcp->rcv_tstamp to tcp_sock_write_txrx group

tcp_ack() writes this field, it belongs to tcp_sock_write_txrx.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250919204856.2977245-5-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/tcp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 3ca5ed02de6d..1e6c2ded22c9 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -238,7 +238,6 @@ struct tcp_sock {
 	/* RX read-mostly hotpath cache lines */
 	__cacheline_group_begin(tcp_sock_read_rx);
 	u32	copied_seq;	/* Head of yet unread data */
-	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
 	u32	snd_wl1;	/* Sequence for window update		*/
 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP */
 	u32	rttvar_us;	/* smoothed mdev_max			*/
@@ -246,13 +245,13 @@ struct tcp_sock {
 	u16	advmss;		/* Advertised MSS			*/
 	u16	urg_data;	/* Saved octet of OOB data and control flags */
 	u32	lost;		/* Total data packets lost incl. rexmits */
+	u32	snd_ssthresh;	/* Slow start size threshold		*/
 	struct  minmax rtt_min;
 	/* OOO segments go in this rbtree. Socket lock must be held. */
 	struct rb_root	out_of_order_queue;
 #if defined(CONFIG_TLS_DEVICE)
 	void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq);
 #endif
-	u32	snd_ssthresh;	/* Slow start size threshold		*/
 	u8	recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */
 	__cacheline_group_end(tcp_sock_read_rx);
 
@@ -319,6 +318,7 @@ struct tcp_sock {
 					*/
 	u32	app_limited;	/* limited until "delivered" reaches this val */
 	u32	rcv_wnd;	/* Current receiver window		*/
+	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
 /*
  *      Options received (usually on last packet, some only on SYN packets).
  */
-- 
cgit v1.2.3


From 969904dcd77dbb0a773d66cddaa59eccc6415d03 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 19 Sep 2025 20:48:53 +0000
Subject: tcp: move recvmsg_inq to tcp_sock_read_txrx

Fill a hole in tcp_sock_read_txrx, instead of possibly wasting
a cache line.

Note that tcp_recvmsg_locked() is also reading tp->repair,
so this removes one cache line miss in tcp recvmsg().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250919204856.2977245-6-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/tcp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 1e6c2ded22c9..c1d7fce251d7 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -232,7 +232,8 @@ struct tcp_sock {
 		repair      : 1,
 		tcp_usec_ts : 1, /* TSval values in usec */
 		is_sack_reneg:1,    /* in recovery from loss with SACK reneg? */
-		is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
+		is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
+		recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */
 	__cacheline_group_end(tcp_sock_read_txrx);
 
 	/* RX read-mostly hotpath cache lines */
@@ -252,7 +253,6 @@ struct tcp_sock {
 #if defined(CONFIG_TLS_DEVICE)
 	void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq);
 #endif
-	u8	recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */
 	__cacheline_group_end(tcp_sock_read_rx);
 
 	/* TX read-write hotpath cache lines */
-- 
cgit v1.2.3


From a105ea47a4e855d24ebf65f1c5fb907162e7b8cf Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 19 Sep 2025 20:48:54 +0000
Subject: tcp: move tcp_clean_acked to tcp_sock_read_tx group

tp->tcp_clean_acked is fetched in tx path when snd_una is updated.

This field thus belongs to tcp_sock_read_tx group.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250919204856.2977245-7-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/tcp.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index c1d7fce251d7..3f282130c863 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -215,6 +215,9 @@ struct tcp_sock {
 	u16	gso_segs;	/* Max number of segs per GSO packet	*/
 	/* from STCP, retrans queue hinting */
 	struct sk_buff *retransmit_skb_hint;
+#if defined(CONFIG_TLS_DEVICE)
+	void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq);
+#endif
 	__cacheline_group_end(tcp_sock_read_tx);
 
 	/* TXRX read-mostly hotpath cache lines */
@@ -250,9 +253,6 @@ struct tcp_sock {
 	struct  minmax rtt_min;
 	/* OOO segments go in this rbtree. Socket lock must be held. */
 	struct rb_root	out_of_order_queue;
-#if defined(CONFIG_TLS_DEVICE)
-	void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq);
-#endif
 	__cacheline_group_end(tcp_sock_read_rx);
 
 	/* TX read-write hotpath cache lines */
-- 
cgit v1.2.3


From 31c4511bbb0c75c525b6e4f4fe4167f2e9d3b05c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 19 Sep 2025 20:48:55 +0000
Subject: tcp: move mtu_info to remove two 32bit holes

This removes 8bytes waste on 64bit builds.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250919204856.2977245-8-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/tcp.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 3f282130c863..20b8c6e21fef 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -448,6 +448,9 @@ struct tcp_sock {
 				 * the first SYN. */
 	u32	undo_marker;	/* snd_una upon a new recovery episode. */
 	int	undo_retrans;	/* number of undoable retransmissions. */
+	u32	mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG
+			   * while socket was owned by user.
+			   */
 	u64	bytes_retrans;	/* RFC4898 tcpEStatsPerfOctetsRetrans
 				 * Total data bytes retransmitted
 				 */
@@ -494,9 +497,6 @@ struct tcp_sock {
 		u32		  probe_seq_end;
 	} mtu_probe;
 	u32     plb_rehash;     /* PLB-triggered rehash attempts */
-	u32	mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG
-			   * while socket was owned by user.
-			   */
 #if IS_ENABLED(CONFIG_MPTCP)
 	bool	is_mptcp;
 #endif
-- 
cgit v1.2.3


From 349271568303695f0ac3563af153d2b4542f6986 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@kernel.org>
Date: Sun, 21 Sep 2025 18:01:16 +0200
Subject: bpf: Implement signature verification for BPF programs

This patch extends the BPF_PROG_LOAD command by adding three new fields
to `union bpf_attr` in the user-space API:

  - signature: A pointer to the signature blob.
  - signature_size: The size of the signature blob.
  - keyring_id: The serial number of a loaded kernel keyring (e.g.,
    the user or session keyring) containing the trusted public keys.

When a BPF program is loaded with a signature, the kernel:

1.  Retrieves the trusted keyring using the provided `keyring_id`.
2.  Verifies the supplied signature against the BPF program's
    instruction buffer.
3.  If the signature is valid and was generated by a key in the trusted
    keyring, the program load proceeds.
4.  If no signature is provided, the load proceeds as before, allowing
    for backward compatibility. LSMs can chose to restrict unsigned
    programs and implement a security policy.
5.  If signature verification fails for any reason,
    the program is not loaded.

Tested-by: syzbot@syzkaller.appspotmail.com
Signed-off-by: KP Singh <kpsingh@kernel.org>
Link: https://lore.kernel.org/r/20250921160120.9711-2-kpsingh@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/verification.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/verification.h b/include/linux/verification.h
index 4f3022d081c3..dec7f2beabfd 100644
--- a/include/linux/verification.h
+++ b/include/linux/verification.h
@@ -36,6 +36,7 @@ enum key_being_used_for {
 	VERIFYING_KEY_SIGNATURE,
 	VERIFYING_KEY_SELF_SIGNATURE,
 	VERIFYING_UNSPECIFIED_SIGNATURE,
+	VERIFYING_BPF_SIGNATURE,
 	NR__KEY_BEING_USED_FOR
 };
 #ifdef CONFIG_SYSTEM_DATA_VERIFICATION
-- 
cgit v1.2.3


From 39f17c707454290900b608ee5a200b5db9245626 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 14 Sep 2025 13:09:08 +0200
Subject: sched/task.h: fix the wrong comment on task_lock() nesting with
 tasklist_lock

The ancient comment above task_lock() states that it can be nested outside
of read_lock(&tasklist_lock), but this is no longer true:

  CPU_0			CPU_1			CPU_2

  task_lock()		read_lock(tasklist)
  						write_lock_irq(tasklist)
  read_lock(tasklist)	task_lock()

Unless CPU_0 calls read_lock() in IRQ context, queued_read_lock_slowpath()
won't get the lock immediately, it will spin waiting for the pending
writer on CPU_2, resulting in a deadlock.

Link: https://lkml.kernel.org/r/20250914110908.GA18769@redhat.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jiri Slaby <jirislaby@kernel.org>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sched/task.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index ea41795a352b..8ff98b18b24b 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -210,9 +210,8 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
  * pins the final release of task.io_context.  Also protects ->cpuset and
  * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist.
  *
- * Nests both inside and outside of read_lock(&tasklist_lock).
- * It must not be nested with write_lock_irq(&tasklist_lock),
- * neither inside nor outside.
+ * Nests inside of read_lock(&tasklist_lock). It must not be nested with
+ * write_lock_irq(&tasklist_lock), neither inside nor outside.
  */
 static inline void task_lock(struct task_struct *p)
 {
-- 
cgit v1.2.3


From af6703838ecb1513efdd2502a8f7bb6472c5ce96 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Wed, 3 Sep 2025 18:48:41 +0100
Subject: mm: specify separate file and vm_file params in vm_area_desc

Patch series "mm: do not assume file == vma->vm_file in
compat_vma_mmap_prepare()", v2.

As part of the efforts to eliminate the problematic f_op->mmap callback, a
new callback - f_op->mmap_prepare was provided.

While we are converting these callbacks, we must deal with 'stacked'
filesystems and drivers - those which in their own f_op->mmap callback
invoke an inner f_op->mmap callback.

To accomodate for this, a compatibility layer is provided that, via
vfs_mmap(), detects if f_op->mmap_prepare is provided and if so, generates
a vm_area_desc containing the VMA's metadata and invokes the call.

So far, we have provided desc->file equal to vma->vm_file.  However this
is not necessarily valid, especially in the case of stacked drivers which
wish to assign a new file after the inner hook is invoked.

To account for this, we adjust vm_area_desc to have both file and vm_file
fields.  The .vm_file field is strictly set to vma->vm_file (or in the
case of a new mapping, what will become vma->vm_file).

However, .file is set to whichever file vfs_mmap() is invoked with when
using the compatibilty layer.

Therefore, if the VMA's file needs to be updated in .mmap_prepare,
desc->vm_file should be assigned, whilst desc->file should be read.

No current f_op->mmap_prepare users assign desc->file so this is safe to
do.

This makes the .mmap_prepare callback in the context of a stacked
filesystem or driver completely consistent with the existing .mmap
implementations.

While we're here, we do a few small cleanups, and ensure that we const-ify
things correctly in the vm_area_desc struct to avoid hooks accidentally
trying to assign fields they should not.


This patch (of 2):

Stacked filesystems and drivers may invoke mmap hooks with a struct file
pointer that differs from the overlying file.  We will make this
functionality possible in a subsequent patch.

In order to prepare for this, let's update vm_area_struct to separately
provide desc->file and desc->vm_file parameters.

The desc->file parameter is the file that the hook is expected to operate
upon, and is not assignable (though the hok may wish to e.g.  update the
file's accessed time for instance).

The desc->vm_file defaults to what will become vma->vm_file and is what
the hook must reassign should it wish to change the VMA"s vma->vm_file.

For now we keep desc->file, vm_file the same to remain consistent.

No f_op->mmap_prepare() callback sets a new vma->vm_file currently, so
this is safe to change.

While we're here, make the mm_struct desc->mm pointers at immutable as
well as the desc->mm field itself.

As part of this change, also update the single hook which this would
otherwise break - mlock_future_ok(), invoked by secretmem_mmap_prepare()).

We additionally update set_vma_from_desc() to compare fields in a more
logical fashion, checking the (possibly) user-modified fields as the first
operand against the existing value as the second one.

Additionally, update VMA tests to accommodate changes.

Link: https://lkml.kernel.org/r/cover.1756920635.git.lorenzo.stoakes@oracle.com
Link: https://lkml.kernel.org/r/3fa15a861bb7419f033d22970598aa61850ea267.1756920635.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Pedro Falcato <pfalcato@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6920c816f6c6..965dedb3ccfa 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -777,13 +777,14 @@ struct pfnmap_track_ctx {
  */
 struct vm_area_desc {
 	/* Immutable state. */
-	struct mm_struct *mm;
+	const struct mm_struct *const mm;
+	struct file *const file; /* May vary from vm_file in stacked callers. */
 	unsigned long start;
 	unsigned long end;
 
 	/* Mutable fields. Populated with initial state. */
 	pgoff_t pgoff;
-	struct file *file;
+	struct file *vm_file;
 	vm_flags_t vm_flags;
 	pgprot_t page_prot;
 
-- 
cgit v1.2.3


From f7a741c53b712542aedd9382f215fbe969f8a580 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Wed, 3 Sep 2025 18:48:42 +0100
Subject: mm: do not assume file == vma->vm_file in compat_vma_mmap_prepare()

In commit bb666b7c2707 ("mm: add mmap_prepare() compatibility layer for
nested file systems") we introduced the ability for stacked drivers and
file systems to correctly invoke the f_op->mmap_prepare() handler from an
f_op->mmap() handler via a compatibility layer implemented in
compat_vma_mmap_prepare().

This populates vm_area_desc fields according to those found in the (not
yet fully initialised) VMA passed to f_op->mmap().

However this function implicitly assumes that the struct file which we are
operating upon is equal to vma->vm_file.  This is not a safe assumption in
all cases.

The only really sane situation in which this matters would be something
like e.g.  i915_gem_dmabuf_mmap() which invokes vfs_mmap() against
obj->base.filp:

	ret = vfs_mmap(obj->base.filp, vma);
	if (ret)
		return ret;

And then sets the VMA's file to this, should the mmap operation succeed:

	vma_set_file(vma, obj->base.filp);

That is - it is the file that is intended to back the VMA mapping.

This is not an issue currently, as so far we have only implemented
f_op->mmap_prepare() handlers for some file systems and internal mm uses,
and the only stacked f_op->mmap() operations that can be performed upon
these are those in backing_file_mmap() and coda_file_mmap(), both of which
use vma->vm_file.

However, moving forward, as we convert drivers to using
f_op->mmap_prepare(), this will become a problem.

Resolve this issue by explicitly setting desc->file to the provided file
parameter and update callers accordingly.

Callers are expected to read desc->file and update desc->vm_file - the
former will be the file provided by the caller (if stacked, this may
differ from vma->vm_file).

If the caller needs to differentiate between the two they therefore now
can.

While we are here, also provide a variant of compat_vma_mmap_prepare()
that operates against a pointer to any file_operations struct and does not
assume that the file_operations struct we are interested in is file->f_op.

This function is __compat_vma_mmap_prepare() and we invoke it from
compat_vma_mmap_prepare() so that we share code between the two functions.

This is important, because some drivers provide hooks in a separate
struct, for instance struct drm_device provides an fops field for this
purpose.

Also update the VMA selftests accordingly.

Link: https://lkml.kernel.org/r/dd0c72df8a33e8ffaa243eeb9b01010b670610e9.1756920635.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: Pedro Falcato <pfalcato@suse.de>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0783c5d05d3f..594bd4d0521e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2279,6 +2279,8 @@ static inline bool can_mmap_file(struct file *file)
 	return true;
 }
 
+int __compat_vma_mmap_prepare(const struct file_operations *f_op,
+		struct file *file, struct vm_area_struct *vma);
 int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma);
 
 static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
-- 
cgit v1.2.3


From ef9f603fd3d4b7937f2cdbce40e47df0a54b2a55 Mon Sep 17 00:00:00 2001
From: Caleb Sander Mateos <csander@purestorage.com>
Date: Mon, 22 Sep 2025 11:02:31 -0600
Subject: io_uring/cmd: drop unused res2 param from io_uring_cmd_done()

Commit 79525b51acc1 ("io_uring: fix nvme's 32b cqes on mixed cq") split
out a separate io_uring_cmd_done32() helper for ->uring_cmd()
implementations that return 32-byte CQEs. The res2 value passed to
io_uring_cmd_done() is now unused because __io_uring_cmd_done() ignores
it when is_cqe32 is passed as false. So drop the parameter from
io_uring_cmd_done() to simplify the callers and clarify that it's not
possible to return an extra value beyond the 32-bit CQE result.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring/cmd.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index 02d50f08f668..7509025b4071 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -160,9 +160,9 @@ static inline void *io_uring_cmd_ctx_handle(struct io_uring_cmd *cmd)
 }
 
 static inline void io_uring_cmd_done(struct io_uring_cmd *ioucmd, s32 ret,
-				     u64 res2, unsigned issue_flags)
+				     unsigned issue_flags)
 {
-	return __io_uring_cmd_done(ioucmd, ret, res2, issue_flags, false);
+	return __io_uring_cmd_done(ioucmd, ret, 0, issue_flags, false);
 }
 
 static inline void io_uring_cmd_done32(struct io_uring_cmd *ioucmd, s32 ret,
-- 
cgit v1.2.3


From 7c7da8aa3fd69b77e8efaa14b80bddd948547739 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol@kernel.org>
Date: Tue, 23 Sep 2025 15:37:09 +0900
Subject: can: dev: turn can_set_static_ctrlmode() into a non-inline function

can_set_static_ctrlmode() is declared as a static inline. But it is
only called in the probe function of the devices and so does not
really benefit from any kind of optimization.

Transform it into a "normal" function by moving it to

  drivers/net/can/dev/dev.c

Signed-off-by: Vincent Mailhol <mailhol@kernel.org>
Link: https://patch.msgid.link/20250923-can-fix-mtu-v3-2-581bde113f52@kernel.org
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev.h | 23 ++---------------------
 1 file changed, 2 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 9a92cbe5b2cb..5dc58360c2d7 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -125,27 +125,6 @@ static inline s32 can_get_relative_tdco(const struct can_priv *priv)
 	return (s32)priv->fd.tdc.tdco - sample_point_in_tc;
 }
 
-/* helper to define static CAN controller features at device creation time */
-static inline int __must_check can_set_static_ctrlmode(struct net_device *dev,
-						       u32 static_mode)
-{
-	struct can_priv *priv = netdev_priv(dev);
-
-	/* alloc_candev() succeeded => netdev_priv() is valid at this point */
-	if (priv->ctrlmode_supported & static_mode) {
-		netdev_warn(dev,
-			    "Controller features can not be supported and static at the same time\n");
-		return -EINVAL;
-	}
-	priv->ctrlmode = static_mode;
-
-	/* override MTU which was set by default in can_setup()? */
-	if (static_mode & CAN_CTRLMODE_FD)
-		dev->mtu = CANFD_MTU;
-
-	return 0;
-}
-
 static inline u32 can_get_static_ctrlmode(struct can_priv *priv)
 {
 	return priv->ctrlmode & ~priv->ctrlmode_supported;
@@ -188,6 +167,8 @@ struct can_priv *safe_candev_priv(struct net_device *dev);
 int open_candev(struct net_device *dev);
 void close_candev(struct net_device *dev);
 int can_change_mtu(struct net_device *dev, int new_mtu);
+int __must_check can_set_static_ctrlmode(struct net_device *dev,
+					 u32 static_mode);
 int can_eth_ioctl_hwts(struct net_device *netdev, struct ifreq *ifr, int cmd);
 int can_ethtool_op_get_ts_info_hwts(struct net_device *dev,
 				    struct kernel_ethtool_ts_info *info);
-- 
cgit v1.2.3


From 91daac8a6893c65e18f194946ad3ad9df5e9de8d Mon Sep 17 00:00:00 2001
From: Nam Cao <namcao@linutronix.de>
Date: Tue, 16 Sep 2025 08:10:07 +0200
Subject: genirq/msi: Remove msi_post_free()

The only user of msi_post_free() - powerpc/pseries - has been changed to
use msi_teardown().

Remove this unused callback.

Signed-off-by: Nam Cao <namcao@linutronix.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20250916061007.964005-1-namcao@linutronix.de
---
 include/linux/msi.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index e5e86a8529fb..faac634ac230 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -431,8 +431,6 @@ struct msi_domain_info;
  *			function.
  * @domain_free_irqs:	Optional function to override the default free
  *			function.
- * @msi_post_free:	Optional function which is invoked after freeing
- *			all interrupts.
  * @msi_translate:	Optional translate callback to support the odd wire to
  *			MSI bridges, e.g. MBIGEN
  *
@@ -473,8 +471,6 @@ struct msi_domain_ops {
 					     struct device *dev, int nvec);
 	void		(*domain_free_irqs)(struct irq_domain *domain,
 					    struct device *dev);
-	void		(*msi_post_free)(struct irq_domain *domain,
-					 struct device *dev);
 	int		(*msi_translate)(struct irq_domain *domain, struct irq_fwspec *fwspec,
 					 irq_hw_number_t *hwirq, unsigned int *type);
 };
-- 
cgit v1.2.3


From 35758b0032c056cdff3e8f5a70669cb3e2c8d0e4 Mon Sep 17 00:00:00 2001
From: Alexandra Winter <wintera@linux.ibm.com>
Date: Thu, 18 Sep 2025 13:04:49 +0200
Subject: dibs: Create drivers/dibs

Create the file structure for a 'DIBS - Direct Internal Buffer Sharing'
shim layer that will provide generic functionality and declarations for
dibs device drivers and dibs clients.

Following patches will add functionality.

Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Link: https://patch.msgid.link/20250918110500.1731261-4-wintera@linux.ibm.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/dibs.h | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 include/linux/dibs.h

(limited to 'include/linux')

diff --git a/include/linux/dibs.h b/include/linux/dibs.h
new file mode 100644
index 000000000000..3f4175aaa732
--- /dev/null
+++ b/include/linux/dibs.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Direct Internal Buffer Sharing
+ *
+ *  Definitions for the DIBS module
+ *
+ *  Copyright IBM Corp. 2025
+ */
+#ifndef _DIBS_H
+#define _DIBS_H
+
+/* DIBS - Direct Internal Buffer Sharing - concept
+ * -----------------------------------------------
+ * In the case of multiple system sharing the same hardware, dibs fabrics can
+ * provide dibs devices to these systems. The systems use dibs devices of the
+ * same fabric to communicate via dmbs (Direct Memory Buffers). Each dmb has
+ * exactly one owning local dibs device and one remote using dibs device, that
+ * is authorized to write into this dmb. This access control is provided by the
+ * dibs fabric.
+ *
+ * Because the access to the dmb is based on access to physical memory, it is
+ * lossless and synchronous. The remote devices can directly access any offset
+ * of the dmb.
+ *
+ * Dibs fabrics, dibs devices and dmbs are identified by tokens and ids.
+ * Dibs fabric id is unique within the same hardware (with the exception of the
+ * dibs loopback fabric), dmb token is unique within the same fabric, dibs
+ * device gids are guaranteed to be unique within the same fabric and
+ * statistically likely to be globally unique. The exchange of these tokens and
+ * ids between the systems is not part of the dibs concept.
+ *
+ * The dibs layer provides an abstraction between dibs device drivers and dibs
+ * clients.
+ */
+
+#define MAX_DIBS_CLIENTS	8
+
+struct dibs_client {
+	const char *name;
+};
+
+#endif	/* _DIBS_H */
-- 
cgit v1.2.3


From d324a2ca3f8efd57f5839aa2690554a5cbb3586f Mon Sep 17 00:00:00 2001
From: Alexandra Winter <wintera@linux.ibm.com>
Date: Thu, 18 Sep 2025 13:04:50 +0200
Subject: dibs: Register smc as dibs_client

Formally register smc as dibs client. Functionality will be moved by
follow-on patches from ism_client to dibs_client until eventually
ism_client can be removed.
As DIBS is only a shim layer without any dependencies, we can depend SMC
on DIBS without adding indirect dependencies. A follow-on patch will
remove dependency of SMC on ISM.

Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Reviewed-by: Julian Ruess <julianr@linux.ibm.com>
Link: https://patch.msgid.link/20250918110500.1731261-5-wintera@linux.ibm.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/dibs.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dibs.h b/include/linux/dibs.h
index 3f4175aaa732..7bedeaf52c1b 100644
--- a/include/linux/dibs.h
+++ b/include/linux/dibs.h
@@ -33,10 +33,33 @@
  * clients.
  */
 
+/* DIBS client
+ * -----------
+ */
 #define MAX_DIBS_CLIENTS	8
 
 struct dibs_client {
+	/* client name for logging and debugging purposes */
 	const char *name;
+	/* client index - provided and used by dibs layer */
+	u8 id;
 };
 
+/* Functions to be called by dibs clients:
+ */
+/**
+ * dibs_register_client() - register a client with dibs layer
+ * @client: this client
+ *
+ * Return: zero on success.
+ */
+int dibs_register_client(struct dibs_client *client);
+/**
+ * dibs_unregister_client() - unregister a client with dibs layer
+ * @client: this client
+ *
+ * Return: zero on success.
+ */
+int dibs_unregister_client(struct dibs_client *client);
+
 #endif	/* _DIBS_H */
-- 
cgit v1.2.3


From 269726968f95ebc00e3a47f91eebd6818991d6fa Mon Sep 17 00:00:00 2001
From: Alexandra Winter <wintera@linux.ibm.com>
Date: Thu, 18 Sep 2025 13:04:51 +0200
Subject: dibs: Register ism as dibs device

Register ism devices with the dibs layer. Follow-on patches will move
functionality to the dibs layer.

As DIBS is only a shim layer without any dependencies, we can depend ISM
on DIBS without adding indirect dependencies. A follow-on patch will
remove implication of SMC by ISM.

Define struct dibs_dev. Follow-on patches will move more content into
dibs_dev.  The goal of follow-on patches is that ism_dev will only
contain fields that are special for this device driver. The same concept
will apply to other dibs device drivers.

Define dibs_dev_alloc(), dibs_dev_add() and dibs_dev_del() to be called
by dibs device drivers and call them from ism_drv.c
Use ism_dev.dibs for a pointer to dibs_dev.

Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Link: https://patch.msgid.link/20250918110500.1731261-6-wintera@linux.ibm.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/dibs.h | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/ism.h  |  1 +
 2 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dibs.h b/include/linux/dibs.h
index 7bedeaf52c1b..c12db19c98c0 100644
--- a/include/linux/dibs.h
+++ b/include/linux/dibs.h
@@ -9,6 +9,7 @@
 #ifndef _DIBS_H
 #define _DIBS_H
 
+#include <linux/device.h>
 /* DIBS - Direct Internal Buffer Sharing - concept
  * -----------------------------------------------
  * In the case of multiple system sharing the same hardware, dibs fabrics can
@@ -62,4 +63,41 @@ int dibs_register_client(struct dibs_client *client);
  */
 int dibs_unregister_client(struct dibs_client *client);
 
+/* DIBS devices
+ * ------------
+ */
+struct dibs_dev {
+	struct list_head list;
+};
+
+/* ------- End of client-only functions ----------- */
+
+/*
+ * Functions to be called by dibs device drivers:
+ */
+/**
+ * dibs_dev_alloc() - allocate and reference device structure
+ *
+ * The following fields will be valid upon successful return: dev
+ * NOTE: Use put_device(dibs_get_dev(@dibs)) to give up your reference instead
+ * of freeing @dibs @dev directly once you have successfully called this
+ * function.
+ * Return: Pointer to dibs device structure
+ */
+struct dibs_dev *dibs_dev_alloc(void);
+/**
+ * dibs_dev_add() - register with dibs layer and all clients
+ * @dibs: dibs device
+ *
+ * The following fields must be valid upon entry: dev, ops, drv_priv
+ * All fields will be valid upon successful return.
+ * Return: zero on success
+ */
+int dibs_dev_add(struct dibs_dev *dibs);
+/**
+ * dibs_dev_del() - unregister from dibs layer and all clients
+ * @dibs: dibs device
+ */
+void dibs_dev_del(struct dibs_dev *dibs);
+
 #endif	/* _DIBS_H */
diff --git a/include/linux/ism.h b/include/linux/ism.h
index 8358b4cd7ba6..9a53d3c48c16 100644
--- a/include/linux/ism.h
+++ b/include/linux/ism.h
@@ -30,6 +30,7 @@ struct ism_dev {
 	spinlock_t lock; /* protects the ism device */
 	spinlock_t cmd_lock; /* serializes cmds */
 	struct list_head list;
+	struct dibs_dev *dibs;
 	struct pci_dev *pdev;
 
 	struct ism_sba *sba;
-- 
cgit v1.2.3


From 69baaac9361edd169713562f088829a1be9c51a9 Mon Sep 17 00:00:00 2001
From: Alexandra Winter <wintera@linux.ibm.com>
Date: Thu, 18 Sep 2025 13:04:53 +0200
Subject: dibs: Define dibs_client_ops and dibs_dev_ops

Move the device add() and remove() functions from ism_client to
dibs_client_ops and call add_dev()/del_dev() for ism devices and
dibs_loopback devices. dibs_client_ops->add_dev() = smcd_register_dev() for
the smc_dibs_client. This is the first step to handle ism and loopback
devices alike (as dibs devices) in the smc dibs client.

Define dibs_dev->ops and move smcd_ops->get_chid to
dibs_dev_ops->get_fabric_id() for ism and loopback devices. See below for
why this needs to be in the same patch as dibs_client_ops->add_dev().

The following changes contain intermediate steps, that will be obsoleted by
follow-on patches, once more functionality has been moved to dibs:

Use different smcd_ops and max_dmbs for ism and loopback. Follow-on patches
will change SMC-D to directly use dibs_ops instead of smcd_ops.

In smcd_register_dev() it is now necessary to identify a dibs_loopback
device before smcd_dev and smcd_ops->get_chid() are available. So provide
dibs_dev_ops->get_fabric_id() in this patch and evaluate it in
smc_ism_is_loopback().

Call smc_loopback_init() in smcd_register_dev() and call
smc_loopback_exit() in smcd_unregister_dev() to handle the functionality
that is still in smc_loopback. Follow-on patches will move all smc_loopback
code to dibs_loopback.

In smcd_[un]register_dev() use only ism device name, this will be replaced
by dibs device name by a follow-on patch.

End of changes with intermediate parts.

Allocate an smcd event workqueue for all dibs devices, although
dibs_loopback does not generate events.

Use kernel memory instead of devres memory for smcd_dev and smcd->conn.
Since commit a72178cfe855 ("net/smc: Fix dependency of SMC on ISM") an ism
device and its driver can have a longer lifetime than the smc module, so
smc should not rely on devres to free its resources [1]. It is now the
responsibility of the smc client to free smcd and smcd->conn for all dibs
devices, ism devices as well as loopback. Call client->ops->del_dev() for
all existing dibs devices in dibs_unregister_client(), so all device
related structures can be freed in the client.

When dibs_unregister_client() is called in the context of smc_exit() or
smc_core_reboot_event(), these functions have already called
smc_lgrs_shutdown() which calls smc_smcd_terminate_all(smcd) and sets
going_away. This is done a second time in smcd_unregister_dev(). This is
analogous to how smcr is handled in these functions, by calling first
smc_lgrs_shutdown() and then smc_ib_unregister_client() >
smc_ib_remove_dev(), so leave it that way. It may be worth investigating,
whether smc_lgrs_shutdown() is still required or useful.

Remove CONFIG_SMC_LO. CONFIG_DIBS_LO now controls whether a dibs loopback
device exists or not.

Link: https://www.kernel.org/doc/Documentation/driver-model/devres.txt [1]
Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Reviewed-by: Mahanta Jambigi <mjambigi@linux.ibm.com>
Link: https://patch.msgid.link/20250918110500.1731261-8-wintera@linux.ibm.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/dibs.h | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/ism.h  |  2 --
 2 files changed, 87 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dibs.h b/include/linux/dibs.h
index c12db19c98c0..805ab33271b5 100644
--- a/include/linux/dibs.h
+++ b/include/linux/dibs.h
@@ -34,14 +34,45 @@
  * clients.
  */
 
+struct dibs_dev;
+
 /* DIBS client
  * -----------
  */
 #define MAX_DIBS_CLIENTS	8
+/* All dibs clients have access to all dibs devices.
+ * A dibs client provides the following functions to be called by dibs layer or
+ * dibs device drivers:
+ */
+struct dibs_client_ops {
+	/**
+	 *  add_dev() - add a dibs device
+	 *  @dev: device that was added
+	 *
+	 * Will be called during dibs_register_client() for all existing
+	 * dibs devices and whenever a new dibs device is registered.
+	 * dev is usable until dibs_client.remove() is called.
+	 * *dev is protected by device refcounting.
+	 */
+	void (*add_dev)(struct dibs_dev *dev);
+	/**
+	 * del_dev() - remove a dibs device
+	 * @dev: device to be removed
+	 *
+	 * Will be called whenever a dibs device is removed.
+	 * Will be called during dibs_unregister_client() for all existing
+	 * dibs devices and whenever a dibs device is unregistered.
+	 * The device has already stopped initiative for this client:
+	 * No new handlers will be started.
+	 * The device is no longer usable by this client after this call.
+	 */
+	void (*del_dev)(struct dibs_dev *dev);
+};
 
 struct dibs_client {
 	/* client name for logging and debugging purposes */
 	const char *name;
+	const struct dibs_client_ops *ops;
 	/* client index - provided and used by dibs layer */
 	u8 id;
 };
@@ -52,6 +83,7 @@ struct dibs_client {
  * dibs_register_client() - register a client with dibs layer
  * @client: this client
  *
+ * Will call client->ops->add_dev() for all existing dibs devices.
  * Return: zero on success.
  */
 int dibs_register_client(struct dibs_client *client);
@@ -59,21 +91,74 @@ int dibs_register_client(struct dibs_client *client);
  * dibs_unregister_client() - unregister a client with dibs layer
  * @client: this client
  *
+ * Will call client->ops->del_dev() for all existing dibs devices.
  * Return: zero on success.
  */
 int dibs_unregister_client(struct dibs_client *client);
 
+/* dibs clients can call dibs device ops. */
+
 /* DIBS devices
  * ------------
  */
+
+/* Defined fabric id / CHID for all loopback devices:
+ * All dibs loopback devices report this fabric id. In this case devices with
+ * the same fabric id can NOT communicate via dibs. Only loopback devices with
+ * the same dibs device gid can communicate (=same device with itself).
+ */
+#define DIBS_LOOPBACK_FABRIC	0xFFFF
+
+/* A dibs device provides the following functions to be called by dibs clients.
+ * They are mandatory, unless marked 'optional'.
+ */
+struct dibs_dev_ops {
+	/**
+	 * get_fabric_id()
+	 * @dev: local dibs device
+	 *
+	 * Only devices on the same dibs fabric can communicate. Fabric_id is
+	 * unique inside the same HW system. Use fabric_id for fast negative
+	 * checks, but only query_remote_gid() can give a reliable positive
+	 * answer:
+	 * Different fabric_id: dibs is not possible
+	 * Same fabric_id: dibs may be possible or not
+	 *		   (e.g. different HW systems)
+	 * EXCEPTION: DIBS_LOOPBACK_FABRIC denotes an ism_loopback device
+	 *	      that can only communicate with itself. Use dibs_dev.gid
+	 *	      or query_remote_gid()to determine whether sender and
+	 *	      receiver use the same ism_loopback device.
+	 * Return: 2 byte dibs fabric id
+	 */
+	u16 (*get_fabric_id)(struct dibs_dev *dev);
+};
+
 struct dibs_dev {
 	struct list_head list;
+	/* To be filled by device driver, before calling dibs_dev_add(): */
+	const struct dibs_dev_ops *ops;
+	/* priv pointer for device driver */
+	void *drv_priv;
+
+	/* priv pointer per client; for client usage only */
+	void *priv[MAX_DIBS_CLIENTS];
 };
 
+static inline void dibs_set_priv(struct dibs_dev *dev,
+				 struct dibs_client *client, void *priv)
+{
+	dev->priv[client->id] = priv;
+}
+
+static inline void *dibs_get_priv(struct dibs_dev *dev,
+				  struct dibs_client *client)
+{
+	return dev->priv[client->id];
+}
+
 /* ------- End of client-only functions ----------- */
 
-/*
- * Functions to be called by dibs device drivers:
+/* Functions to be called by dibs device drivers:
  */
 /**
  * dibs_dev_alloc() - allocate and reference device structure
diff --git a/include/linux/ism.h b/include/linux/ism.h
index 9a53d3c48c16..c818a25996db 100644
--- a/include/linux/ism.h
+++ b/include/linux/ism.h
@@ -59,8 +59,6 @@ struct ism_event {
 
 struct ism_client {
 	const char *name;
-	void (*add)(struct ism_dev *dev);
-	void (*remove)(struct ism_dev *dev);
 	void (*handle_event)(struct ism_dev *dev, struct ism_event *event);
 	/* Parameter dmbemask contains a bit vector with updated DMBEs, if sent
 	 * via ism_move_data(). Callback function must handle all active bits
-- 
cgit v1.2.3


From 845c334a0186a23c2ac4abfb444e499fec831b24 Mon Sep 17 00:00:00 2001
From: Julian Ruess <julianr@linux.ibm.com>
Date: Thu, 18 Sep 2025 13:04:54 +0200
Subject: dibs: Move struct device to dibs_dev

Move struct device from ism_dev and smc_lo_dev to dibs_dev, and define a
corresponding release function. Free ism_dev in ism_remove() and smc_lo_dev
in smc_lo_dev_remove().

Replace smcd->ops->get_dev(smcd) by using dibs->dev directly.

An alternative design would be to embed dibs_dev as a field in ism_dev and
do the same for other dibs device driver specific structs. However that
would have the disadvantage that each dibs device driver needs to allocate
dibs_dev and each dibs device driver needs a different device release
function. The advantage would be that ism_dev and other device driver
specific structs would be covered by device reference counts.

Signed-off-by: Julian Ruess <julianr@linux.ibm.com>
Co-developed-by: Alexandra Winter <wintera@linux.ibm.com>
Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Reviewed-by: Mahanta Jambigi <mjambigi@linux.ibm.com>
Link: https://patch.msgid.link/20250918110500.1731261-9-wintera@linux.ibm.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/dibs.h | 1 +
 include/linux/ism.h  | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dibs.h b/include/linux/dibs.h
index 805ab33271b5..793c6e1ece0f 100644
--- a/include/linux/dibs.h
+++ b/include/linux/dibs.h
@@ -135,6 +135,7 @@ struct dibs_dev_ops {
 
 struct dibs_dev {
 	struct list_head list;
+	struct device dev;
 	/* To be filled by device driver, before calling dibs_dev_add(): */
 	const struct dibs_dev_ops *ops;
 	/* priv pointer for device driver */
diff --git a/include/linux/ism.h b/include/linux/ism.h
index c818a25996db..84f1afb3dded 100644
--- a/include/linux/ism.h
+++ b/include/linux/ism.h
@@ -42,7 +42,6 @@ struct ism_dev {
 	struct ism_eq *ieq;
 	dma_addr_t ieq_dma_addr;
 
-	struct device dev;
 	u64 local_gid;
 	int ieq_idx;
 
-- 
cgit v1.2.3


From 05e68d8dedf34f270cc3769ffe7f0ed413f23add Mon Sep 17 00:00:00 2001
From: Alexandra Winter <wintera@linux.ibm.com>
Date: Thu, 18 Sep 2025 13:04:56 +0200
Subject: dibs: Local gid for dibs devices

Define a uuid_t GID attribute to identify a dibs device.

SMC uses 64 Bit and 128 Bit Global Identifiers (GIDs) per device, that
need to be sent via the SMC protocol. Because the smc code uses integers,
network endianness and host endianness need to be considered. Avoid this
in the dibs layer by using uuid_t byte arrays. Future patches could change
SMC to use uuid_t. For now conversion helper functions are introduced.

ISM devices provide 64 Bit GIDs. Map them to dibs uuid_t GIDs like this:
 _________________________________________
| 64 Bit ISM-vPCI GID | 00000000_00000000 |
 -----------------------------------------
If interpreted as UUID [1], this would be interpreted as the UIID variant,
that is reserved for NCS backward compatibility. So it will not collide
with UUIDs that were generated according to the standard.

smc_loopback already uses version 4 UUIDs as 128 Bit GIDs, move that to
dibs loopback. A temporary change to smc_lo_query_rgid() is required,
that will be moved to dibs_loopback with a follow-on patch.

Provide gid of a dibs device as sysfs read-only attribute.

Link: https://datatracker.ietf.org/doc/html/rfc4122 [1]
Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Reviewed-by: Julian Ruess <julianr@linux.ibm.com>
Reviewed-by: Mahanta Jambigi <mjambigi@linux.ibm.com>
Link: https://patch.msgid.link/20250918110500.1731261-11-wintera@linux.ibm.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/dibs.h | 3 +++
 include/linux/ism.h  | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dibs.h b/include/linux/dibs.h
index 793c6e1ece0f..904f37505c27 100644
--- a/include/linux/dibs.h
+++ b/include/linux/dibs.h
@@ -10,6 +10,8 @@
 #define _DIBS_H
 
 #include <linux/device.h>
+#include <linux/uuid.h>
+
 /* DIBS - Direct Internal Buffer Sharing - concept
  * -----------------------------------------------
  * In the case of multiple system sharing the same hardware, dibs fabrics can
@@ -138,6 +140,7 @@ struct dibs_dev {
 	struct device dev;
 	/* To be filled by device driver, before calling dibs_dev_add(): */
 	const struct dibs_dev_ops *ops;
+	uuid_t gid;
 	/* priv pointer for device driver */
 	void *drv_priv;
 
diff --git a/include/linux/ism.h b/include/linux/ism.h
index 84f1afb3dded..a926dd61b5a1 100644
--- a/include/linux/ism.h
+++ b/include/linux/ism.h
@@ -42,7 +42,6 @@ struct ism_dev {
 	struct ism_eq *ieq;
 	dma_addr_t ieq_dma_addr;
 
-	u64 local_gid;
 	int ieq_idx;
 
 	struct ism_client *subs[MAX_CLIENTS];
-- 
cgit v1.2.3


From 92a0f7bb081dde6e88368816b8ba51352ddabb1d Mon Sep 17 00:00:00 2001
From: Alexandra Winter <wintera@linux.ibm.com>
Date: Thu, 18 Sep 2025 13:04:57 +0200
Subject: dibs: Move vlan support to dibs_dev_ops

It can be debated how much benefit definition of vlan ids for dibs devices
brings, as the dmbs are accessible only by a single peer anyhow. But ism
provides vlan support and smcd exploits it, so move it to dibs layer as an
optional feature.

smcd_loopback simply ignores all vlan settings, do the same in
dibs_loopback.

SMC-D and ISM have a method to use the invalid VLAN ID 1FFF
(ISM_RESERVED_VLANID), to indicate that both communication peers support
routable SMC-Dv2. Tolerate it in dibs, but move it to SMC only.

Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Link: https://patch.msgid.link/20250918110500.1731261-12-wintera@linux.ibm.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/dibs.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dibs.h b/include/linux/dibs.h
index 904f37505c27..166148fb8d76 100644
--- a/include/linux/dibs.h
+++ b/include/linux/dibs.h
@@ -133,6 +133,25 @@ struct dibs_dev_ops {
 	 * Return: 2 byte dibs fabric id
 	 */
 	u16 (*get_fabric_id)(struct dibs_dev *dev);
+	/**
+	 * add_vlan_id() - add dibs device to vlan (optional, deprecated)
+	 * @dev: dibs device
+	 * @vlan_id: vlan id
+	 *
+	 * In order to write into a vlan-tagged dmb, the remote device needs
+	 * to belong to the this vlan. A device can belong to more than 1 vlan.
+	 * Any device can access an untagged dmb.
+	 * Deprecated, only supported for backwards compatibility.
+	 * Return: zero on success
+	 */
+	int (*add_vlan_id)(struct dibs_dev *dev, u64 vlan_id);
+	/**
+	 * del_vlan_id() - remove dibs device from vlan (optional, deprecated)
+	 * @dev: dibs device
+	 * @vlan_id: vlan id
+	 * Return: zero on success
+	 */
+	int (*del_vlan_id)(struct dibs_dev *dev, u64 vlan_id);
 };
 
 struct dibs_dev {
-- 
cgit v1.2.3


From 719c3b67bb7ea95bb8158b03c75641c8fc8f94a0 Mon Sep 17 00:00:00 2001
From: Alexandra Winter <wintera@linux.ibm.com>
Date: Thu, 18 Sep 2025 13:04:58 +0200
Subject: dibs: Move query_remote_gid() to dibs_dev_ops

Provide the dibs_dev_ops->query_remote_gid() in ism and dibs_loopback
dibs_devices. And call it in smc dibs_client.

Reviewed-by: Julian Ruess <julianr@linux.ibm.com>
Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Link: https://patch.msgid.link/20250918110500.1731261-13-wintera@linux.ibm.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/dibs.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dibs.h b/include/linux/dibs.h
index 166148fb8d76..c75a40fe3039 100644
--- a/include/linux/dibs.h
+++ b/include/linux/dibs.h
@@ -133,6 +133,20 @@ struct dibs_dev_ops {
 	 * Return: 2 byte dibs fabric id
 	 */
 	u16 (*get_fabric_id)(struct dibs_dev *dev);
+	/**
+	 * query_remote_gid()
+	 * @dev: local dibs device
+	 * @rgid: gid of remote dibs device
+	 * @vid_valid: if zero, vid will be ignored;
+	 *	       deprecated, ignored if device does not support vlan
+	 * @vid: VLAN id; deprecated, ignored if device does not support vlan
+	 *
+	 * Query whether a remote dibs device is reachable via this local device
+	 * and this vlan id.
+	 * Return: 0 if remote gid is reachable.
+	 */
+	int (*query_remote_gid)(struct dibs_dev *dev, const uuid_t *rgid,
+				u32 vid_valid, u32 vid);
 	/**
 	 * add_vlan_id() - add dibs device to vlan (optional, deprecated)
 	 * @dev: dibs device
-- 
cgit v1.2.3


From cc21191b584c6f7836b0f10774f8278b7cbfba10 Mon Sep 17 00:00:00 2001
From: Alexandra Winter <wintera@linux.ibm.com>
Date: Thu, 18 Sep 2025 13:04:59 +0200
Subject: dibs: Move data path to dibs layer

Use struct dibs_dmb instead of struct smc_dmb and move the corresponding
client tables to dibs_dev. Leave driver specific implementation details
like sba in the device drivers.

Register and unregister dmbs via dibs_dev_ops. A dmb is dedicated to a
single client, but a dibs device can have dmbs for more than one client.

Trigger dibs clients via dibs_client_ops->handle_irq(), when data is
received into a dmb. For dibs_loopback replace scheduling an smcd receive
tasklet with calling dibs_client_ops->handle_irq().

For loopback devices attach_dmb(), detach_dmb() and move_data() need to
access the dmb tables, so move those to dibs_dev_ops in this patch as well.

Remove remaining definitions of smc_loopback as they are no longer
required, now that everything is in dibs_loopback.

Note that struct ism_client and struct ism_dev are still required in smc
until a follow-on patch moves event handling to dibs. (Loopback does not
use events).

Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Link: https://patch.msgid.link/20250918110500.1731261-14-wintera@linux.ibm.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/dibs.h | 177 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/ism.h  |  23 -------
 2 files changed, 177 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dibs.h b/include/linux/dibs.h
index c75a40fe3039..be009c614205 100644
--- a/include/linux/dibs.h
+++ b/include/linux/dibs.h
@@ -36,12 +36,44 @@
  * clients.
  */
 
+/* DMB - Direct Memory Buffer
+ * --------------------------
+ * A dibs client provides a dmb as input buffer for a local receiving
+ * dibs device for exactly one (remote) sending dibs device. Only this
+ * sending device can send data into this dmb using move_data(). Sender
+ * and receiver can be the same device. A dmb belongs to exactly one client.
+ */
+struct dibs_dmb {
+	/* tok - Token for this dmb
+	 * Used by remote and local devices and clients to address this dmb.
+	 * Provided by dibs fabric. Unique per dibs fabric.
+	 */
+	u64 dmb_tok;
+	/* rgid - GID of designated remote sending device */
+	uuid_t rgid;
+	/* cpu_addr - buffer address */
+	void *cpu_addr;
+	/* len - buffer length */
+	u32 dmb_len;
+	/* idx - Index of this DMB on this receiving device */
+	u32 idx;
+	/* VLAN support (deprecated)
+	 * In order to write into a vlan-tagged dmb, the remote device needs
+	 * to belong to the this vlan
+	 */
+	u32 vlan_valid;
+	u32 vlan_id;
+	/* optional, used by device driver */
+	dma_addr_t dma_addr;
+};
+
 struct dibs_dev;
 
 /* DIBS client
  * -----------
  */
 #define MAX_DIBS_CLIENTS	8
+#define NO_DIBS_CLIENT		0xff
 /* All dibs clients have access to all dibs devices.
  * A dibs client provides the following functions to be called by dibs layer or
  * dibs device drivers:
@@ -69,6 +101,22 @@ struct dibs_client_ops {
 	 * The device is no longer usable by this client after this call.
 	 */
 	void (*del_dev)(struct dibs_dev *dev);
+	/**
+	 * handle_irq() - Handle signaling for a DMB
+	 * @dev: device that owns the dmb
+	 * @idx: Index of the dmb that got signalled
+	 * @dmbemask: signaling mask of the dmb
+	 *
+	 * Handle signaling for a dmb that was registered by this client
+	 * for this device.
+	 * The dibs device can coalesce multiple signaling triggers into a
+	 * single call of handle_irq(). dmbemask can be used to indicate
+	 * different kinds of triggers.
+	 *
+	 * Context: Called in IRQ context by dibs device driver
+	 */
+	void (*handle_irq)(struct dibs_dev *dev, unsigned int idx,
+			   u16 dmbemask);
 };
 
 struct dibs_client {
@@ -147,6 +195,77 @@ struct dibs_dev_ops {
 	 */
 	int (*query_remote_gid)(struct dibs_dev *dev, const uuid_t *rgid,
 				u32 vid_valid, u32 vid);
+	/**
+	 * max_dmbs()
+	 * Return: Max number of DMBs that can be registered for this kind of
+	 *	   dibs_dev
+	 */
+	int (*max_dmbs)(void);
+	/**
+	 * register_dmb() - allocate and register a dmb
+	 * @dev: dibs device
+	 * @dmb: dmb struct to be registered
+	 * @client: dibs client
+	 * @vid: VLAN id; deprecated, ignored if device does not support vlan
+	 *
+	 * The following fields of dmb must provide valid input:
+	 *	@rgid: gid of remote user device
+	 *	@dmb_len: buffer length
+	 *	@idx: Optionally:requested idx (if non-zero)
+	 *	@vlan_valid: if zero, vlan_id will be ignored;
+	 *		     deprecated, ignored if device does not support vlan
+	 *	@vlan_id: deprecated, ignored if device does not support vlan
+	 * Upon return in addition the following fields will be valid:
+	 *	@dmb_tok: for usage by remote and local devices and clients
+	 *	@cpu_addr: allocated buffer
+	 *	@idx: dmb index, unique per dibs device
+	 *	@dma_addr: to be used by device driver,if applicable
+	 *
+	 * Allocate a dmb buffer and register it with this device and for this
+	 * client.
+	 * Return: zero on success
+	 */
+	int (*register_dmb)(struct dibs_dev *dev, struct dibs_dmb *dmb,
+			    struct dibs_client *client);
+	/**
+	 * unregister_dmb() - unregister and free a dmb
+	 * @dev: dibs device
+	 * @dmb: dmb struct to be unregistered
+	 * The following fields of dmb must provide valid input:
+	 *	@dmb_tok
+	 *	@cpu_addr
+	 *	@idx
+	 *
+	 * Free dmb.cpu_addr and unregister the dmb from this device.
+	 * Return: zero on success
+	 */
+	int (*unregister_dmb)(struct dibs_dev *dev, struct dibs_dmb *dmb);
+	/**
+	 * move_data() - write into a remote dmb
+	 * @dev: Local sending dibs device
+	 * @dmb_tok: Token of the remote dmb
+	 * @idx: signaling index in dmbemask
+	 * @sf: signaling flag;
+	 *      if true, idx will be turned on at target dmbemask mask
+	 *      and target device will be signaled.
+	 * @offset: offset within target dmb
+	 * @data: pointer to data to be sent
+	 * @size: length of data to be sent, can be zero.
+	 *
+	 * Use dev to write data of size at offset into a remote dmb
+	 * identified by dmb_tok. Data is moved synchronously, *data can
+	 * be freed when this function returns.
+	 *
+	 * If signaling flag (sf) is true, bit number idx bit will be turned
+	 * on in the dmbemask mask when handle_irq() is called at the remote
+	 * dibs client that owns the target dmb. The target device may chose
+	 * to coalesce the signaling triggers of multiple move_data() calls
+	 * to the same target dmb into a single handle_irq() call.
+	 * Return: zero on success
+	 */
+	int (*move_data)(struct dibs_dev *dev, u64 dmb_tok, unsigned int idx,
+			 bool sf, unsigned int offset, void *data,
+			 unsigned int size);
 	/**
 	 * add_vlan_id() - add dibs device to vlan (optional, deprecated)
 	 * @dev: dibs device
@@ -166,6 +285,55 @@ struct dibs_dev_ops {
 	 * Return: zero on success
 	 */
 	int (*del_vlan_id)(struct dibs_dev *dev, u64 vlan_id);
+	/**
+	 * support_mmapped_rdmb() - can this device provide memory mapped
+	 *			    remote dmbs? (optional)
+	 * @dev: dibs device
+	 *
+	 * A dibs device can provide a kernel address + length, that represent
+	 * a remote target dmb (like MMIO). Alternatively to calling
+	 * move_data(), a dibs client can write into such a ghost-send-buffer
+	 * (= to this kernel address) and the data will automatically
+	 * immediately appear in the target dmb, even without calling
+	 * move_data().
+	 *
+	 * Either all 3 function pointers for support_dmb_nocopy(),
+	 * attach_dmb() and detach_dmb() are defined, or all of them must
+	 * be NULL.
+	 *
+	 * Return: non-zero, if memory mapped remote dmbs are supported.
+	 */
+	int (*support_mmapped_rdmb)(struct dibs_dev *dev);
+	/**
+	 * attach_dmb() - attach local memory to a remote dmb
+	 * @dev: Local sending ism device
+	 * @dmb: all other parameters are passed in the form of a
+	 *	 dmb struct
+	 *	 TODO: (THIS IS CONFUSING, should be changed)
+	 *  dmb_tok: (in) Token of the remote dmb, we want to attach to
+	 *  cpu_addr: (out) MMIO address
+	 *  dma_addr: (out) MMIO address (if applicable, invalid otherwise)
+	 *  dmb_len: (out) length of local MMIO region,
+	 *           equal to length of remote DMB.
+	 *  sba_idx: (out) index of remote dmb (NOT HELPFUL, should be removed)
+	 *
+	 * Provides a memory address to the sender that can be used to
+	 * directly write into the remote dmb.
+	 * Memory is available until detach_dmb is called
+	 *
+	 * Return: Zero upon success, Error code otherwise
+	 */
+	int (*attach_dmb)(struct dibs_dev *dev, struct dibs_dmb *dmb);
+	/**
+	 * detach_dmb() - Detach the ghost buffer from a remote dmb
+	 * @dev: ism device
+	 * @token: dmb token of the remote dmb
+	 *
+	 * No need to free cpu_addr.
+	 *
+	 * Return: Zero upon success, Error code otherwise
+	 */
+	int (*detach_dmb)(struct dibs_dev *dev, u64 token);
 };
 
 struct dibs_dev {
@@ -179,6 +347,15 @@ struct dibs_dev {
 
 	/* priv pointer per client; for client usage only */
 	void *priv[MAX_DIBS_CLIENTS];
+
+	/* get this lock before accessing any of the fields below */
+	spinlock_t lock;
+	/* array of client ids indexed by dmb idx;
+	 * can be used as indices into priv and subs arrays
+	 */
+	u8 *dmb_clientid_arr;
+	/* Sparse array of all ISM clients */
+	struct dibs_client *subs[MAX_DIBS_CLIENTS];
 };
 
 static inline void dibs_set_priv(struct dibs_dev *dev,
diff --git a/include/linux/ism.h b/include/linux/ism.h
index a926dd61b5a1..b7feb4dcd5a8 100644
--- a/include/linux/ism.h
+++ b/include/linux/ism.h
@@ -11,17 +11,6 @@
 
 #include <linux/workqueue.h>
 
-struct ism_dmb {
-	u64 dmb_tok;
-	u64 rgid;
-	u32 dmb_len;
-	u32 sba_idx;
-	u32 vlan_valid;
-	u32 vlan_id;
-	void *cpu_addr;
-	dma_addr_t dma_addr;
-};
-
 /* Unless we gain unexpected popularity, this limit should hold for a while */
 #define MAX_CLIENTS		8
 #define ISM_NR_DMBS		1920
@@ -36,7 +25,6 @@ struct ism_dev {
 	struct ism_sba *sba;
 	dma_addr_t sba_dma_addr;
 	DECLARE_BITMAP(sba_bitmap, ISM_NR_DMBS);
-	u8 *sba_client_arr;	/* entries are indices into 'clients' array */
 	void *priv[MAX_CLIENTS];
 
 	struct ism_eq *ieq;
@@ -58,11 +46,6 @@ struct ism_event {
 struct ism_client {
 	const char *name;
 	void (*handle_event)(struct ism_dev *dev, struct ism_event *event);
-	/* Parameter dmbemask contains a bit vector with updated DMBEs, if sent
-	 * via ism_move_data(). Callback function must handle all active bits
-	 * indicated by dmbemask.
-	 */
-	void (*handle_irq)(struct ism_dev *dev, unsigned int bit, u16 dmbemask);
 	/* Private area - don't touch! */
 	u8 id;
 };
@@ -79,12 +62,6 @@ static inline void ism_set_priv(struct ism_dev *dev, struct ism_client *client,
 	dev->priv[client->id] = priv;
 }
 
-int  ism_register_dmb(struct ism_dev *dev, struct ism_dmb *dmb,
-		      struct ism_client *client);
-int  ism_unregister_dmb(struct ism_dev *dev, struct ism_dmb *dmb);
-int  ism_move(struct ism_dev *dev, u64 dmb_tok, unsigned int idx, bool sf,
-	      unsigned int offset, void *data, unsigned int size);
-
 const struct smcd_ops *ism_get_smcd_ops(void);
 
 #endif	/* _ISM_H */
-- 
cgit v1.2.3


From a612dbe8d04d47af91fa88f0599c1370cc70f687 Mon Sep 17 00:00:00 2001
From: Julian Ruess <julianr@linux.ibm.com>
Date: Thu, 18 Sep 2025 13:05:00 +0200
Subject: dibs: Move event handling to dibs layer

Add defines for all event types and subtypes an ism device is known to
produce as it can be helpful for debugging purposes.

Introduces a generic 'struct dibs_event' and adopt ism device driver
and smc-d client accordingly. Tolerate and ignore other type and subtype
values to enable future device extensions.

SMC-D and ISM are now independent.
struct ism_dev can be moved to drivers/s390/net/ism.h.

Note that in smc, the term 'ism' is still used. Future patches could
replace that with 'dibs' or 'smc-d' as appropriate.

Signed-off-by: Julian Ruess <julianr@linux.ibm.com>
Co-developed-by: Alexandra Winter <wintera@linux.ibm.com>
Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Link: https://patch.msgid.link/20250918110500.1731261-15-wintera@linux.ibm.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/dibs.h | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dibs.h b/include/linux/dibs.h
index be009c614205..c75607f8a5cf 100644
--- a/include/linux/dibs.h
+++ b/include/linux/dibs.h
@@ -67,6 +67,41 @@ struct dibs_dmb {
 	dma_addr_t dma_addr;
 };
 
+/* DIBS events
+ * -----------
+ * Dibs devices can optionally notify dibs clients about events that happened
+ * in the fabric or at the remote device or remote dmb.
+ */
+enum dibs_event_type {
+	/* Buffer event, e.g. a remote dmb was unregistered */
+	DIBS_BUF_EVENT,
+	/* Device event, e.g. a remote dibs device was disabled */
+	DIBS_DEV_EVENT,
+	/* Software event, a dibs client can send an event signal to a
+	 * remote dibs device.
+	 */
+	DIBS_SW_EVENT,
+	DIBS_OTHER_TYPE };
+
+enum dibs_event_subtype {
+	DIBS_BUF_UNREGISTERED,
+	DIBS_DEV_DISABLED,
+	DIBS_DEV_ERR_STATE,
+	DIBS_OTHER_SUBTYPE
+};
+
+struct dibs_event {
+	u32 type;
+	u32 subtype;
+	/* uuid_null if invalid */
+	uuid_t gid;
+	/* zero if invalid */
+	u64 buffer_tok;
+	u64 time;
+	/* additional data or zero */
+	u64 data;
+};
+
 struct dibs_dev;
 
 /* DIBS client
@@ -117,6 +152,15 @@ struct dibs_client_ops {
 	 */
 	void (*handle_irq)(struct dibs_dev *dev, unsigned int idx,
 			   u16 dmbemask);
+	/**
+	 * handle_event() - Handle control information sent by device
+	 * @dev: device reporting the event
+	 * @event: ism event structure
+	 *
+	 * * Context: Called in IRQ context by dibs device driver
+	 */
+	void (*handle_event)(struct dibs_dev *dev,
+			     const struct dibs_event *event);
 };
 
 struct dibs_client {
@@ -285,6 +329,24 @@ struct dibs_dev_ops {
 	 * Return: zero on success
 	 */
 	int (*del_vlan_id)(struct dibs_dev *dev, u64 vlan_id);
+	/**
+	 * signal_event() - trigger an event at a remote dibs device (optional)
+	 * @dev: local dibs device
+	 * @rgid: gid of remote dibs device
+	 * trigger_irq: zero: notification may be coalesced with other events
+	 *		non-zero: notify immediately
+	 * @subtype: 4 byte event code, meaning is defined by dibs client
+	 * @data: 8 bytes of additional information,
+	 *	  meaning is defined by dibs client
+	 *
+	 * dibs devices can offer support for sending a control event of type
+	 * EVENT_SWR to a remote dibs device.
+	 * NOTE: handle_event() will be called for all registered dibs clients
+	 * at the remote device.
+	 * Return: zero on success
+	 */
+	int (*signal_event)(struct dibs_dev *dev, const uuid_t *rgid,
+			    u32 trigger_irq, u32 event_code, u64 info);
 	/**
 	 * support_mmapped_rdmb() - can this device provide memory mapped
 	 *			    remote dmbs? (optional)
-- 
cgit v1.2.3


From 9095d207417477eb50e84fd0652895db77ec584e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@igalia.com>
Date: Thu, 14 Aug 2025 14:22:12 -0300
Subject: fs: Create sb_encoding() helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Filesystems that need to deal with the super block encoding need to use
a if IS_ENABLED(CONFIG_UNICODE) around it because this struct member is
not declared otherwise. In order to move this if/endif guards outside of
the filesytem code and make it simpler, create a new function that
returns the s_encoding member of struct super_block if Unicode is
enabled, and return NULL otherwise.

Suggested-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: André Almeida <andrealmeid@igalia.com>
Reviewed-by: Gabriel Krisman Bertazi <gabriel@krisman.be>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 include/linux/fs.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7ab4f96d705..43b3a7cf6750 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3740,15 +3740,20 @@ static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qst
 }
 #endif
 
-static inline bool sb_has_encoding(const struct super_block *sb)
+static inline struct unicode_map *sb_encoding(const struct super_block *sb)
 {
 #if IS_ENABLED(CONFIG_UNICODE)
-	return !!sb->s_encoding;
+	return sb->s_encoding;
 #else
-	return false;
+	return NULL;
 #endif
 }
 
+static inline bool sb_has_encoding(const struct super_block *sb)
+{
+	return !!sb_encoding(sb);
+}
+
 int may_setattr(struct mnt_idmap *idmap, struct inode *inode,
 		unsigned int ia_valid);
 int setattr_prepare(struct mnt_idmap *, struct dentry *, struct iattr *);
-- 
cgit v1.2.3


From 23253e278a4574114d4c2729ed70f70b4ec7a30e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@igalia.com>
Date: Thu, 14 Aug 2025 14:22:13 -0300
Subject: fs: Create sb_same_encoding() helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For cases where a file lookup can look in different filesystems (like in
overlayfs), both super blocks must have the same encoding and the same
flags. To help with that, create a sb_same_encoding() function.

Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: André Almeida <andrealmeid@igalia.com>
Reviewed-by: Gabriel Krisman Bertazi <gabriel@krisman.be>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 include/linux/fs.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 43b3a7cf6750..ec867f112fd5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3754,6 +3754,24 @@ static inline bool sb_has_encoding(const struct super_block *sb)
 	return !!sb_encoding(sb);
 }
 
+/*
+ * Compare if two super blocks have the same encoding and flags
+ */
+static inline bool sb_same_encoding(const struct super_block *sb1,
+				    const struct super_block *sb2)
+{
+#if IS_ENABLED(CONFIG_UNICODE)
+	if (sb1->s_encoding == sb2->s_encoding)
+		return true;
+
+	return (sb1->s_encoding && sb2->s_encoding &&
+	       (sb1->s_encoding->version == sb2->s_encoding->version) &&
+	       (sb1->s_encoding_flags == sb2->s_encoding_flags));
+#else
+	return true;
+#endif
+}
+
 int may_setattr(struct mnt_idmap *idmap, struct inode *inode,
 		unsigned int ia_valid);
 int setattr_prepare(struct mnt_idmap *, struct dentry *, struct iattr *);
-- 
cgit v1.2.3


From 0494fc345b377d1207c2cbfef67dc51f6ec874c0 Mon Sep 17 00:00:00 2001
From: Gokul Praveen <g-praveen@ti.com>
Date: Tue, 12 Aug 2025 16:23:46 +0530
Subject: clocksource/drivers/timer-ti-dm : Capture functionality for OMAP DM
 timer

Add PWM capture function in DM timer driver.

OMAP DM timer hardware supports capture feature.It can be used to
timestamp events (falling/rising edges) detected on input signal.

Signed-off-by: Gokul Praveen <g-praveen@ti.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Neha Malcom Francis <n-francis@ti.com>
Link: https://lore.kernel.org/r/20250812105346.203541-1-g-praveen@ti.com
---
 include/linux/platform_data/dmtimer-omap.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/dmtimer-omap.h b/include/linux/platform_data/dmtimer-omap.h
index 95d852aef130..726d89143842 100644
--- a/include/linux/platform_data/dmtimer-omap.h
+++ b/include/linux/platform_data/dmtimer-omap.h
@@ -36,9 +36,13 @@ struct omap_dm_timer_ops {
 	int	(*set_pwm)(struct omap_dm_timer *timer, int def_on,
 			   int toggle, int trigger, int autoreload);
 	int	(*get_pwm_status)(struct omap_dm_timer *timer);
+	int	(*set_cap)(struct omap_dm_timer *timer,
+			   int autoreload, bool config_period);
+	int	(*get_cap_status)(struct omap_dm_timer *timer);
 	int	(*set_prescaler)(struct omap_dm_timer *timer, int prescaler);
 
 	unsigned int (*read_counter)(struct omap_dm_timer *timer);
+	unsigned int (*read_cap)(struct omap_dm_timer *timer, bool is_period);
 	int	(*write_counter)(struct omap_dm_timer *timer,
 				 unsigned int value);
 	unsigned int (*read_status)(struct omap_dm_timer *timer);
-- 
cgit v1.2.3


From 17eb98d6b517b6e7faaebed496fd688dbb1771d9 Mon Sep 17 00:00:00 2001
From: NeilBrown <neil@brown.name>
Date: Mon, 22 Sep 2025 14:29:48 +1000
Subject: VFS/ovl: add lookup_one_positive_killable()

ovl wants a lookup which won't block on a fatal signal.  It currently
uses down_write_killable() and then repeatedly calls to lookup_one()

The lock may not be needed if the name is already in the dcache and it
aids proposed future changes if the locking is kept internal to namei.c

So this patch adds lookup_one_positive_killable() which is like
lookup_one_positive() but will abort in the face of a fatal signal.
overlayfs is changed to use this.

Note that instead of always getting an exclusive lock, ovl now only gets
a shared lock, and only sometimes.  The exclusive lock was never needed.

However down_read_killable() was only added in v4.15 but overlayfs started
using down_write_killable() here in v4.7.

Note that the linked list ->first_maybe_whiteout ->next_maybe_white is
local to the thread so there is no concurrency in that list which could
be threatened by removing the locking.

Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: NeilBrown <neil@brown.name>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/namei.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index 5d085428e471..551a1a01e5e7 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -80,6 +80,9 @@ struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap,
 struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap,
 					    struct qstr *name,
 					    struct dentry *base);
+struct dentry *lookup_one_positive_killable(struct mnt_idmap *idmap,
+					    struct qstr *name,
+					    struct dentry *base);
 
 extern int follow_down_one(struct path *);
 extern int follow_down(struct path *path, unsigned int flags);
-- 
cgit v1.2.3


From d7fb2c410240348edee7867c29b60688175dcc11 Mon Sep 17 00:00:00 2001
From: NeilBrown <neil@brown.name>
Date: Mon, 22 Sep 2025 14:29:50 +1000
Subject: VFS: unify old_mnt_idmap and new_mnt_idmap in renamedata

A rename operation can only rename within a single mount.  Callers of
vfs_rename() must and do ensure this is the case.

So there is no point in having two mnt_idmaps in renamedata as they are
always the same.  Only one of them is passed to ->rename in any case.

This patch replaces both with a single "mnt_idmap" and changes all
callers.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neil@brown.name>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7ab4f96d705..73b39e5bb9e4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2008,20 +2008,18 @@ int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *,
 
 /**
  * struct renamedata - contains all information required for renaming
- * @old_mnt_idmap:     idmap of the old mount the inode was found from
+ * @mnt_idmap:     idmap of the mount in which the rename is happening.
  * @old_parent:        parent of source
  * @old_dentry:                source
- * @new_mnt_idmap:     idmap of the new mount the inode was found from
  * @new_parent:        parent of destination
  * @new_dentry:                destination
  * @delegated_inode:   returns an inode needing a delegation break
  * @flags:             rename flags
  */
 struct renamedata {
-	struct mnt_idmap *old_mnt_idmap;
+	struct mnt_idmap *mnt_idmap;
 	struct dentry *old_parent;
 	struct dentry *old_dentry;
-	struct mnt_idmap *new_mnt_idmap;
 	struct dentry *new_parent;
 	struct dentry *new_dentry;
 	struct inode **delegated_inode;
-- 
cgit v1.2.3


From 76a53de6f7ff0641570364234fb4489f4d4fc8e9 Mon Sep 17 00:00:00 2001
From: NeilBrown <neil@brown.name>
Date: Mon, 22 Sep 2025 14:29:51 +1000
Subject: VFS/audit: introduce kern_path_parent() for audit

audit_alloc_mark() and audit_get_nd() both need to perform a path
lookup getting the parent dentry (which must exist) and the final
target (following a LAST_NORM name) which sometimes doesn't need to
exist.

They don't need the parent to be locked, but use kern_path_locked() or
kern_path_locked_negative() anyway.  This is somewhat misleading to the
casual reader.

This patch introduces a more targeted function, kern_path_parent(),
which returns not holding locks.  On success the "path" will
be set to the parent, which must be found, and the return value is the
dentry of the target, which might be negative.

This will clear the way to rename kern_path_locked() which is
otherwise only used to prepare for removing something.

It also allows us to remove kern_path_locked_negative(), which is
transformed into the new kern_path_parent().

Signed-off-by: NeilBrown <neil@brown.name>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/namei.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index 551a1a01e5e7..1d5038c21c20 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -57,12 +57,12 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
 				    struct dentry *base,
 				    unsigned int flags);
 extern int kern_path(const char *, unsigned, struct path *);
+struct dentry *kern_path_parent(const char *name, struct path *parent);
 
 extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int);
 extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
 extern void done_path_create(struct path *, struct dentry *);
 extern struct dentry *kern_path_locked(const char *, struct path *);
-extern struct dentry *kern_path_locked_negative(const char *, struct path *);
 extern struct dentry *user_path_locked_at(int , const char __user *, struct path *);
 int vfs_path_parent_lookup(struct filename *filename, unsigned int flags,
 			   struct path *parent, struct qstr *last, int *type,
-- 
cgit v1.2.3


From 3d18f80ce181ba27f37d0ec1c550b22acb01dd49 Mon Sep 17 00:00:00 2001
From: NeilBrown <neil@brown.name>
Date: Mon, 22 Sep 2025 14:29:52 +1000
Subject: VFS: rename kern_path_locked() and related functions.

kern_path_locked() is now only used to prepare for removing an object
from the filesystem (and that is the only credible reason for wanting a
positive locked dentry).  Thus it corresponds to kern_path_create() and
so should have a corresponding name.

Unfortunately the name "kern_path_create" is somewhat misleading as it
doesn't actually create anything.  The recently added
simple_start_creating() provides a better pattern I believe.  The
"start" can be matched with "end" to bracket the creating or removing.

So this patch changes names:

 kern_path_locked -> start_removing_path
 kern_path_create -> start_creating_path
 user_path_create -> start_creating_user_path
 user_path_locked_at -> start_removing_user_path_at
 done_path_create -> end_creating_path

and also introduces end_removing_path() which is identical to
end_creating_path().

__start_removing_path (which was __kern_path_locked) is enhanced to
call mnt_want_write() for consistency with the start_creating_path().

Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: NeilBrown <neil@brown.name>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/namei.h | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index 1d5038c21c20..a7800ef04e76 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -59,11 +59,15 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
 extern int kern_path(const char *, unsigned, struct path *);
 struct dentry *kern_path_parent(const char *name, struct path *parent);
 
-extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int);
-extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
-extern void done_path_create(struct path *, struct dentry *);
-extern struct dentry *kern_path_locked(const char *, struct path *);
-extern struct dentry *user_path_locked_at(int , const char __user *, struct path *);
+extern struct dentry *start_creating_path(int, const char *, struct path *, unsigned int);
+extern struct dentry *start_creating_user_path(int, const char __user *, struct path *, unsigned int);
+extern void end_creating_path(struct path *, struct dentry *);
+extern struct dentry *start_removing_path(const char *, struct path *);
+extern struct dentry *start_removing_user_path_at(int , const char __user *, struct path *);
+static inline void end_removing_path(struct path *path , struct dentry *dentry)
+{
+	end_creating_path(path, dentry);
+}
 int vfs_path_parent_lookup(struct filename *filename, unsigned int flags,
 			   struct path *parent, struct qstr *last, int *type,
 			   const struct path *root);
-- 
cgit v1.2.3


From 1abc1b212effe920f4729353880c8e03f1d76b4b Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@arm.com>
Date: Thu, 31 Jul 2025 13:23:40 +0100
Subject: coresight: Appropriately disable programming clocks

Some CoreSight components have programming clocks (pclk) and are enabled
using clk_get() and clk_prepare_enable().  However, in many cases, these
clocks are not disabled when modules exit and only released by clk_put().

To fix the issue, this commit refactors programming clock by replacing
clk_get() and clk_prepare_enable() with devm_clk_get_optional_enabled()
for enabling APB clock. If the "apb_pclk" clock is not found, a NULL
pointer is returned, and the function proceeds to attempt enabling the
"apb" clock.

Since ACPI platforms rely on firmware to manage clocks, returning a NULL
pointer in this case leaves clock management to the firmware rather than
the driver. This effectively avoids a clock imbalance issue during
module removal - where the clock could be disabled twice: once during
the ACPI runtime suspend and again during the devm resource release.

Callers are updated to reuse the returned error value.

With the change, programming clocks are managed as resources in driver
model layer, allowing clock cleanup to be handled automatically.  As a
result, manual cleanup operations are no longer needed and are removed
from the Coresight drivers.

Fixes: 73d779a03a76 ("coresight: etm4x: Change etm4_platform_driver driver for MMIO devices")
Reviewed-by: Yeoreum Yun <yeoreum.yun@arm.com>
Tested-by: James Clark <james.clark@linaro.org>
Signed-off-by: Leo Yan <leo.yan@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20250731-arm_cs_fix_clock_v4-v6-4-1dfe10bb3f6f@arm.com
---
 include/linux/coresight.h | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 4ac65c68bbf4..1e652e157841 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -6,6 +6,7 @@
 #ifndef _LINUX_CORESIGHT_H
 #define _LINUX_CORESIGHT_H
 
+#include <linux/acpi.h>
 #include <linux/amba/bus.h>
 #include <linux/clk.h>
 #include <linux/device.h>
@@ -480,26 +481,21 @@ static inline bool is_coresight_device(void __iomem *base)
  * Returns:
  *
  * clk   - Clock is found and enabled
- * NULL  - clock is not found
+ * NULL  - Clock is controlled by firmware (ACPI device only)
  * ERROR - Clock is found but failed to enable
  */
 static inline struct clk *coresight_get_enable_apb_pclk(struct device *dev)
 {
 	struct clk *pclk;
-	int ret;
 
-	pclk = clk_get(dev, "apb_pclk");
-	if (IS_ERR(pclk)) {
-		pclk = clk_get(dev, "apb");
-		if (IS_ERR(pclk))
-			return NULL;
-	}
+	/* Firmware controls clocks for an ACPI device. */
+	if (has_acpi_companion(dev))
+		return NULL;
+
+	pclk = devm_clk_get_optional_enabled(dev, "apb_pclk");
+	if (!pclk)
+		pclk = devm_clk_get_optional_enabled(dev, "apb");
 
-	ret = clk_prepare_enable(pclk);
-	if (ret) {
-		clk_put(pclk);
-		return ERR_PTR(ret);
-	}
 	return pclk;
 }
 
-- 
cgit v1.2.3


From d091c6312561821f216ced63a7ad17c946b6d335 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@arm.com>
Date: Thu, 31 Jul 2025 13:23:42 +0100
Subject: coresight: Avoid enable programming clock duplicately

The programming clock is enabled by AMBA bus driver before a dynamic
probe. As a result, a CoreSight driver may redundantly enable the same
clock.

To avoid this, add a check for device type and skip enabling the
programming clock for AMBA devices. The returned NULL pointer will be
tolerated by the drivers.

Fixes: 73d779a03a76 ("coresight: etm4x: Change etm4_platform_driver driver for MMIO devices")
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Yeoreum Yun <yeoreum.yun@arm.com>
Tested-by: James Clark <james.clark@linaro.org>
Signed-off-by: Leo Yan <leo.yan@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20250731-arm_cs_fix_clock_v4-v6-6-1dfe10bb3f6f@arm.com
---
 include/linux/coresight.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 1e652e157841..bb49080ec8f9 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -481,20 +481,23 @@ static inline bool is_coresight_device(void __iomem *base)
  * Returns:
  *
  * clk   - Clock is found and enabled
- * NULL  - Clock is controlled by firmware (ACPI device only)
+ * NULL  - Clock is controlled by firmware (ACPI device only) or when managed
+ *	   by the AMBA bus driver instead
  * ERROR - Clock is found but failed to enable
  */
 static inline struct clk *coresight_get_enable_apb_pclk(struct device *dev)
 {
-	struct clk *pclk;
+	struct clk *pclk = NULL;
 
 	/* Firmware controls clocks for an ACPI device. */
 	if (has_acpi_companion(dev))
 		return NULL;
 
-	pclk = devm_clk_get_optional_enabled(dev, "apb_pclk");
-	if (!pclk)
-		pclk = devm_clk_get_optional_enabled(dev, "apb");
+	if (!dev_is_amba(dev)) {
+		pclk = devm_clk_get_optional_enabled(dev, "apb_pclk");
+		if (!pclk)
+			pclk = devm_clk_get_optional_enabled(dev, "apb");
+	}
 
 	return pclk;
 }
-- 
cgit v1.2.3


From fbe7514a7912959e384acf108931ac1bfbb16466 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@arm.com>
Date: Thu, 31 Jul 2025 13:23:43 +0100
Subject: coresight: Consolidate clock enabling

CoreSight drivers enable pclk and atclk conditionally. For example,
pclk is only enabled in the static probe, while atclk is an optional
clock that it is enabled for both dynamic and static probes, if it is
present. In the current CoreSight drivers, these two clocks are
initialized separately.  This causes complex and duplicate codes.

CoreSight drivers are refined so that clocks are initialized in one go.
For this purpose, this commit renames coresight_get_enable_apb_pclk() to
coresight_get_enable_clocks() and encapsulates clock initialization
logic:

 - If a clock is initialized successfully, its clock pointer is assigned
   to the double pointer passed as an argument.
 - For ACPI devices, clocks are controlled by firmware, directly bail
   out.
 - Skip enabling pclk for an AMBA device.
 - If atclk is not found, the corresponding double pointer is set to
   NULL. The function returns Success (0) to guide callers can proceed
   with no error.
 - Otherwise, an error number is returned for failures.

The function became complex, move it from the header to the CoreSight
core layer and the symbol is exported. Added comments for recording
details.

Suggested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Yeoreum Yun <yeoreum.yun@arm.com>
Tested-by: James Clark <james.clark@linaro.org>
Signed-off-by: Leo Yan <leo.yan@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20250731-arm_cs_fix_clock_v4-v6-7-1dfe10bb3f6f@arm.com
---
 include/linux/coresight.h | 30 ++----------------------------
 1 file changed, 2 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index bb49080ec8f9..6de59ce8ef8c 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -6,7 +6,6 @@
 #ifndef _LINUX_CORESIGHT_H
 #define _LINUX_CORESIGHT_H
 
-#include <linux/acpi.h>
 #include <linux/amba/bus.h>
 #include <linux/clk.h>
 #include <linux/device.h>
@@ -475,33 +474,6 @@ static inline bool is_coresight_device(void __iomem *base)
 	return cid == CORESIGHT_CID;
 }
 
-/*
- * Attempt to find and enable "APB clock" for the given device
- *
- * Returns:
- *
- * clk   - Clock is found and enabled
- * NULL  - Clock is controlled by firmware (ACPI device only) or when managed
- *	   by the AMBA bus driver instead
- * ERROR - Clock is found but failed to enable
- */
-static inline struct clk *coresight_get_enable_apb_pclk(struct device *dev)
-{
-	struct clk *pclk = NULL;
-
-	/* Firmware controls clocks for an ACPI device. */
-	if (has_acpi_companion(dev))
-		return NULL;
-
-	if (!dev_is_amba(dev)) {
-		pclk = devm_clk_get_optional_enabled(dev, "apb_pclk");
-		if (!pclk)
-			pclk = devm_clk_get_optional_enabled(dev, "apb");
-	}
-
-	return pclk;
-}
-
 #define CORESIGHT_PIDRn(i)	(0xFE0 + ((i) * 4))
 
 static inline u32 coresight_get_pid(struct csdev_access *csa)
@@ -732,4 +704,6 @@ void coresight_remove_driver(struct amba_driver *amba_drv,
 			     struct platform_driver *pdev_drv);
 int coresight_etm_get_trace_id(struct coresight_device *csdev, enum cs_mode mode,
 			       struct coresight_device *sink);
+int coresight_get_enable_clocks(struct device *dev, struct clk **pclk,
+				struct clk **atclk);
 #endif		/* _LINUX_COREISGHT_H */
-- 
cgit v1.2.3


From ade2105e748f85eb026d26701091213855aea633 Mon Sep 17 00:00:00 2001
From: Elijah Wright <git@elijahs.space>
Date: Wed, 20 Aug 2025 22:39:07 -0700
Subject: tracing: Move buffer in trace_seq to end of struct

TRACE_SEQ_BUFFER_SIZE is dependent on the architecture for its size. on 64-bit
systems, it is 8148 bytes. forced 8-byte alignment in size_t and seq_buf means
that trace_seq is 8200 bytes on 64-bit systems. moving the buffer to the end
of the struct fixes the issue. there shouldn't be any side effects, i.e.
pointer arithmetic on trace_seq

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://lore.kernel.org/20250821053917.23301-1-git@elijahs.space
Signed-off-by: Elijah Wright <git@elijahs.space>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace_seq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index a93ed5ac3226..557780fe1c77 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -21,10 +21,10 @@
 	(sizeof(struct seq_buf) + sizeof(size_t) + sizeof(int)))
 
 struct trace_seq {
-	char			buffer[TRACE_SEQ_BUFFER_SIZE];
 	struct seq_buf		seq;
 	size_t			readpos;
 	int			full;
+	char                    buffer[TRACE_SEQ_BUFFER_SIZE];
 };
 
 static inline void
-- 
cgit v1.2.3


From 5c8fd7e2b5b0a527cf88740da122166695382a78 Mon Sep 17 00:00:00 2001
From: Mykyta Yatsenko <yatsenko@meta.com>
Date: Tue, 23 Sep 2025 12:24:00 +0100
Subject: bpf: bpf task work plumbing

This patch adds necessary plumbing in verifier, syscall and maps to
support handling new kfunc bpf_task_work_schedule and kernel structure
bpf_task_work. The idea is similar to how we already handle bpf_wq and
bpf_timer.
verifier changes validate calls to bpf_task_work_schedule to make sure
it is safe and expected invariants hold.
btf part is required to detect bpf_task_work structure inside map value
and store its offset, which will be used in the next patch to calculate
key and value addresses.
arraymap and hashtab changes are needed to handle freeing of the
bpf_task_work: run code needed to deinitialize it, for example cancel
task_work callback if possible.
The use of bpf_task_work and proper implementation for kfuncs are
introduced in the next patch.

Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20250923112404.668720-6-mykyta.yatsenko5@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index dfc1a27b56d5..a2ab51fa8b0a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -209,6 +209,7 @@ enum btf_field_type {
 	BPF_WORKQUEUE  = (1 << 10),
 	BPF_UPTR       = (1 << 11),
 	BPF_RES_SPIN_LOCK = (1 << 12),
+	BPF_TASK_WORK  = (1 << 13),
 };
 
 enum bpf_cgroup_storage_type {
@@ -262,6 +263,7 @@ struct btf_record {
 	int timer_off;
 	int wq_off;
 	int refcount_off;
+	int task_work_off;
 	struct btf_field fields[];
 };
 
@@ -363,6 +365,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
 		return "bpf_rb_node";
 	case BPF_REFCOUNT:
 		return "bpf_refcount";
+	case BPF_TASK_WORK:
+		return "bpf_task_work";
 	default:
 		WARN_ON_ONCE(1);
 		return "unknown";
@@ -401,6 +405,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
 		return sizeof(struct bpf_rb_node);
 	case BPF_REFCOUNT:
 		return sizeof(struct bpf_refcount);
+	case BPF_TASK_WORK:
+		return sizeof(struct bpf_task_work);
 	default:
 		WARN_ON_ONCE(1);
 		return 0;
@@ -433,6 +439,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
 		return __alignof__(struct bpf_rb_node);
 	case BPF_REFCOUNT:
 		return __alignof__(struct bpf_refcount);
+	case BPF_TASK_WORK:
+		return __alignof__(struct bpf_task_work);
 	default:
 		WARN_ON_ONCE(1);
 		return 0;
@@ -464,6 +472,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
 	case BPF_KPTR_REF:
 	case BPF_KPTR_PERCPU:
 	case BPF_UPTR:
+	case BPF_TASK_WORK:
 		break;
 	default:
 		WARN_ON_ONCE(1);
@@ -600,6 +609,7 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
 			   bool lock_src);
 void bpf_timer_cancel_and_free(void *timer);
 void bpf_wq_cancel_and_free(void *timer);
+void bpf_task_work_cancel_and_free(void *timer);
 void bpf_list_head_free(const struct btf_field *field, void *list_head,
 			struct bpf_spin_lock *spin_lock);
 void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
@@ -2426,6 +2436,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec);
 bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
 void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
 void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj);
+void bpf_obj_free_task_work(const struct btf_record *rec, void *obj);
 void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
 void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);
 
-- 
cgit v1.2.3


From 9082aae154be2d9e208b56e249cb886612f7c6cf Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Fri, 22 Aug 2025 09:19:22 -0400
Subject: sunrpc: remove dfprintk_cont() and dfprintk_rcu_cont()

KERN_CONT hails from a simpler time, when SMP wasn't the norm. These
days, it doesn't quite work right since another printk() can always race
in between the first one and the one being "continued".

Nothing calls dprintk_rcu_cont(), so just remove it. The only caller of
dprintk_cont() is in nfs_commit_release_pages(). Just use a normal
dprintk() there instead, since this is not SMP-safe anyway.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/sunrpc/debug.h | 24 ++----------------------
 1 file changed, 2 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index f6aeed07fe04..99a6fa4a1d6a 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -23,12 +23,8 @@ extern unsigned int		nlm_debug;
 
 #define dprintk(fmt, ...)						\
 	dfprintk(FACILITY, fmt, ##__VA_ARGS__)
-#define dprintk_cont(fmt, ...)						\
-	dfprintk_cont(FACILITY, fmt, ##__VA_ARGS__)
 #define dprintk_rcu(fmt, ...)						\
 	dfprintk_rcu(FACILITY, fmt, ##__VA_ARGS__)
-#define dprintk_rcu_cont(fmt, ...)					\
-	dfprintk_rcu_cont(FACILITY, fmt, ##__VA_ARGS__)
 
 #undef ifdebug
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -37,29 +33,14 @@ extern unsigned int		nlm_debug;
 # define dfprintk(fac, fmt, ...)					\
 do {									\
 	ifdebug(fac)							\
-		printk(KERN_DEFAULT fmt, ##__VA_ARGS__);		\
-} while (0)
-
-# define dfprintk_cont(fac, fmt, ...)					\
-do {									\
-	ifdebug(fac)							\
-		printk(KERN_CONT fmt, ##__VA_ARGS__);			\
+		printk(KERN_DEFAULT fmt, ##__VA_ARGS__);				\
 } while (0)
 
 # define dfprintk_rcu(fac, fmt, ...)					\
 do {									\
 	ifdebug(fac) {							\
 		rcu_read_lock();					\
-		printk(KERN_DEFAULT fmt, ##__VA_ARGS__);		\
-		rcu_read_unlock();					\
-	}								\
-} while (0)
-
-# define dfprintk_rcu_cont(fac, fmt, ...)				\
-do {									\
-	ifdebug(fac) {							\
-		rcu_read_lock();					\
-		printk(KERN_CONT fmt, ##__VA_ARGS__);			\
+		printk(KERN_DEFAULT fmt, ##__VA_ARGS__);				\
 		rcu_read_unlock();					\
 	}								\
 } while (0)
@@ -68,7 +49,6 @@ do {									\
 #else
 # define ifdebug(fac)		if (0)
 # define dfprintk(fac, fmt, ...)	do {} while (0)
-# define dfprintk_cont(fac, fmt, ...)	do {} while (0)
 # define dfprintk_rcu(fac, fmt, ...)	do {} while (0)
 # define RPC_IFDEBUG(x)
 #endif
-- 
cgit v1.2.3


From ec7d8e68ef0ec5c635c8f9e93cd881673445a397 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Fri, 22 Aug 2025 09:19:23 -0400
Subject: sunrpc: add a Kconfig option to redirect dfprintk() output to trace
 buffer

We have a lot of old dprintk() call sites that aren't going anywhere
anytime soon. At the same time, turning them up is a serious burden on
the host due to the console locking overhead.

Add a new Kconfig option that redirects dfprintk() output to the trace
buffer. This is more efficient than logging to the console and allows
for proper interleaving of dprintk and static tracepoint events.

Since using trace_printk() causes scary warnings to pop at boot time,
this new option defaults to "n".

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/sunrpc/debug.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index 99a6fa4a1d6a..891f6173c951 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -30,17 +30,23 @@ extern unsigned int		nlm_debug;
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define ifdebug(fac)		if (unlikely(rpc_debug & RPCDBG_##fac))
 
+# if IS_ENABLED(CONFIG_SUNRPC_DEBUG_TRACE)
+#  define __sunrpc_printk(fmt, ...)	trace_printk(fmt, ##__VA_ARGS__)
+# else
+#  define __sunrpc_printk(fmt, ...)	printk(KERN_DEFAULT fmt, ##__VA_ARGS__)
+# endif
+
 # define dfprintk(fac, fmt, ...)					\
 do {									\
 	ifdebug(fac)							\
-		printk(KERN_DEFAULT fmt, ##__VA_ARGS__);				\
+		__sunrpc_printk(fmt, ##__VA_ARGS__);			\
 } while (0)
 
 # define dfprintk_rcu(fac, fmt, ...)					\
 do {									\
 	ifdebug(fac) {							\
 		rcu_read_lock();					\
-		printk(KERN_DEFAULT fmt, ##__VA_ARGS__);				\
+		__sunrpc_printk(fmt, ##__VA_ARGS__);			\
 		rcu_read_unlock();					\
 	}								\
 } while (0)
-- 
cgit v1.2.3


From 62c0c0e7491211969d8d1c2a9ab0e112b34664cf Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 20 Aug 2025 10:27:28 -0400
Subject: SUNRPC: Move the svc_rpcb_cleanup() call sites

Clean up: because svc_rpcb_cleanup() and svc_xprt_destroy_all()
are always invoked in pairs, we can deduplicate code by moving
the svc_rpcb_cleanup() call sites into svc_xprt_destroy_all().

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Olga Kornievskaia <okorniev@redhat.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/sunrpc/svc_xprt.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 369a89aea186..fde60d4e2cd5 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -165,7 +165,8 @@ int	svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
 			struct net *net, const int family,
 			const unsigned short port, int flags,
 			const struct cred *cred);
-void	svc_xprt_destroy_all(struct svc_serv *serv, struct net *net);
+void	svc_xprt_destroy_all(struct svc_serv *serv, struct net *net,
+			     bool unregister);
 void	svc_xprt_received(struct svc_xprt *xprt);
 void	svc_xprt_enqueue(struct svc_xprt *xprt);
 void	svc_xprt_put(struct svc_xprt *xprt);
-- 
cgit v1.2.3


From 301f3470273c89df3a933762b7495569f650e68b Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Thu, 21 Aug 2025 12:27:00 -0400
Subject: nfs: remove NFS_WBACK_BUSY()

Nothing calls this macro.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfs_page.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 9aed39abc94b..afe1d8f09d89 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -122,8 +122,6 @@ struct nfs_pageio_descriptor {
 /* arbitrarily selected limit to number of mirrors */
 #define NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX 16
 
-#define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
-
 extern struct nfs_page *nfs_page_create_from_page(struct nfs_open_context *ctx,
 						  struct page *page,
 						  unsigned int pgbase,
-- 
cgit v1.2.3


From c8a127596edc026e5364a7a609986dcd3999914c Mon Sep 17 00:00:00 2001
From: Anna Schumaker <anna.schumaker@oracle.com>
Date: Mon, 30 Jun 2025 10:04:54 -0400
Subject: SUNRPC: Introduce xdr_set_scratch_folio()

This will replace xdr_set_scratch_page() when we switch pages to folios.

Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/sunrpc/xdr.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 8a9ec617cf66..3ce17321689a 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -300,6 +300,19 @@ xdr_set_scratch_page(struct xdr_stream *xdr, struct page *page)
 	xdr_set_scratch_buffer(xdr, page_address(page), PAGE_SIZE);
 }
 
+/**
+ * xdr_set_scratch_folio - Attach a scratch buffer for decoding data
+ * @xdr: pointer to xdr_stream struct
+ * @page: an anonymous folio
+ *
+ * See xdr_set_scratch_buffer().
+ */
+static inline void
+xdr_set_scratch_folio(struct xdr_stream *xdr, struct folio *folio)
+{
+	xdr_set_scratch_buffer(xdr, folio_address(folio), folio_size(folio));
+}
+
 /**
  * xdr_reset_scratch_buffer - Clear scratch buffer information
  * @xdr: pointer to xdr_stream struct
-- 
cgit v1.2.3


From 2f8416f23edf3b5c49ce8e08616d0071cda5c37b Mon Sep 17 00:00:00 2001
From: Anna Schumaker <anna.schumaker@oracle.com>
Date: Mon, 30 Jun 2025 10:32:29 -0400
Subject: NFS: Update getacl to use xdr_set_scratch_folio()

Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfs_xdr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ac4bff6e9913..7823d4574e29 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -862,7 +862,7 @@ struct nfs_getaclres {
 	size_t				acl_len;
 	size_t				acl_data_offset;
 	int				acl_flags;
-	struct page *			acl_scratch;
+	struct folio *			acl_scratch;
 };
 
 struct nfs_setattrres {
-- 
cgit v1.2.3


From c9cefd7ae86aa3463adfedca309696ba0946f9c5 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <anna.schumaker@oracle.com>
Date: Mon, 30 Jun 2025 11:14:41 -0400
Subject: NFS: Update listxattr to use xdr_set_scratch_folio()

Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfs_xdr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 7823d4574e29..d56583572c98 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1596,7 +1596,7 @@ struct nfs42_listxattrsargs {
 
 struct nfs42_listxattrsres {
 	struct nfs4_sequence_res	seq_res;
-	struct page			*scratch;
+	struct folio			*scratch;
 	void				*xattr_buf;
 	size_t				xattr_len;
 	u64				cookie;
-- 
cgit v1.2.3


From d57e43b72bf2071caac90da323849c3983a695f0 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <anna.schumaker@oracle.com>
Date: Mon, 30 Jun 2025 14:53:09 -0400
Subject: SUNRPC: Update svcxdr_init_decode() to call xdr_set_scratch_folio()

The only snag here is that __folio_alloc_node() doesn't handle
NUMA_NO_NODE, so I also need to update svc_pool_map_get_node() to return
numa_mem_id() instead. I arrived at this approach by  looking at what
other users of __folio_alloc_node() do for this case.

Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/sunrpc/svc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 40cbe81360ed..5506d20857c3 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -196,7 +196,7 @@ struct svc_rqst {
 	struct xdr_buf		rq_arg;
 	struct xdr_stream	rq_arg_stream;
 	struct xdr_stream	rq_res_stream;
-	struct page		*rq_scratch_page;
+	struct folio		*rq_scratch_folio;
 	struct xdr_buf		rq_res;
 	unsigned long		rq_maxpages;	/* num of entries in rq_pages */
 	struct page *		*rq_pages;
@@ -503,7 +503,7 @@ static inline void svcxdr_init_decode(struct svc_rqst *rqstp)
 	buf->len = buf->head->iov_len + buf->page_len + buf->tail->iov_len;
 
 	xdr_init_decode(xdr, buf, argv->iov_base, NULL);
-	xdr_set_scratch_page(xdr, rqstp->rq_scratch_page);
+	xdr_set_scratch_folio(xdr, rqstp->rq_scratch_folio);
 }
 
 /**
-- 
cgit v1.2.3


From cc6ac66f1c0946299b8f192026cff9a320aaad18 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <anna.schumaker@oracle.com>
Date: Tue, 1 Jul 2025 10:46:50 -0400
Subject: SUNRPC: Update gssx_accept_sec_context() to use
 xdr_set_scratch_folio()

This was the last caller of xdr_set_scratch_page(), so I remove this
function while I'm at it.

Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/sunrpc/xdr.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 3ce17321689a..49278749ad0c 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -287,19 +287,6 @@ xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen)
 	xdr->scratch.iov_len = buflen;
 }
 
-/**
- * xdr_set_scratch_page - Attach a scratch buffer for decoding data
- * @xdr: pointer to xdr_stream struct
- * @page: an anonymous page
- *
- * See xdr_set_scratch_buffer().
- */
-static inline void
-xdr_set_scratch_page(struct xdr_stream *xdr, struct page *page)
-{
-	xdr_set_scratch_buffer(xdr, page_address(page), PAGE_SIZE);
-}
-
 /**
  * xdr_set_scratch_folio - Attach a scratch buffer for decoding data
  * @xdr: pointer to xdr_stream struct
-- 
cgit v1.2.3


From 24bbd533f596a4544e17579e9f622918680e7bff Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Sat, 6 Sep 2025 12:48:14 -0400
Subject: filemap: Add a helper for filesystems implementing dropbehind

Add a helper to allow filesystems to attempt to free the 'dropbehind'
folio.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Link: https://lore.kernel.org/all/5588a06f6d5a2cf6746828e2d36e7ada668b1739.1745381692.git.trond.myklebust@hammerspace.com/
Reviewed-by: Mike Snitzer <snitzer@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/pagemap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 12a12dae727d..201b7c6f6441 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1221,6 +1221,7 @@ void folio_wait_writeback(struct folio *folio);
 int folio_wait_writeback_killable(struct folio *folio);
 void end_page_writeback(struct page *page);
 void folio_end_writeback(struct folio *folio);
+void folio_end_dropbehind(struct folio *folio);
 void folio_wait_stable(struct folio *folio);
 void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn);
 void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb);
-- 
cgit v1.2.3


From 010054a530aa266ee1711dfbe23fc06b6eb0fa48 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Sat, 6 Sep 2025 12:48:15 -0400
Subject: filemap: Add a version of folio_end_writeback that ignores dropbehind

Filesystems such as NFS may need to defer dropbehind until after their
2-stage writes are done. This adds a helper
folio_end_writeback_no_dropbehind() that allows them to release the
writeback flag without immediately dropping the folio.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/pagemap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 201b7c6f6441..5b26465358ce 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1221,6 +1221,7 @@ void folio_wait_writeback(struct folio *folio);
 int folio_wait_writeback_killable(struct folio *folio);
 void end_page_writeback(struct page *page);
 void folio_end_writeback(struct folio *folio);
+void folio_end_writeback_no_dropbehind(struct folio *folio);
 void folio_end_dropbehind(struct folio *folio);
 void folio_wait_stable(struct folio *folio);
 void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn);
-- 
cgit v1.2.3


From a91ae3c89311648cbaa9b46b860e4f76004a24b8 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Tue, 23 Sep 2025 11:01:49 +0000
Subject: bpf, x86: Add support for signed arena loads

Currently, signed load instructions into arena memory are unsupported.
The compiler is free to generate these, and on GCC-14 we see a
corresponding error when it happens. The hurdle in supporting them is
deciding which unused opcode to use to mark them for the JIT's own
consumption. After much thinking, it appears 0xc0 / BPF_NOSPEC can be
combined with load instructions to identify signed arena loads. Use
this to recognize and JIT them appropriately, and remove the verifier
side limitation on the program if the JIT supports them.

Co-developed-by: Puranjay Mohan <puranjay@kernel.org>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Link: https://lore.kernel.org/r/20250923110157.18326-2-puranjay@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4241a885975f..f5c859b8131a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -78,6 +78,9 @@ struct ctl_table_header;
 /* unused opcode to mark special atomic instruction */
 #define BPF_PROBE_ATOMIC 0xe0
 
+/* unused opcode to mark special ldsx instruction. Same as BPF_NOSPEC */
+#define BPF_PROBE_MEM32SX 0xc0
+
 /* unused opcode to mark call to interpreter with arguments */
 #define BPF_CALL_ARGS	0xe0
 
-- 
cgit v1.2.3


From edf005fa274a0c224e550a52726aa7a426384e36 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 23 Sep 2025 09:03:26 -1000
Subject: sched_ext: Improve SCX_KF_DISPATCH comment

The comment for SCX_KF_DISPATCH was incomplete and didn't explain that
ops.dispatch() may temporarily release the rq lock, allowing ENQUEUE and
SELECT_CPU operations to be nested inside DISPATCH contexts.

Update the comment to clarify this nesting behavior and provide better
context for when these operations can occur within dispatch.

Acked-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/sched/ext.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 7047101dbf58..d82b7a9b0658 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -108,7 +108,11 @@ enum scx_kf_mask {
 	SCX_KF_UNLOCKED		= 0,	  /* sleepable and not rq locked */
 	/* ENQUEUE and DISPATCH may be nested inside CPU_RELEASE */
 	SCX_KF_CPU_RELEASE	= 1 << 0, /* ops.cpu_release() */
-	/* ops.dequeue (in REST) may be nested inside DISPATCH */
+	/*
+	 * ops.dispatch() may release rq lock temporarily and thus ENQUEUE and
+	 * SELECT_CPU may be nested inside. ops.dequeue (in REST) may also be
+	 * nested inside DISPATCH.
+	 */
 	SCX_KF_DISPATCH		= 1 << 1, /* ops.dispatch() */
 	SCX_KF_ENQUEUE		= 1 << 2, /* ops.enqueue() and ops.select_cpu() */
 	SCX_KF_SELECT_CPU	= 1 << 3, /* ops.select_cpu() */
-- 
cgit v1.2.3


From ccb4f5d91ec43c05ba165ccfc7ed889eb9cdfd05 Mon Sep 17 00:00:00 2001
From: Leon Hwang <leon.hwang@linux.dev>
Date: Fri, 19 Sep 2025 12:41:09 +0800
Subject: bpf: Allow union argument in trampoline based programs

Currently, functions with 'union' arguments cannot be traced with
fentry/fexit:

bpftrace -e 'fentry:release_pages { exit(); }' -v

The function release_pages arg0 type UNION is unsupported.

The type of the 'release_pages' arg0 is defined as:

typedef union {
	struct page **pages;
	struct folio **folios;
	struct encoded_page **encoded_pages;
} release_pages_arg __attribute__ ((__transparent_union__));

This patch relaxes the restriction by allowing function arguments of type
'union' to be traced in verifier.

Reviewed-by: Amery Hung <ameryhung@gmail.com>
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
Link: https://lore.kernel.org/r/20250919044110.23729-2-leon.hwang@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a2ab51fa8b0a..ba3a3be7eb2a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1128,7 +1128,7 @@ struct bpf_prog_offload {
  */
 #define MAX_BPF_FUNC_REG_ARGS 5
 
-/* The argument is a structure. */
+/* The argument is a structure or a union. */
 #define BTF_FMODEL_STRUCT_ARG		BIT(0)
 
 /* The argument is signed. */
-- 
cgit v1.2.3


From b650bf0977d34c52befb31a9fa711534e11b220f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 22 Sep 2025 10:42:40 +0000
Subject: udp: remove busylock and add per NUMA queues

busylock was protecting UDP sockets against packet floods,
but unfortunately was not protecting the host itself.

Under stress, many cpus could spin while acquiring the busylock,
and NIC had to drop packets. Or packets would be dropped
in cpu backlog if RPS/RFS were in place.

This patch replaces the busylock by intermediate
lockless queues. (One queue per NUMA node).

This means that fewer number of cpus have to acquire
the UDP receive queue lock.

Most of the cpus can either:
- immediately drop the packet.
- or queue it in their NUMA aware lockless queue.

Then one of the cpu is chosen to process this lockless queue
in a batch.

The batch only contains packets that were cooked on the same
NUMA node, thus with very limited latency impact.

Tested:

DDOS targeting a victim UDP socket, on a platform with 6 NUMA nodes
(Intel(R) Xeon(R) 6985P-C)

Before:

nstat -n ; sleep 1 ; nstat | grep Udp
Udp6InDatagrams                 1004179            0.0
Udp6InErrors                    3117               0.0
Udp6RcvbufErrors                3117               0.0

After:
nstat -n ; sleep 1 ; nstat | grep Udp
Udp6InDatagrams                 1116633            0.0
Udp6InErrors                    14197275           0.0
Udp6RcvbufErrors                14197275           0.0

We can see this host can now proces 14.2 M more packets per second
while under attack, and the victim socket can receive 11 % more
packets.

I used a small bpftrace program measuring time (in us) spent in
__udp_enqueue_schedule_skb().

Before:

@udp_enqueue_us[398]:
[0]                24901 |@@@                                                 |
[1]                63512 |@@@@@@@@@                                           |
[2, 4)            344827 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[4, 8)            244673 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@                |
[8, 16)            54022 |@@@@@@@@                                            |
[16, 32)          222134 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@                   |
[32, 64)          232042 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@                  |
[64, 128)           4219 |                                                    |
[128, 256)           188 |                                                    |

After:

@udp_enqueue_us[398]:
[0]              5608855 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[1]              1111277 |@@@@@@@@@@                                          |
[2, 4)            501439 |@@@@                                                |
[4, 8)            102921 |                                                    |
[8, 16)            29895 |                                                    |
[16, 32)           43500 |                                                    |
[32, 64)           31552 |                                                    |
[64, 128)            979 |                                                    |
[128, 256)            13 |                                                    |

Note that the remaining bottleneck for this platform is in
udp_drops_inc() because we limited struct numa_drop_counters
to only two nodes so far.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250922104240.2182559-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/udp.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index e554890c4415..58795688a186 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -44,6 +44,12 @@ enum {
 	UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */
 };
 
+/* per NUMA structure for lockless producer usage. */
+struct udp_prod_queue {
+	struct llist_head	ll_root ____cacheline_aligned_in_smp;
+	atomic_t		rmem_alloc;
+};
+
 struct udp_sock {
 	/* inet_sock has to be the first member */
 	struct inet_sock inet;
@@ -90,6 +96,8 @@ struct udp_sock {
 						struct sk_buff *skb,
 						int nhoff);
 
+	struct udp_prod_queue *udp_prod_queue;
+
 	/* udp_recvmsg try to use this before splicing sk_receive_queue */
 	struct sk_buff_head	reader_queue ____cacheline_aligned_in_smp;
 
@@ -109,7 +117,6 @@ struct udp_sock {
 	 */
 	struct hlist_node	tunnel_list;
 	struct numa_drop_counters drop_counters;
-	spinlock_t		busylock ____cacheline_aligned_in_smp;
 };
 
 #define udp_test_bit(nr, sk)			\
-- 
cgit v1.2.3


From 092263a03105539b8dfe74c59be4c6cce1304d5f Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 20 Sep 2025 23:34:07 +0200
Subject: net: phy: stop exporting phy_driver_register

phy_driver_register() isn't used outside phy_device.c any longer,
so we can stop exporting it.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/dff44b83-4a85-4fff-bf6b-f12efd97b56e@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index d09fc42e61f3..b377dfaa6801 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -2032,7 +2032,6 @@ static inline int phy_read_status(struct phy_device *phydev)
 
 void phy_driver_unregister(struct phy_driver *drv);
 void phy_drivers_unregister(struct phy_driver *drv, int n);
-int phy_driver_register(struct phy_driver *new_driver, struct module *owner);
 int phy_drivers_register(struct phy_driver *new_driver, int n,
 			 struct module *owner);
 void phy_error(struct phy_device *phydev);
-- 
cgit v1.2.3


From 6043819e707cefb1c9e59d6e431dcfa735c4f975 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@nvidia.com>
Date: Mon, 22 Sep 2025 10:11:32 +0300
Subject: net/mlx5: fs, fix UAF in flow counter release

Fix a kernel trace [1] caused by releasing an HWS action of a local flow
counter in mlx5_cmd_hws_delete_fte(), where the HWS action refcount and
mutex were not initialized and the counter struct could already be freed
when deleting the rule.

Fix it by adding the missing initializations and adding refcount for the
local flow counter struct.

[1] Kernel log:
 Call Trace:
  <TASK>
  dump_stack_lvl+0x34/0x48
  mlx5_fs_put_hws_action.part.0.cold+0x21/0x94 [mlx5_core]
  mlx5_fc_put_hws_action+0x96/0xad [mlx5_core]
  mlx5_fs_destroy_fs_actions+0x8b/0x152 [mlx5_core]
  mlx5_cmd_hws_delete_fte+0x5a/0xa0 [mlx5_core]
  del_hw_fte+0x1ce/0x260 [mlx5_core]
  mlx5_del_flow_rules+0x12d/0x240 [mlx5_core]
  ? ttwu_queue_wakelist+0xf4/0x110
  mlx5_ib_destroy_flow+0x103/0x1b0 [mlx5_ib]
  uverbs_free_flow+0x20/0x50 [ib_uverbs]
  destroy_hw_idr_uobject+0x1b/0x50 [ib_uverbs]
  uverbs_destroy_uobject+0x34/0x1a0 [ib_uverbs]
  uobj_destroy+0x3c/0x80 [ib_uverbs]
  ib_uverbs_run_method+0x23e/0x360 [ib_uverbs]
  ? uverbs_finalize_object+0x60/0x60 [ib_uverbs]
  ib_uverbs_cmd_verbs+0x14f/0x2c0 [ib_uverbs]
  ? do_tty_write+0x1a9/0x270
  ? file_tty_write.constprop.0+0x98/0xc0
  ? new_sync_write+0xfc/0x190
  ib_uverbs_ioctl+0xd7/0x160 [ib_uverbs]
  __x64_sys_ioctl+0x87/0xc0
  do_syscall_64+0x59/0x90

Fixes: b581f4266928 ("net/mlx5: fs, manage flow counters HWS action sharing by refcount")
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1758525094-816583-2-git-send-email-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 86055d55836d..6ac76a0c3827 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -308,6 +308,8 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
 struct mlx5_fc *mlx5_fc_local_create(u32 counter_id, u32 offset, u32 bulk_size);
 void mlx5_fc_local_destroy(struct mlx5_fc *counter);
+void mlx5_fc_local_get(struct mlx5_fc *counter);
+void mlx5_fc_local_put(struct mlx5_fc *counter);
 u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter);
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
 			  u64 *bytes, u64 *packets, u64 *lastuse);
-- 
cgit v1.2.3


From 7384893d970ea114952aef54ad7e3d7d2ca82d4f Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 16 Sep 2025 23:52:56 +0200
Subject: bpf: Allow uprobe program to change context registers

Currently uprobe (BPF_PROG_TYPE_KPROBE) program can't write to the
context registers data. While this makes sense for kprobe attachments,
for uprobe attachment it might make sense to be able to change user
space registers to alter application execution.

Since uprobe and kprobe programs share the same type (BPF_PROG_TYPE_KPROBE),
we can't deny write access to context during the program load. We need
to check on it during program attachment to see if it's going to be
kprobe or uprobe.

Storing the program's write attempt to context and checking on it
during the attachment.

Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20250916215301.664963-2-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ba3a3be7eb2a..ea2ed6771cc6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1639,6 +1639,7 @@ struct bpf_prog_aux {
 	bool priv_stack_requested;
 	bool changes_pkt_data;
 	bool might_sleep;
+	bool kprobe_write_ctx;
 	u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
 	struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
 	struct bpf_arena *arena;
-- 
cgit v1.2.3


From d4680a11e14c7baf683cb8453d91d71d2e0b9d3e Mon Sep 17 00:00:00 2001
From: Andrea Righi <arighi@nvidia.com>
Date: Wed, 24 Sep 2025 10:14:26 +0200
Subject: bpf: Mark kfuncs as __noclone

Some distributions (e.g., CachyOS) support building the kernel with -O3,
but doing so may break kfuncs, resulting in their symbols not being
properly exported.

In fact, with gcc -O3, some kfuncs may be optimized away despite being
annotated as noinline. This happens because gcc can still clone the
function during IPA optimizations, e.g., by duplicating or inlining it
into callers, and then dropping the standalone symbol. This breaks BTF
ID resolution since resolve_btfids relies on the presence of a global
symbol for each kfunc.

Currently, this is not an issue for upstream, because we don't allow
building the kernel with -O3, but it may be safer to address it anyway,
to prevent potential issues in the future if compilers become more
aggressive with optimizations.

Therefore, add __noclone to __bpf_kfunc to ensure kfuncs are never
cloned and remain distinct, globally visible symbols, regardless of
the optimization level.

Fixes: 57e7c169cd6af ("bpf: Add __bpf_kfunc tag for marking kernel functions as kfuncs")
Acked-by: David Vernet <void@manifault.com>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Andrea Righi <arighi@nvidia.com>
Link: https://lore.kernel.org/r/20250924081426.156934-1-arighi@nvidia.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/btf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index 9eda6b113f9b..f06976ffb63f 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -86,7 +86,7 @@
  * as to avoid issues such as the compiler inlining or eliding either a static
  * kfunc, or a global kfunc in an LTO build.
  */
-#define __bpf_kfunc __used __retain noinline
+#define __bpf_kfunc __used __retain __noclone noinline
 
 #define __bpf_kfunc_start_defs()					       \
 	__diag_push();							       \
-- 
cgit v1.2.3


From 64f89f6e1f2b7f8f203d218a8c8d90922e1d4048 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Wed, 17 Sep 2025 10:54:05 +0200
Subject: gpio: generic: rename BGPIOF_ flags to GPIO_GENERIC_

Make the flags passed to gpio_generic_chip_init() use the same prefix as
the rest of the modernized generic GPIO chip API.

Link: https://lore.kernel.org/r/20250917-gpio-generic-flags-v1-1-69f51fee8c89@linaro.org
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio/driver.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 9b14fd20f13e..e62622e42cad 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -684,15 +684,15 @@ int gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc,
 
 #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
 
-#define BGPIOF_BIG_ENDIAN		BIT(0)
-#define BGPIOF_UNREADABLE_REG_SET	BIT(1) /* reg_set is unreadable */
-#define BGPIOF_UNREADABLE_REG_DIR	BIT(2) /* reg_dir is unreadable */
-#define BGPIOF_BIG_ENDIAN_BYTE_ORDER	BIT(3)
-#define BGPIOF_READ_OUTPUT_REG_SET	BIT(4) /* reg_set stores output value */
-#define BGPIOF_NO_OUTPUT		BIT(5) /* only input */
-#define BGPIOF_NO_SET_ON_INPUT		BIT(6)
-#define BGPIOF_PINCTRL_BACKEND		BIT(7) /* Call pinctrl direction setters */
-#define BGPIOF_NO_INPUT			BIT(8) /* only output */
+#define GPIO_GENERIC_BIG_ENDIAN			BIT(0)
+#define GPIO_GENERIC_UNREADABLE_REG_SET		BIT(1) /* reg_set is unreadable */
+#define GPIO_GENERIC_UNREADABLE_REG_DIR		BIT(2) /* reg_dir is unreadable */
+#define GPIO_GENERIC_BIG_ENDIAN_BYTE_ORDER	BIT(3)
+#define GPIO_GENERIC_READ_OUTPUT_REG_SET	BIT(4) /* reg_set stores output value */
+#define GPIO_GENERIC_NO_OUTPUT			BIT(5) /* only input */
+#define GPIO_GENERIC_NO_SET_ON_INPUT		BIT(6)
+#define GPIO_GENERIC_PINCTRL_BACKEND		BIT(7) /* Call pinctrl direction setters */
+#define GPIO_GENERIC_NO_INPUT			BIT(8) /* only output */
 
 #ifdef CONFIG_GPIOLIB_IRQCHIP
 int gpiochip_irqchip_add_domain(struct gpio_chip *gc,
-- 
cgit v1.2.3


From 2235b26c1b25daf253748acff501af3ea85faaa8 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Wed, 17 Sep 2025 10:54:06 +0200
Subject: gpio: generic: move GPIO_GENERIC_ flags to the correct header

These flags are specific to gpio-mmio and belong in linux/gpio/generic.h
so move them there.

Link: https://lore.kernel.org/r/20250917-gpio-generic-flags-v1-2-69f51fee8c89@linaro.org
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio/driver.h  | 10 ----------
 include/linux/gpio/generic.h | 10 ++++++++++
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index e62622e42cad..fabe2baf7b50 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -684,16 +684,6 @@ int gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc,
 
 #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
 
-#define GPIO_GENERIC_BIG_ENDIAN			BIT(0)
-#define GPIO_GENERIC_UNREADABLE_REG_SET		BIT(1) /* reg_set is unreadable */
-#define GPIO_GENERIC_UNREADABLE_REG_DIR		BIT(2) /* reg_dir is unreadable */
-#define GPIO_GENERIC_BIG_ENDIAN_BYTE_ORDER	BIT(3)
-#define GPIO_GENERIC_READ_OUTPUT_REG_SET	BIT(4) /* reg_set stores output value */
-#define GPIO_GENERIC_NO_OUTPUT			BIT(5) /* only input */
-#define GPIO_GENERIC_NO_SET_ON_INPUT		BIT(6)
-#define GPIO_GENERIC_PINCTRL_BACKEND		BIT(7) /* Call pinctrl direction setters */
-#define GPIO_GENERIC_NO_INPUT			BIT(8) /* only output */
-
 #ifdef CONFIG_GPIOLIB_IRQCHIP
 int gpiochip_irqchip_add_domain(struct gpio_chip *gc,
 				struct irq_domain *domain);
diff --git a/include/linux/gpio/generic.h b/include/linux/gpio/generic.h
index 162430d96660..ff566dc9c3cb 100644
--- a/include/linux/gpio/generic.h
+++ b/include/linux/gpio/generic.h
@@ -9,6 +9,16 @@
 
 struct device;
 
+#define GPIO_GENERIC_BIG_ENDIAN			BIT(0)
+#define GPIO_GENERIC_UNREADABLE_REG_SET		BIT(1) /* reg_set is unreadable */
+#define GPIO_GENERIC_UNREADABLE_REG_DIR		BIT(2) /* reg_dir is unreadable */
+#define GPIO_GENERIC_BIG_ENDIAN_BYTE_ORDER	BIT(3)
+#define GPIO_GENERIC_READ_OUTPUT_REG_SET	BIT(4) /* reg_set stores output value */
+#define GPIO_GENERIC_NO_OUTPUT			BIT(5) /* only input */
+#define GPIO_GENERIC_NO_SET_ON_INPUT		BIT(6)
+#define GPIO_GENERIC_PINCTRL_BACKEND		BIT(7) /* Call pinctrl direction setters */
+#define GPIO_GENERIC_NO_INPUT			BIT(8) /* only output */
+
 /**
  * struct gpio_generic_chip_config - Generic GPIO chip configuration data
  * @dev: Parent device of the new GPIO chip (compulsory).
-- 
cgit v1.2.3


From 23049938605bda390f875ce20e0704252c2e5c3d Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol@kernel.org>
Date: Tue, 23 Sep 2025 15:37:10 +0900
Subject: can: populate the minimum and maximum MTU values

By populating:

  net_device->min_mtu

and

  net_device->max_mtu

the net core infrastructure will automatically:

  1. validate that the user's inputs are in range.

  2. report those min and max MTU values through the netlink
     interface.

Add can_set_default_mtu() which sets the default mtu value as well as
the minimum and maximum values. The logic for the default mtu value
remains unchanged:

  - CANFD_MTU if the device has a static CAN_CTRLMODE_FD.

  - CAN_MTU otherwise.

Call can_set_default_mtu() each time the CAN_CTRLMODE_FD is modified.
This will guarantee that the MTU value is always consistent with the
control mode flags.

With this, the checks done in can_change_mtu() become fully redundant
and will be removed in an upcoming change and it is now possible to
confirm the minimum and maximum MTU values on a physical CAN interface
by doing:

  $ ip --details link show can0

The virtual interfaces (vcan and vxcan) are not impacted by this
change.

Signed-off-by: Vincent Mailhol <mailhol@kernel.org>
Link: https://patch.msgid.link/20250923-can-fix-mtu-v3-3-581bde113f52@kernel.org
[mkl: squashed https://patch.msgid.link/20250924143644.17622-2-mailhol@kernel.org]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 5dc58360c2d7..3354f70ed2c6 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -166,6 +166,7 @@ struct can_priv *safe_candev_priv(struct net_device *dev);
 
 int open_candev(struct net_device *dev);
 void close_candev(struct net_device *dev);
+void can_set_default_mtu(struct net_device *dev);
 int can_change_mtu(struct net_device *dev, int new_mtu);
 int __must_check can_set_static_ctrlmode(struct net_device *dev,
 					 u32 static_mode);
-- 
cgit v1.2.3


From cc470fcf1d59f9d6186810ea5253da49a4f85f83 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol@kernel.org>
Date: Tue, 23 Sep 2025 15:58:26 +0900
Subject: can: dev: move struct data_bittiming_params to linux/can/bittiming.h

In commit b803c4a4f788 ("can: dev: add struct data_bittiming_params to
group FD parameters"), struct data_bittiming_params was put into
linux/can/dev.h.

This structure being a collection of bittiming parameters, on second
thought, bittiming.h is actually a better location. This way, users of
struct data_bittiming_params will not have to forcefully include
linux/can/dev.h thus removing some complexity and reducing the risk of
circular dependencies in headers.

Move struct data_bittiming_params from linux/can/dev.h to
linux/can/bittiming.h.

Signed-off-by: Vincent Mailhol <mailhol@kernel.org>
Link: https://patch.msgid.link/20250923-canxl-netlink-prep-v4-1-e720d28f66fe@kernel.org
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/bittiming.h | 11 +++++++++++
 include/linux/can/dev.h       | 11 -----------
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h
index 5dfdbb63b1d5..6572ec1712ca 100644
--- a/include/linux/can/bittiming.h
+++ b/include/linux/can/bittiming.h
@@ -114,6 +114,17 @@ struct can_tdc_const {
 	u32 tdcf_max;
 };
 
+struct data_bittiming_params {
+	const struct can_bittiming_const *data_bittiming_const;
+	struct can_bittiming data_bittiming;
+	const struct can_tdc_const *tdc_const;
+	struct can_tdc tdc;
+	const u32 *data_bitrate_const;
+	unsigned int data_bitrate_const_cnt;
+	int (*do_set_data_bittiming)(struct net_device *dev);
+	int (*do_get_auto_tdcv)(const struct net_device *dev, u32 *tdcv);
+};
+
 #ifdef CONFIG_CAN_CALC_BITTIMING
 int can_calc_bittiming(const struct net_device *dev, struct can_bittiming *bt,
 		       const struct can_bittiming_const *btc, struct netlink_ext_ack *extack);
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 3354f70ed2c6..c2fe956ab776 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -38,17 +38,6 @@ enum can_termination_gpio {
 	CAN_TERMINATION_GPIO_MAX,
 };
 
-struct data_bittiming_params {
-	const struct can_bittiming_const *data_bittiming_const;
-	struct can_bittiming data_bittiming;
-	const struct can_tdc_const *tdc_const;
-	struct can_tdc tdc;
-	const u32 *data_bitrate_const;
-	unsigned int data_bitrate_const_cnt;
-	int (*do_set_data_bittiming)(struct net_device *dev);
-	int (*do_get_auto_tdcv)(const struct net_device *dev, u32 *tdcv);
-};
-
 /*
  * CAN common private data
  */
-- 
cgit v1.2.3


From 7208385df7846d30e29febc6c6280cb32e91ee82 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol@kernel.org>
Date: Tue, 23 Sep 2025 15:58:27 +0900
Subject: can: dev: make can_get_relative_tdco() FD agnostic and move it to
 bittiming.h

can_get_relative_tdco() needs to access can_priv->fd making it
specific to CAN FD. Change the function parameter from struct can_priv
to struct data_bittiming_params. This way, the function becomes CAN FD
agnostic and can be reused later on for the CAN XL TDC.

Now that we dropped the dependency on struct can_priv, also move
can_get_relative_tdco() back to bittiming.h where it was meant to
belong to.

Signed-off-by: Vincent Mailhol <mailhol@kernel.org>
Link: https://patch.msgid.link/20250923-canxl-netlink-prep-v4-2-e720d28f66fe@kernel.org
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/bittiming.h | 29 +++++++++++++++++++++++++++++
 include/linux/can/dev.h       | 29 -----------------------------
 2 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h
index 6572ec1712ca..4d5f7794194a 100644
--- a/include/linux/can/bittiming.h
+++ b/include/linux/can/bittiming.h
@@ -160,6 +160,35 @@ int can_get_bittiming(const struct net_device *dev, struct can_bittiming *bt,
 		      const unsigned int bitrate_const_cnt,
 		      struct netlink_ext_ack *extack);
 
+/*
+ * can_get_relative_tdco() - TDCO relative to the sample point
+ *
+ * struct can_tdc::tdco represents the absolute offset from TDCV. Some
+ * controllers use instead an offset relative to the Sample Point (SP)
+ * such that:
+ *
+ * SSP = TDCV + absolute TDCO
+ *     = TDCV + SP + relative TDCO
+ *
+ * -+----------- one bit ----------+-- TX pin
+ *  |<--- Sample Point --->|
+ *
+ *                         --+----------- one bit ----------+-- RX pin
+ *  |<-------- TDCV -------->|
+ *                           |<------------------------>| absolute TDCO
+ *                           |<--- Sample Point --->|
+ *                           |                      |<->| relative TDCO
+ *  |<------------- Secondary Sample Point ------------>|
+ */
+static inline s32 can_get_relative_tdco(const struct data_bittiming_params *dbt_params)
+{
+	const struct can_bittiming *dbt = &dbt_params->data_bittiming;
+	s32 sample_point_in_tc = (CAN_SYNC_SEG + dbt->prop_seg +
+				  dbt->phase_seg1) * dbt->brp;
+
+	return (s32)dbt_params->tdc.tdco - sample_point_in_tc;
+}
+
 /*
  * can_bit_time() - Duration of one bit
  *
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index c2fe956ab776..8e75e9b3830a 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -85,35 +85,6 @@ static inline bool can_fd_tdc_is_enabled(const struct can_priv *priv)
 	return !!(priv->ctrlmode & CAN_CTRLMODE_FD_TDC_MASK);
 }
 
-/*
- * can_get_relative_tdco() - TDCO relative to the sample point
- *
- * struct can_tdc::tdco represents the absolute offset from TDCV. Some
- * controllers use instead an offset relative to the Sample Point (SP)
- * such that:
- *
- * SSP = TDCV + absolute TDCO
- *     = TDCV + SP + relative TDCO
- *
- * -+----------- one bit ----------+-- TX pin
- *  |<--- Sample Point --->|
- *
- *                         --+----------- one bit ----------+-- RX pin
- *  |<-------- TDCV -------->|
- *                           |<------------------------>| absolute TDCO
- *                           |<--- Sample Point --->|
- *                           |                      |<->| relative TDCO
- *  |<------------- Secondary Sample Point ------------>|
- */
-static inline s32 can_get_relative_tdco(const struct can_priv *priv)
-{
-	const struct can_bittiming *dbt = &priv->fd.data_bittiming;
-	s32 sample_point_in_tc = (CAN_SYNC_SEG + dbt->prop_seg +
-				  dbt->phase_seg1) * dbt->brp;
-
-	return (s32)priv->fd.tdc.tdco - sample_point_in_tc;
-}
-
 static inline u32 can_get_static_ctrlmode(struct can_priv *priv)
 {
 	return priv->ctrlmode & ~priv->ctrlmode_supported;
-- 
cgit v1.2.3


From b23a8425cba5d7908d69f3bce8f3c697362b50ae Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol@kernel.org>
Date: Tue, 23 Sep 2025 15:58:30 +0900
Subject: can: netlink: add can_validate_tdc()

Factorise the TDC validation out of can_validate() and move it in the
new can_validate_tdc() function. This is a preparation patch for the
introduction of CAN XL because this TDC validation will be reused
later on.

Signed-off-by: Vincent Mailhol <mailhol@kernel.org>
Link: https://patch.msgid.link/20250923-canxl-netlink-prep-v4-5-e720d28f66fe@kernel.org
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/bittiming.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h
index 4d5f7794194a..71f839c3f032 100644
--- a/include/linux/can/bittiming.h
+++ b/include/linux/can/bittiming.h
@@ -16,6 +16,10 @@
 
 #define CAN_CTRLMODE_FD_TDC_MASK				\
 	(CAN_CTRLMODE_TDC_AUTO | CAN_CTRLMODE_TDC_MANUAL)
+#define CAN_CTRLMODE_TDC_AUTO_MASK				\
+	(CAN_CTRLMODE_TDC_AUTO)
+#define CAN_CTRLMODE_TDC_MANUAL_MASK				\
+	(CAN_CTRLMODE_TDC_MANUAL)
 
 /*
  * struct can_tdc - CAN FD Transmission Delay Compensation parameters
-- 
cgit v1.2.3


From 6ffc1230d3a728e07d7d2464f388ad4bbefe90c2 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol@kernel.org>
Date: Tue, 23 Sep 2025 15:58:43 +0900
Subject: can: calc_bittiming: make can_calc_tdco() FD agnostic

can_calc_tdco() uses the CAN_CTRLMODE_FD_TDC_MASK and
CAN_CTRLMODE_TDC_AUTO macros making it specific to CAN FD. Add the tdc
mask to the function parameter list. The value of the tdc auto flag
can then be derived from that mask and stored in a local variable.
This way, the function becomes CAN FD agnostic and can be reused later
on for the CAN XL TDC.

Signed-off-by: Vincent Mailhol <mailhol@kernel.org>
Link: https://patch.msgid.link/20250923-canxl-netlink-prep-v4-18-e720d28f66fe@kernel.org
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/bittiming.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h
index 71f839c3f032..d30816dd93c7 100644
--- a/include/linux/can/bittiming.h
+++ b/include/linux/can/bittiming.h
@@ -135,7 +135,7 @@ int can_calc_bittiming(const struct net_device *dev, struct can_bittiming *bt,
 
 void can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const,
 		   const struct can_bittiming *dbt,
-		   u32 *ctrlmode, u32 ctrlmode_supported);
+		   u32 tdc_mask, u32 *ctrlmode, u32 ctrlmode_supported);
 #else /* !CONFIG_CAN_CALC_BITTIMING */
 static inline int
 can_calc_bittiming(const struct net_device *dev, struct can_bittiming *bt,
@@ -148,7 +148,7 @@ can_calc_bittiming(const struct net_device *dev, struct can_bittiming *bt,
 static inline void
 can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const,
 	      const struct can_bittiming *dbt,
-	      u32 *ctrlmode, u32 ctrlmode_supported)
+	      u32 tdc_mask, u32 *ctrlmode, u32 ctrlmode_supported)
 {
 }
 #endif /* CONFIG_CAN_CALC_BITTIMING */
-- 
cgit v1.2.3


From 7de54546fff11cb0a53f47847d62f7b1a5792d17 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol@kernel.org>
Date: Tue, 23 Sep 2025 15:58:44 +0900
Subject: can: dev: add can_get_ctrlmode_str()

In an effort to give more human readable messages when errors occur
because of conflicting options, it can be useful to convert the CAN
control mode flags into text.

Add a function which converts the first set CAN control mode into a
human readable string. The reason to only convert the first one is to
simplify edge cases: imagine that there are several invalid control
modes, we would just return the first invalid one to the user, thus
not having to handle complex string concatenation. The user can then
solve the first problem, call the netlink interface again and see the
next issue.

People who wish to enumerate all the control modes can still do so by,
for example, using this new function in a for_each_set_bit() loop.

Signed-off-by: Vincent Mailhol <mailhol@kernel.org>
Link: https://patch.msgid.link/20250923-canxl-netlink-prep-v4-19-e720d28f66fe@kernel.org
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 8e75e9b3830a..a2229a61ccde 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -141,6 +141,8 @@ int can_restart_now(struct net_device *dev);
 void can_bus_off(struct net_device *dev);
 
 const char *can_get_state_str(const enum can_state state);
+const char *can_get_ctrlmode_str(u32 ctrlmode);
+
 void can_state_get_by_berr_counter(const struct net_device *dev,
 				   const struct can_berr_counter *bec,
 				   enum can_state *tx_state,
-- 
cgit v1.2.3


From 83fb49389bbe07defb85b063f7ff0fd016f06b35 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 18 Sep 2025 10:05:50 +0200
Subject: modpost: Add modname to mod_device_table alias

At this point, if a symbol is compiled as part of the kernel,
information about which module the symbol belongs to is lost.

To save this it is possible to add the module name to the alias name.
It's not very pretty, but it's possible for now.

Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Andreas Hindborg <a.hindborg@kernel.org>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: Alex Gaynor <alex.gaynor@gmail.com>
Cc: rust-for-linux@vger.kernel.org
Signed-off-by: Alexey Gladkov <legion@kernel.org>
Acked-by: Danilo Krummrich <dakr@kernel.org>
Acked-by: Nicolas Schier <nsc@kernel.org>
Link: https://patch.msgid.link/1a0d0bd87a4981d465b9ed21e14f4e78eaa03ded.1758182101.git.legion@kernel.org
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
---
 include/linux/module.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 3319a5269d28..e31ee29fac6b 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -244,10 +244,22 @@ struct module_kobject *lookup_or_create_module_kobject(const char *name);
 /* What your module does. */
 #define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description)
 
+/*
+ * Format: __mod_device_table__kmod_<modname>__<type>__<name>
+ * Parts of the string `__kmod_` and `__` are used as delimiters when parsing
+ * a symbol in file2alias.c
+ */
+#define __mod_device_table(type, name)	\
+	__PASTE(__mod_device_table__,	\
+	__PASTE(__KBUILD_MODNAME,	\
+	__PASTE(__,			\
+	__PASTE(type,			\
+	__PASTE(__, name)))))
+
 #ifdef MODULE
 /* Creates an alias so file2alias.c can find device table. */
 #define MODULE_DEVICE_TABLE(type, name)					\
-static typeof(name) __mod_device_table__##type##__##name		\
+static typeof(name) __mod_device_table(type, name)			\
   __attribute__ ((used, alias(__stringify(name))))
 #else  /* !MODULE */
 #define MODULE_DEVICE_TABLE(type, name)
-- 
cgit v1.2.3


From 5ab23c7923a1d2ae1890026866a2d8506b010a4a Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 18 Sep 2025 10:05:51 +0200
Subject: modpost: Create modalias for builtin modules

For some modules, modalias is generated using the modpost utility and
the section is added to the module file.

When a module is added inside vmlinux, modpost does not generate
modalias for such modules and the information is lost.

As a result kmod (which uses modules.builtin.modinfo in userspace)
cannot determine that modalias is handled by a builtin kernel module.

$ cat /sys/devices/pci0000:00/0000:00:14.0/modalias
pci:v00008086d0000A36Dsv00001043sd00008694bc0Csc03i30

$ modinfo xhci_pci
name:           xhci_pci
filename:       (builtin)
license:        GPL
file:           drivers/usb/host/xhci-pci
description:    xHCI PCI Host Controller Driver

Missing modalias "pci:v*d*sv*sd*bc0Csc03i30*" which will be generated by
modpost if the module is built separately.

To fix this it is necessary to generate the same modalias for vmlinux as
for the individual modules. Fortunately '.vmlinux.export.o' is already
generated from which '.modinfo' can be extracted in the same way as for
vmlinux.o.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Alexey Gladkov <legion@kernel.org>
Tested-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reviewed-by: Nicolas Schier <nsc@kernel.org>
Link: https://patch.msgid.link/28d4da3b0e3fc8474142746bcf469e03752c3208.1758182101.git.legion@kernel.org
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
---
 include/linux/module.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index e31ee29fac6b..e135cc79acee 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -256,14 +256,10 @@ struct module_kobject *lookup_or_create_module_kobject(const char *name);
 	__PASTE(type,			\
 	__PASTE(__, name)))))
 
-#ifdef MODULE
 /* Creates an alias so file2alias.c can find device table. */
 #define MODULE_DEVICE_TABLE(type, name)					\
 static typeof(name) __mod_device_table(type, name)			\
   __attribute__ ((used, alias(__stringify(name))))
-#else  /* !MODULE */
-#define MODULE_DEVICE_TABLE(type, name)
-#endif
 
 /* Version of form [<epoch>:]<version>[-<extra-version>].
  * Or for CVS/RCS ID version, everything but the number is stripped.
-- 
cgit v1.2.3


From 23ef9d439769d5f35353650e771c63d13824235b Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Tue, 23 Sep 2025 14:34:19 -0700
Subject: kcfi: Rename CONFIG_CFI_CLANG to CONFIG_CFI

The kernel's CFI implementation uses the KCFI ABI specifically, and is
not strictly tied to a particular compiler. In preparation for GCC
supporting KCFI, rename CONFIG_CFI_CLANG to CONFIG_CFI (along with
associated options).

Use new "transitional" Kconfig option for old CONFIG_CFI_CLANG that will
enable CONFIG_CFI during olddefconfig.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/20250923213422.1105654-3-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/cfi.h       | 6 +++---
 include/linux/cfi_types.h | 8 ++++----
 include/linux/compiler.h  | 2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cfi.h b/include/linux/cfi.h
index 52a98886a455..1fd22ea6eba4 100644
--- a/include/linux/cfi.h
+++ b/include/linux/cfi.h
@@ -11,7 +11,7 @@
 #include <linux/module.h>
 #include <asm/cfi.h>
 
-#ifdef CONFIG_CFI_CLANG
+#ifdef CONFIG_CFI
 extern bool cfi_warn;
 
 enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr,
@@ -52,7 +52,7 @@ static inline u32 cfi_get_func_hash(void *func)
 extern u32 cfi_bpf_hash;
 extern u32 cfi_bpf_subprog_hash;
 
-#else /* CONFIG_CFI_CLANG */
+#else /* CONFIG_CFI */
 
 static inline int cfi_get_offset(void) { return 0; }
 static inline u32 cfi_get_func_hash(void *func) { return 0; }
@@ -60,7 +60,7 @@ static inline u32 cfi_get_func_hash(void *func) { return 0; }
 #define cfi_bpf_hash 0U
 #define cfi_bpf_subprog_hash 0U
 
-#endif /* CONFIG_CFI_CLANG */
+#endif /* CONFIG_CFI */
 
 #ifdef CONFIG_ARCH_USES_CFI_TRAPS
 bool is_cfi_trap(unsigned long addr);
diff --git a/include/linux/cfi_types.h b/include/linux/cfi_types.h
index 685f7181780f..a86af9bc8bdc 100644
--- a/include/linux/cfi_types.h
+++ b/include/linux/cfi_types.h
@@ -8,7 +8,7 @@
 #ifdef __ASSEMBLY__
 #include <linux/linkage.h>
 
-#ifdef CONFIG_CFI_CLANG
+#ifdef CONFIG_CFI
 /*
  * Use the __kcfi_typeid_<function> type identifier symbol to
  * annotate indirectly called assembly functions. The compiler emits
@@ -29,12 +29,12 @@
 #define SYM_TYPED_START(name, linkage, align...)	\
 	SYM_TYPED_ENTRY(name, linkage, align)
 
-#else /* CONFIG_CFI_CLANG */
+#else /* CONFIG_CFI */
 
 #define SYM_TYPED_START(name, linkage, align...)	\
 	SYM_START(name, linkage, align)
 
-#endif /* CONFIG_CFI_CLANG */
+#endif /* CONFIG_CFI */
 
 #ifndef SYM_TYPED_FUNC_START
 #define SYM_TYPED_FUNC_START(name) 			\
@@ -43,7 +43,7 @@
 
 #else /* __ASSEMBLY__ */
 
-#ifdef CONFIG_CFI_CLANG
+#ifdef CONFIG_CFI
 #define DEFINE_CFI_TYPE(name, func)						\
 	/*									\
 	 * Force a reference to the function so the compiler generates		\
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 6f04a1d8c720..fb27da2221ee 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -248,7 +248,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 
 #endif /* __KERNEL__ */
 
-#if defined(CONFIG_CFI_CLANG) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#if defined(CONFIG_CFI) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
 /*
  * Force a reference to the external symbol so the compiler generates
  * __kcfi_typid.
-- 
cgit v1.2.3


From 340974c4f709ce8142a672ab11f1889dff94d6dc Mon Sep 17 00:00:00 2001
From: Anup Patel <apatel@ventanamicro.com>
Date: Mon, 18 Aug 2025 09:39:00 +0530
Subject: mailbox: Add common header for RPMI messages sent via mailbox

The RPMI based mailbox controller drivers and mailbox clients need to
share defines related to RPMI messages over mailbox interface so add
a common header for this purpose.

Acked-by: Jassi Brar <jassisinghbrar@gmail.com>
Co-developed-by: Rahul Pathak <rpathak@ventanamicro.com>
Signed-off-by: Rahul Pathak <rpathak@ventanamicro.com>
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
Link: https://lore.kernel.org/r/20250818040920.272664-5-apatel@ventanamicro.com
Signed-off-by: Paul Walmsley <pjw@kernel.org>
---
 include/linux/mailbox/riscv-rpmi-message.h | 214 +++++++++++++++++++++++++++++
 1 file changed, 214 insertions(+)
 create mode 100644 include/linux/mailbox/riscv-rpmi-message.h

(limited to 'include/linux')

diff --git a/include/linux/mailbox/riscv-rpmi-message.h b/include/linux/mailbox/riscv-rpmi-message.h
new file mode 100644
index 000000000000..c3a98fc12c0a
--- /dev/null
+++ b/include/linux/mailbox/riscv-rpmi-message.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2025 Ventana Micro Systems Inc. */
+
+#ifndef _LINUX_RISCV_RPMI_MESSAGE_H_
+#define _LINUX_RISCV_RPMI_MESSAGE_H_
+
+#include <linux/errno.h>
+#include <linux/mailbox_client.h>
+#include <linux/types.h>
+#include <linux/wordpart.h>
+
+/* RPMI version encode/decode macros */
+#define RPMI_VER_MAJOR(__ver)		upper_16_bits(__ver)
+#define RPMI_VER_MINOR(__ver)		lower_16_bits(__ver)
+#define RPMI_MKVER(__maj, __min)	(((u32)(__maj) << 16) | (u16)(__min))
+
+/* RPMI message header */
+struct rpmi_message_header {
+	__le16 servicegroup_id;
+	u8 service_id;
+	u8 flags;
+	__le16 datalen;
+	__le16 token;
+};
+
+/* RPMI message */
+struct rpmi_message {
+	struct rpmi_message_header header;
+	u8 data[];
+};
+
+/* RPMI notification event */
+struct rpmi_notification_event {
+	__le16 event_datalen;
+	u8 event_id;
+	u8 reserved;
+	u8 event_data[];
+};
+
+/* RPMI error codes */
+enum rpmi_error_codes {
+	RPMI_SUCCESS			= 0,
+	RPMI_ERR_FAILED			= -1,
+	RPMI_ERR_NOTSUPP		= -2,
+	RPMI_ERR_INVALID_PARAM		= -3,
+	RPMI_ERR_DENIED			= -4,
+	RPMI_ERR_INVALID_ADDR		= -5,
+	RPMI_ERR_ALREADY		= -6,
+	RPMI_ERR_EXTENSION		= -7,
+	RPMI_ERR_HW_FAULT		= -8,
+	RPMI_ERR_BUSY			= -9,
+	RPMI_ERR_INVALID_STATE		= -10,
+	RPMI_ERR_BAD_RANGE		= -11,
+	RPMI_ERR_TIMEOUT		= -12,
+	RPMI_ERR_IO			= -13,
+	RPMI_ERR_NO_DATA		= -14,
+	RPMI_ERR_RESERVED_START		= -15,
+	RPMI_ERR_RESERVED_END		= -127,
+	RPMI_ERR_VENDOR_START		= -128,
+};
+
+static inline int rpmi_to_linux_error(int rpmi_error)
+{
+	switch (rpmi_error) {
+	case RPMI_SUCCESS:
+		return 0;
+	case RPMI_ERR_INVALID_PARAM:
+	case RPMI_ERR_BAD_RANGE:
+	case RPMI_ERR_INVALID_STATE:
+		return -EINVAL;
+	case RPMI_ERR_DENIED:
+		return -EPERM;
+	case RPMI_ERR_INVALID_ADDR:
+	case RPMI_ERR_HW_FAULT:
+		return -EFAULT;
+	case RPMI_ERR_ALREADY:
+		return -EALREADY;
+	case RPMI_ERR_BUSY:
+		return -EBUSY;
+	case RPMI_ERR_TIMEOUT:
+		return -ETIMEDOUT;
+	case RPMI_ERR_IO:
+		return -ECOMM;
+	case RPMI_ERR_FAILED:
+	case RPMI_ERR_NOTSUPP:
+	case RPMI_ERR_NO_DATA:
+	case RPMI_ERR_EXTENSION:
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/* RPMI Linux mailbox attribute IDs */
+enum rpmi_mbox_attribute_id {
+	RPMI_MBOX_ATTR_SPEC_VERSION,
+	RPMI_MBOX_ATTR_MAX_MSG_DATA_SIZE,
+	RPMI_MBOX_ATTR_SERVICEGROUP_ID,
+	RPMI_MBOX_ATTR_SERVICEGROUP_VERSION,
+	RPMI_MBOX_ATTR_IMPL_ID,
+	RPMI_MBOX_ATTR_IMPL_VERSION,
+	RPMI_MBOX_ATTR_MAX_ID
+};
+
+/* RPMI Linux mailbox message types */
+enum rpmi_mbox_message_type {
+	RPMI_MBOX_MSG_TYPE_GET_ATTRIBUTE,
+	RPMI_MBOX_MSG_TYPE_SET_ATTRIBUTE,
+	RPMI_MBOX_MSG_TYPE_SEND_WITH_RESPONSE,
+	RPMI_MBOX_MSG_TYPE_SEND_WITHOUT_RESPONSE,
+	RPMI_MBOX_MSG_TYPE_NOTIFICATION_EVENT,
+	RPMI_MBOX_MSG_MAX_TYPE
+};
+
+/* RPMI Linux mailbox message instance */
+struct rpmi_mbox_message {
+	enum rpmi_mbox_message_type type;
+	union {
+		struct {
+			enum rpmi_mbox_attribute_id id;
+			u32 value;
+		} attr;
+
+		struct {
+			u32 service_id;
+			void *request;
+			unsigned long request_len;
+			void *response;
+			unsigned long max_response_len;
+			unsigned long out_response_len;
+		} data;
+
+		struct {
+			u16 event_datalen;
+			u8 event_id;
+			u8 *event_data;
+		} notif;
+	};
+	int error;
+};
+
+/* RPMI Linux mailbox message helper routines */
+static inline void rpmi_mbox_init_get_attribute(struct rpmi_mbox_message *msg,
+						enum rpmi_mbox_attribute_id id)
+{
+	msg->type = RPMI_MBOX_MSG_TYPE_GET_ATTRIBUTE;
+	msg->attr.id = id;
+	msg->attr.value = 0;
+	msg->error = 0;
+}
+
+static inline void rpmi_mbox_init_set_attribute(struct rpmi_mbox_message *msg,
+						enum rpmi_mbox_attribute_id id,
+						u32 value)
+{
+	msg->type = RPMI_MBOX_MSG_TYPE_SET_ATTRIBUTE;
+	msg->attr.id = id;
+	msg->attr.value = value;
+	msg->error = 0;
+}
+
+static inline void rpmi_mbox_init_send_with_response(struct rpmi_mbox_message *msg,
+						     u32 service_id,
+						     void *request,
+						     unsigned long request_len,
+						     void *response,
+						     unsigned long max_response_len)
+{
+	msg->type = RPMI_MBOX_MSG_TYPE_SEND_WITH_RESPONSE;
+	msg->data.service_id = service_id;
+	msg->data.request = request;
+	msg->data.request_len = request_len;
+	msg->data.response = response;
+	msg->data.max_response_len = max_response_len;
+	msg->data.out_response_len = 0;
+	msg->error = 0;
+}
+
+static inline void rpmi_mbox_init_send_without_response(struct rpmi_mbox_message *msg,
+							u32 service_id,
+							void *request,
+							unsigned long request_len)
+{
+	msg->type = RPMI_MBOX_MSG_TYPE_SEND_WITHOUT_RESPONSE;
+	msg->data.service_id = service_id;
+	msg->data.request = request;
+	msg->data.request_len = request_len;
+	msg->data.response = NULL;
+	msg->data.max_response_len = 0;
+	msg->data.out_response_len = 0;
+	msg->error = 0;
+}
+
+static inline void *rpmi_mbox_get_msg_response(struct rpmi_mbox_message *msg)
+{
+	return msg ? msg->data.response : NULL;
+}
+
+static inline int rpmi_mbox_send_message(struct mbox_chan *chan,
+					 struct rpmi_mbox_message *msg)
+{
+	int ret;
+
+	/* Send message for the underlying mailbox channel */
+	ret = mbox_send_message(chan, msg);
+	if (ret < 0)
+		return ret;
+
+	/* Explicitly signal txdone for mailbox channel */
+	ret = msg->error;
+	mbox_client_txdone(chan, ret);
+	return ret;
+}
+
+#endif /* _LINUX_RISCV_RPMI_MESSAGE_H_ */
-- 
cgit v1.2.3


From f32a26fab3672e60f622bd7461bf978fc72f29ec Mon Sep 17 00:00:00 2001
From: Viacheslav Dubeyko <slava@dubeyko.com>
Date: Wed, 24 Sep 2025 16:24:41 -0700
Subject: hfs/hfsplus: rework debug output subsystem

Currently, HFS/HFS+ has very obsolete and inconvenient
debug output subsystem. Also, the code is duplicated
in HFS and HFS+ driver. This patch introduces
linux/hfs_common.h for gathering common declarations,
inline functions, and common short methods. Currently,
this file contains only hfs_dbg() function that
employs pr_debug() with the goal to print a debug-level
messages conditionally.

So, now, it is possible to enable the debug output
by means of:

echo 'file extent.c +p' > /proc/dynamic_debug/control
echo 'func hfsplus_evict_inode +p' > /proc/dynamic_debug/control

And debug output looks like this:

hfs: pid 5831:fs/hfs/catalog.c:228 hfs_cat_delete(): delete_cat: 00,48
hfs: pid 5831:fs/hfs/extent.c:484 hfs_file_truncate(): truncate: 48, 409600 -> 0
hfs: pid 5831:fs/hfs/extent.c:212 hfs_dump_extent():
hfs: pid 5831:fs/hfs/extent.c:214 hfs_dump_extent():  78:4
hfs: pid 5831:fs/hfs/extent.c:214 hfs_dump_extent():  0:0
hfs: pid 5831:fs/hfs/extent.c:214 hfs_dump_extent():  0:0

v4
Debug messages have been reworked and information about
new HFS/HFS+ shared declarations file has been added
to MAINTAINERS file.

v5
Yangtao Li suggested to clean up debug output and
fix several typos.

Signed-off-by: Viacheslav Dubeyko <slava@dubeyko.com>
cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
cc: Yangtao Li <frank.li@vivo.com>
cc: linux-fsdevel@vger.kernel.org
cc: Johannes Thumshirn <Johannes.Thumshirn@wdc.com>
Signed-off-by: Viacheslav Dubeyko <slava@dubeyko.com>
---
 include/linux/hfs_common.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 include/linux/hfs_common.h

(limited to 'include/linux')

diff --git a/include/linux/hfs_common.h b/include/linux/hfs_common.h
new file mode 100644
index 000000000000..8838ca2f3d08
--- /dev/null
+++ b/include/linux/hfs_common.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * HFS/HFS+ common definitions, inline functions,
+ * and shared functionality.
+ */
+
+#ifndef _HFS_COMMON_H_
+#define _HFS_COMMON_H_
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#define hfs_dbg(fmt, ...)							\
+	pr_debug("pid %d:%s:%d %s(): " fmt,					\
+		 current->pid, __FILE__, __LINE__, __func__, ##__VA_ARGS__)	\
+
+#endif /* _HFS_COMMON_H_ */
-- 
cgit v1.2.3


From ba879dfc0574878f3e08f217b2b4fdf845c426c0 Mon Sep 17 00:00:00 2001
From: Anup Patel <apatel@ventanamicro.com>
Date: Mon, 18 Aug 2025 09:39:01 +0530
Subject: mailbox: Allow controller specific mapping using fwnode

Introduce optional fw_node() callback which allows a mailbox controller
driver to provide controller specific mapping using fwnode.

The Linux OF framework already implements fwnode operations for the
Linux DD framework so the fw_xlate() callback works fine with device
tree as well.

Acked-by: Jassi Brar <jassisinghbrar@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
Link: https://lore.kernel.org/r/20250818040920.272664-6-apatel@ventanamicro.com
Signed-off-by: Paul Walmsley <pjw@kernel.org>
---
 include/linux/mailbox_controller.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h
index ad01c4082358..80a427c7ca29 100644
--- a/include/linux/mailbox_controller.h
+++ b/include/linux/mailbox_controller.h
@@ -66,6 +66,7 @@ struct mbox_chan_ops {
  *			no interrupt rises. Ignored if 'txdone_irq' is set.
  * @txpoll_period:	If 'txdone_poll' is in effect, the API polls for
  *			last TX's status after these many millisecs
+ * @fw_xlate:		Controller driver specific mapping of channel via fwnode
  * @of_xlate:		Controller driver specific mapping of channel via DT
  * @poll_hrt:		API private. hrtimer used to poll for TXDONE on all
  *			channels.
@@ -79,6 +80,8 @@ struct mbox_controller {
 	bool txdone_irq;
 	bool txdone_poll;
 	unsigned txpoll_period;
+	struct mbox_chan *(*fw_xlate)(struct mbox_controller *mbox,
+				      const struct fwnode_reference_args *sp);
 	struct mbox_chan *(*of_xlate)(struct mbox_controller *mbox,
 				      const struct of_phandle_args *sp);
 	/* Internal to API */
-- 
cgit v1.2.3


From 6f01c24f3a7525e953d514367a6d91b39c8f1f6a Mon Sep 17 00:00:00 2001
From: Anup Patel <apatel@ventanamicro.com>
Date: Mon, 18 Aug 2025 09:39:02 +0530
Subject: byteorder: Add memcpy_to_le32() and memcpy_from_le32()

Add common memcpy APIs for copying u32 array to/from __le32 array.

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jassi Brar <jassisinghbrar@gmail.com>
Link: https://lore.kernel.org/r/20250818040920.272664-7-apatel@ventanamicro.com
Signed-off-by: Paul Walmsley <pjw@kernel.org>
---
 include/linux/byteorder/generic.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h
index c9a4c96c9943..b3705e8bbe2b 100644
--- a/include/linux/byteorder/generic.h
+++ b/include/linux/byteorder/generic.h
@@ -173,6 +173,22 @@ static inline void cpu_to_le32_array(u32 *buf, unsigned int words)
 	}
 }
 
+static inline void memcpy_from_le32(u32 *dst, const __le32 *src, size_t words)
+{
+	size_t i;
+
+	for (i = 0; i < words; i++)
+		dst[i] = le32_to_cpu(src[i]);
+}
+
+static inline void memcpy_to_le32(__le32 *dst, const u32 *src, size_t words)
+{
+	size_t i;
+
+	for (i = 0; i < words; i++)
+		dst[i] = cpu_to_le32(src[i]);
+}
+
 static inline void be16_add_cpu(__be16 *var, u16 val)
 {
 	*var = cpu_to_be16(be16_to_cpu(*var) + val);
-- 
cgit v1.2.3


From edcc8a38b5ac1a3dbd05e113a38a25b937ebefe5 Mon Sep 17 00:00:00 2001
From: Qi Xi <xiqi2@huawei.com>
Date: Tue, 9 Sep 2025 19:29:10 +0800
Subject: once: fix race by moving DO_ONCE to separate section

The commit c2c60ea37e5b ("once: use __section(".data.once")") moved
DO_ONCE's ___done variable to .data.once section, which conflicts with
DO_ONCE_LITE() that also uses the same section.

This creates a race condition when clear_warn_once is used:

Thread 1 (DO_ONCE)             Thread 2 (DO_ONCE)
__do_once_start
    read ___done (false)
    acquire once_lock
execute func
__do_once_done
    write ___done (true)      __do_once_start
    release once_lock             // Thread 3 clear_warn_once reset ___done
                                  read ___done (false)
                                  acquire once_lock
                              execute func
schedule once_work            __do_once_done
once_deferred: OK             write ___done (true)
static_branch_disable         release once_lock
                              schedule once_work
                              once_deferred:
                                  BUG_ON(!static_key_enabled)

DO_ONCE_LITE() in once_lite.h is used by WARN_ON_ONCE() and other warning
macros. Keep its ___done flag in the .data..once section and allow resetting
by clear_warn_once, as originally intended.

In contrast, DO_ONCE() is used for functions like get_random_once() and
relies on its ___done flag for internal synchronization. We should not reset
DO_ONCE() by clear_warn_once.

Fix it by isolating DO_ONCE's ___done into a separate .data..do_once section,
shielding it from clear_warn_once.

Fixes: c2c60ea37e5b ("once: use __section(".data.once")")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Qi Xi <xiqi2@huawei.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/once.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/once.h b/include/linux/once.h
index 30346fcdc799..449a0e34ad5a 100644
--- a/include/linux/once.h
+++ b/include/linux/once.h
@@ -46,7 +46,7 @@ void __do_once_sleepable_done(bool *done, struct static_key_true *once_key,
 #define DO_ONCE(func, ...)						     \
 	({								     \
 		bool ___ret = false;					     \
-		static bool __section(".data..once") ___done = false;	     \
+		static bool __section(".data..do_once") ___done = false;     \
 		static DEFINE_STATIC_KEY_TRUE(___once_key);		     \
 		if (static_branch_unlikely(&___once_key)) {		     \
 			unsigned long ___flags;				     \
@@ -64,7 +64,7 @@ void __do_once_sleepable_done(bool *done, struct static_key_true *once_key,
 #define DO_ONCE_SLEEPABLE(func, ...)						\
 	({									\
 		bool ___ret = false;						\
-		static bool __section(".data..once") ___done = false;		\
+		static bool __section(".data..do_once") ___done = false;	\
 		static DEFINE_STATIC_KEY_TRUE(___once_key);			\
 		if (static_branch_unlikely(&___once_key)) {			\
 			___ret = __do_once_sleepable_start(&___done);		\
-- 
cgit v1.2.3


From 5295be6c4ea4e1ffd38ab0ab131a65afc6b78e9f Mon Sep 17 00:00:00 2001
From: Vitaly Margolin <vitaly.margolin@intel.com>
Date: Sun, 23 Jun 2024 09:19:15 +0300
Subject: accel/habanalabs: add generic message type to get error counters

Add a new CPUCP generic message type to retrieve HBM, SRAM and critical
error counters from the device.

Signed-off-by: Vitaly Margolin <vitaly.margolin@intel.com>
Reviewed-by: Koby Elbaz <koby.elbaz@intel.com>
Signed-off-by: Koby Elbaz <koby.elbaz@intel.com>
---
 include/linux/habanalabs/cpucp_if.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h
index 7ed3fdd55dda..29c50e7427d1 100644
--- a/include/linux/habanalabs/cpucp_if.h
+++ b/include/linux/habanalabs/cpucp_if.h
@@ -1425,9 +1425,11 @@ struct cpucp_monitor_dump {
  * from "pkt_subidx" field in struct cpucp_packet.
  *
  * HL_PASSTHROUGHT_VERSIONS	- Fetch all firmware versions.
+ * HL_GET_ERR_COUNTERS_CMD	- Command to get error counters
  */
 enum hl_passthrough_type {
 	HL_PASSTHROUGH_VERSIONS,
+	HL_GET_ERR_COUNTERS_CMD,
 };
 
 #endif /* CPUCP_IF_H */
-- 
cgit v1.2.3


From 65a3f5bc331ca7384900641b46cadff63805c10e Mon Sep 17 00:00:00 2001
From: Ariel Aviad <ariel.aviad@intel.com>
Date: Tue, 24 Sep 2024 11:02:49 +0300
Subject: accel/habanalabs: add HL_GET_P_STATE passthrough type

Add a new passthrough type HL_GET_P_STATE to the cpucp generic ioctl
to allow userspace to read the device performance state via firmware.

Signed-off-by: Ariel Aviad <ariel.aviad@intel.com>
Reviewed-by: Koby Elbaz <koby.elbaz@intel.com>
Signed-off-by: Koby Elbaz <koby.elbaz@intel.com>
---
 include/linux/habanalabs/cpucp_if.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h
index 29c50e7427d1..45f181bcf890 100644
--- a/include/linux/habanalabs/cpucp_if.h
+++ b/include/linux/habanalabs/cpucp_if.h
@@ -1426,10 +1426,12 @@ struct cpucp_monitor_dump {
  *
  * HL_PASSTHROUGHT_VERSIONS	- Fetch all firmware versions.
  * HL_GET_ERR_COUNTERS_CMD	- Command to get error counters
+ * HL_GET_P_STATE		- get performance state
  */
 enum hl_passthrough_type {
 	HL_PASSTHROUGH_VERSIONS,
 	HL_GET_ERR_COUNTERS_CMD,
+	HL_GET_P_STATE,
 };
 
 #endif /* CPUCP_IF_H */
-- 
cgit v1.2.3


From 10cdfcd37ade7ce736bc4a1927680f390a6b1f7b Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 24 Sep 2025 13:33:58 +0200
Subject: nstree: make struct ns_tree private

Don't expose it directly. There's no need to do that.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/nstree.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nstree.h b/include/linux/nstree.h
index 29ad6402260c..8b8636690473 100644
--- a/include/linux/nstree.h
+++ b/include/linux/nstree.h
@@ -9,19 +9,6 @@
 #include <linux/rculist.h>
 #include <linux/cookie.h>
 
-/**
- * struct ns_tree - Namespace tree
- * @ns_tree: Rbtree of namespaces of a particular type
- * @ns_list: Sequentially walkable list of all namespaces of this type
- * @ns_tree_lock: Seqlock to protect the tree and list
- */
-struct ns_tree {
-	struct rb_root ns_tree;
-	struct list_head ns_list;
-	seqlock_t ns_tree_lock;
-	int type;
-};
-
 extern struct ns_tree cgroup_ns_tree;
 extern struct ns_tree ipc_ns_tree;
 extern struct ns_tree mnt_ns_tree;
-- 
cgit v1.2.3


From 4055526d35746ce8b04bfa5e14e14f28bb163186 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 24 Sep 2025 13:33:59 +0200
Subject: ns: move ns type into struct ns_common

It's misplaced in struct proc_ns_operations and ns->ops might be NULL if
the namespace is compiled out but we still want to know the type of the
namespace for the initial namespace struct.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/ns_common.h | 30 +++++++++++++++++++++++++-----
 include/linux/proc_ns.h   |  1 -
 2 files changed, 25 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 56492cd9ff8d..f5b68b8abb54 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -4,6 +4,7 @@
 
 #include <linux/refcount.h>
 #include <linux/rbtree.h>
+#include <uapi/linux/sched.h>
 
 struct proc_ns_operations;
 
@@ -37,6 +38,7 @@ extern const struct proc_ns_operations timens_operations;
 extern const struct proc_ns_operations timens_for_children_operations;
 
 struct ns_common {
+	u32 ns_type;
 	struct dentry *stashed;
 	const struct proc_ns_operations *ops;
 	unsigned int inum;
@@ -51,7 +53,7 @@ struct ns_common {
 	};
 };
 
-int __ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops, int inum);
+int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_operations *ops, int inum);
 void __ns_common_free(struct ns_common *ns);
 
 #define to_ns_common(__ns)                                    \
@@ -106,10 +108,28 @@ void __ns_common_free(struct ns_common *ns);
 		struct user_namespace *:   (IS_ENABLED(CONFIG_USER_NS) ? &userns_operations   : NULL), \
 		struct uts_namespace *:    (IS_ENABLED(CONFIG_UTS_NS)  ? &utsns_operations    : NULL))
 
-#define ns_common_init(__ns) \
-	__ns_common_init(to_ns_common(__ns), to_ns_operations(__ns), (((__ns) == ns_init_ns(__ns)) ? ns_init_inum(__ns) : 0))
-
-#define ns_common_init_inum(__ns, __inum) __ns_common_init(to_ns_common(__ns), to_ns_operations(__ns), __inum)
+#define ns_common_type(__ns)                                \
+	_Generic((__ns),                                    \
+		struct cgroup_namespace *: CLONE_NEWCGROUP, \
+		struct ipc_namespace *:    CLONE_NEWIPC,    \
+		struct mnt_namespace *:    CLONE_NEWNS,     \
+		struct net *:              CLONE_NEWNET,    \
+		struct pid_namespace *:    CLONE_NEWPID,    \
+		struct time_namespace *:   CLONE_NEWTIME,   \
+		struct user_namespace *:   CLONE_NEWUSER,   \
+		struct uts_namespace *:    CLONE_NEWUTS)
+
+#define ns_common_init(__ns)                     \
+	__ns_common_init(to_ns_common(__ns),     \
+			 ns_common_type(__ns),   \
+			 to_ns_operations(__ns), \
+			 (((__ns) == ns_init_ns(__ns)) ? ns_init_inum(__ns) : 0))
+
+#define ns_common_init_inum(__ns, __inum)        \
+	__ns_common_init(to_ns_common(__ns),     \
+			 ns_common_type(__ns),   \
+			 to_ns_operations(__ns), \
+			 __inum)
 
 #define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns)))
 
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 08016f6e0e6f..e81b8e596e4f 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -17,7 +17,6 @@ struct inode;
 struct proc_ns_operations {
 	const char *name;
 	const char *real_ns_name;
-	int type;
 	struct ns_common *(*get)(struct task_struct *task);
 	void (*put)(struct ns_common *ns);
 	int (*install)(struct nsset *nsset, struct ns_common *ns);
-- 
cgit v1.2.3


From 4ae8d9aa9f9dc7137ea5e564d79c5aa5af1bc45c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 16 Sep 2025 23:02:41 +0200
Subject: sched/deadline: Fix dl_server getting stuck

John found it was easy to hit lockup warnings when running locktorture
on a 2 CPU VM, which he bisected down to: commit cccb45d7c429
("sched/deadline: Less agressive dl_server handling").

While debugging it seems there is a chance where we end up with the
dl_server dequeued, with dl_se->dl_server_active. This causes
dl_server_start() to return without enqueueing the dl_server, thus it
fails to run when RT tasks starve the cpu.

When this happens, dl_server_timer() catches the
'!dl_se->server_has_tasks(dl_se)' case, which then calls
replenish_dl_entity() and dl_server_stopped() and finally return
HRTIMER_NO_RESTART.

This ends in no new timer and also no enqueue, leaving the dl_server
'dead', allowing starvation.

What should have happened is for the bandwidth timer to start the
zero-laxity timer, which in turn would enqueue the dl_server and cause
dl_se->server_pick_task() to be called -- which will stop the
dl_server if no fair tasks are observed for a whole period.

IOW, it is totally irrelevant if there are fair tasks at the moment of
bandwidth refresh.

This removes all dl_se->server_has_tasks() users, so remove the whole
thing.

Fixes: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling")
Reported-by: John Stultz <jstultz@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: John Stultz <jstultz@google.com>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f8188b833350..f89313b150e6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -733,7 +733,6 @@ struct sched_dl_entity {
 	 * runnable task.
 	 */
 	struct rq			*rq;
-	dl_server_has_tasks_f		server_has_tasks;
 	dl_server_pick_f		server_pick_task;
 
 #ifdef CONFIG_RT_MUTEXES
-- 
cgit v1.2.3


From a3a70caf7906708bf9bbc80018752a6b36543808 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 17 Sep 2025 12:03:20 +0200
Subject: sched/deadline: Fix dl_server behaviour

John reported undesirable behaviour with the dl_server since commit:
cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling").

When starving fair tasks on purpose (starting spinning FIFO tasks),
his fair workload, which often goes (briefly) idle, would delay fair
invocations for a second, running one invocation per second was both
unexpected and terribly slow.

The reason this happens is that when dl_se->server_pick_task() returns
NULL, indicating no runnable tasks, it would yield, pushing any later
jobs out a whole period (1 second).

Instead simply stop the server. This should restore behaviour in that
a later wakeup (which restarts the server) will be able to continue
running (subject to the CBS wakeup rules).

Notably, this does not re-introduce the behaviour cccb45d7c4295 set
out to solve, any start/stop cycle is naturally throttled by the timer
period (no active cancel).

Fixes: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling")
Reported-by: John Stultz <jstultz@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: John Stultz <jstultz@google.com>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f89313b150e6..e4ce0a76831e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -706,7 +706,6 @@ struct sched_dl_entity {
 	unsigned int			dl_defer	  : 1;
 	unsigned int			dl_defer_armed	  : 1;
 	unsigned int			dl_defer_running  : 1;
-	unsigned int			dl_server_idle    : 1;
 
 	/*
 	 * Bandwidth enforcement timer. Each -deadline task has its
-- 
cgit v1.2.3


From 88a90315a99a9120cd471bf681515cc77cd7cdb8 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Wed, 17 Sep 2025 14:09:14 +0800
Subject: rcu: Replace preempt.h with sched.h in include/linux/rcupdate.h

In the next commit, we will move the definition of migrate_enable() and
migrate_disable() to linux/sched.h. However,
migrate_enable/migrate_disable will be used in commit
1b93c03fb319 ("rcu: add rcu_read_lock_dont_migrate()") in bpf-next tree.

In order to fix potential compiling error, replace linux/preempt.h with
linux/sched.h in include/linux/rcupdate.h.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/rcupdate.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 120536f4c6eb..8f346c847ee5 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -24,7 +24,7 @@
 #include <linux/compiler.h>
 #include <linux/atomic.h>
 #include <linux/irqflags.h>
-#include <linux/preempt.h>
+#include <linux/sched.h>
 #include <linux/bottom_half.h>
 #include <linux/lockdep.h>
 #include <linux/cleanup.h>
-- 
cgit v1.2.3


From 378b7708194fff77c9020392067329931c3fcc04 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Wed, 17 Sep 2025 14:09:15 +0800
Subject: sched: Make migrate_{en,dis}able() inline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For now, migrate_enable and migrate_disable are global, which makes them
become hotspots in some case. Take BPF for example, the function calling
to migrate_enable and migrate_disable in BPF trampoline can introduce
significant overhead, and following is the 'perf top' of FENTRY's
benchmark (./tools/testing/selftests/bpf/bench trig-fentry):

  54.63% bpf_prog_2dcccf652aac1793_bench_trigger_fentry [k]
                 bpf_prog_2dcccf652aac1793_bench_trigger_fentry
  10.43% [kernel] [k] migrate_enable
  10.07% bpf_trampoline_6442517037 [k] bpf_trampoline_6442517037
  8.06% [kernel] [k] __bpf_prog_exit_recur
  4.11% libc.so.6 [.] syscall
  2.15% [kernel] [k] entry_SYSCALL_64
  1.48% [kernel] [k] memchr_inv
  1.32% [kernel] [k] fput
  1.16% [kernel] [k] _copy_to_user
  0.73% [kernel] [k] bpf_prog_test_run_raw_tp

So in this commit, we make migrate_enable/migrate_disable inline to obtain
better performance. The struct rq is defined internally in
kernel/sched/sched.h, and the field "nr_pinned" is accessed in
migrate_enable/migrate_disable, which makes it hard to make them inline.

Alexei Starovoitov suggests to generate the offset of "nr_pinned" in [1],
so we can define the migrate_enable/migrate_disable in
include/linux/sched.h and access "this_rq()->nr_pinned" with
"(void *)this_rq() + RQ_nr_pinned".

The offset of "nr_pinned" is generated in include/generated/rq-offsets.h
by kernel/sched/rq-offsets.c.

Generally speaking, we move the definition of migrate_enable and
migrate_disable to include/linux/sched.h from kernel/sched/core.c. The
calling to __set_cpus_allowed_ptr() is leaved in ___migrate_enable().

The "struct rq" is not available in include/linux/sched.h, so we can't
access the "runqueues" with this_cpu_ptr(), as the compilation will fail
in this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr():
  typeof((ptr) + 0)

So we introduce the this_rq_raw() and access the runqueues with
arch_raw_cpu_ptr/PERCPU_PTR directly.

The variable "runqueues" is not visible in the kernel modules, and export
it is not a good idea. As Peter Zijlstra advised in [2], we define and
export migrate_enable/migrate_disable in kernel/sched/core.c too, and use
them for the modules.

Before this patch, the performance of BPF FENTRY is:

  fentry         :  113.030 ± 0.149M/s
  fentry         :  112.501 ± 0.187M/s
  fentry         :  112.828 ± 0.267M/s
  fentry         :  115.287 ± 0.241M/s

After this patch, the performance of BPF FENTRY increases to:

  fentry         :  143.644 ± 0.670M/s
  fentry         :  149.764 ± 0.362M/s
  fentry         :  149.642 ± 0.156M/s
  fentry         :  145.263 ± 0.221M/s

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/bpf/CAADnVQ+5sEDKHdsJY5ZsfGDO_1SEhhQWHrt2SMBG5SYyQ+jt7w@mail.gmail.com/ [1]
Link: https://lore.kernel.org/all/20250819123214.GH4067720@noisy.programming.kicks-ass.net/ [2]
---
 include/linux/preempt.h |   3 --
 include/linux/sched.h   | 113 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 113 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 1fad1c8a4c76..92237c319035 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -424,8 +424,6 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
  *       work-conserving schedulers.
  *
  */
-extern void migrate_disable(void);
-extern void migrate_enable(void);
 
 /**
  * preempt_disable_nested - Disable preemption inside a normally preempt disabled section
@@ -471,7 +469,6 @@ static __always_inline void preempt_enable_nested(void)
 
 DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable())
 DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace())
-DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
 
 #ifdef CONFIG_PREEMPT_DYNAMIC
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 644a01bdae70..d60ecaccdffc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -49,6 +49,9 @@
 #include <linux/tracepoint-defs.h>
 #include <linux/unwind_deferred_types.h>
 #include <asm/kmap_size.h>
+#ifndef COMPILE_OFFSETS
+#include <generated/rq-offsets.h>
+#endif
 
 /* task_struct member predeclarations (sorted alphabetically): */
 struct audit_context;
@@ -2317,4 +2320,114 @@ static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct allo
 #define alloc_tag_restore(_tag, _old)		do {} while (0)
 #endif
 
+#ifndef MODULE
+#ifndef COMPILE_OFFSETS
+
+extern void ___migrate_enable(void);
+
+struct rq;
+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
+
+/*
+ * The "struct rq" is not available here, so we can't access the
+ * "runqueues" with this_cpu_ptr(), as the compilation will fail in
+ * this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr():
+ *   typeof((ptr) + 0)
+ *
+ * So use arch_raw_cpu_ptr()/PERCPU_PTR() directly here.
+ */
+#ifdef CONFIG_SMP
+#define this_rq_raw() arch_raw_cpu_ptr(&runqueues)
+#else
+#define this_rq_raw() PERCPU_PTR(&runqueues)
+#endif
+#define this_rq_pinned() (*(unsigned int *)((void *)this_rq_raw() + RQ_nr_pinned))
+
+static inline void __migrate_enable(void)
+{
+	struct task_struct *p = current;
+
+#ifdef CONFIG_DEBUG_PREEMPT
+	/*
+	 * Check both overflow from migrate_disable() and superfluous
+	 * migrate_enable().
+	 */
+	if (WARN_ON_ONCE((s16)p->migration_disabled <= 0))
+		return;
+#endif
+
+	if (p->migration_disabled > 1) {
+		p->migration_disabled--;
+		return;
+	}
+
+	/*
+	 * Ensure stop_task runs either before or after this, and that
+	 * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
+	 */
+	guard(preempt)();
+	if (unlikely(p->cpus_ptr != &p->cpus_mask))
+		___migrate_enable();
+	/*
+	 * Mustn't clear migration_disabled() until cpus_ptr points back at the
+	 * regular cpus_mask, otherwise things that race (eg.
+	 * select_fallback_rq) get confused.
+	 */
+	barrier();
+	p->migration_disabled = 0;
+	this_rq_pinned()--;
+}
+
+static inline void __migrate_disable(void)
+{
+	struct task_struct *p = current;
+
+	if (p->migration_disabled) {
+#ifdef CONFIG_DEBUG_PREEMPT
+		/*
+		 *Warn about overflow half-way through the range.
+		 */
+		WARN_ON_ONCE((s16)p->migration_disabled < 0);
+#endif
+		p->migration_disabled++;
+		return;
+	}
+
+	guard(preempt)();
+	this_rq_pinned()++;
+	p->migration_disabled = 1;
+}
+#else /* !COMPILE_OFFSETS */
+static inline void __migrate_disable(void) { }
+static inline void __migrate_enable(void) { }
+#endif /* !COMPILE_OFFSETS */
+
+/*
+ * So that it is possible to not export the runqueues variable, define and
+ * export migrate_enable/migrate_disable in kernel/sched/core.c too, and use
+ * them for the modules. The macro "INSTANTIATE_EXPORTED_MIGRATE_DISABLE" will
+ * be defined in kernel/sched/core.c.
+ */
+#ifndef INSTANTIATE_EXPORTED_MIGRATE_DISABLE
+static inline void migrate_disable(void)
+{
+	__migrate_disable();
+}
+
+static inline void migrate_enable(void)
+{
+	__migrate_enable();
+}
+#else /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */
+extern void migrate_disable(void);
+extern void migrate_enable(void);
+#endif /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */
+
+#else /* MODULE */
+extern void migrate_disable(void);
+extern void migrate_enable(void);
+#endif /* MODULE */
+
+DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
+
 #endif
-- 
cgit v1.2.3


From 45b7f780739a3145aeef24d2dfa02517a6c82ed6 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Wed, 17 Sep 2025 14:09:16 +0800
Subject: sched: Fix some typos in include/linux/preempt.h

There are some typos in the comments of migrate in
include/linux/preempt.h:

  elegible -> eligible
  it's -> its
  migirate_disable -> migrate_disable
  abritrary -> arbitrary

Just fix them.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/preempt.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 92237c319035..102202185d7a 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -372,7 +372,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
 /*
  * Migrate-Disable and why it is undesired.
  *
- * When a preempted task becomes elegible to run under the ideal model (IOW it
+ * When a preempted task becomes eligible to run under the ideal model (IOW it
  * becomes one of the M highest priority tasks), it might still have to wait
  * for the preemptee's migrate_disable() section to complete. Thereby suffering
  * a reduction in bandwidth in the exact duration of the migrate_disable()
@@ -387,7 +387,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
  * - a lower priority tasks; which under preempt_disable() could've instantly
  *   migrated away when another CPU becomes available, is now constrained
  *   by the ability to push the higher priority task away, which might itself be
- *   in a migrate_disable() section, reducing it's available bandwidth.
+ *   in a migrate_disable() section, reducing its available bandwidth.
  *
  * IOW it trades latency / moves the interference term, but it stays in the
  * system, and as long as it remains unbounded, the system is not fully
@@ -399,7 +399,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
  * PREEMPT_RT breaks a number of assumptions traditionally held. By forcing a
  * number of primitives into becoming preemptible, they would also allow
  * migration. This turns out to break a bunch of per-cpu usage. To this end,
- * all these primitives employ migirate_disable() to restore this implicit
+ * all these primitives employ migrate_disable() to restore this implicit
  * assumption.
  *
  * This is a 'temporary' work-around at best. The correct solution is getting
@@ -407,7 +407,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
  * per-cpu locking or short preempt-disable regions.
  *
  * The end goal must be to get rid of migrate_disable(), alternatively we need
- * a schedulability theory that does not depend on abritrary migration.
+ * a schedulability theory that does not depend on arbitrary migration.
  *
  *
  * Notes on the implementation.
-- 
cgit v1.2.3


From 3271f19bf7b9df665549666d789b9f126b4420c7 Mon Sep 17 00:00:00 2001
From: Richard Gobert <richardbgobert@gmail.com>
Date: Tue, 23 Sep 2025 10:59:06 +0200
Subject: net: gso: restore ids of outer ip headers correctly

Currently, NETIF_F_TSO_MANGLEID indicates that the inner-most ID can
be mangled. Outer IDs can always be mangled.

Make GSO preserve outer IDs by default, with NETIF_F_TSO_MANGLEID allowing
both inner and outer IDs to be mangled.

This commit also modifies a few drivers that use SKB_GSO_FIXEDID directly.

Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
Reviewed-by: Edward Cree <ecree.xilinx@gmail.com> # for sfc
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20250923085908.4687-4-richardbgobert@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 9 +++++++--
 include/linux/skbuff.h    | 8 +++++++-
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1c54d44805fa..1b85454116f6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -5320,13 +5320,18 @@ void skb_warn_bad_offload(const struct sk_buff *skb);
 
 static inline bool net_gso_ok(netdev_features_t features, int gso_type)
 {
-	netdev_features_t feature = (netdev_features_t)gso_type << NETIF_F_GSO_SHIFT;
+	netdev_features_t feature;
+
+	if (gso_type & (SKB_GSO_TCP_FIXEDID | SKB_GSO_TCP_FIXEDID_INNER))
+		gso_type |= __SKB_GSO_TCP_FIXEDID;
+
+	feature = ((netdev_features_t)gso_type << NETIF_F_GSO_SHIFT) & NETIF_F_GSO_MASK;
 
 	/* check flags correspondence */
 	BUILD_BUG_ON(SKB_GSO_TCPV4   != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_DODGY   != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT));
-	BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(__SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_TCPV6   != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_FCOE    != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_GRE     != (NETIF_F_GSO_GRE >> NETIF_F_GSO_SHIFT));
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 78ecfa7d00d0..fb3fec9affaa 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -674,7 +674,7 @@ enum {
 	/* This indicates the tcp segment has CWR set. */
 	SKB_GSO_TCP_ECN = 1 << 2,
 
-	SKB_GSO_TCP_FIXEDID = 1 << 3,
+	__SKB_GSO_TCP_FIXEDID = 1 << 3,
 
 	SKB_GSO_TCPV6 = 1 << 4,
 
@@ -707,6 +707,12 @@ enum {
 	SKB_GSO_FRAGLIST = 1 << 18,
 
 	SKB_GSO_TCP_ACCECN = 1 << 19,
+
+	/* These indirectly map onto the same netdev feature.
+	 * If NETIF_F_TSO_MANGLEID is set it may mangle both inner and outer IDs.
+	 */
+	SKB_GSO_TCP_FIXEDID = 1 << 30,
+	SKB_GSO_TCP_FIXEDID_INNER = 1 << 31,
 };
 
 #if BITS_PER_LONG > 32
-- 
cgit v1.2.3


From 60ac65a31041b0e5dfd736a79027314b9d533ef5 Mon Sep 17 00:00:00 2001
From: Sung-Chi Li <lschyi@chromium.org>
Date: Thu, 11 Sep 2025 06:56:34 +0000
Subject: platform/chrome: update pwm fan control host commands
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Update cros_ec_commands.h to include definitions for getting PWM fan
duty, getting and setting the fan control mode.

Signed-off-by: Sung-Chi Li <lschyi@chromium.org>
Acked-by: Tzung-Bi Shih <tzungbi@kernel.org>
Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20250911-cros_ec_fan-v6-1-a1446cc098af@google.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/platform_data/cros_ec_commands.h | 29 +++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h
index c19b404e3d8d..69294f79cc88 100644
--- a/include/linux/platform_data/cros_ec_commands.h
+++ b/include/linux/platform_data/cros_ec_commands.h
@@ -1825,6 +1825,16 @@ struct ec_response_pwm_get_duty {
 	uint16_t duty;     /* Duty cycle, EC_PWM_MAX_DUTY = 100% */
 } __ec_align2;
 
+#define EC_CMD_PWM_GET_FAN_DUTY 0x0027
+
+struct ec_params_pwm_get_fan_duty {
+	uint8_t fan_idx;
+} __ec_align1;
+
+struct ec_response_pwm_get_fan_duty {
+	uint32_t percent; /* Percentage of duty cycle, ranging from 0 ~ 100 */
+} __ec_align4;
+
 /*****************************************************************************/
 /*
  * Lightbar commands. This looks worse than it is. Since we only use one HOST
@@ -3127,14 +3137,31 @@ struct ec_params_thermal_set_threshold_v1 {
 
 /****************************************************************************/
 
-/* Toggle automatic fan control */
+/* Set or get fan control mode */
 #define EC_CMD_THERMAL_AUTO_FAN_CTRL 0x0052
 
+enum ec_auto_fan_ctrl_cmd {
+	EC_AUTO_FAN_CONTROL_CMD_SET = 0,
+	EC_AUTO_FAN_CONTROL_CMD_GET,
+};
+
 /* Version 1 of input params */
 struct ec_params_auto_fan_ctrl_v1 {
 	uint8_t fan_idx;
 } __ec_align1;
 
+/* Version 2 of input params */
+struct ec_params_auto_fan_ctrl_v2 {
+	uint8_t fan_idx;
+	uint8_t cmd; /* enum ec_auto_fan_ctrl_cmd */
+	uint8_t set_auto; /* only used with EC_AUTO_FAN_CONTROL_CMD_SET - bool
+			   */
+} __ec_align4;
+
+struct ec_response_auto_fan_control {
+	uint8_t is_auto; /* bool */
+} __ec_align1;
+
 /* Get/Set TMP006 calibration data */
 #define EC_CMD_TMP006_GET_CALIBRATION 0x0053
 #define EC_CMD_TMP006_SET_CALIBRATION 0x0054
-- 
cgit v1.2.3


From 5ba9f520f41a33c99fd5d1eb81b5650ed3517b88 Mon Sep 17 00:00:00 2001
From: Rahul Pathak <rpathak@ventanamicro.com>
Date: Mon, 18 Aug 2025 09:39:06 +0530
Subject: clk: Add clock driver for the RISC-V RPMI clock service group

The RPMI specification defines a clock service group which can be
accessed via SBI MPXY extension or dedicated S-mode RPMI transport.

Add mailbox client based clock driver for the RISC-V RPMI clock
service group.

Reviewed-by: Stephen Boyd <sboyd@kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Co-developed-by: Anup Patel <apatel@ventanamicro.com>
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
Signed-off-by: Rahul Pathak <rpathak@ventanamicro.com>
Link: https://lore.kernel.org/r/20250818040920.272664-11-apatel@ventanamicro.com
[pjw@kernel.org: converted rpmi_clkrate_u64 macro to a function; replaced bare constant with a macro]
Signed-off-by: Paul Walmsley <pjw@kernel.org>
---
 include/linux/mailbox/riscv-rpmi-message.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mailbox/riscv-rpmi-message.h b/include/linux/mailbox/riscv-rpmi-message.h
index c3a98fc12c0a..8176d33747fe 100644
--- a/include/linux/mailbox/riscv-rpmi-message.h
+++ b/include/linux/mailbox/riscv-rpmi-message.h
@@ -90,6 +90,22 @@ static inline int rpmi_to_linux_error(int rpmi_error)
 	}
 }
 
+/* RPMI service group IDs */
+#define RPMI_SRVGRP_CLOCK		0x00008
+
+/* RPMI clock service IDs */
+enum rpmi_clock_service_id {
+	RPMI_CLK_SRV_ENABLE_NOTIFICATION = 0x01,
+	RPMI_CLK_SRV_GET_NUM_CLOCKS = 0x02,
+	RPMI_CLK_SRV_GET_ATTRIBUTES = 0x03,
+	RPMI_CLK_SRV_GET_SUPPORTED_RATES = 0x04,
+	RPMI_CLK_SRV_SET_CONFIG = 0x05,
+	RPMI_CLK_SRV_GET_CONFIG = 0x06,
+	RPMI_CLK_SRV_SET_RATE = 0x07,
+	RPMI_CLK_SRV_GET_RATE = 0x08,
+	RPMI_CLK_SRV_ID_MAX_COUNT
+};
+
 /* RPMI Linux mailbox attribute IDs */
 enum rpmi_mbox_attribute_id {
 	RPMI_MBOX_ATTR_SPEC_VERSION,
-- 
cgit v1.2.3


From 495c8d35035edb66e3284113bef01f3b1b843832 Mon Sep 17 00:00:00 2001
From: "Mario Limonciello (AMD)" <superm1@kernel.org>
Date: Thu, 25 Sep 2025 13:51:07 -0500
Subject: PM: hibernate: Add pm_hibernation_mode_is_suspend()

Some drivers have different flows for hibernation and suspend. If
the driver opportunistically will skip thaw() then it needs a hint
to know what is happening after the hibernate.

Introduce a new symbol pm_hibernation_mode_is_suspend() that drivers
can call to determine if suspending the system for this purpose.

Tested-by: Ionut Nechita <ionut_n2001@yahoo.com>
Tested-by: Kenneth Crudup <kenny@panix.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/suspend.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 317ae31e89b3..0664c685f0b2 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -276,6 +276,7 @@ extern void arch_suspend_enable_irqs(void);
 
 extern int pm_suspend(suspend_state_t state);
 extern bool sync_on_suspend_enabled;
+bool pm_hibernation_mode_is_suspend(void);
 #else /* !CONFIG_SUSPEND */
 #define suspend_valid_only_mem	NULL
 
@@ -288,6 +289,7 @@ static inline bool pm_suspend_via_firmware(void) { return false; }
 static inline bool pm_resume_via_firmware(void) { return false; }
 static inline bool pm_suspend_no_platform(void) { return false; }
 static inline bool pm_suspend_default_s2idle(void) { return false; }
+static inline bool pm_hibernation_mode_is_suspend(void) { return false; }
 
 static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {}
 static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; }
-- 
cgit v1.2.3


From aa43953e862c031ff66e44353c88beb7a449e80d Mon Sep 17 00:00:00 2001
From: Anup Patel <apatel@ventanamicro.com>
Date: Mon, 18 Aug 2025 09:39:09 +0530
Subject: irqchip: Add driver for the RPMI system MSI service group

The RPMI specification defines a system MSI service group which
allows application processors to receive MSIs upon system events
such as graceful shutdown/reboot request, CPU hotplug event, memory
hotplug event, etc.

Add an irqchip driver for the RISC-V RPMI system MSI service group
to directly receive system MSIs in Linux kernel.

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
Link: https://lore.kernel.org/r/20250818040920.272664-14-apatel@ventanamicro.com
Signed-off-by: Paul Walmsley <pjw@kernel.org>
---
 include/linux/mailbox/riscv-rpmi-message.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mailbox/riscv-rpmi-message.h b/include/linux/mailbox/riscv-rpmi-message.h
index 8176d33747fe..e135c6564d0c 100644
--- a/include/linux/mailbox/riscv-rpmi-message.h
+++ b/include/linux/mailbox/riscv-rpmi-message.h
@@ -91,6 +91,7 @@ static inline int rpmi_to_linux_error(int rpmi_error)
 }
 
 /* RPMI service group IDs */
+#define RPMI_SRVGRP_SYSTEM_MSI		0x00002
 #define RPMI_SRVGRP_CLOCK		0x00008
 
 /* RPMI clock service IDs */
@@ -106,6 +107,18 @@ enum rpmi_clock_service_id {
 	RPMI_CLK_SRV_ID_MAX_COUNT
 };
 
+/* RPMI system MSI service IDs */
+enum rpmi_sysmsi_service_id {
+	RPMI_SYSMSI_SRV_ENABLE_NOTIFICATION = 0x01,
+	RPMI_SYSMSI_SRV_GET_ATTRIBUTES = 0x02,
+	RPMI_SYSMSI_SRV_GET_MSI_ATTRIBUTES = 0x03,
+	RPMI_SYSMSI_SRV_SET_MSI_STATE = 0x04,
+	RPMI_SYSMSI_SRV_GET_MSI_STATE = 0x05,
+	RPMI_SYSMSI_SRV_SET_MSI_TARGET = 0x06,
+	RPMI_SYSMSI_SRV_GET_MSI_TARGET = 0x07,
+	RPMI_SYSMSI_SRV_ID_MAX_COUNT
+};
+
 /* RPMI Linux mailbox attribute IDs */
 enum rpmi_mbox_attribute_id {
 	RPMI_MBOX_ATTR_SPEC_VERSION,
-- 
cgit v1.2.3


From 1a2b423be6a89dd07d5fc27ea042be68697a6a49 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 14 Sep 2025 22:24:13 +0200
Subject: i2c: boardinfo: Annotate code used in init phase only

Annotate two places in boardinfo code:
- __i2c_first_dynamic_bus_num is set in init phase. Annotate it as
  __ro_after_init to prevent later changes.
- i2c_register_board_info() is used in init phase only, so annotate it
  as __init, allowing to free the memory after init phase.
  This is safe, see comment: "done in board-specific init code near
  arch_initcall() time"

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/i2c.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 20fd41b51d5c..11a19241e360 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -499,7 +499,7 @@ static inline struct i2c_client *i2c_verify_client(struct device *dev)
  * Modules for add-on boards must use other calls.
  */
 #ifdef CONFIG_I2C_BOARDINFO
-int
+int __init
 i2c_register_board_info(int busnum, struct i2c_board_info const *info,
 			unsigned n);
 #else
-- 
cgit v1.2.3


From 14967a9c7d247841b0312c48dcf8cd29e55a4cc8 Mon Sep 17 00:00:00 2001
From: Jane Chu <jane.chu@oracle.com>
Date: Mon, 15 Sep 2025 18:45:20 -0600
Subject: mm/hugetlb: fix copy_hugetlb_page_range() to use ->pt_share_count

commit 59d9094df3d79 ("mm: hugetlb: independent PMD page table shared
count") introduced ->pt_share_count dedicated to hugetlb PMD share count
tracking, but omitted fixing copy_hugetlb_page_range(), leaving the
function relying on page_count() for tracking that no longer works.

When lazy page table copy for hugetlb is disabled, that is, revert commit
bcd51a3c679d ("hugetlb: lazy page table copies in fork()") fork()'ing with
hugetlb PMD sharing quickly lockup -

[  239.446559] watchdog: BUG: soft lockup - CPU#75 stuck for 27s!
[  239.446611] RIP: 0010:native_queued_spin_lock_slowpath+0x7e/0x2e0
[  239.446631] Call Trace:
[  239.446633]  <TASK>
[  239.446636]  _raw_spin_lock+0x3f/0x60
[  239.446639]  copy_hugetlb_page_range+0x258/0xb50
[  239.446645]  copy_page_range+0x22b/0x2c0
[  239.446651]  dup_mmap+0x3e2/0x770
[  239.446654]  dup_mm.constprop.0+0x5e/0x230
[  239.446657]  copy_process+0xd17/0x1760
[  239.446660]  kernel_clone+0xc0/0x3e0
[  239.446661]  __do_sys_clone+0x65/0xa0
[  239.446664]  do_syscall_64+0x82/0x930
[  239.446668]  ? count_memcg_events+0xd2/0x190
[  239.446671]  ? syscall_trace_enter+0x14e/0x1f0
[  239.446676]  ? syscall_exit_work+0x118/0x150
[  239.446677]  ? arch_exit_to_user_mode_prepare.constprop.0+0x9/0xb0
[  239.446681]  ? clear_bhb_loop+0x30/0x80
[  239.446684]  ? clear_bhb_loop+0x30/0x80
[  239.446686]  entry_SYSCALL_64_after_hwframe+0x76/0x7e

There are two options to resolve the potential latent issue:
  1. warn against PMD sharing in copy_hugetlb_page_range(),
  2. fix it.
This patch opts for the second option.
While at it, simplify the comment, the details are not actually relevant
anymore.

Link: https://lkml.kernel.org/r/20250916004520.1604530-1-jane.chu@oracle.com
Fixes: 59d9094df3d7 ("mm: hugetlb: independent PMD page table shared count")
Signed-off-by: Jane Chu <jane.chu@oracle.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Acked-by: Oscar Salvador <osalvador@suse.de>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Liu Shixin <liushixin2@huawei.com>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 08bc2442db93..a643fae8a349 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -631,6 +631,11 @@ static inline int ptdesc_pmd_pts_count(struct ptdesc *ptdesc)
 {
 	return atomic_read(&ptdesc->pt_share_count);
 }
+
+static inline bool ptdesc_pmd_is_shared(struct ptdesc *ptdesc)
+{
+	return !!ptdesc_pmd_pts_count(ptdesc);
+}
 #else
 static inline void ptdesc_pmd_pts_init(struct ptdesc *ptdesc)
 {
-- 
cgit v1.2.3


From 7e89979f6695fb56e8739b7d19614256e637131d Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sat, 13 Sep 2025 17:03:39 -0700
Subject: include/linux/pgtable.h: convert arch_enter_lazy_mmu_mode() and
 friends to static inlines

commit c519c3c0a113 ("mm/kasan: avoid lazy MMU mode hazards") introduced
the use of arch_enter_lazy_mmu_mode(), which results in the compiler
complaining about "statement has no effect", when
__HAVE_ARCH_LAZY_MMU_MODE is not defined in include/linux/pgtable.h

The exact warning/error is:

In file included from ./include/linux/kasan.h:37,
                 from mm/kasan/shadow.c:14:
mm/kasan/shadow.c: In function kasan_populate_vmalloc_pte:
./include/linux/pgtable.h:247:41: error: statement with no effect [-Werror=unused-value]
  247 | #define arch_enter_lazy_mmu_mode()      (LAZY_MMU_DEFAULT)
      |                                         ^
mm/kasan/shadow.c:322:9: note: in expansion of macro arch_enter_lazy_mmu_mode>   322 |         arch_enter_lazy_mmu_mode();
     |         ^~~~~~~~~~~~~~~~~~~~~~~~

switching these "functions" to static inlines fixes this up.

Fixes: c519c3c0a113 ("mm/kasan: avoid lazy MMU mode hazards")
Reported-by: Balbir Singh <balbirs@nvidia.com>
Closes: https://lkml.kernel.org/r/20250912235515.367061-1-balbirs@nvidia.com
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 2b80fd456c8b..25a7257052ff 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -232,9 +232,9 @@ static inline int pmd_dirty(pmd_t pmd)
  * and the mode cannot be used in interrupt context.
  */
 #ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
-#define arch_enter_lazy_mmu_mode()	do {} while (0)
-#define arch_leave_lazy_mmu_mode()	do {} while (0)
-#define arch_flush_lazy_mmu_mode()	do {} while (0)
+static inline void arch_enter_lazy_mmu_mode(void) {}
+static inline void arch_leave_lazy_mmu_mode(void) {}
+static inline void arch_flush_lazy_mmu_mode(void) {}
 #endif
 
 #ifndef pte_batch_hint
-- 
cgit v1.2.3


From 17f0d1f6321caa95699b8f96baf12e654d7b8d60 Mon Sep 17 00:00:00 2001
From: Tao Chen <chen.dylane@linux.dev>
Date: Fri, 26 Sep 2025 01:50:28 +0800
Subject: bpf: Add lookup_and_delete_elem for BPF_MAP_STACK_TRACE

The stacktrace map can be easily full, which will lead to failure in
obtaining the stack. In addition to increasing the size of the map,
another solution is to delete the stack_id after looking it up from
the user, so extend the existing bpf_map_lookup_and_delete_elem()
functionality to stacktrace map types.

Signed-off-by: Tao Chen <chen.dylane@linux.dev>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20250925175030.1615837-1-chen.dylane@linux.dev
---
 include/linux/bpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ea2ed6771cc6..6338e54a9b1f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2724,7 +2724,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
 int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
 			    u64 flags);
 
-int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
+int bpf_stackmap_extract(struct bpf_map *map, void *key, void *value, bool delete);
 
 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
 				 void *key, void *value, u64 map_flags);
-- 
cgit v1.2.3


From 212b0f07cf021575ec25e0b2336df77c7a4d2e68 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 3 Sep 2025 14:59:43 +0200
Subject: locking/local_lock: Expose dep_map in local_trylock_t.

lockdep_is_held() macro assumes that "struct lockdep_map dep_map;"
is a top level field of any lock that participates in LOCKDEP.
Make it so for local_trylock_t.

Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/local_lock_internal.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h
index d80b5306a2c0..949de37700db 100644
--- a/include/linux/local_lock_internal.h
+++ b/include/linux/local_lock_internal.h
@@ -17,7 +17,10 @@ typedef struct {
 
 /* local_trylock() and local_trylock_irqsave() only work with local_trylock_t */
 typedef struct {
-	local_lock_t	llock;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map	dep_map;
+	struct task_struct	*owner;
+#endif
 	u8		acquired;
 } local_trylock_t;
 
@@ -31,7 +34,7 @@ typedef struct {
 	.owner = NULL,
 
 # define LOCAL_TRYLOCK_DEBUG_INIT(lockname)		\
-	.llock = { LOCAL_LOCK_DEBUG_INIT((lockname).llock) },
+	LOCAL_LOCK_DEBUG_INIT(lockname)
 
 static inline void local_lock_acquire(local_lock_t *l)
 {
@@ -81,7 +84,7 @@ do {								\
 	local_lock_debug_init(lock);				\
 } while (0)
 
-#define __local_trylock_init(lock) __local_lock_init(lock.llock)
+#define __local_trylock_init(lock) __local_lock_init((local_lock_t *)lock)
 
 #define __spinlock_nested_bh_init(lock)				\
 do {								\
-- 
cgit v1.2.3


From 2d517aa09bbc4203f10cdee7e1d42f3bbdc1b1cd Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Wed, 3 Sep 2025 14:59:45 +0200
Subject: slab: add opt-in caching layer of percpu sheaves

Specifying a non-zero value for a new struct kmem_cache_args field
sheaf_capacity will setup a caching layer of percpu arrays called
sheaves of given capacity for the created cache.

Allocations from the cache will allocate via the percpu sheaves (main or
spare) as long as they have no NUMA node preference. Frees will also
put the object back into one of the sheaves.

When both percpu sheaves are found empty during an allocation, an empty
sheaf may be replaced with a full one from the per-node barn. If none
are available and the allocation is allowed to block, an empty sheaf is
refilled from slab(s) by an internal bulk alloc operation. When both
percpu sheaves are full during freeing, the barn can replace a full one
with an empty one, unless over a full sheaves limit. In that case a
sheaf is flushed to slab(s) by an internal bulk free operation. Flushing
sheaves and barns is also wired to the existing cpu flushing and cache
shrinking operations.

The sheaves do not distinguish NUMA locality of the cached objects. If
an allocation is requested with kmem_cache_alloc_node() (or a mempolicy
with strict_numa mode enabled) with a specific node (not NUMA_NO_NODE),
the sheaves are bypassed.

The bulk operations exposed to slab users also try to utilize the
sheaves as long as the necessary (full or empty) sheaves are available
on the cpu or in the barn. Once depleted, they will fallback to bulk
alloc/free to slabs directly to avoid double copying.

The sheaf_capacity value is exported in sysfs for observability.

Sysfs CONFIG_SLUB_STATS counters alloc_cpu_sheaf and free_cpu_sheaf
count objects allocated or freed using the sheaves (and thus not
counting towards the other alloc/free path counters). Counters
sheaf_refill and sheaf_flush count objects filled or flushed from or to
slab pages, and can be used to assess how effective the caching is. The
refill and flush operations will also count towards the usual
alloc_fastpath/slowpath, free_fastpath/slowpath and other counters for
the backing slabs.  For barn operations, barn_get and barn_put count how
many full sheaves were get from or put to the barn, the _fail variants
count how many such requests could not be satisfied mainly  because the
barn was either empty or full. While the barn also holds empty sheaves
to make some operations easier, these are not as critical to mandate own
counters.  Finally, there are sheaf_alloc/sheaf_free counters.

Access to the percpu sheaves is protected by local_trylock() when
potential callers include irq context, and local_lock() otherwise (such
as when we already know the gfp flags allow blocking). The trylock
failures should be rare and we can easily fallback. Each per-NUMA-node
barn has a spin_lock.

When slub_debug is enabled for a cache with sheaf_capacity also
specified, the latter is ignored so that allocations and frees reach the
slow path where debugging hooks are processed. Similarly, we ignore it
with CONFIG_SLUB_TINY which prefers low memory usage to performance.

[boot failure: https://lore.kernel.org/all/583eacf5-c971-451a-9f76-fed0e341b815@linux.ibm.com/ ]

Reported-and-tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index d5a8ab98035c..49acbcdc6696 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -335,6 +335,37 @@ struct kmem_cache_args {
 	 * %NULL means no constructor.
 	 */
 	void (*ctor)(void *);
+	/**
+	 * @sheaf_capacity: Enable sheaves of given capacity for the cache.
+	 *
+	 * With a non-zero value, allocations from the cache go through caching
+	 * arrays called sheaves. Each cpu has a main sheaf that's always
+	 * present, and a spare sheaf that may be not present. When both become
+	 * empty, there's an attempt to replace an empty sheaf with a full sheaf
+	 * from the per-node barn.
+	 *
+	 * When no full sheaf is available, and gfp flags allow blocking, a
+	 * sheaf is allocated and filled from slab(s) using bulk allocation.
+	 * Otherwise the allocation falls back to the normal operation
+	 * allocating a single object from a slab.
+	 *
+	 * Analogically when freeing and both percpu sheaves are full, the barn
+	 * may replace it with an empty sheaf, unless it's over capacity. In
+	 * that case a sheaf is bulk freed to slab pages.
+	 *
+	 * The sheaves do not enforce NUMA placement of objects, so allocations
+	 * via kmem_cache_alloc_node() with a node specified other than
+	 * NUMA_NO_NODE will bypass them.
+	 *
+	 * Bulk allocation and free operations also try to use the cpu sheaves
+	 * and barn, but fallback to using slab pages directly.
+	 *
+	 * When slub_debug is enabled for the cache, the sheaf_capacity argument
+	 * is ignored.
+	 *
+	 * %0 means no sheaves will be created.
+	 */
+	unsigned int sheaf_capacity;
 };
 
 struct kmem_cache *__kmem_cache_create_args(const char *name,
-- 
cgit v1.2.3


From bbfe987c5a2854705393ad79813074e5eadcbde6 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 26 Sep 2025 13:10:22 +0200
Subject: PM: hibernate: Fix pm_hibernation_mode_is_suspend() build breakage

Commit 495c8d35035e ("PM: hibernate: Add pm_hibernation_mode_is_suspend()")
that introduced pm_hibernation_mode_is_suspend() did not define it in
the case when CONFIG_HIBERNATION is unset, but CONFIG_SUSPEND is set.

Subsequent commit 0a6e9e098fcc ("drm/amd: Fix hybrid sleep") made the
amdgpu driver use that function which led to kernel build breakage in
the case mentioned above [1].

Address this by using appropriate #ifdeffery around the definition of
pm_hibernation_mode_is_suspend().

Fixes: 0a6e9e098fcc ("drm/amd: Fix hybrid sleep")
Reported-by: KernelCI bot <bot@kernelci.org>
Closes: https://groups.io/g/kernelci-results/topic/regression_pm_testing/115439919 [1]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
---
 include/linux/suspend.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 0664c685f0b2..b02876f1ae38 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -276,7 +276,6 @@ extern void arch_suspend_enable_irqs(void);
 
 extern int pm_suspend(suspend_state_t state);
 extern bool sync_on_suspend_enabled;
-bool pm_hibernation_mode_is_suspend(void);
 #else /* !CONFIG_SUSPEND */
 #define suspend_valid_only_mem	NULL
 
@@ -289,7 +288,6 @@ static inline bool pm_suspend_via_firmware(void) { return false; }
 static inline bool pm_resume_via_firmware(void) { return false; }
 static inline bool pm_suspend_no_platform(void) { return false; }
 static inline bool pm_suspend_default_s2idle(void) { return false; }
-static inline bool pm_hibernation_mode_is_suspend(void) { return false; }
 
 static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {}
 static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; }
@@ -420,6 +418,12 @@ static inline int hibernate_quiet_exec(int (*func)(void *data), void *data) {
 }
 #endif /* CONFIG_HIBERNATION */
 
+#if defined(CONFIG_HIBERNATION) && defined(CONFIG_SUSPEND)
+bool pm_hibernation_mode_is_suspend(void);
+#else
+static inline bool pm_hibernation_mode_is_suspend(void) { return false; }
+#endif
+
 int arch_resume_nosmt(void);
 
 #ifdef CONFIG_HIBERNATION_SNAPSHOT_DEV
-- 
cgit v1.2.3


From a036bb0e60ad2828c3498bff7465bcbb247b7436 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 25 Sep 2025 14:56:30 -0500
Subject: of: base: Add of_get_next_child_with_prefix() stub

1fcc67e3a354 ("of: base: Add for_each_child_of_node_with_prefix()") added
of_get_next_child_with_prefix() but did not add a stub for the !CONFIG_OF
case.

Add a of_get_next_child_with_prefix() stub so users of
for_each_child_of_node_with_prefix() can be built for compile testing even
when !CONFIG_OF.

Fixes: 1fcc67e3a354 ("of: base: Add for_each_child_of_node_with_prefix()")
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 include/linux/of.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index a62154aeda1b..5e2c6ed9370a 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -550,6 +550,13 @@ static inline struct device_node *of_get_next_child(
 	return NULL;
 }
 
+static inline struct device_node *of_get_next_child_with_prefix(
+	const struct device_node *node, struct device_node *prev,
+	const char *prefix)
+{
+	return NULL;
+}
+
 static inline struct device_node *of_get_next_available_child(
 	const struct device_node *node, struct device_node *prev)
 {
-- 
cgit v1.2.3


From 4e9510f16218802b5fc0d593d8707d4e7ebf9774 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 24 Sep 2025 01:27:07 -0400
Subject: ptr_ring: drop duplicated tail zeroing code

We have some rather subtle code around zeroing tail entries, minimizing
cache bouncing.  Let's put it all in one place.

Doing this also reduces the text size slightly, e.g. for
drivers/vhost/net.o
  Before: text: 15,114 bytes
  After: text: 15,082 bytes

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Link: https://patch.msgid.link/adb9d941de4a2b619ddb2be271a9939849e70687.1758690291.git.mst@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ptr_ring.h | 42 ++++++++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 551329220e4f..a736b16859a6 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -243,6 +243,24 @@ static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
 	return ret;
 }
 
+/* Zero entries from tail to specified head.
+ * NB: if consumer_head can be >= r->size need to fixup tail later.
+ */
+static inline void __ptr_ring_zero_tail(struct ptr_ring *r, int consumer_head)
+{
+	int head = consumer_head - 1;
+
+	/* Zero out entries in the reverse order: this way we touch the
+	 * cache line that producer might currently be reading the last;
+	 * producer won't make progress and touch other cache lines
+	 * besides the first one until we write out all entries.
+	 */
+	while (likely(head >= r->consumer_tail))
+		r->queue[head--] = NULL;
+
+	r->consumer_tail = consumer_head;
+}
+
 /* Must only be called after __ptr_ring_peek returned !NULL */
 static inline void __ptr_ring_discard_one(struct ptr_ring *r)
 {
@@ -261,8 +279,7 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r)
 	/* Note: we must keep consumer_head valid at all times for __ptr_ring_empty
 	 * to work correctly.
 	 */
-	int consumer_head = r->consumer_head;
-	int head = consumer_head++;
+	int consumer_head = r->consumer_head + 1;
 
 	/* Once we have processed enough entries invalidate them in
 	 * the ring all at once so producer can reuse their space in the ring.
@@ -270,16 +287,9 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r)
 	 * but helps keep the implementation simple.
 	 */
 	if (unlikely(consumer_head - r->consumer_tail >= r->batch ||
-		     consumer_head >= r->size)) {
-		/* Zero out entries in the reverse order: this way we touch the
-		 * cache line that producer might currently be reading the last;
-		 * producer won't make progress and touch other cache lines
-		 * besides the first one until we write out all entries.
-		 */
-		while (likely(head >= r->consumer_tail))
-			r->queue[head--] = NULL;
-		r->consumer_tail = consumer_head;
-	}
+		     consumer_head >= r->size))
+		__ptr_ring_zero_tail(r, consumer_head);
+
 	if (unlikely(consumer_head >= r->size)) {
 		consumer_head = 0;
 		r->consumer_tail = 0;
@@ -513,7 +523,6 @@ static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
 				      void (*destroy)(void *))
 {
 	unsigned long flags;
-	int head;
 
 	spin_lock_irqsave(&r->consumer_lock, flags);
 	spin_lock(&r->producer_lock);
@@ -525,17 +534,14 @@ static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
 	 * Clean out buffered entries (for simplicity). This way following code
 	 * can test entries for NULL and if not assume they are valid.
 	 */
-	head = r->consumer_head - 1;
-	while (likely(head >= r->consumer_tail))
-		r->queue[head--] = NULL;
-	r->consumer_tail = r->consumer_head;
+	__ptr_ring_zero_tail(r, r->consumer_head);
 
 	/*
 	 * Go over entries in batch, start moving head back and copy entries.
 	 * Stop when we run into previously unconsumed entries.
 	 */
 	while (n) {
-		head = r->consumer_head - 1;
+		int head = r->consumer_head - 1;
 		if (head < 0)
 			head = r->size - 1;
 		if (r->queue[head]) {
-- 
cgit v1.2.3


From cc2f08129925b437bf28f7f7822f20dac083a87c Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Date: Wed, 24 Sep 2025 12:40:33 +0000
Subject: ethtool: add FEC bins histogram report

IEEE 802.3ck-2022 defines counters for FEC bins and 802.3df-2024
clarifies it a bit further. Implement reporting interface through as
addition to FEC stats available in ethtool. Drivers can leave bin
counter uninitialized if per-lane values are provided. In this case the
core will recalculate summ for the bin.

Signed-off-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Link: https://patch.msgid.link/20250924124037.1508846-2-vadim.fedorenko@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index c869b7f8bce8..c2d8b4ec62eb 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -492,7 +492,29 @@ struct ethtool_pause_stats {
 };
 
 #define ETHTOOL_MAX_LANES	8
+/**
+ * IEEE 802.3ck/df defines 16 bins for FEC histogram plus one more for
+ * the end-of-list marker, total 17 items
+ */
+#define ETHTOOL_FEC_HIST_MAX	17
+/**
+ * struct ethtool_fec_hist_range - error bits range for FEC histogram
+ * statistics
+ * @low: low bound of the bin (inclusive)
+ * @high: high bound of the bin (inclusive)
+ */
+struct ethtool_fec_hist_range {
+	u16 low;
+	u16 high;
+};
 
+struct ethtool_fec_hist {
+	struct ethtool_fec_hist_value {
+		u64 sum;
+		u64 per_lane[ETHTOOL_MAX_LANES];
+	} values[ETHTOOL_FEC_HIST_MAX];
+	const struct ethtool_fec_hist_range *ranges;
+};
 /**
  * struct ethtool_fec_stats - statistics for IEEE 802.3 FEC
  * @corrected_blocks: number of received blocks corrected by FEC
@@ -1214,7 +1236,8 @@ struct ethtool_ops {
 	int	(*set_link_ksettings)(struct net_device *,
 				      const struct ethtool_link_ksettings *);
 	void	(*get_fec_stats)(struct net_device *dev,
-				 struct ethtool_fec_stats *fec_stats);
+				 struct ethtool_fec_stats *fec_stats,
+				 struct ethtool_fec_hist *hist);
 	int	(*get_fecparam)(struct net_device *,
 				      struct ethtool_fecparam *);
 	int	(*set_fecparam)(struct net_device *,
-- 
cgit v1.2.3


From 105ce7ad57e492b75ab40f2dc591db645fadbaa2 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 24 Sep 2025 23:14:53 +0200
Subject: net: airoha: npu: Add a NPU callback to initialize flow stats

Introduce a NPU callback to initialize flow stats and remove NPU stats
initialization from airoha_npu_get routine. Add num_stats_entries to
airoha_npu_ppe_stats_setup routine.
This patch makes the code more readable since NPU statistic are now
initialized on demand by the NPU consumer (at the moment NPU statistic
are configured just by the airoha_eth driver).
Moreover this patch allows the NPU consumer (PPE module) to explicitly
enable/disable NPU flow stats.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250924-airoha-npu-init-stats-callback-v1-1-88bdf3c941b2@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/soc/airoha/airoha_offload.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/soc/airoha/airoha_offload.h b/include/linux/soc/airoha/airoha_offload.h
index 1dc5b4e35ef9..6f66eb339b3f 100644
--- a/include/linux/soc/airoha/airoha_offload.h
+++ b/include/linux/soc/airoha/airoha_offload.h
@@ -181,6 +181,8 @@ struct airoha_npu {
 	struct {
 		int (*ppe_init)(struct airoha_npu *npu);
 		int (*ppe_deinit)(struct airoha_npu *npu);
+		int (*ppe_init_stats)(struct airoha_npu *npu,
+				      dma_addr_t addr, u32 num_stats_entries);
 		int (*ppe_flush_sram_entries)(struct airoha_npu *npu,
 					      dma_addr_t foe_addr,
 					      int sram_num_entries);
@@ -206,7 +208,7 @@ struct airoha_npu {
 };
 
 #if (IS_BUILTIN(CONFIG_NET_AIROHA_NPU) || IS_MODULE(CONFIG_NET_AIROHA_NPU))
-struct airoha_npu *airoha_npu_get(struct device *dev, dma_addr_t *stats_addr);
+struct airoha_npu *airoha_npu_get(struct device *dev);
 void airoha_npu_put(struct airoha_npu *npu);
 
 static inline int airoha_npu_wlan_init_reserved_memory(struct airoha_npu *npu)
@@ -256,8 +258,7 @@ static inline void airoha_npu_wlan_disable_irq(struct airoha_npu *npu, int q)
 	npu->ops.wlan_disable_irq(npu, q);
 }
 #else
-static inline struct airoha_npu *airoha_npu_get(struct device *dev,
-						dma_addr_t *foe_stats_addr)
+static inline struct airoha_npu *airoha_npu_get(struct device *dev)
 {
 	return NULL;
 }
-- 
cgit v1.2.3


From 87608c2a7718dcac5deef801fb3c18cf36fb0233 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Fri, 26 Sep 2025 17:52:39 +0800
Subject: bpf: Remove duplicate crypto/sha2.h header

./include/linux/bpf.h: crypto/sha2.h is included more than once.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=25501
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Acked-by: Quentin Monnet <qmo@kernel.org>
Link: https://lore.kernel.org/r/20250926095240.3397539-1-jiapeng.chong@linux.alibaba.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6338e54a9b1f..fe2a396d8ac6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -32,7 +32,6 @@
 #include <linux/memcontrol.h>
 #include <linux/cfi.h>
 #include <asm/rqspinlock.h>
-#include <crypto/sha2.h>
 
 struct bpf_verifier_env;
 struct bpf_verifier_log;
-- 
cgit v1.2.3


From fed7eaa4f037361fe4f3d4170649d6849a25998d Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Thu, 25 Sep 2025 12:42:16 -0700
Subject: PM: runtime: Update kerneldoc return codes

APIs based on __pm_runtime_idle() (pm_runtime_idle(), pm_request_idle())
do not return 1 when already suspended. They return -EAGAIN. This is
already covered in the docs, so the entry for "1" is redundant and
conflicting.

(pm_runtime_put() and pm_runtime_put_sync() were previously incorrect,
but that's fixed in "PM: runtime: pm_runtime_put{,_sync}() returns 1
when already suspended", to ensure consistency with APIs like
pm_runtime_put_autosuspend().)

RPM_GET_PUT APIs based on __pm_runtime_suspend() do return 1 when
already suspended, but the language is a little unclear -- it's not
really an "error", so it seems better to list as a clarification before
the 0/success case. Additionally, they only actually return 1 when the
refcount makes it to 0; if the usage counter is still non-zero, we
return 0.

pm_runtime_put(), etc., also don't appear at first like they can ever
see "-EAGAIN: Runtime PM usage_count non-zero", because in non-racy
conditions, pm_runtime_put() would drop its reference count, see it's
non-zero, and return early (in __pm_runtime_idle()). However, it's
possible to race with another actor that increments the usage_count
afterward, since rpm_idle() is protected by a separate lock; in such a
case, we may see -EAGAIN.

Because this case is only seen in the presence of concurrent actors, it
makes sense to clarify that this is when "usage_count **became**
non-zero", by way of some racing actor.

Lastly, pm_runtime_put_sync_suspend() duplicated some -EAGAIN language.
Fix that.

Fixes: 271ff96d6066 ("PM: runtime: Document return values of suspend-related API functions")
Link: https://lore.kernel.org/linux-pm/aJ5pkEJuixTaybV4@google.com/
Signed-off-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: 6.17+ <stable@vger.kernel.org> # 6.17+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_runtime.h | 56 +++++++++++++++++++++++++---------------------
 1 file changed, 31 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index d88d6b6ccf5b..d1ff76e0e2d0 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -350,13 +350,12 @@ static inline int pm_runtime_force_resume(struct device *dev) { return -ENXIO; }
  * * 0: Success.
  * * -EINVAL: Runtime PM error.
  * * -EACCES: Runtime PM disabled.
- * * -EAGAIN: Runtime PM usage_count non-zero, Runtime PM status change ongoing
- *            or device not in %RPM_ACTIVE state.
+ * * -EAGAIN: Runtime PM usage counter non-zero, Runtime PM status change
+ *            ongoing or device not in %RPM_ACTIVE state.
  * * -EBUSY: Runtime PM child_count non-zero.
  * * -EPERM: Device PM QoS resume latency 0.
  * * -EINPROGRESS: Suspend already in progress.
  * * -ENOSYS: CONFIG_PM not enabled.
- * * 1: Device already suspended.
  * Other values and conditions for the above values are possible as returned by
  * Runtime PM idle and suspend callbacks.
  */
@@ -370,14 +369,15 @@ static inline int pm_runtime_idle(struct device *dev)
  * @dev: Target device.
  *
  * Return:
+ * * 1: Success; device was already suspended.
  * * 0: Success.
  * * -EINVAL: Runtime PM error.
  * * -EACCES: Runtime PM disabled.
- * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing.
+ * * -EAGAIN: Runtime PM usage counter non-zero or Runtime PM status change
+ *            ongoing.
  * * -EBUSY: Runtime PM child_count non-zero.
  * * -EPERM: Device PM QoS resume latency 0.
  * * -ENOSYS: CONFIG_PM not enabled.
- * * 1: Device already suspended.
  * Other values and conditions for the above values are possible as returned by
  * Runtime PM suspend callbacks.
  */
@@ -396,14 +396,15 @@ static inline int pm_runtime_suspend(struct device *dev)
  * engaging its "idle check" callback.
  *
  * Return:
+ * * 1: Success; device was already suspended.
  * * 0: Success.
  * * -EINVAL: Runtime PM error.
  * * -EACCES: Runtime PM disabled.
- * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing.
+ * * -EAGAIN: Runtime PM usage counter non-zero or Runtime PM status change
+ *            ongoing.
  * * -EBUSY: Runtime PM child_count non-zero.
  * * -EPERM: Device PM QoS resume latency 0.
  * * -ENOSYS: CONFIG_PM not enabled.
- * * 1: Device already suspended.
  * Other values and conditions for the above values are possible as returned by
  * Runtime PM suspend callbacks.
  */
@@ -433,13 +434,12 @@ static inline int pm_runtime_resume(struct device *dev)
  * * 0: Success.
  * * -EINVAL: Runtime PM error.
  * * -EACCES: Runtime PM disabled.
- * * -EAGAIN: Runtime PM usage_count non-zero, Runtime PM status change ongoing
- *            or device not in %RPM_ACTIVE state.
+ * * -EAGAIN: Runtime PM usage counter non-zero, Runtime PM status change
+ *            ongoing or device not in %RPM_ACTIVE state.
  * * -EBUSY: Runtime PM child_count non-zero.
  * * -EPERM: Device PM QoS resume latency 0.
  * * -EINPROGRESS: Suspend already in progress.
  * * -ENOSYS: CONFIG_PM not enabled.
- * * 1: Device already suspended.
  */
 static inline int pm_request_idle(struct device *dev)
 {
@@ -464,15 +464,16 @@ static inline int pm_request_resume(struct device *dev)
  * equivalent pm_runtime_autosuspend() for @dev asynchronously.
  *
  * Return:
+ * * 1: Success; device was already suspended.
  * * 0: Success.
  * * -EINVAL: Runtime PM error.
  * * -EACCES: Runtime PM disabled.
- * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing.
+ * * -EAGAIN: Runtime PM usage counter non-zero or Runtime PM status change
+ *            ongoing.
  * * -EBUSY: Runtime PM child_count non-zero.
  * * -EPERM: Device PM QoS resume latency 0.
  * * -EINPROGRESS: Suspend already in progress.
  * * -ENOSYS: CONFIG_PM not enabled.
- * * 1: Device already suspended.
  */
 static inline int pm_request_autosuspend(struct device *dev)
 {
@@ -540,15 +541,16 @@ static inline int pm_runtime_resume_and_get(struct device *dev)
  * equal to 0, queue up a work item for @dev like in pm_request_idle().
  *
  * Return:
+ * * 1: Success. Usage counter dropped to zero, but device was already suspended.
  * * 0: Success.
  * * -EINVAL: Runtime PM error.
  * * -EACCES: Runtime PM disabled.
- * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing.
+ * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status
+ *            change ongoing.
  * * -EBUSY: Runtime PM child_count non-zero.
  * * -EPERM: Device PM QoS resume latency 0.
  * * -EINPROGRESS: Suspend already in progress.
  * * -ENOSYS: CONFIG_PM not enabled.
- * * 1: Device already suspended.
  */
 static inline int pm_runtime_put(struct device *dev)
 {
@@ -565,15 +567,16 @@ DEFINE_FREE(pm_runtime_put, struct device *, if (_T) pm_runtime_put(_T))
  * equal to 0, queue up a work item for @dev like in pm_request_autosuspend().
  *
  * Return:
+ * * 1: Success. Usage counter dropped to zero, but device was already suspended.
  * * 0: Success.
  * * -EINVAL: Runtime PM error.
  * * -EACCES: Runtime PM disabled.
- * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing.
+ * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status
+ *            change ongoing.
  * * -EBUSY: Runtime PM child_count non-zero.
  * * -EPERM: Device PM QoS resume latency 0.
  * * -EINPROGRESS: Suspend already in progress.
  * * -ENOSYS: CONFIG_PM not enabled.
- * * 1: Device already suspended.
  */
 static inline int __pm_runtime_put_autosuspend(struct device *dev)
 {
@@ -590,15 +593,16 @@ static inline int __pm_runtime_put_autosuspend(struct device *dev)
  * in pm_request_autosuspend().
  *
  * Return:
+ * * 1: Success. Usage counter dropped to zero, but device was already suspended.
  * * 0: Success.
  * * -EINVAL: Runtime PM error.
  * * -EACCES: Runtime PM disabled.
- * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing.
+ * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status
+ *            change ongoing.
  * * -EBUSY: Runtime PM child_count non-zero.
  * * -EPERM: Device PM QoS resume latency 0.
  * * -EINPROGRESS: Suspend already in progress.
  * * -ENOSYS: CONFIG_PM not enabled.
- * * 1: Device already suspended.
  */
 static inline int pm_runtime_put_autosuspend(struct device *dev)
 {
@@ -619,14 +623,15 @@ static inline int pm_runtime_put_autosuspend(struct device *dev)
  * if it returns an error code.
  *
  * Return:
+ * * 1: Success. Usage counter dropped to zero, but device was already suspended.
  * * 0: Success.
  * * -EINVAL: Runtime PM error.
  * * -EACCES: Runtime PM disabled.
- * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing.
+ * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status
+ *            change ongoing.
  * * -EBUSY: Runtime PM child_count non-zero.
  * * -EPERM: Device PM QoS resume latency 0.
  * * -ENOSYS: CONFIG_PM not enabled.
- * * 1: Device already suspended.
  * Other values and conditions for the above values are possible as returned by
  * Runtime PM suspend callbacks.
  */
@@ -646,15 +651,15 @@ static inline int pm_runtime_put_sync(struct device *dev)
  * if it returns an error code.
  *
  * Return:
+ * * 1: Success. Usage counter dropped to zero, but device was already suspended.
  * * 0: Success.
  * * -EINVAL: Runtime PM error.
  * * -EACCES: Runtime PM disabled.
- * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing.
- * * -EAGAIN: usage_count non-zero or Runtime PM status change ongoing.
+ * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status
+ *            change ongoing.
  * * -EBUSY: Runtime PM child_count non-zero.
  * * -EPERM: Device PM QoS resume latency 0.
  * * -ENOSYS: CONFIG_PM not enabled.
- * * 1: Device already suspended.
  * Other values and conditions for the above values are possible as returned by
  * Runtime PM suspend callbacks.
  */
@@ -677,15 +682,16 @@ static inline int pm_runtime_put_sync_suspend(struct device *dev)
  * if it returns an error code.
  *
  * Return:
+ * * 1: Success. Usage counter dropped to zero, but device was already suspended.
  * * 0: Success.
  * * -EINVAL: Runtime PM error.
  * * -EACCES: Runtime PM disabled.
- * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing.
+ * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status
+ *            change ongoing.
  * * -EBUSY: Runtime PM child_count non-zero.
  * * -EPERM: Device PM QoS resume latency 0.
  * * -EINPROGRESS: Suspend already in progress.
  * * -ENOSYS: CONFIG_PM not enabled.
- * * 1: Device already suspended.
  * Other values and conditions for the above values are possible as returned by
  * Runtime PM suspend callbacks.
  */
-- 
cgit v1.2.3


From 4540aed51b12bc13364149bf95f6ecef013197c0 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 26 Sep 2025 19:12:00 +0200
Subject: bpf: Enforce expected_attach_type for tailcall compatibility

Yinhao et al. recently reported:

  Our fuzzer tool discovered an uninitialized pointer issue in the
  bpf_prog_test_run_xdp() function within the Linux kernel's BPF subsystem.
  This leads to a NULL pointer dereference when a BPF program attempts to
  deference the txq member of struct xdp_buff object.

The test initializes two programs of BPF_PROG_TYPE_XDP: progA acts as the
entry point for bpf_prog_test_run_xdp() and its expected_attach_type can
neither be of be BPF_XDP_DEVMAP nor BPF_XDP_CPUMAP. progA calls into a slot
of a tailcall map it owns. progB's expected_attach_type must be BPF_XDP_DEVMAP
to pass xdp_is_valid_access() validation. The program returns struct xdp_md's
egress_ifindex, and the latter is only allowed to be accessed under mentioned
expected_attach_type. progB is then inserted into the tailcall which progA
calls.

The underlying issue goes beyond XDP though. Another example are programs
of type BPF_PROG_TYPE_CGROUP_SOCK_ADDR. sock_addr_is_valid_access() as well
as sock_addr_func_proto() have different logic depending on the programs'
expected_attach_type. Similarly, a program attached to BPF_CGROUP_INET4_GETPEERNAME
should not be allowed doing a tailcall into a program which calls bpf_bind()
out of BPF which is only enabled for BPF_CGROUP_INET4_CONNECT.

In short, specifying expected_attach_type allows to open up additional
functionality or restrictions beyond what the basic bpf_prog_type enables.
The use of tailcalls must not violate these constraints. Fix it by enforcing
expected_attach_type in __bpf_prog_map_compatible().

Note that we only enforce this for tailcall maps, but not for BPF devmaps or
cpumaps: There, the programs are invoked through dev_map_bpf_prog_run*() and
cpu_map_bpf_prog_run*() which set up a new environment / context and therefore
these situations are not prone to this issue.

Fixes: 5e43f899b03a ("bpf: Check attach type at prog load time")
Reported-by: Yinhao Hu <dddddd@hust.edu.cn>
Reported-by: Kaiyan Mei <M202472210@hust.edu.cn>
Reviewed-by: Dongliang Mu <dzm91@hust.edu.cn>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/r/20250926171201.188490-1-daniel@iogearbox.net
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index fe2a396d8ac6..a98c83346134 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -289,6 +289,7 @@ struct bpf_map_owner {
 	bool xdp_has_frags;
 	u64 storage_cookie[MAX_BPF_CGROUP_STORAGE_TYPE];
 	const struct btf_type *attach_func_proto;
+	enum bpf_attach_type expected_attach_type;
 };
 
 struct bpf_map {
-- 
cgit v1.2.3


From 2f7d98f10b8f64525b2c74cae7d70ae5278eb654 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 20 Jul 2025 15:32:31 -0400
Subject: Have cc(1) catch attempts to modify ->f_path

There are very few places that have cause to do that - all in core
VFS now, and all done to files that are not yet opened (or visible
to anybody else, for that matter).

Let's turn f_path into a union of struct path __f_path and const
struct path f_path.  It's C, not C++ - 6.5.2.3[4] in C99 and
later explicitly allows that kind of type-punning.

That way any attempts to bypass these checks will be either very
easy to catch, or (if the bastards get sufficiently creative to
make it hard to spot with grep alone) very clearly malicious -
and still catchable with a bit of instrumentation for sparse.

Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index af514fae4e2d..1fb02c76ae09 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1082,6 +1082,8 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
  * @f_cred: stashed credentials of creator/opener
  * @f_owner: file owner
  * @f_path: path of the file
+ * @__f_path: writable alias for @f_path; *ONLY* for core VFS and only before
+ *   the file gets open
  * @f_pos_lock: lock protecting file position
  * @f_pipe: specific to pipes
  * @f_pos: file position
@@ -1107,7 +1109,10 @@ struct file {
 	const struct cred		*f_cred;
 	struct fown_struct		*f_owner;
 	/* --- cacheline 1 boundary (64 bytes) --- */
-	struct path			f_path;
+	union {
+		const struct path	f_path;
+		struct path		__f_path;
+	};
 	union {
 		/* regular files (with FMODE_ATOMIC_POS) and directories */
 		struct mutex		f_pos_lock;
-- 
cgit v1.2.3


From 1ddf1636e0e058adf2231486da0419243eb49539 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Mon, 22 Sep 2025 09:06:30 +0300
Subject: net/mlx5: Add IFC bit for TIR/SQ order capability

Before this cap, firmware requested a certain creation order between TIR
objects and SQs of the same transport domain to properly support the
self loopback prevention feature. If order is not preserved, explicit
modify_tir operations are necessary after the opening of the SQs.

When set, this cap bit indicates that this firmware requirement /
limitation no longer holds.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1758521191-814350-2-git-send-email-tariqt@nvidia.com
Reviewed-by: Carolina Jubran <cjubran@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 0cf187e13def..c0f5fee7a4a5 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1895,7 +1895,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
 	u8         reserved_at_2a0[0x7];
 	u8         mkey_pcie_tph[0x1];
-	u8         reserved_at_2a8[0x2];
+	u8         reserved_at_2a8[0x1];
+	u8         tis_tir_td_order[0x1];
 
 	u8         psp[0x1];
 	u8         shampo[0x1];
-- 
cgit v1.2.3


From 137d1a6355131457723b51a34192320d93d15654 Mon Sep 17 00:00:00 2001
From: Mark Bloch <mbloch@nvidia.com>
Date: Mon, 22 Sep 2025 09:06:31 +0300
Subject: net/mlx5: IFC add balance ID and LAG per MP group bits

Add interface definitions for load balance ID and LAG per multiplane group
functionality. This patch introduces the hardware capability bits needed
to support balance ID in multiplane LAG configurations.

The new fields include:
- load_balance_id: 4-bit field for balance identifier.
- lag_per_mp_group: capability bit for LAG per multiplane group support.

These interface additions are prerequisites for implementing balance ID
support in the MLX5 driver.

Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Reviewed-by: Shay Drori <shayd@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1758521191-814350-3-git-send-email-tariqt@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index c0f5fee7a4a5..07614cd95bed 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2235,12 +2235,16 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
 	u8	   reserved_at_440[0x8];
 	u8	   max_num_eqs_24b[0x18];
 
-	u8         reserved_at_460[0x160];
+	u8         reserved_at_460[0x144];
+	u8         load_balance_id[0x4];
+	u8         reserved_at_5a8[0x18];
 
 	u8         query_adjacent_functions_id[0x1];
 	u8         ingress_egress_esw_vport_connect[0x1];
 	u8         function_id_type_vhca_id[0x1];
-	u8         reserved_at_5c3[0xd];
+	u8         reserved_at_5c3[0x1];
+	u8         lag_per_mp_group[0x1];
+	u8         reserved_at_5c5[0xb];
 	u8         delegate_vhca_management_profiles[0x10];
 
 	u8         delegated_vhca_max[0x10];
-- 
cgit v1.2.3


From 2db579838296239545554443234fafb8f485cca0 Mon Sep 17 00:00:00 2001
From: Kiryl Shutsemau <kas@kernel.org>
Date: Tue, 23 Sep 2025 12:07:06 +0100
Subject: mm/page_vma_mapped: track if the page is mapped across page table
 boundary

Patch series "mm: Improve mlock tracking for large folios", v3.

The patchset includes several fixes and improvements related to mlock
tracking of large folios.

The main objective is to reduce the undercount of Mlocked memory in
/proc/meminfo and improve the accuracy of the statistics.

Patches 1-2:

These patches address a minor race condition in folio_referenced_one()
related to mlock_vma_folio().

Currently, mlock_vma_folio() is called on large folio without the page
table lock, which can result in a race condition with unmap (i.e.
MADV_DONTNEED).  This can lead to partially mapped folios on the
unevictable LRU list.

While not a significant issue, I do not believe backporting is necessary.

Patch 3:

This patch adds mlocking logic similar to folio_referenced_one() to
try_to_unmap_one(), allowing for mlocking of large folios where possible.

Patch 4-5:

These patches modifies finish_fault() and faultaround to map in the entire
folio when possible, enabling efficient mlocking upon addition to the
rmap.

Patch 6:

This patch makes rmap mlock large folios if they are fully mapped,
addressing the primary source of mlock undercount for large folios.


This patch (of 6):

Add a PVMW_PGTABLE_CROSSSED flag that page_vma_mapped_walk() will set if
the page is mapped across page table boundary.  Unlike other PVMW_* flags,
this one is result of page_vma_mapped_walk() and not set by the caller.

folio_referenced_one() will use it to detect if it safe to mlock the
folio.

[akpm@linux-foundation.org: s/CROSSSED/CROSSED/]
Link: https://lkml.kernel.org/r/20250923110711.690639-1-kirill@shutemov.name
Link: https://lkml.kernel.org/r/20250923110711.690639-2-kirill@shutemov.name
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/rmap.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index e8aff6d2deda..daa92a58585d 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -922,6 +922,11 @@ struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr,
 /* Look for migration entries rather than present PTEs */
 #define PVMW_MIGRATION		(1 << 1)
 
+/* Result flags */
+
+/* The page is mapped across page table boundary */
+#define PVMW_PGTABLE_CROSSED	(1 << 16)
+
 struct page_vma_mapped_walk {
 	unsigned long pfn;
 	unsigned long nr_pages;
-- 
cgit v1.2.3


From 4d6fc29f36341d7795db1d1819b4c15fe9be7b23 Mon Sep 17 00:00:00 2001
From: Donet Tom <donettom@linux.ibm.com>
Date: Wed, 24 Sep 2025 00:16:59 +0530
Subject: mm/ksm: fix incorrect KSM counter handling in mm_struct during fork

Patch series "mm/ksm: Fix incorrect accounting of KSM counters during
fork", v3.

The first patch in this series fixes the incorrect accounting of KSM
counters such as ksm_merging_pages, ksm_rmap_items, and the global
ksm_zero_pages during fork.

The following patch add a selftest to verify the ksm_merging_pages counter
was updated correctly during fork.

Test Results
============
Without the first patch
-----------------------
 # [RUN] test_fork_ksm_merging_page_count
 not ok 10 ksm_merging_page in child: 32

With the first patch
--------------------
 # [RUN] test_fork_ksm_merging_page_count
 ok 10 ksm_merging_pages is not inherited after fork


This patch (of 2):

Currently, the KSM-related counters in `mm_struct`, such as
`ksm_merging_pages`, `ksm_rmap_items`, and `ksm_zero_pages`, are inherited
by the child process during fork.  This results in inconsistent
accounting.

When a process uses KSM, identical pages are merged and an rmap item is
created for each merged page.  The `ksm_merging_pages` and
`ksm_rmap_items` counters are updated accordingly.  However, after a fork,
these counters are copied to the child while the corresponding rmap items
are not.  As a result, when the child later triggers an unmerge, there are
no rmap items present in the child, so the counters remain stale, leading
to incorrect accounting.

A similar issue exists with `ksm_zero_pages`, which maintains both a
global counter and a per-process counter.  During fork, the per-process
counter is inherited by the child, but the global counter is not
incremented.  Since the child also references zero pages, the global
counter should be updated as well.  Otherwise, during zero-page unmerge,
both the global and per-process counters are decremented, causing the
global counter to become inconsistent.

To fix this, ksm_merging_pages and ksm_rmap_items are reset to 0 during
fork, and the global ksm_zero_pages counter is updated with the
per-process ksm_zero_pages value inherited by the child.  This ensures
that KSM statistics remain accurate and reflect the activity of each
process correctly.

Link: https://lkml.kernel.org/r/cover.1758648700.git.donettom@linux.ibm.com
Link: https://lkml.kernel.org/r/7b9870eb67ccc0d79593940d9dbd4a0b39b5d396.1758648700.git.donettom@linux.ibm.com
Fixes: 7609385337a4 ("ksm: count ksm merging pages for each process")
Fixes: cb4df4cae4f2 ("ksm: count allocated ksm rmap_items for each process")
Fixes: e2942062e01d ("ksm: count all zero pages placed by KSM")
Signed-off-by: Donet Tom <donettom@linux.ibm.com>
Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Aboorva Devarajan <aboorvad@linux.ibm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Donet Tom <donettom@linux.ibm.com>
Cc: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: xu xin <xu.xin16@zte.com.cn>
Cc: <stable@vger.kernel.org>	[6.6+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/ksm.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 22e67ca7cba3..067538fc4d58 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -56,8 +56,14 @@ static inline long mm_ksm_zero_pages(struct mm_struct *mm)
 static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 {
 	/* Adding mm to ksm is best effort on fork. */
-	if (mm_flags_test(MMF_VM_MERGEABLE, oldmm))
+	if (mm_flags_test(MMF_VM_MERGEABLE, oldmm)) {
+		long nr_ksm_zero_pages = atomic_long_read(&mm->ksm_zero_pages);
+
+		mm->ksm_merging_pages = 0;
+		mm->ksm_rmap_items = 0;
+		atomic_long_add(nr_ksm_zero_pages, &ksm_zero_pages);
 		__ksm_enter(mm);
+	}
 }
 
 static inline int ksm_execve(struct mm_struct *mm)
-- 
cgit v1.2.3


From 989c2f55ca4839121cbf23b5802f8513dbd54e1e Mon Sep 17 00:00:00 2001
From: Lance Yang <lance.yang@linux.dev>
Date: Fri, 26 Sep 2025 17:24:26 +0800
Subject: mm: silence data-race in update_hiwater_rss

KCSAN reports a data race on mm_cluster.hiwater_rss, which can be accessed
concurrently from various paths like page migration and memory unmapping
without synchronization.

Since hiwater_rss is a statistical field for accounting purposes, this
data race is benign.  Annotate both the read and write accesses with
data_race() to make KCSAN happy.

Link: https://lkml.kernel.org/r/20250926092426.43312-1-lance.yang@linux.dev
Signed-off-by: Lance Yang <lance.yang@linux.dev>
Reported-by: syzbot+60192c8877d0bc92a92b@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/linux-mm/68d6364e.050a0220.3390a8.000d.GAE@google.com
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Marco Elver <elver@google.com>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index fcb1e72eea40..06978b4dbeb8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2742,7 +2742,7 @@ static inline void update_hiwater_rss(struct mm_struct *mm)
 	unsigned long _rss = get_mm_rss(mm);
 
 	if (data_race(mm->hiwater_rss) < _rss)
-		(mm)->hiwater_rss = _rss;
+		data_race(mm->hiwater_rss = _rss);
 }
 
 static inline void update_hiwater_vm(struct mm_struct *mm)
-- 
cgit v1.2.3


From 81e78b7ec61e89e8bab9736551839f79b063614c Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 23 Sep 2025 16:00:58 +0200
Subject: mm: convert folio_page() back to a macro

In commit 73b3294b1152 ("mm: simplify folio_page() and folio_page_idx()")
we converted folio_page() into a static inline function.  However briefly
afterwards in commit a847b17009ec ("mm: constify highmem related functions
for improved const-correctness") we had to add some nasty const-away
casting to make the compiler happy when checking const correctness.

So let's just convert it back to a simple macro so the compiler can check
const correctness properly.  There is the alternative of using a
_Generic() similar to page_folio(), but there is not a lot of benefit
compared to just using a simple macro.

Link: https://lkml.kernel.org/r/20250923140058.2020023-1-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Kiryl Shutsemau <kas@kernel.org>
Reviewed-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Dev Jain <dev.jain@arm.com>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Lance Yang <lance.yang@linux.dev>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 568011930e35..48e27768e7ba 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -316,10 +316,7 @@ static __always_inline unsigned long _compound_head(const struct page *page)
  * check that the page number lies within @folio; the caller is presumed
  * to have a reference to the page.
  */
-static inline struct page *folio_page(const struct folio *folio, unsigned long n)
-{
-	return (struct page *)(&folio->page + n);
-}
+#define folio_page(folio, n)	(&(folio)->page + (n))
 
 static __always_inline int PageTail(const struct page *page)
 {
-- 
cgit v1.2.3


From c8a935a31bc787db52296944890f300ba9479088 Mon Sep 17 00:00:00 2001
From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Date: Tue, 23 Sep 2025 10:52:29 +0100
Subject: lib/string_choices: Add str_assert_deassert() helper

Add str_assert_deassert() helper to return "assert" or "deassert"
string literal depending on the boolean argument. Also add the
inversed variant str_deassert_assert().

Suggested-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Link: https://lore.kernel.org/r/20250923095229.2149740-1-prabhakar.mahadev-lad.rj@bp.renesas.com
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/string_choices.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h
index f3ba4f52ff26..6c4077be7742 100644
--- a/include/linux/string_choices.h
+++ b/include/linux/string_choices.h
@@ -17,6 +17,12 @@
 
 #include <linux/types.h>
 
+static inline const char *str_assert_deassert(bool v)
+{
+	return v ? "assert" : "deassert";
+}
+#define str_deassert_assert(v)		str_assert_deassert(!(v))
+
 static inline const char *str_enable_disable(bool v)
 {
 	return v ? "enable" : "disable";
-- 
cgit v1.2.3


From 3c1ea5c5019ff197aca7e886a3a240c38f6c6f0d Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Wed, 3 Sep 2025 14:59:47 +0200
Subject: slab: sheaf prefilling for guaranteed allocations

Add functions for efficient guaranteed allocations e.g. in a critical
section that cannot sleep, when the exact number of allocations is not
known beforehand, but an upper limit can be calculated.

kmem_cache_prefill_sheaf() returns a sheaf containing at least given
number of objects.

kmem_cache_alloc_from_sheaf() will allocate an object from the sheaf
and is guaranteed not to fail until depleted.

kmem_cache_return_sheaf() is for giving the sheaf back to the slab
allocator after the critical section. This will also attempt to refill
it to cache's sheaf capacity for better efficiency of sheaves handling,
but it's not stricly necessary to succeed.

kmem_cache_refill_sheaf() can be used to refill a previously obtained
sheaf to requested size. If the current size is sufficient, it does
nothing. If the requested size exceeds cache's sheaf_capacity and the
sheaf's current capacity, the sheaf will be replaced with a new one,
hence the indirect pointer parameter.

kmem_cache_sheaf_size() can be used to query the current size.

The implementation supports requesting sizes that exceed cache's
sheaf_capacity, but it is not efficient - such "oversize" sheaves are
allocated fresh in kmem_cache_prefill_sheaf() and flushed and freed
immediately by kmem_cache_return_sheaf(). kmem_cache_refill_sheaf()
might be especially ineffective when replacing a sheaf with a new one of
a larger capacity. It is therefore better to size cache's
sheaf_capacity accordingly to make oversize sheaves exceptional.

CONFIG_SLUB_STATS counters are added for sheaf prefill and return
operations. A prefill or return is considered _fast when it is able to
grab or return a percpu spare sheaf (even if the sheaf needs a refill to
satisfy the request, as those should amortize over time), and _slow
otherwise (when the barn or even sheaf allocation/freeing has to be
involved). sheaf_prefill_oversize is provided to determine how many
prefills were oversize (counter for oversize returns is not necessary as
all oversize refills result in oversize returns).

When slub_debug is enabled for a cache with sheaves, no percpu sheaves
exist for it, but the prefill functionality is still provided simply by
all prefilled sheaves becoming oversize. If percpu sheaves are not
created for a cache due to not passing the sheaf_capacity argument on
cache creation, the prefills also work through oversize sheaves, but
there's a WARN_ON_ONCE() to indicate the omission.

Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 49acbcdc6696..680193356ac7 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -829,6 +829,22 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags,
 				   int node) __assume_slab_alignment __malloc;
 #define kmem_cache_alloc_node(...)	alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__))
 
+struct slab_sheaf *
+kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size);
+
+int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
+		struct slab_sheaf **sheafp, unsigned int size);
+
+void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp,
+				       struct slab_sheaf *sheaf);
+
+void *kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *cachep, gfp_t gfp,
+			struct slab_sheaf *sheaf) __assume_slab_alignment __malloc;
+#define kmem_cache_alloc_from_sheaf(...)	\
+			alloc_hooks(kmem_cache_alloc_from_sheaf_noprof(__VA_ARGS__))
+
+unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf);
+
 /*
  * These macros allow declaring a kmem_buckets * parameter alongside size, which
  * can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call
-- 
cgit v1.2.3


From 9b05890a25d9197e39fcf5b2298f0b911c323306 Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Wed, 3 Sep 2025 15:00:01 +0200
Subject: maple_tree: Prefilled sheaf conversion and testing

Use prefilled sheaves instead of bulk allocations. This should speed up
the allocations and the return path of unused allocations.

Remove the push and pop of nodes from the maple state as this is now
handled by the slab layer with sheaves.

Testing has been removed as necessary since the features of the tree
have been reduced.

Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/maple_tree.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index bafe143b1f78..0e31b191e3be 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -442,7 +442,8 @@ struct ma_state {
 	struct maple_enode *node;	/* The node containing this entry */
 	unsigned long min;		/* The minimum index of this node - implied pivot min */
 	unsigned long max;		/* The maximum index of this node - implied pivot max */
-	struct maple_alloc *alloc;	/* Allocated nodes for this operation */
+	struct slab_sheaf *sheaf;	/* Allocated nodes for this operation */
+	unsigned long node_request;	/* The number of nodes to allocate for this operation */
 	enum maple_status status;	/* The status of the state (active, start, none, etc) */
 	unsigned char depth;		/* depth of tree descent during write */
 	unsigned char offset;
@@ -490,7 +491,8 @@ struct ma_wr_state {
 		.status = ma_start,					\
 		.min = 0,						\
 		.max = ULONG_MAX,					\
-		.alloc = NULL,						\
+		.node_request = 0,					\
+		.sheaf = NULL,						\
 		.mas_flags = 0,						\
 		.store_type = wr_invalid,				\
 	}
-- 
cgit v1.2.3


From 6bf377b06c08049d0f4042493df302285e45165e Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
Date: Wed, 3 Sep 2025 15:00:02 +0200
Subject: maple_tree: Add single node allocation support to maple state

The fast path through a write will require replacing a single node in
the tree.  Using a sheaf (32 nodes) is too heavy for the fast path, so
special case the node store operation by just allocating one node in the
maple state.

Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/maple_tree.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 0e31b191e3be..51a64ff23b88 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -443,6 +443,7 @@ struct ma_state {
 	unsigned long min;		/* The minimum index of this node - implied pivot min */
 	unsigned long max;		/* The maximum index of this node - implied pivot max */
 	struct slab_sheaf *sheaf;	/* Allocated nodes for this operation */
+	struct maple_node *alloc;	/* A single allocated node for fast path writes */
 	unsigned long node_request;	/* The number of nodes to allocate for this operation */
 	enum maple_status status;	/* The status of the state (active, start, none, etc) */
 	unsigned char depth;		/* depth of tree descent during write */
@@ -491,8 +492,9 @@ struct ma_wr_state {
 		.status = ma_start,					\
 		.min = 0,						\
 		.max = ULONG_MAX,					\
-		.node_request = 0,					\
 		.sheaf = NULL,						\
+		.alloc = NULL,						\
+		.node_request = 0,					\
 		.mas_flags = 0,						\
 		.store_type = wr_invalid,				\
 	}
-- 
cgit v1.2.3


From 4957089a23f41f31f8e7e22802a8ef9f5789c191 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 8 Sep 2025 18:00:02 -0700
Subject: locking/local_lock: Introduce local_lock_is_locked().

Introduce local_lock_is_locked() that returns true when
given local_lock is locked by current cpu (in !PREEMPT_RT) or
by current task (in PREEMPT_RT).
The goal is to detect a deadlock by the caller.

Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/local_lock.h          |  2 ++
 include/linux/local_lock_internal.h |  7 +++++++
 include/linux/rtmutex.h             | 10 ++++++++++
 3 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h
index 2ba846419524..0d91d060e3e9 100644
--- a/include/linux/local_lock.h
+++ b/include/linux/local_lock.h
@@ -66,6 +66,8 @@
  */
 #define local_trylock(lock)		__local_trylock(this_cpu_ptr(lock))
 
+#define local_lock_is_locked(lock)	__local_lock_is_locked(lock)
+
 /**
  * local_trylock_irqsave - Try to acquire a per CPU local lock, save and disable
  *			   interrupts if acquired
diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h
index 949de37700db..a4dc479157b5 100644
--- a/include/linux/local_lock_internal.h
+++ b/include/linux/local_lock_internal.h
@@ -165,6 +165,9 @@ do {								\
 		!!tl;						\
 	})
 
+/* preemption or migration must be disabled before calling __local_lock_is_locked */
+#define __local_lock_is_locked(lock) READ_ONCE(this_cpu_ptr(lock)->acquired)
+
 #define __local_lock_release(lock)					\
 	do {								\
 		local_trylock_t *tl;					\
@@ -285,4 +288,8 @@ do {								\
 		__local_trylock(lock);				\
 	})
 
+/* migration must be disabled before calling __local_lock_is_locked */
+#define __local_lock_is_locked(__lock)					\
+	(rt_mutex_owner(&this_cpu_ptr(__lock)->lock) == current)
+
 #endif /* CONFIG_PREEMPT_RT */
diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index fa9f1021541e..ede4c6bf6f22 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -44,6 +44,16 @@ static inline bool rt_mutex_base_is_locked(struct rt_mutex_base *lock)
 	return READ_ONCE(lock->owner) != NULL;
 }
 
+#ifdef CONFIG_RT_MUTEXES
+#define RT_MUTEX_HAS_WAITERS	1UL
+
+static inline struct task_struct *rt_mutex_owner(struct rt_mutex_base *lock)
+{
+	unsigned long owner = (unsigned long) READ_ONCE(lock->owner);
+
+	return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS);
+}
+#endif
 extern void rt_mutex_base_init(struct rt_mutex_base *rtb);
 
 /**
-- 
cgit v1.2.3


From 99253de51f80acccc528a9c94e2f4d5f329071f1 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 8 Sep 2025 18:00:03 -0700
Subject: mm: Allow GFP_ACCOUNT to be used in alloc_pages_nolock().

Change alloc_pages_nolock() to default to __GFP_COMP when allocating
pages, since upcoming reentrant alloc_slab_page() needs __GFP_COMP.
Also allow __GFP_ACCOUNT flag to be specified,
since most of BPF infra needs __GFP_ACCOUNT except BPF streams.

Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/gfp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 5ebf26fcdcfa..0ceb4e09306c 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -354,7 +354,7 @@ static inline struct page *alloc_page_vma_noprof(gfp_t gfp,
 }
 #define alloc_page_vma(...)			alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__))
 
-struct page *alloc_pages_nolock_noprof(int nid, unsigned int order);
+struct page *alloc_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int order);
 #define alloc_pages_nolock(...)			alloc_hooks(alloc_pages_nolock_noprof(__VA_ARGS__))
 
 extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order);
-- 
cgit v1.2.3


From 7612833192d56af86061de8ab51989b75daf5b0d Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 8 Sep 2025 18:00:06 -0700
Subject: slab: Reuse first bit for OBJEXTS_ALLOC_FAIL

Since the combination of valid upper bits in slab->obj_exts with
OBJEXTS_ALLOC_FAIL bit can never happen,
use OBJEXTS_ALLOC_FAIL == (1ull << 0) as a magic sentinel
instead of (1ull << 2) to free up bit 2.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/memcontrol.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 785173aa0739..d254c0b96d0d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -341,17 +341,23 @@ enum page_memcg_data_flags {
 	__NR_MEMCG_DATA_FLAGS  = (1UL << 2),
 };
 
+#define __OBJEXTS_ALLOC_FAIL	MEMCG_DATA_OBJEXTS
 #define __FIRST_OBJEXT_FLAG	__NR_MEMCG_DATA_FLAGS
 
 #else /* CONFIG_MEMCG */
 
+#define __OBJEXTS_ALLOC_FAIL	(1UL << 0)
 #define __FIRST_OBJEXT_FLAG	(1UL << 0)
 
 #endif /* CONFIG_MEMCG */
 
 enum objext_flags {
-	/* slabobj_ext vector failed to allocate */
-	OBJEXTS_ALLOC_FAIL = __FIRST_OBJEXT_FLAG,
+	/*
+	 * Use bit 0 with zero other bits to signal that slabobj_ext vector
+	 * failed to allocate. The same bit 0 with valid upper bits means
+	 * MEMCG_DATA_OBJEXTS.
+	 */
+	OBJEXTS_ALLOC_FAIL = __OBJEXTS_ALLOC_FAIL,
 	/* the next bit after the last actual flag */
 	__NR_OBJEXTS_FLAGS  = (__FIRST_OBJEXT_FLAG << 1),
 };
-- 
cgit v1.2.3


From af92793e52c3a99b828ed4bdd277fd3e11c18d08 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 8 Sep 2025 18:00:07 -0700
Subject: slab: Introduce kmalloc_nolock() and kfree_nolock().

kmalloc_nolock() relies on ability of local_trylock_t to detect
the situation when per-cpu kmem_cache is locked.

In !PREEMPT_RT local_(try)lock_irqsave(&s->cpu_slab->lock, flags)
disables IRQs and marks s->cpu_slab->lock as acquired.
local_lock_is_locked(&s->cpu_slab->lock) returns true when
slab is in the middle of manipulating per-cpu cache
of that specific kmem_cache.

kmalloc_nolock() can be called from any context and can re-enter
into ___slab_alloc():
  kmalloc() -> ___slab_alloc(cache_A) -> irqsave -> NMI -> bpf ->
    kmalloc_nolock() -> ___slab_alloc(cache_B)
or
  kmalloc() -> ___slab_alloc(cache_A) -> irqsave -> tracepoint/kprobe -> bpf ->
    kmalloc_nolock() -> ___slab_alloc(cache_B)

Hence the caller of ___slab_alloc() checks if &s->cpu_slab->lock
can be acquired without a deadlock before invoking the function.
If that specific per-cpu kmem_cache is busy the kmalloc_nolock()
retries in a different kmalloc bucket. The second attempt will
likely succeed, since this cpu locked different kmem_cache.

Similarly, in PREEMPT_RT local_lock_is_locked() returns true when
per-cpu rt_spin_lock is locked by current _task_. In this case
re-entrance into the same kmalloc bucket is unsafe, and
kmalloc_nolock() tries a different bucket that is most likely is
not locked by the current task. Though it may be locked by a
different task it's safe to rt_spin_lock() and sleep on it.

Similar to alloc_pages_nolock() the kmalloc_nolock() returns NULL
immediately if called from hard irq or NMI in PREEMPT_RT.

kfree_nolock() defers freeing to irq_work when local_lock_is_locked()
and (in_nmi() or in PREEMPT_RT).

SLUB_TINY config doesn't use local_lock_is_locked() and relies on
spin_trylock_irqsave(&n->list_lock) to allocate,
while kfree_nolock() always defers to irq_work.

Note, kfree_nolock() must be called _only_ for objects allocated
with kmalloc_nolock(). Debug checks (like kmemleak and kfence)
were skipped on allocation, hence obj = kmalloc(); kfree_nolock(obj);
will miss kmemleak/kfence book keeping and will cause false positives.
large_kmalloc is not supported by either kmalloc_nolock()
or kfree_nolock().

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/kasan.h      | 13 ++++++++-----
 include/linux/memcontrol.h |  2 ++
 include/linux/slab.h       |  4 ++++
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 890011071f2b..acdc8cb0152e 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -200,7 +200,7 @@ static __always_inline bool kasan_slab_pre_free(struct kmem_cache *s,
 }
 
 bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init,
-		       bool still_accessible);
+		       bool still_accessible, bool no_quarantine);
 /**
  * kasan_slab_free - Poison, initialize, and quarantine a slab object.
  * @object: Object to be freed.
@@ -226,11 +226,13 @@ bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init,
  * @Return true if KASAN took ownership of the object; false otherwise.
  */
 static __always_inline bool kasan_slab_free(struct kmem_cache *s,
-						void *object, bool init,
-						bool still_accessible)
+					    void *object, bool init,
+					    bool still_accessible,
+					    bool no_quarantine)
 {
 	if (kasan_enabled())
-		return __kasan_slab_free(s, object, init, still_accessible);
+		return __kasan_slab_free(s, object, init, still_accessible,
+					 no_quarantine);
 	return false;
 }
 
@@ -427,7 +429,8 @@ static inline bool kasan_slab_pre_free(struct kmem_cache *s, void *object)
 }
 
 static inline bool kasan_slab_free(struct kmem_cache *s, void *object,
-				   bool init, bool still_accessible)
+				   bool init, bool still_accessible,
+				   bool no_quarantine)
 {
 	return false;
 }
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d254c0b96d0d..82563236f35c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -358,6 +358,8 @@ enum objext_flags {
 	 * MEMCG_DATA_OBJEXTS.
 	 */
 	OBJEXTS_ALLOC_FAIL = __OBJEXTS_ALLOC_FAIL,
+	/* slabobj_ext vector allocated with kmalloc_nolock() */
+	OBJEXTS_NOSPIN_ALLOC = __FIRST_OBJEXT_FLAG,
 	/* the next bit after the last actual flag */
 	__NR_OBJEXTS_FLAGS  = (__FIRST_OBJEXT_FLAG << 1),
 };
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 680193356ac7..561597dd2164 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -501,6 +501,7 @@ void * __must_check krealloc_noprof(const void *objp, size_t new_size,
 #define krealloc(...)				alloc_hooks(krealloc_noprof(__VA_ARGS__))
 
 void kfree(const void *objp);
+void kfree_nolock(const void *objp);
 void kfree_sensitive(const void *objp);
 size_t __ksize(const void *objp);
 
@@ -957,6 +958,9 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f
 }
 #define kmalloc(...)				alloc_hooks(kmalloc_noprof(__VA_ARGS__))
 
+void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node);
+#define kmalloc_nolock(...)			alloc_hooks(kmalloc_nolock_noprof(__VA_ARGS__))
+
 #define kmem_buckets_alloc(_b, _size, _flags)	\
 	alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))
 
-- 
cgit v1.2.3


From 9a0abc39450a3123fd52533a662fbd37e0d1508c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 26 Sep 2025 17:47:14 +0200
Subject: PM: runtime: Add auto-cleanup macros for "resume and get" operations

It is generally useful to be able to automatically drop a device's
runtime PM usage counter incremented by runtime PM operations that
resume a device and bump up its usage counter [1].

To that end, add guard definition macros allowing pm_runtime_put()
and pm_runtime_put_autosuspend() to be used for the auto-cleanup in
those cases.

Simply put, a piece of code like below:

	pm_runtime_get_sync(dev);
	.....
	pm_runtime_put(dev);
	return 0;

can be transformed with guard() like:

	guard(pm_runtime_active)(dev);
	.....
	return 0;

(see the pm_runtime_put() call is gone).

However, it is better to do proper error handling in the majority of
cases, so doing something like this instead of the above is recommended:

	ACQUIRE(pm_runtime_active_try, pm)(dev);
	if (ACQUIRE_ERR(pm_runtime_active_try, &pm))
		return -ENXIO;
	.....
	return 0;

In all of the cases in which runtime PM is known to be enabled for the
given device or the device can be regarded as operational (and so it can
be accessed) with runtime PM disabled, a piece of code like:

	ret = pm_runtime_resume_and_get(dev);
	if (ret < 0)
		return ret;
	.....
	pm_runtime_put(dev);
	return 0;

can be changed as follows:

	ACQUIRE(pm_runtime_active_try, pm)(dev);
	ret = ACQUIRE_ERR(pm_runtime_active_try, &pm);
	if (ret < 0)
		return ret;
	.....
	return 0;

(again, see the pm_runtime_put() call is gone).

Still, if the device cannot be accessed unless runtime PM has been
enabled for it, the pm_runtime_active_try_enabled guard variant
needs to be used, that is (in the context of the example above):

	ACQUIRE(pm_runtime_active_try_enabled, pm)(dev);
	ret = ACQUIRE_ERR(pm_runtime_active_try_enabled, &pm);
	if (ret < 0)
		return ret;
	.....
	return 0;

When the original code calls pm_runtime_put_autosuspend(), use one
of the "auto" guard variants, pm_runtime_active_auto/_try/_enabled,
so for example, a piece of code like:

	ret = pm_runtime_resume_and_get(dev);
	if (ret < 0)
		return ret;
	.....
	pm_runtime_put_autosuspend(dev);
	return 0;

will become:

	ACQUIRE(pm_runtime_active_auto_try_enabled, pm)(dev);
	ret = ACQUIRE_ERR(pm_runtime_active_auto_try_enabled, &pm);
	if (ret < 0)
		return ret;
	.....
	return 0;

Note that the cases in which the return value of pm_runtime_get_sync()
is checked can also be handled with the help of the new guard macros.
For example, a piece of code like:

	ret = pm_runtime_get_sync(dev);
	if (ret < 0) {
		pm_runtime_put(dev);
		return ret;
	}
	.....
	pm_runtime_put(dev);
	return 0;

can be rewritten as:

	ACQUIRE(pm_runtime_active_auto_try_enabled, pm)(dev);
	ret = ACQUIRE_ERR(pm_runtime_active_auto_try_enabled, &pm);
	if (ret < 0)
		return ret;
	.....
	return 0;

or pm_runtime_get_active_try can be used if transparent handling of
disabled runtime PM is desirable.

Link: https://lore.kernel.org/linux-pm/878qimv24u.wl-tiwai@suse.de/ [1]
Link: https://lore.kernel.org/linux-pm/20250926150613.000073a4@huawei.com/
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/2238241.irdbgypaU6@rafael.j.wysocki
[ rjw: Fixed leftovers from the previous version in the changelog ]
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Dhruva Gole <d-gole@ti.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_runtime.h | 44 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 35 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index d1ff76e0e2d0..e5426bdd0c9f 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -21,6 +21,7 @@
 #define RPM_GET_PUT		0x04	/* Increment/decrement the
 					    usage_count */
 #define RPM_AUTO		0x08	/* Use autosuspend_delay */
+#define RPM_TRANSPARENT	0x10	/* Succeed if runtime PM is disabled */
 
 /*
  * Use this for defining a set of PM operations to be used in all situations
@@ -512,6 +513,19 @@ static inline int pm_runtime_get_sync(struct device *dev)
 	return __pm_runtime_resume(dev, RPM_GET_PUT);
 }
 
+static inline int pm_runtime_get_active(struct device *dev, int rpmflags)
+{
+	int ret;
+
+	ret = __pm_runtime_resume(dev, RPM_GET_PUT | rpmflags);
+	if (ret < 0) {
+		pm_runtime_put_noidle(dev);
+		return ret;
+	}
+
+	return 0;
+}
+
 /**
  * pm_runtime_resume_and_get - Bump up usage counter of a device and resume it.
  * @dev: Target device.
@@ -522,15 +536,7 @@ static inline int pm_runtime_get_sync(struct device *dev)
  */
 static inline int pm_runtime_resume_and_get(struct device *dev)
 {
-	int ret;
-
-	ret = __pm_runtime_resume(dev, RPM_GET_PUT);
-	if (ret < 0) {
-		pm_runtime_put_noidle(dev);
-		return ret;
-	}
-
-	return 0;
+	return pm_runtime_get_active(dev, 0);
 }
 
 /**
@@ -610,6 +616,26 @@ static inline int pm_runtime_put_autosuspend(struct device *dev)
 	return __pm_runtime_put_autosuspend(dev);
 }
 
+DEFINE_GUARD(pm_runtime_active, struct device *,
+	     pm_runtime_get_sync(_T), pm_runtime_put(_T));
+DEFINE_GUARD(pm_runtime_active_auto, struct device *,
+	     pm_runtime_get_sync(_T), pm_runtime_put_autosuspend(_T));
+/*
+ * Use the following guards with ACQUIRE()/ACQUIRE_ERR().
+ *
+ * The difference between the "_try" and "_try_enabled" variants is that the
+ * former do not produce an error when runtime PM is disabled for the given
+ * device.
+ */
+DEFINE_GUARD_COND(pm_runtime_active, _try,
+		  pm_runtime_get_active(_T, RPM_TRANSPARENT))
+DEFINE_GUARD_COND(pm_runtime_active, _try_enabled,
+		  pm_runtime_resume_and_get(_T))
+DEFINE_GUARD_COND(pm_runtime_active_auto, _try,
+		  pm_runtime_get_active(_T, RPM_TRANSPARENT))
+DEFINE_GUARD_COND(pm_runtime_active_auto, _try_enabled,
+		  pm_runtime_resume_and_get(_T))
+
 /**
  * pm_runtime_put_sync - Drop device usage counter and run "idle check" if 0.
  * @dev: Target device.
-- 
cgit v1.2.3


From d5e58ce1fb0f13a9a0845851f267ede3551cd9fe Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 26 Sep 2025 18:26:40 +0200
Subject: PM: runtime: Drop DEFINE_FREE() for pm_runtime_put()

The DEFINE_FREE() for pm_runtime_put has been superseded by recently
introduced runtime PM auto-cleanup macros and its only user has been
converted to using one of the new macros, so drop it.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Dhruva Gole <d-gole@ti.com>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
---
 include/linux/pm_runtime.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index e5426bdd0c9f..edb8aed5ef62 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -563,8 +563,6 @@ static inline int pm_runtime_put(struct device *dev)
 	return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC);
 }
 
-DEFINE_FREE(pm_runtime_put, struct device *, if (_T) pm_runtime_put(_T))
-
 /**
  * __pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0.
  * @dev: Target device.
-- 
cgit v1.2.3


From c39d6d4d933381714b6e5d735545256558ec6c05 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sat, 27 Sep 2025 08:29:35 -0400
Subject: ptr_ring: __ptr_ring_zero_tail micro optimization

__ptr_ring_zero_tail currently does the - 1 operation twice:
- during initialization of head
- at each loop iteration

Let's just do it in one place, all we need to do
is adjust the loop condition. this is better:
- a slightly clearer logic with less duplication
- uses prefix -- we don't need to save the old value
- one less - 1 operation - for example, when ring is empty
  we now don't do - 1 at all, existing code does it once

Text size shrinks from 15081 to 15050 bytes.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/bcd630c7edc628e20d4f8e037341f26c90ab4365.1758976026.git.mst@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ptr_ring.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index a736b16859a6..534531807d95 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -248,15 +248,15 @@ static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
  */
 static inline void __ptr_ring_zero_tail(struct ptr_ring *r, int consumer_head)
 {
-	int head = consumer_head - 1;
+	int head = consumer_head;
 
 	/* Zero out entries in the reverse order: this way we touch the
 	 * cache line that producer might currently be reading the last;
 	 * producer won't make progress and touch other cache lines
 	 * besides the first one until we write out all entries.
 	 */
-	while (likely(head >= r->consumer_tail))
-		r->queue[head--] = NULL;
+	while (likely(head > r->consumer_tail))
+		r->queue[--head] = NULL;
 
 	r->consumer_tail = consumer_head;
 }
-- 
cgit v1.2.3


From e28d5a68b6519ec6b2118a3f604295b5534eeb51 Mon Sep 17 00:00:00 2001
From: Ivan Vecera <ivecera@redhat.com>
Date: Sat, 27 Sep 2025 10:49:11 +0200
Subject: dpll: add phase_offset_avg_factor_get/set callback ops

Add new callback operations for a dpll device:
- phase_offset_avg_factor_get(...) - to obtain current phase offset
  averaging factor from dpll device,
- phase_offset_avg_factor_set(...) - to set phase offset averaging factor

Obtain the factor value using the get callback and provide it to the user
if the device driver implement this callback. Execute the set callback upon
user requests, if the driver implement it.

Signed-off-by: Ivan Vecera <ivecera@redhat.com>
v2:
* do not require 'set' callback to retrieve current value
* always call 'set' callback regardless of current value
Reviewed-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Link: https://patch.msgid.link/20250927084912.2343597-3-ivecera@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/dpll.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dpll.h b/include/linux/dpll.h
index fa1e76920d0e..25be745bf41f 100644
--- a/include/linux/dpll.h
+++ b/include/linux/dpll.h
@@ -38,6 +38,12 @@ struct dpll_device_ops {
 					void *dpll_priv,
 					enum dpll_feature_state *state,
 					struct netlink_ext_ack *extack);
+	int (*phase_offset_avg_factor_set)(const struct dpll_device *dpll,
+					   void *dpll_priv, u32 factor,
+					   struct netlink_ext_ack *extack);
+	int (*phase_offset_avg_factor_get)(const struct dpll_device *dpll,
+					   void *dpll_priv, u32 *factor,
+					   struct netlink_ext_ack *extack);
 };
 
 struct dpll_pin_ops {
-- 
cgit v1.2.3


From 7bd80ed89d72285515db673803b021469ba71ee8 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Wed, 24 Sep 2025 14:02:41 +0200
Subject: Documentation: net: add flow control guide and document ethtool API

Introduce a new document, flow_control.rst, to provide a comprehensive
guide on Ethernet Flow Control in Linux. The guide explains how flow
control works, how autonegotiation resolves pause capabilities, and how
to configure it using ethtool and Netlink.

In parallel, document the pause and pause-stat attributes in the
ethtool.yaml netlink spec. This enables the ynl tool to generate
kernel-doc comments for the corresponding enums in the UAPI header,
making the C interface self-documenting.

Finally, replace the legacy flow control section in phy.rst with a
reference to the new document and add pointers in the relevant C source
files.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://patch.msgid.link/20250924120241.724850-1-o.rempel@pengutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/ethtool.h | 45 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 42 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index c2d8b4ec62eb..eeed1ea50369 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -953,9 +953,48 @@ struct kernel_ethtool_ts_info {
  * @get_pause_stats: Report pause frame statistics. Drivers must not zero
  *	statistics which they don't report. The stats structure is initialized
  *	to ETHTOOL_STAT_NOT_SET indicating driver does not report statistics.
- * @get_pauseparam: Report pause parameters
- * @set_pauseparam: Set pause parameters.  Returns a negative error code
- *	or zero.
+ *
+ * @get_pauseparam: Report the configured policy for link-wide PAUSE
+ *      (IEEE 802.3 Annex 31B). Drivers must fill struct ethtool_pauseparam
+ *      such that:
+ *      @autoneg:
+ *              This refers to **Pause Autoneg** (IEEE 802.3 Annex 31B) only
+ *              and is independent of generic link autonegotiation configured
+ *              via ethtool -s.
+ *              true  -> the device follows the negotiated result of pause
+ *                       autonegotiation (Pause/Asym);
+ *              false -> the device uses a forced MAC state independent of
+ *                       negotiation.
+ *      @rx_pause/@tx_pause:
+ *              represent the desired policy (preferred configuration).
+ *              In autoneg mode they describe what is to be advertised;
+ *              in forced mode they describe the MAC state to apply.
+ *
+ *      Drivers (and/or frameworks) should persist this policy across link
+ *      changes and reapply appropriate MAC programming when link parameters
+ *      change.
+ *
+ * @set_pauseparam: Apply a policy for link-wide PAUSE (IEEE 802.3 Annex 31B).
+ *      If @autoneg is true:
+ *              Arrange for pause advertisement (Pause/Asym) based on
+ *              @rx_pause/@tx_pause and program the MAC to follow the
+ *              negotiated result (which may be symmetric, asymmetric, or off
+ *              depending on the link partner).
+ *      If @autoneg is false:
+ *              Do not rely on autonegotiation; force the MAC RX/TX pause
+ *              state directly per @rx_pause/@tx_pause.
+ *
+ *      Implementations that integrate with PHYLIB/PHYLINK should cooperate
+ *      with those frameworks for advertisement and resolution; MAC drivers are
+ *      still responsible for applying the required MAC state.
+ *
+ *      Return: 0 on success or a negative errno. Return -EOPNOTSUPP if
+ *      link-wide PAUSE is unsupported. If only symmetric pause is supported,
+ *      reject unsupported asymmetric requests with -EINVAL (or document any
+ *      coercion policy).
+ *
+ *      See also: Documentation/networking/flow_control.rst
+ *
  * @self_test: Run specified self-tests
  * @get_strings: Return a set of strings that describe the requested objects
  * @set_phys_id: Identify the physical devices, e.g. by flashing an LED
-- 
cgit v1.2.3


From e211c463b748c4e2e8364c10bc216ca775fcc943 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 27 Sep 2025 21:52:30 +0200
Subject: net: phy: stop exporting phy_driver_unregister

After 42e2a9e11a1d ("net: phy: dp83640: improve phydev and driver
removal handling") we can stop exporting also phy_driver_unregister().

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://patch.msgid.link/2bab950e-4b70-4030-b997-03f48379586f@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/phy.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index b377dfaa6801..7a54a8b4d277 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -2030,7 +2030,6 @@ static inline int phy_read_status(struct phy_device *phydev)
 		return genphy_read_status(phydev);
 }
 
-void phy_driver_unregister(struct phy_driver *drv);
 void phy_drivers_unregister(struct phy_driver *drv, int n);
 int phy_drivers_register(struct phy_driver *new_driver, int n,
 			 struct module *owner);
-- 
cgit v1.2.3


From 9c94ae6bb0b2895024b6e29fcc1cbec968b4776a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 28 Sep 2025 08:49:32 +0000
Subject: net: make softnet_data.defer_count an atomic

This is preparation work to remove the softnet_data.defer_lock,
as it is contended on hosts with large number of cores.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250928084934.3266948-2-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1b85454116f6..27e3fa69253f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3538,7 +3538,7 @@ struct softnet_data {
 
 	/* Another possibly contended cache line */
 	spinlock_t		defer_lock ____cacheline_aligned_in_smp;
-	int			defer_count;
+	atomic_t		defer_count;
 	int			defer_ipi_scheduled;
 	struct sk_buff		*defer_list;
 	call_single_data_t	defer_csd;
-- 
cgit v1.2.3


From 844c9db7f7f5fe1b0b53ed9f1c2bc7313b3021c8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 28 Sep 2025 08:49:33 +0000
Subject: net: use llist for sd->defer_list

Get rid of sd->defer_lock and adopt llist operations.

We optimize skb_attempt_defer_free() for the common case,
where the packet is queued. Otherwise sd->defer_count
is increasing, until skb_defer_free_flush() clears it.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250928084934.3266948-3-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 27e3fa69253f..5c9aa16933d1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3537,10 +3537,10 @@ struct softnet_data {
 	struct numa_drop_counters drop_counters;
 
 	/* Another possibly contended cache line */
-	spinlock_t		defer_lock ____cacheline_aligned_in_smp;
-	atomic_t		defer_count;
-	int			defer_ipi_scheduled;
-	struct sk_buff		*defer_list;
+	struct llist_head	defer_list ____cacheline_aligned_in_smp;
+	atomic_long_t		defer_count;
+
+	int			defer_ipi_scheduled ____cacheline_aligned_in_smp;
 	call_single_data_t	defer_csd;
 };
 
-- 
cgit v1.2.3


From 5628f3fe3b16114e8424bbfcf0594caef8958a06 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 28 Sep 2025 08:49:34 +0000
Subject: net: add NUMA awareness to skb_attempt_defer_free()

Instead of sharing sd->defer_list & sd->defer_count with
many cpus, add one pair for each NUMA node.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250928084934.3266948-4-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5c9aa16933d1..d1a687444b27 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3536,10 +3536,6 @@ struct softnet_data {
 
 	struct numa_drop_counters drop_counters;
 
-	/* Another possibly contended cache line */
-	struct llist_head	defer_list ____cacheline_aligned_in_smp;
-	atomic_long_t		defer_count;
-
 	int			defer_ipi_scheduled ____cacheline_aligned_in_smp;
 	call_single_data_t	defer_csd;
 };
-- 
cgit v1.2.3


From 20c48920583675e67b3824f147726e0fbda735ce Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 18 Sep 2025 17:33:00 -0700
Subject: KVM: Export KVM-internal symbols for sub-modules only

Rework the vast majority of KVM's exports to expose symbols only to KVM
submodules, i.e. to x86's kvm-{amd,intel}.ko and PPC's kvm-{pr,hv}.ko.
With few exceptions, KVM's exported APIs are intended (and safe) for KVM-
internal usage only.

Keep kvm_get_kvm(), kvm_get_kvm_safe(), and kvm_put_kvm() as normal
exports, as they are needed by VFIO, and are generally safe for external
usage (though ideally even the get/put APIs would be KVM-internal, and
VFIO would pin a VM by grabbing a reference to its associated file).

Implement a framework in kvm_types.h in anticipation of providing a macro
to restrict KVM-specific kernel exports, i.e. to provide symbol exports
for KVM if and only if KVM is built as one or more modules.

Link: https://lore.kernel.org/r/20250919003303.1355064-3-seanjc@google.com
Cc: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_types.h | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 827ecc0b7e10..490464c205b4 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -3,6 +3,23 @@
 #ifndef __KVM_TYPES_H__
 #define __KVM_TYPES_H__
 
+#include <linux/bits.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <asm/kvm_types.h>
+
+#ifdef KVM_SUB_MODULES
+#define EXPORT_SYMBOL_FOR_KVM_INTERNAL(symbol) \
+	EXPORT_SYMBOL_FOR_MODULES(symbol, __stringify(KVM_SUB_MODULES))
+#else
+#define EXPORT_SYMBOL_FOR_KVM_INTERNAL(symbol)
+#endif
+
+#ifndef __ASSEMBLER__
+
+#include <linux/mutex.h>
+#include <linux/spinlock_types.h>
+
 struct kvm;
 struct kvm_async_pf;
 struct kvm_device_ops;
@@ -19,13 +36,6 @@ struct kvm_memslots;
 
 enum kvm_mr_change;
 
-#include <linux/bits.h>
-#include <linux/mutex.h>
-#include <linux/types.h>
-#include <linux/spinlock_types.h>
-
-#include <asm/kvm_types.h>
-
 /*
  * Address types:
  *
@@ -116,5 +126,6 @@ struct kvm_vcpu_stat_generic {
 };
 
 #define KVM_STATS_NAME_SIZE	48
+#endif /* !__ASSEMBLER__ */
 
 #endif /* __KVM_TYPES_H__ */
-- 
cgit v1.2.3


From 25ba2b84c38f624151a3ba36e56d41c39b9223ad Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@kernel.org>
Date: Fri, 19 Sep 2025 10:36:26 -0400
Subject: nfs/localio: avoid issuing misaligned IO using O_DIRECT

Add nfsd_file_dio_alignment and use it to avoid issuing misaligned IO
using O_DIRECT. Any misaligned DIO falls back to using buffered IO.

Because misaligned DIO is now handled safely, remove the nfs modparam
'localio_O_DIRECT_semantics' that was added to require users opt-in to
the requirement that all O_DIRECT be properly DIO-aligned.

Also, introduce nfs_iov_iter_aligned_bvec() which is a variant of
iov_iter_aligned_bvec() that also verifies the offset associated with
an iov_iter is DIO-aligned.  NOTE: in a parallel effort,
iov_iter_aligned_bvec() is being removed along with
iov_iter_is_aligned().

Lastly, add pr_info_ratelimited if underlying filesystem returns
-EINVAL because it was made to try O_DIRECT for IO that is not
DIO-aligned (shouldn't happen, so its best to be louder if it does).

Fixes: 3feec68563d ("nfs/localio: add direct IO enablement with sync and async IO support")
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfslocalio.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
index 5c7c92659e73..7ca2715edccc 100644
--- a/include/linux/nfslocalio.h
+++ b/include/linux/nfslocalio.h
@@ -65,6 +65,8 @@ struct nfsd_localio_operations {
 	struct net *(*nfsd_file_put_local)(struct nfsd_file __rcu **);
 	struct nfsd_file *(*nfsd_file_get_local)(struct nfsd_file *);
 	struct file *(*nfsd_file_file)(struct nfsd_file *);
+	void (*nfsd_file_dio_alignment)(struct nfsd_file *,
+					u32 *, u32 *, u32 *);
 } ____cacheline_aligned;
 
 extern void nfsd_localio_ops_init(void);
-- 
cgit v1.2.3


From 6d0386ea99875313fdfd074eb74013b6e3b48a76 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 27 Aug 2025 17:01:53 -0700
Subject: entry/kvm: KVM: Move KVM details related to signal/-EINTR into KVM
 proper

Move KVM's morphing of pending signals into userspace exits into KVM
proper, and drop the @vcpu param from xfer_to_guest_mode_handle_work().
How KVM responds to -EINTR is a detail that really belongs in KVM itself,
and invoking kvm_handle_signal_exit() from kernel code creates an inverted
module dependency.  E.g. attempting to move kvm_handle_signal_exit() into
kvm_main.c would generate an linker error when building kvm.ko as a module.

Dropping KVM details will also converting the KVM "entry" code into a more
generic virtualization framework so that it can be used when running as a
Hyper-V root partition.

Lastly, eliminating usage of "struct kvm_vcpu" outside of KVM is also nice
to have for KVM x86 developers, as keeping the details of kvm_vcpu purely
within KVM allows changing the layout of the structure without having to
boot into a new kernel, e.g. allows rebuilding and reloading kvm.ko with a
modified kvm_vcpu structure as part of debug/development.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 include/linux/entry-kvm.h | 11 +++--------
 include/linux/kvm_host.h  | 13 ++++++++++++-
 2 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h
index 16149f6625e4..3644de7e6019 100644
--- a/include/linux/entry-kvm.h
+++ b/include/linux/entry-kvm.h
@@ -21,8 +21,6 @@
 	 _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME |			\
 	 ARCH_XFER_TO_GUEST_MODE_WORK)
 
-struct kvm_vcpu;
-
 /**
  * arch_xfer_to_guest_mode_handle_work - Architecture specific xfer to guest
  *					 mode work handling function.
@@ -32,12 +30,10 @@ struct kvm_vcpu;
  * Invoked from xfer_to_guest_mode_handle_work(). Defaults to NOOP. Can be
  * replaced by architecture specific code.
  */
-static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu,
-						      unsigned long ti_work);
+static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work);
 
 #ifndef arch_xfer_to_guest_mode_work
-static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu,
-						      unsigned long ti_work)
+static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work)
 {
 	return 0;
 }
@@ -46,11 +42,10 @@ static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu,
 /**
  * xfer_to_guest_mode_handle_work - Check and handle pending work which needs
  *				    to be handled before going to guest mode
- * @vcpu:	Pointer to current's VCPU data
  *
  * Returns: 0 or an error code
  */
-int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu);
+int xfer_to_guest_mode_handle_work(void);
 
 /**
  * xfer_to_guest_mode_prepare - Perform last minute preparation work that
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 15656b7fba6c..598b9473e46d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2,7 +2,7 @@
 #ifndef __KVM_HOST_H
 #define __KVM_HOST_H
 
-
+#include <linux/entry-kvm.h>
 #include <linux/types.h>
 #include <linux/hardirq.h>
 #include <linux/list.h>
@@ -2450,6 +2450,17 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu)
 	vcpu->run->exit_reason = KVM_EXIT_INTR;
 	vcpu->stat.signal_exits++;
 }
+
+static inline int kvm_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu)
+{
+	int r = xfer_to_guest_mode_handle_work();
+
+	if (r) {
+		WARN_ON_ONCE(r != -EINTR);
+		kvm_handle_signal_exit(vcpu);
+	}
+	return r;
+}
 #endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */
 
 /*
-- 
cgit v1.2.3


From 9be7e1e320ff2e7db4b23c8ec5f599bbfac94ede Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 27 Aug 2025 17:01:54 -0700
Subject: entry: Rename "kvm" entry code assets to "virt" to genericize APIs

Rename the "kvm" entry code files and Kconfigs to use generic "virt"
nomenclature so that the code can be reused by other hypervisors (or
rather, their root/dom0 partition drivers), without incorrectly suggesting
the code somehow relies on and/or involves KVM.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Joel Fernandes <joelagnelf@nvidia.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 include/linux/entry-kvm.h  | 95 ----------------------------------------------
 include/linux/entry-virt.h | 95 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/kvm_host.h   |  6 +--
 include/linux/rcupdate.h   |  2 +-
 4 files changed, 99 insertions(+), 99 deletions(-)
 delete mode 100644 include/linux/entry-kvm.h
 create mode 100644 include/linux/entry-virt.h

(limited to 'include/linux')

diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h
deleted file mode 100644
index 3644de7e6019..000000000000
--- a/include/linux/entry-kvm.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LINUX_ENTRYKVM_H
-#define __LINUX_ENTRYKVM_H
-
-#include <linux/static_call_types.h>
-#include <linux/resume_user_mode.h>
-#include <linux/syscalls.h>
-#include <linux/seccomp.h>
-#include <linux/sched.h>
-#include <linux/tick.h>
-
-/* Transfer to guest mode work */
-#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK
-
-#ifndef ARCH_XFER_TO_GUEST_MODE_WORK
-# define ARCH_XFER_TO_GUEST_MODE_WORK	(0)
-#endif
-
-#define XFER_TO_GUEST_MODE_WORK						\
-	(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | _TIF_SIGPENDING | \
-	 _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME |			\
-	 ARCH_XFER_TO_GUEST_MODE_WORK)
-
-/**
- * arch_xfer_to_guest_mode_handle_work - Architecture specific xfer to guest
- *					 mode work handling function.
- * @vcpu:	Pointer to current's VCPU data
- * @ti_work:	Cached TIF flags gathered in xfer_to_guest_mode_handle_work()
- *
- * Invoked from xfer_to_guest_mode_handle_work(). Defaults to NOOP. Can be
- * replaced by architecture specific code.
- */
-static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work);
-
-#ifndef arch_xfer_to_guest_mode_work
-static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work)
-{
-	return 0;
-}
-#endif
-
-/**
- * xfer_to_guest_mode_handle_work - Check and handle pending work which needs
- *				    to be handled before going to guest mode
- *
- * Returns: 0 or an error code
- */
-int xfer_to_guest_mode_handle_work(void);
-
-/**
- * xfer_to_guest_mode_prepare - Perform last minute preparation work that
- *				need to be handled while IRQs are disabled
- *				upon entering to guest.
- *
- * Has to be invoked with interrupts disabled before the last call
- * to xfer_to_guest_mode_work_pending().
- */
-static inline void xfer_to_guest_mode_prepare(void)
-{
-	lockdep_assert_irqs_disabled();
-	tick_nohz_user_enter_prepare();
-}
-
-/**
- * __xfer_to_guest_mode_work_pending - Check if work is pending
- *
- * Returns: True if work pending, False otherwise.
- *
- * Bare variant of xfer_to_guest_mode_work_pending(). Can be called from
- * interrupt enabled code for racy quick checks with care.
- */
-static inline bool __xfer_to_guest_mode_work_pending(void)
-{
-	unsigned long ti_work = read_thread_flags();
-
-	return !!(ti_work & XFER_TO_GUEST_MODE_WORK);
-}
-
-/**
- * xfer_to_guest_mode_work_pending - Check if work is pending which needs to be
- *				     handled before returning to guest mode
- *
- * Returns: True if work pending, False otherwise.
- *
- * Has to be invoked with interrupts disabled before the transition to
- * guest mode.
- */
-static inline bool xfer_to_guest_mode_work_pending(void)
-{
-	lockdep_assert_irqs_disabled();
-	return __xfer_to_guest_mode_work_pending();
-}
-#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */
-
-#endif
diff --git a/include/linux/entry-virt.h b/include/linux/entry-virt.h
new file mode 100644
index 000000000000..42c89e3e5ca7
--- /dev/null
+++ b/include/linux/entry-virt.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_ENTRYVIRT_H
+#define __LINUX_ENTRYVIRT_H
+
+#include <linux/static_call_types.h>
+#include <linux/resume_user_mode.h>
+#include <linux/syscalls.h>
+#include <linux/seccomp.h>
+#include <linux/sched.h>
+#include <linux/tick.h>
+
+/* Transfer to guest mode work */
+#ifdef CONFIG_VIRT_XFER_TO_GUEST_WORK
+
+#ifndef ARCH_XFER_TO_GUEST_MODE_WORK
+# define ARCH_XFER_TO_GUEST_MODE_WORK	(0)
+#endif
+
+#define XFER_TO_GUEST_MODE_WORK						\
+	(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | _TIF_SIGPENDING | \
+	 _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME |			\
+	 ARCH_XFER_TO_GUEST_MODE_WORK)
+
+/**
+ * arch_xfer_to_guest_mode_handle_work - Architecture specific xfer to guest
+ *					 mode work handling function.
+ * @vcpu:	Pointer to current's VCPU data
+ * @ti_work:	Cached TIF flags gathered in xfer_to_guest_mode_handle_work()
+ *
+ * Invoked from xfer_to_guest_mode_handle_work(). Defaults to NOOP. Can be
+ * replaced by architecture specific code.
+ */
+static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work);
+
+#ifndef arch_xfer_to_guest_mode_work
+static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work)
+{
+	return 0;
+}
+#endif
+
+/**
+ * xfer_to_guest_mode_handle_work - Check and handle pending work which needs
+ *				    to be handled before going to guest mode
+ *
+ * Returns: 0 or an error code
+ */
+int xfer_to_guest_mode_handle_work(void);
+
+/**
+ * xfer_to_guest_mode_prepare - Perform last minute preparation work that
+ *				need to be handled while IRQs are disabled
+ *				upon entering to guest.
+ *
+ * Has to be invoked with interrupts disabled before the last call
+ * to xfer_to_guest_mode_work_pending().
+ */
+static inline void xfer_to_guest_mode_prepare(void)
+{
+	lockdep_assert_irqs_disabled();
+	tick_nohz_user_enter_prepare();
+}
+
+/**
+ * __xfer_to_guest_mode_work_pending - Check if work is pending
+ *
+ * Returns: True if work pending, False otherwise.
+ *
+ * Bare variant of xfer_to_guest_mode_work_pending(). Can be called from
+ * interrupt enabled code for racy quick checks with care.
+ */
+static inline bool __xfer_to_guest_mode_work_pending(void)
+{
+	unsigned long ti_work = read_thread_flags();
+
+	return !!(ti_work & XFER_TO_GUEST_MODE_WORK);
+}
+
+/**
+ * xfer_to_guest_mode_work_pending - Check if work is pending which needs to be
+ *				     handled before returning to guest mode
+ *
+ * Returns: True if work pending, False otherwise.
+ *
+ * Has to be invoked with interrupts disabled before the transition to
+ * guest mode.
+ */
+static inline bool xfer_to_guest_mode_work_pending(void)
+{
+	lockdep_assert_irqs_disabled();
+	return __xfer_to_guest_mode_work_pending();
+}
+#endif /* CONFIG_VIRT_XFER_TO_GUEST_WORK */
+
+#endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 598b9473e46d..70ac2267d5d0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2,7 +2,7 @@
 #ifndef __KVM_HOST_H
 #define __KVM_HOST_H
 
-#include <linux/entry-kvm.h>
+#include <linux/entry-virt.h>
 #include <linux/types.h>
 #include <linux/hardirq.h>
 #include <linux/list.h>
@@ -2444,7 +2444,7 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
 }
 #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */
 
-#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK
+#ifdef CONFIG_VIRT_XFER_TO_GUEST_WORK
 static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu)
 {
 	vcpu->run->exit_reason = KVM_EXIT_INTR;
@@ -2461,7 +2461,7 @@ static inline int kvm_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu)
 	}
 	return r;
 }
-#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */
+#endif /* CONFIG_VIRT_XFER_TO_GUEST_WORK */
 
 /*
  * If more than one page is being (un)accounted, @virt must be the address of
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 120536f4c6eb..1e1f3aa375d9 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -129,7 +129,7 @@ static inline void rcu_sysrq_start(void) { }
 static inline void rcu_sysrq_end(void) { }
 #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */
 
-#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK))
+#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_VIRT_XFER_TO_GUEST_WORK))
 void rcu_irq_work_resched(void);
 #else
 static __always_inline void rcu_irq_work_resched(void) { }
-- 
cgit v1.2.3


From 1a98f5699bd57c9b3f66ec54cc38571d5e42ffb1 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 30 Sep 2025 15:45:06 +0200
Subject: Revert "Documentation: net: add flow control guide and document
 ethtool API"

This reverts commit 7bd80ed89d72285515db673803b021469ba71ee8.

I should not have merged it to begin with due to pending review and
changes to be addressed.

Link: https://patch.msgid.link/c6f3af12df9b7998920a02027fc8893ce82afc4c.1759239721.git.pabeni@redhat.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/ethtool.h | 45 +++------------------------------------------
 1 file changed, 3 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index eeed1ea50369..c2d8b4ec62eb 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -953,48 +953,9 @@ struct kernel_ethtool_ts_info {
  * @get_pause_stats: Report pause frame statistics. Drivers must not zero
  *	statistics which they don't report. The stats structure is initialized
  *	to ETHTOOL_STAT_NOT_SET indicating driver does not report statistics.
- *
- * @get_pauseparam: Report the configured policy for link-wide PAUSE
- *      (IEEE 802.3 Annex 31B). Drivers must fill struct ethtool_pauseparam
- *      such that:
- *      @autoneg:
- *              This refers to **Pause Autoneg** (IEEE 802.3 Annex 31B) only
- *              and is independent of generic link autonegotiation configured
- *              via ethtool -s.
- *              true  -> the device follows the negotiated result of pause
- *                       autonegotiation (Pause/Asym);
- *              false -> the device uses a forced MAC state independent of
- *                       negotiation.
- *      @rx_pause/@tx_pause:
- *              represent the desired policy (preferred configuration).
- *              In autoneg mode they describe what is to be advertised;
- *              in forced mode they describe the MAC state to apply.
- *
- *      Drivers (and/or frameworks) should persist this policy across link
- *      changes and reapply appropriate MAC programming when link parameters
- *      change.
- *
- * @set_pauseparam: Apply a policy for link-wide PAUSE (IEEE 802.3 Annex 31B).
- *      If @autoneg is true:
- *              Arrange for pause advertisement (Pause/Asym) based on
- *              @rx_pause/@tx_pause and program the MAC to follow the
- *              negotiated result (which may be symmetric, asymmetric, or off
- *              depending on the link partner).
- *      If @autoneg is false:
- *              Do not rely on autonegotiation; force the MAC RX/TX pause
- *              state directly per @rx_pause/@tx_pause.
- *
- *      Implementations that integrate with PHYLIB/PHYLINK should cooperate
- *      with those frameworks for advertisement and resolution; MAC drivers are
- *      still responsible for applying the required MAC state.
- *
- *      Return: 0 on success or a negative errno. Return -EOPNOTSUPP if
- *      link-wide PAUSE is unsupported. If only symmetric pause is supported,
- *      reject unsupported asymmetric requests with -EINVAL (or document any
- *      coercion policy).
- *
- *      See also: Documentation/networking/flow_control.rst
- *
+ * @get_pauseparam: Report pause parameters
+ * @set_pauseparam: Set pause parameters.  Returns a negative error code
+ *	or zero.
  * @self_test: Run specified self-tests
  * @get_strings: Return a set of strings that describe the requested objects
  * @set_phys_id: Identify the physical devices, e.g. by flashing an LED
-- 
cgit v1.2.3


From 5e1c88679174e4bfe5d152060b06d370bd85de80 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Mon, 4 Aug 2025 15:07:23 +0200
Subject: mfd: qnap-mcu: Include linux/types.h in qnap-mcu.h shared header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Relying on other components to include those basic types is unreliable
and may cause compile errors like:

../include/linux/mfd/qnap-mcu.h:13:9: error: unknown type name ‘u32’
   13 |         u32 baud_rate;
      |         ^~~
../include/linux/mfd/qnap-mcu.h:17:9: error: unknown type name ‘bool’
   17 |         bool usb_led;
      |         ^~~~

So make sure, the types used in the header are available.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20250804130726.3180806-2-heiko@sntech.de
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/qnap-mcu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/qnap-mcu.h b/include/linux/mfd/qnap-mcu.h
index 8d48c212fd44..42bf523f9a5b 100644
--- a/include/linux/mfd/qnap-mcu.h
+++ b/include/linux/mfd/qnap-mcu.h
@@ -7,6 +7,8 @@
 #ifndef _LINUX_QNAP_MCU_H_
 #define _LINUX_QNAP_MCU_H_
 
+#include <linux/types.h>
+
 struct qnap_mcu;
 
 struct qnap_mcu_variant {
-- 
cgit v1.2.3


From 9c5ad8374b1fe0c8c9eaabc1146d96a543858c4a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 8 Aug 2025 17:17:56 +0200
Subject: mfd: arizona: Make legacy gpiolib interface optional

The only machine that still uses the old gpio number based interface
is the wlf_cragg_6410 board file. In order to remove the dependency
on the interfaces, add #ifdef blocks here.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20250808151822.536879-13-arnd@kernel.org
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/arizona/pdata.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h
index 2d13bbea4f3a..f72e6d4b14a7 100644
--- a/include/linux/mfd/arizona/pdata.h
+++ b/include/linux/mfd/arizona/pdata.h
@@ -117,8 +117,10 @@ struct arizona_pdata {
 	/** Check for line output with HPDET method */
 	bool hpdet_acc_id_line;
 
+#ifdef CONFIG_GPIOLIB_LEGACY
 	/** GPIO used for mic isolation with HPDET */
 	int hpdet_id_gpio;
+#endif
 
 	/** Channel to use for headphone detection */
 	unsigned int hpdet_channel;
@@ -129,8 +131,10 @@ struct arizona_pdata {
 	/** Extra debounce timeout used during initial mic detection (ms) */
 	unsigned int micd_detect_debounce;
 
+#ifdef CONFIG_GPIOLIB_LEGACY
 	/** GPIO for mic detection polarity */
 	int micd_pol_gpio;
+#endif
 
 	/** Mic detect ramp rate */
 	unsigned int micd_bias_start_time;
@@ -184,8 +188,10 @@ struct arizona_pdata {
 	/** Haptic actuator type */
 	unsigned int hap_act;
 
+#ifdef CONFIG_GPIOLIB_LEGACY
 	/** GPIO for primary IRQ (used for edge triggered emulation) */
 	int irq_gpio;
+#endif
 
 	/** General purpose switch control */
 	unsigned int gpsw;
-- 
cgit v1.2.3


From 719d02a25a24601c6fb8a7b1627e1abf015b6c2a Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Thu, 21 Aug 2025 20:23:34 +0200
Subject: mfd: bd71828, bd71815: Prepare for power-supply support

Add core support for ROHM BD718(15/28/78) PMIC's charger blocks.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Signed-off-by: Andreas Kemnade <andreas@kemnade.info>
Link: https://lore.kernel.org/r/20250821-bd71828-charger-v3-1-cc74ac4e0fb9@kemnade.info
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/rohm-bd71828.h | 63 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/rohm-bd71828.h b/include/linux/mfd/rohm-bd71828.h
index ce786c96404a..73a71ef69152 100644
--- a/include/linux/mfd/rohm-bd71828.h
+++ b/include/linux/mfd/rohm-bd71828.h
@@ -189,6 +189,69 @@ enum {
 /* Charger/Battey */
 #define BD71828_REG_CHG_STATE		0x65
 #define BD71828_REG_CHG_FULL		0xd2
+#define BD71828_REG_CHG_EN		0x6F
+#define BD71828_REG_DCIN_STAT		0x68
+#define BD71828_MASK_DCIN_DET		0x01
+#define BD71828_REG_VDCIN_U		0x9c
+#define BD71828_MASK_CHG_EN		0x01
+#define BD71828_CHG_MASK_DCIN_U		0x0f
+#define BD71828_REG_BAT_STAT		0x67
+#define BD71828_REG_BAT_TEMP		0x6c
+#define BD71828_MASK_BAT_TEMP		0x07
+#define BD71828_BAT_TEMP_OPEN		0x07
+#define BD71828_MASK_BAT_DET		0x20
+#define BD71828_MASK_BAT_DET_DONE	0x10
+#define BD71828_REG_CHG_STATE		0x65
+#define BD71828_REG_VBAT_U		0x8c
+#define BD71828_MASK_VBAT_U		0x0f
+#define BD71828_REG_VBAT_REX_AVG_U	0x92
+
+#define BD71828_REG_OCV_PWRON_U		0x8A
+
+#define BD71828_REG_VBAT_MIN_AVG_U	0x8e
+#define BD71828_REG_VBAT_MIN_AVG_L	0x8f
+
+#define BD71828_REG_CC_CNT3		0xb5
+#define BD71828_REG_CC_CNT2		0xb6
+#define BD71828_REG_CC_CNT1		0xb7
+#define BD71828_REG_CC_CNT0		0xb8
+#define BD71828_REG_CC_CURCD_AVG_U	0xb2
+#define BD71828_MASK_CC_CURCD_AVG_U	0x3f
+#define BD71828_MASK_CC_CUR_DIR		0x80
+#define BD71828_REG_VM_BTMP_U		0xa1
+#define BD71828_REG_VM_BTMP_L		0xa2
+#define BD71828_MASK_VM_BTMP_U		0x0f
+#define BD71828_REG_COULOMB_CTRL	0xc4
+#define BD71828_REG_COULOMB_CTRL2	0xd2
+#define BD71828_MASK_REX_CC_CLR		0x01
+#define BD71828_MASK_FULL_CC_CLR	0x10
+#define BD71828_REG_CC_CNT_FULL3	0xbd
+#define BD71828_REG_CC_CNT_CHG3		0xc1
+
+#define BD71828_REG_VBAT_INITIAL1_U	0x86
+#define BD71828_REG_VBAT_INITIAL1_L	0x87
+
+#define BD71828_REG_VBAT_INITIAL2_U	0x88
+#define BD71828_REG_VBAT_INITIAL2_L	0x89
+
+#define BD71828_REG_IBAT_U		0xb0
+#define BD71828_REG_IBAT_L		0xb1
+
+#define BD71828_REG_IBAT_AVG_U		0xb2
+#define BD71828_REG_IBAT_AVG_L		0xb3
+
+#define BD71828_REG_VSYS_AVG_U		0x96
+#define BD71828_REG_VSYS_AVG_L		0x97
+#define BD71828_REG_VSYS_MIN_AVG_U	0x98
+#define BD71828_REG_VSYS_MIN_AVG_L	0x99
+#define BD71828_REG_CHG_SET1		0x75
+#define BD71828_REG_ALM_VBAT_LIMIT_U	0xaa
+#define BD71828_REG_BATCAP_MON_LIMIT_U	0xcc
+#define BD71828_REG_CONF		0x64
+
+#define BD71828_REG_DCIN_CLPS		0x71
+
+#define BD71828_REG_MEAS_CLEAR		0xaf
 
 /* LEDs */
 #define BD71828_REG_LED_CTRL		0x4A
-- 
cgit v1.2.3


From 7d096cb3e16f09d9762ae8cef897cdbc13a40029 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 21 Aug 2025 14:46:33 +0800
Subject: virtio_ring: constify virtqueue pointer for DMA helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch constifies the virtqueue pointer for DMA helpers.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Message-Id: <20250821064641.5025-2-jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Lei Yang <leiyang@redhat.com>
Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
---
 include/linux/virtio.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index db31fc6f4f1f..eab71a440fba 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -262,18 +262,18 @@ void unregister_virtio_driver(struct virtio_driver *drv);
 	module_driver(__virtio_driver, register_virtio_driver, \
 			unregister_virtio_driver)
 
-dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, size_t size,
+dma_addr_t virtqueue_dma_map_single_attrs(const struct virtqueue *_vq, void *ptr, size_t size,
 					  enum dma_data_direction dir, unsigned long attrs);
-void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr,
+void virtqueue_dma_unmap_single_attrs(const struct virtqueue *_vq, dma_addr_t addr,
 				      size_t size, enum dma_data_direction dir,
 				      unsigned long attrs);
-int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr);
+int virtqueue_dma_mapping_error(const struct virtqueue *_vq, dma_addr_t addr);
 
-bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr);
-void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, dma_addr_t addr,
+bool virtqueue_dma_need_sync(const struct virtqueue *_vq, dma_addr_t addr);
+void virtqueue_dma_sync_single_range_for_cpu(const struct virtqueue *_vq, dma_addr_t addr,
 					     unsigned long offset, size_t size,
 					     enum dma_data_direction dir);
-void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, dma_addr_t addr,
+void virtqueue_dma_sync_single_range_for_device(const struct virtqueue *_vq, dma_addr_t addr,
 						unsigned long offset, size_t size,
 						enum dma_data_direction dir);
 
-- 
cgit v1.2.3


From b41cb3bcf67fcb7b8297e5acc5bb3309c96c2ff2 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 21 Aug 2025 14:46:35 +0800
Subject: virtio: rename dma helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Following patch will introduce virtio mapping function to avoid
abusing DMA API for device that doesn't do DMA. To ease the
introduction, this patch rename "dma" to "map" for the current dma
mapping helpers.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Message-Id: <20250821064641.5025-4-jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Lei Yang <leiyang@redhat.com>
Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
---
 include/linux/virtio.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index eab71a440fba..576e08bd7697 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -262,18 +262,18 @@ void unregister_virtio_driver(struct virtio_driver *drv);
 	module_driver(__virtio_driver, register_virtio_driver, \
 			unregister_virtio_driver)
 
-dma_addr_t virtqueue_dma_map_single_attrs(const struct virtqueue *_vq, void *ptr, size_t size,
+dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void *ptr, size_t size,
 					  enum dma_data_direction dir, unsigned long attrs);
-void virtqueue_dma_unmap_single_attrs(const struct virtqueue *_vq, dma_addr_t addr,
+void virtqueue_unmap_single_attrs(const struct virtqueue *_vq, dma_addr_t addr,
 				      size_t size, enum dma_data_direction dir,
 				      unsigned long attrs);
-int virtqueue_dma_mapping_error(const struct virtqueue *_vq, dma_addr_t addr);
+int virtqueue_map_mapping_error(const struct virtqueue *_vq, dma_addr_t addr);
 
-bool virtqueue_dma_need_sync(const struct virtqueue *_vq, dma_addr_t addr);
-void virtqueue_dma_sync_single_range_for_cpu(const struct virtqueue *_vq, dma_addr_t addr,
+bool virtqueue_map_need_sync(const struct virtqueue *_vq, dma_addr_t addr);
+void virtqueue_map_sync_single_range_for_cpu(const struct virtqueue *_vq, dma_addr_t addr,
 					     unsigned long offset, size_t size,
 					     enum dma_data_direction dir);
-void virtqueue_dma_sync_single_range_for_device(const struct virtqueue *_vq, dma_addr_t addr,
+void virtqueue_map_sync_single_range_for_device(const struct virtqueue *_vq, dma_addr_t addr,
 						unsigned long offset, size_t size,
 						enum dma_data_direction dir);
 
-- 
cgit v1.2.3


From b16060c5c7d56455da3c3c50b4a20a83c2a30810 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 21 Aug 2025 14:46:36 +0800
Subject: virtio: introduce virtio_map container union
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Following patch will introduce the mapping operations for virtio
device. In order to achieve this, besides the dma device, virtio core
needs to support a transport or device specific mapping metadata as well.
So this patch introduces a union container of a dma device. The idea
is the allow the transport layer to pass device specific mapping
metadata which will be used as a parameter for the virtio mapping
operations. For the transport or device that is using DMA, dma device
is still being used.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Message-Id: <20250821064641.5025-5-jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Lei Yang <leiyang@redhat.com>
Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
---
 include/linux/virtio.h      | 5 +++++
 include/linux/virtio_ring.h | 7 ++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 576e08bd7697..b4ba1a99e5ab 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -41,6 +41,11 @@ struct virtqueue {
 	void *priv;
 };
 
+union virtio_map {
+	/* Device that performs DMA */
+	struct device *dma_dev;
+};
+
 int virtqueue_add_outbuf(struct virtqueue *vq,
 			 struct scatterlist sg[], unsigned int num,
 			 void *data,
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index 9b33df741b63..c97a12c1cda3 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -3,6 +3,7 @@
 #define _LINUX_VIRTIO_RING_H
 
 #include <asm/barrier.h>
+#include <linux/virtio.h>
 #include <linux/irqreturn.h>
 #include <uapi/linux/virtio_ring.h>
 
@@ -79,9 +80,9 @@ struct virtqueue *vring_create_virtqueue(unsigned int index,
 
 /*
  * Creates a virtqueue and allocates the descriptor ring with per
- * virtqueue DMA device.
+ * virtqueue mapping operations.
  */
-struct virtqueue *vring_create_virtqueue_dma(unsigned int index,
+struct virtqueue *vring_create_virtqueue_map(unsigned int index,
 					     unsigned int num,
 					     unsigned int vring_align,
 					     struct virtio_device *vdev,
@@ -91,7 +92,7 @@ struct virtqueue *vring_create_virtqueue_dma(unsigned int index,
 					     bool (*notify)(struct virtqueue *vq),
 					     void (*callback)(struct virtqueue *vq),
 					     const char *name,
-					     struct device *dma_dev);
+					     union virtio_map map);
 
 /*
  * Creates a virtqueue with a standard layout but a caller-allocated
-- 
cgit v1.2.3


From bee8c7c24b737338216dc0f87d6c47a4abaf609a Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 21 Aug 2025 14:46:38 +0800
Subject: virtio: introduce map ops in virtio core
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch introduces map operations for virtio device. Virtio used to
use DMA API which is not necessarily the case since some devices
doesn't do DMA. Instead of using tricks and abusing DMA API, let's
simply abstract the current mapping logic into a virtio specific
mapping operations. For the device or transport that doesn't do DMA,
they can implement their own mapping logic without the need to trick
DMA core. In this case the mapping metadata is opaque to the virtio
core that will be passed back to the transport or device specific map
operations. For other devices, DMA API will still be used, so map
token will still be the dma device to minimize the changeset and
performance impact.

The mapping operations are abstracted as a independent structure
instead of reusing virtio_config_ops. This allows the transport can
simply reuse the structure for lower layers like vDPA.

A set of new mapping helpers were introduced for the device that want
to do mapping by themselves.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Message-Id: <20250821064641.5025-7-jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Lei Yang <leiyang@redhat.com>
Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
---
 include/linux/virtio.h        | 25 +++++++++++++++
 include/linux/virtio_config.h | 72 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index b4ba1a99e5ab..3386a4a8d06b 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -166,9 +166,11 @@ struct virtio_device {
 	struct virtio_device_id id;
 	const struct virtio_config_ops *config;
 	const struct vringh_config_ops *vringh_config;
+	const struct virtio_map_ops *map;
 	struct list_head vqs;
 	VIRTIO_DECLARE_FEATURES(features);
 	void *priv;
+	union virtio_map vmap;
 #ifdef CONFIG_VIRTIO_DEBUG
 	struct dentry *debugfs_dir;
 	u64 debugfs_filter_features[VIRTIO_FEATURES_DWORDS];
@@ -267,6 +269,29 @@ void unregister_virtio_driver(struct virtio_driver *drv);
 	module_driver(__virtio_driver, register_virtio_driver, \
 			unregister_virtio_driver)
 
+
+void *virtqueue_map_alloc_coherent(struct virtio_device *vdev,
+				   union virtio_map mapping_token,
+				   size_t size, dma_addr_t *dma_handle,
+				   gfp_t gfp);
+
+void virtqueue_map_free_coherent(struct virtio_device *vdev,
+				 union virtio_map mapping_token,
+				 size_t size, void *vaddr,
+				 dma_addr_t dma_handle);
+
+dma_addr_t virtqueue_map_page_attrs(const struct virtqueue *_vq,
+				    struct page *page,
+				    unsigned long offset,
+				    size_t size,
+				    enum dma_data_direction dir,
+				    unsigned long attrs);
+
+void virtqueue_unmap_page_attrs(const struct virtqueue *_vq,
+				dma_addr_t dma_handle,
+				size_t size, enum dma_data_direction dir,
+				unsigned long attrs);
+
 dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void *ptr, size_t size,
 					  enum dma_data_direction dir, unsigned long attrs);
 void virtqueue_unmap_single_attrs(const struct virtqueue *_vq, dma_addr_t addr,
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 7427b79d6f3d..16001e9f9b39 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -139,6 +139,78 @@ struct virtio_config_ops {
 	int (*enable_vq_after_reset)(struct virtqueue *vq);
 };
 
+/**
+ * struct virtio_map_ops - operations for mapping buffer for a virtio device
+ * Note: For transport that has its own mapping logic it must
+ * implements all of the operations
+ * @map_page: map a buffer to the device
+ *      map: metadata for performing mapping
+ *      page: the page that will be mapped by the device
+ *      offset: the offset in the page for a buffer
+ *      size: the buffer size
+ *      dir: mapping direction
+ *      attrs: mapping attributes
+ *      Returns: the mapped address
+ * @unmap_page: unmap a buffer from the device
+ *      map: device specific mapping map
+ *      map_handle: the mapped address
+ *      size: the buffer size
+ *      dir: mapping direction
+ *      attrs: unmapping attributes
+ * @sync_single_for_cpu: sync a single buffer from device to cpu
+ *      map: metadata for performing mapping
+ *      map_handle: the mapping address to sync
+ *      size: the size of the buffer
+ *      dir: synchronization direction
+ * @sync_single_for_device: sync a single buffer from cpu to device
+ *      map: metadata for performing mapping
+ *      map_handle: the mapping address to sync
+ *      size: the size of the buffer
+ *      dir: synchronization direction
+ * @alloc: alloc a coherent buffer mapping
+ *      map: metadata for performing mapping
+ *      size: the size of the buffer
+ *      map_handle: the mapping address to sync
+ *      gfp: allocation flag (GFP_XXX)
+ *      Returns: virtual address of the allocated buffer
+ * @free: free a coherent buffer mapping
+ *      map: metadata for performing mapping
+ *      size: the size of the buffer
+ *      vaddr: virtual address of the buffer
+ *      map_handle: the mapping address to sync
+ *      attrs: unmapping attributes
+ * @need_sync: if the buffer needs synchronization
+ *      map: metadata for performing mapping
+ *      map_handle: the mapped address
+ *      Returns: whether the buffer needs synchronization
+ * @mapping_error: if the mapping address is error
+ *      map: metadata for performing mapping
+ *      map_handle: the mapped address
+ * @max_mapping_size: get the maximum buffer size that can be mapped
+ *      map: metadata for performing mapping
+ *      Returns: the maximum buffer size that can be mapped
+ */
+struct virtio_map_ops {
+	dma_addr_t (*map_page)(union virtio_map map, struct page *page,
+			       unsigned long offset, size_t size,
+			       enum dma_data_direction dir, unsigned long attrs);
+	void (*unmap_page)(union virtio_map map, dma_addr_t map_handle,
+			   size_t size, enum dma_data_direction dir,
+			   unsigned long attrs);
+	void (*sync_single_for_cpu)(union virtio_map map, dma_addr_t map_handle,
+				    size_t size, enum dma_data_direction dir);
+	void (*sync_single_for_device)(union virtio_map map,
+				       dma_addr_t map_handle, size_t size,
+				       enum dma_data_direction dir);
+	void *(*alloc)(union virtio_map map, size_t size,
+		       dma_addr_t *map_handle, gfp_t gfp);
+	void (*free)(union virtio_map map, size_t size, void *vaddr,
+		     dma_addr_t map_handle, unsigned long attrs);
+	bool (*need_sync)(union virtio_map map, dma_addr_t map_handle);
+	int (*mapping_error)(union virtio_map map, dma_addr_t map_handle);
+	size_t (*max_mapping_size)(union virtio_map map);
+};
+
 /* If driver didn't advertise the feature, it will never appear. */
 void virtio_check_driver_offered_feature(const struct virtio_device *vdev,
 					 unsigned int fbit);
-- 
cgit v1.2.3


From 58aca3dbc7d8891a016cb17d488af3002812793b Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 21 Aug 2025 14:46:39 +0800
Subject: vdpa: support virtio_map
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Virtio core switches from DMA device to virtio_map, let's do that
as well for vDPA.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Message-Id: <20250821064641.5025-8-jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Lei Yang <leiyang@redhat.com>
Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
---
 include/linux/vdpa.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 2e7a30fe6b92..ae0451945851 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -5,6 +5,7 @@
 #include <linux/kernel.h>
 #include <linux/device.h>
 #include <linux/interrupt.h>
+#include <linux/virtio.h>
 #include <linux/vhost_iotlb.h>
 #include <linux/virtio_net.h>
 #include <linux/virtio_blk.h>
@@ -70,7 +71,7 @@ struct vdpa_mgmt_dev;
 /**
  * struct vdpa_device - representation of a vDPA device
  * @dev: underlying device
- * @dma_dev: the actual device that is performing DMA
+ * @vmap: the metadata passed to upper layer to be used for mapping
  * @driver_override: driver name to force a match; do not set directly,
  *                   because core frees it; use driver_set_override() to
  *                   set or clear it.
@@ -87,7 +88,7 @@ struct vdpa_mgmt_dev;
  */
 struct vdpa_device {
 	struct device dev;
-	struct device *dma_dev;
+	union virtio_map vmap;
 	const char *driver_override;
 	const struct vdpa_config_ops *config;
 	struct rw_semaphore cf_lock; /* Protects get/set config */
@@ -352,11 +353,11 @@ struct vdpa_map_file {
  *				@vdev: vdpa device
  *				@asid: address space identifier
  *				Returns integer: success (0) or error (< 0)
- * @get_vq_dma_dev:		Get the dma device for a specific
+ * @get_vq_map:		Get the map metadata for a specific
  *				virtqueue (optional)
  *				@vdev: vdpa device
  *				@idx: virtqueue index
- *				Returns pointer to structure device or error (NULL)
+ *				Returns map token union error (NULL)
  * @bind_mm:			Bind the device to a specific address space
  *				so the vDPA framework can use VA when this
  *				callback is implemented. (optional)
@@ -436,7 +437,7 @@ struct vdpa_config_ops {
 	int (*reset_map)(struct vdpa_device *vdev, unsigned int asid);
 	int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group,
 			      unsigned int asid);
-	struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx);
+	union virtio_map (*get_vq_map)(struct vdpa_device *vdev, u16 idx);
 	int (*bind_mm)(struct vdpa_device *vdev, struct mm_struct *mm);
 	void (*unbind_mm)(struct vdpa_device *vdev);
 
@@ -520,9 +521,9 @@ static inline void vdpa_set_drvdata(struct vdpa_device *vdev, void *data)
 	dev_set_drvdata(&vdev->dev, data);
 }
 
-static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)
+static inline union virtio_map vdpa_get_map(struct vdpa_device *vdev)
 {
-	return vdev->dma_dev;
+	return vdev->vmap;
 }
 
 static inline int vdpa_reset(struct vdpa_device *vdev, u32 flags)
-- 
cgit v1.2.3


From 0d16cc439f36355d04b17ac45c3001d90969aa44 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 24 Sep 2025 15:00:44 +0800
Subject: vdpa: introduce map ops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Virtio core allows the transport to provide device or transport
specific mapping functions. This patch adds this support to vDPA. We
can simply do this by allowing the vDPA parent to register a
virtio_map_ops.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Message-Id: <20250924070045.10361-2-jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
---
 include/linux/vdpa.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index ae0451945851..4cf21d6e9cfd 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -76,6 +76,7 @@ struct vdpa_mgmt_dev;
  *                   because core frees it; use driver_set_override() to
  *                   set or clear it.
  * @config: the configuration ops for this device.
+ * @map: the map ops for this device
  * @cf_lock: Protects get and set access to configuration layout.
  * @index: device index
  * @features_valid: were features initialized? for legacy guests
@@ -91,6 +92,7 @@ struct vdpa_device {
 	union virtio_map vmap;
 	const char *driver_override;
 	const struct vdpa_config_ops *config;
+	const struct virtio_map_ops *map;
 	struct rw_semaphore cf_lock; /* Protects get/set config */
 	unsigned int index;
 	bool features_valid;
@@ -447,6 +449,7 @@ struct vdpa_config_ops {
 
 struct vdpa_device *__vdpa_alloc_device(struct device *parent,
 					const struct vdpa_config_ops *config,
+					const struct virtio_map_ops *map,
 					unsigned int ngroups, unsigned int nas,
 					size_t size, const char *name,
 					bool use_va);
@@ -458,6 +461,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
  * @member: the name of struct vdpa_device within the @dev_struct
  * @parent: the parent device
  * @config: the bus operations that is supported by this device
+ * @map: the map operations that is supported by this device
  * @ngroups: the number of virtqueue groups supported by this device
  * @nas: the number of address spaces
  * @name: name of the vdpa device
@@ -465,10 +469,10 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
  *
  * Return allocated data structure or ERR_PTR upon error
  */
-#define vdpa_alloc_device(dev_struct, member, parent, config, ngroups, nas, \
-			  name, use_va) \
+#define vdpa_alloc_device(dev_struct, member, parent, config, map, \
+			  ngroups, nas, name, use_va)		   \
 			  container_of((__vdpa_alloc_device( \
-				       parent, config, ngroups, nas, \
+				       parent, config, map, ngroups, nas, \
 				       (sizeof(dev_struct) + \
 				       BUILD_BUG_ON_ZERO(offsetof( \
 				       dev_struct, member))), name, use_va)), \
-- 
cgit v1.2.3


From 1c14b0e4ba988381e362ad8a9651eff0b21bd47f Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 24 Sep 2025 15:00:45 +0800
Subject: vduse: switch to use virtio map API instead of DMA API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lacking the support of device specific mapping supported in virtio,
VDUSE must trick the DMA API in order to make virtio-vdpa transport
work. This is done by advertising vDPA device as dma device with a
VDUSE specific dma_ops even if it doesn't do DMA at all.

This will be fixed by this patch. Thanks to the new mapping operations
support by virtio and vDPA. VDUSE can simply switch to advertise its
specific mappings operations to virtio via virtio-vdpa then DMA API is
not needed for VDUSE any more and iova domain could be used as the
mapping token instead.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Message-Id: <20250924070045.10361-3-jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Eugenio Pérez <eperezma@redhat.com>
---
 include/linux/virtio.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 3386a4a8d06b..96c66126c074 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -41,9 +41,13 @@ struct virtqueue {
 	void *priv;
 };
 
+struct vduse_iova_domain;
+
 union virtio_map {
 	/* Device that performs DMA */
 	struct device *dma_dev;
+	/* VDUSE specific mapping data */
+	struct vduse_iova_domain *iova_domain;
 };
 
 int virtqueue_add_outbuf(struct virtqueue *vq,
-- 
cgit v1.2.3


From f97aef092e199c10a3da96ae79b571edd5362faa Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 26 Sep 2025 12:12:37 +0200
Subject: cpufreq: Make drivers using CPUFREQ_ETERNAL specify transition
 latency

Commit a755d0e2d41b ("cpufreq: Honour transition_latency over
transition_delay_us") caused platforms where cpuinfo.transition_latency
is CPUFREQ_ETERNAL to get a very large transition latency whereas
previously it had been capped at 10 ms (and later at 2 ms).

This led to a user-observable regression between 6.6 and 6.12 as
described by Shawn:

"The dbs sampling_rate was 10000 us on 6.6 and suddently becomes
 6442450 us (4294967295 / 1000 * 1.5) on 6.12 for these platforms
 because the default transition delay was dropped [...].

 It slows down dbs governor's reacting to CPU loading change
 dramatically.  Also, as transition_delay_us is used by schedutil
 governor as rate_limit_us, it shows a negative impact on device
 idle power consumption, because the device gets slightly less time
 in the lowest OPP."

Evidently, the expectation of the drivers using CPUFREQ_ETERNAL as
cpuinfo.transition_latency was that it would be capped by the core,
but they may as well return a default transition latency value instead
of CPUFREQ_ETERNAL and the core need not do anything with it.

Accordingly, introduce CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS and make
all of the drivers in question use it instead of CPUFREQ_ETERNAL.  Also
update the related Rust binding.

Fixes: a755d0e2d41b ("cpufreq: Honour transition_latency over transition_delay_us")
Closes: https://lore.kernel.org/linux-pm/20250922125929.453444-1-shawnguo2@yeah.net/
Reported-by: Shawn Guo <shawnguo@kernel.org>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Reviewed-by: Jie Zhan <zhanjie9@hisilicon.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: 6.6+ <stable@vger.kernel.org> # 6.6+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://patch.msgid.link/2264949.irdbgypaU6@rafael.j.wysocki
[ rjw: Fix typo in new symbol name, drop redundant type cast from Rust binding ]
Tested-by: Shawn Guo <shawnguo@kernel.org> # with cpufreq-dt driver
Reviewed-by: Qais Yousef <qyousef@layalina.io>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 40966512ea18..bc8c083bc16a 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -32,6 +32,9 @@
  */
 
 #define CPUFREQ_ETERNAL			(-1)
+
+#define CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS	NSEC_PER_MSEC
+
 #define CPUFREQ_NAME_LEN		16
 /* Print length for names. Extra 1 space for accommodating '\n' in prints */
 #define CPUFREQ_NAME_PLEN		(CPUFREQ_NAME_LEN + 1)
-- 
cgit v1.2.3


From 950c6451a5c38d375993c3b9da427e2e69b01c30 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 26 Sep 2025 12:31:47 +0200
Subject: cpufreq: Drop unused symbol CPUFREQ_ETERNAL

Drop CPUFREQ_ETERNAL that has no users any more along with all
references to it in the documentation.

No functional impact.

Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Reviewed-by: Jie Zhan <zhanjie9@hisilicon.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Qais Yousef <qyousef@layalina.io>
---
 include/linux/cpufreq.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index bc8c083bc16a..0465d1e6f72a 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -26,13 +26,8 @@
  *********************************************************************/
 /*
  * Frequency values here are CPU kHz
- *
- * Maximum transition latency is in nanoseconds - if it's unknown,
- * CPUFREQ_ETERNAL shall be used.
  */
 
-#define CPUFREQ_ETERNAL			(-1)
-
 #define CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS	NSEC_PER_MSEC
 
 #define CPUFREQ_NAME_LEN		16
-- 
cgit v1.2.3


From 4e66293bb141df33d5eb1f922e16fe05913bf296 Mon Sep 17 00:00:00 2001
From: Bhanu Seshu Kumar Valluri <bhanuseshukumar@gmail.com>
Date: Wed, 1 Oct 2025 14:27:16 +0530
Subject: of: doc: Fix typo in doc comments.

synthetized => synthesized
definied => defined
sucess => success

Signed-off-by: Bhanu Seshu Kumar Valluri <bhanuseshukumar@gmail.com>
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 include/linux/of.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 5e2c6ed9370a..121a288ca92d 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -1134,7 +1134,7 @@ static inline bool of_phandle_args_equal(const struct of_phandle_args *a1,
  * Search for a property in a device node and count the number of u8 elements
  * in it.
  *
- * Return: The number of elements on sucess, -EINVAL if the property does
+ * Return: The number of elements on success, -EINVAL if the property does
  * not exist or its length does not match a multiple of u8 and -ENODATA if the
  * property does not have a value.
  */
@@ -1153,7 +1153,7 @@ static inline int of_property_count_u8_elems(const struct device_node *np,
  * Search for a property in a device node and count the number of u16 elements
  * in it.
  *
- * Return: The number of elements on sucess, -EINVAL if the property does
+ * Return: The number of elements on success, -EINVAL if the property does
  * not exist or its length does not match a multiple of u16 and -ENODATA if the
  * property does not have a value.
  */
@@ -1172,7 +1172,7 @@ static inline int of_property_count_u16_elems(const struct device_node *np,
  * Search for a property in a device node and count the number of u32 elements
  * in it.
  *
- * Return: The number of elements on sucess, -EINVAL if the property does
+ * Return: The number of elements on success, -EINVAL if the property does
  * not exist or its length does not match a multiple of u32 and -ENODATA if the
  * property does not have a value.
  */
@@ -1191,7 +1191,7 @@ static inline int of_property_count_u32_elems(const struct device_node *np,
  * Search for a property in a device node and count the number of u64 elements
  * in it.
  *
- * Return: The number of elements on sucess, -EINVAL if the property does
+ * Return: The number of elements on success, -EINVAL if the property does
  * not exist or its length does not match a multiple of u64 and -ENODATA if the
  * property does not have a value.
  */
-- 
cgit v1.2.3


From 510d76646a6a7beaa49fc0da7282e285a3dfce97 Mon Sep 17 00:00:00 2001
From: Tang Yizhou <yizhou.tang@shopee.com>
Date: Tue, 23 Sep 2025 19:21:36 +0800
Subject: block: Update a comment of disk statistics

>From commit 074a7aca7afa ("block: move stats from disk to part0"),
we know that:

* {disk|all}_stat_*() are gone.

* disk_stat_lock/unlock() are renamed to part_stat_lock/unlock().

Therefore, outdated comments should be updated accordingly.

Fixes: 074a7aca7afa ("block: move stats from disk to part0")
Signed-off-by: Tang Yizhou <yizhou.tang@shopee.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/part_stat.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
index eeeff2a04529..729415e91215 100644
--- a/include/linux/part_stat.h
+++ b/include/linux/part_stat.h
@@ -17,8 +17,8 @@ struct disk_stats {
 /*
  * Macros to operate on percpu disk statistics:
  *
- * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters and should
- * be called between disk_stat_lock() and disk_stat_unlock().
+ * part_stat_{add|sub|inc|dec}() modify the stat counters and should
+ * be called between part_stat_lock() and part_stat_unlock().
  *
  * part_stat_read() can be called at any time.
  */
-- 
cgit v1.2.3


From 93a4b36ef3cf4ce5e6a7e7a7686181de76e246a1 Mon Sep 17 00:00:00 2001
From: Nirbhay Sharma <nirbhay.lkd@gmail.com>
Date: Fri, 3 Oct 2025 17:15:55 +0530
Subject: cgroup: Fix seqcount lockdep assertion in cgroup freezer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The commit afa3701c0e45 ("cgroup: cgroup.stat.local time accounting")
introduced a seqcount to track freeze timing but initialized it as a
plain seqcount_t using seqcount_init().

However, the write-side critical section in cgroup_do_freeze() holds
the css_set_lock spinlock while calling write_seqcount_begin(). On
PREEMPT_RT kernels, spinlocks do not disable preemption, causing the
lockdep assertion for a plain seqcount_t, which checks for preemption
being disabled, to fail.

This triggers the following warning:
  WARNING: CPU: 0 PID: 9692 at include/linux/seqlock.h:221

Fix this by changing the type to seqcount_spinlock_t and initializing
it with seqcount_spinlock_init() to associate css_set_lock with the
seqcount. This allows lockdep to correctly validate that the spinlock
is held during write operations, resolving the assertion failure on all
kernel configurations.

Reported-by: syzbot+27a2519eb4dad86d0156@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=27a2519eb4dad86d0156
Fixes: afa3701c0e45 ("cgroup: cgroup.stat.local time accounting")
Signed-off-by: Nirbhay Sharma <nirbhay.lkd@gmail.com>
Link: https://lore.kernel.org/r/20251002165510.KtY3IT--@linutronix.de/
Acked-by: Michal Koutný <mkoutny@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 93318fce31f3..b760a3c470a5 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -452,7 +452,7 @@ struct cgroup_freezer_state {
 	int nr_frozen_tasks;
 
 	/* Freeze time data consistency protection */
-	seqcount_t freeze_seq;
+	seqcount_spinlock_t freeze_seq;
 
 	/*
 	 * Most recent time the cgroup was requested to freeze.
-- 
cgit v1.2.3


From 1b54b0756f051c11f5a5d0fbc1581e0b9a18e2bc Mon Sep 17 00:00:00 2001
From: Bhanu Seshu Kumar Valluri <bhanuseshukumar@gmail.com>
Date: Wed, 1 Oct 2025 16:27:15 +0530
Subject: net: doc: Fix typos in docs

Fix typos in doc comments.

Signed-off-by: Bhanu Seshu Kumar Valluri <bhanuseshukumar@gmail.com>
Link: https://patch.msgid.link/20251001105715.50462-1-bhanuseshukumar@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 7a54a8b4d277..3c7634482356 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -297,7 +297,7 @@ static inline const char *phy_modes(phy_interface_t interface)
  *
  * Description: maps RGMII supported link speeds into the clock rates.
  * This can also be used for MII, GMII, and RMII interface modes as the
- * clock rates are indentical, but the caller must be aware that errors
+ * clock rates are identical, but the caller must be aware that errors
  * for unsupported clock rates will not be signalled.
  *
  * Returns: clock rate or negative errno
@@ -519,7 +519,7 @@ enum phy_state {
  * struct phy_c45_device_ids - 802.3-c45 Device Identifiers
  * @devices_in_package: IEEE 802.3 devices in package register value.
  * @mmds_present: bit vector of MMDs present.
- * @device_ids: The device identifer for each present device.
+ * @device_ids: The device identifier for each present device.
  */
 struct phy_c45_device_ids {
 	u32 devices_in_package;
-- 
cgit v1.2.3


From 384b52ce32110db974d3b61d463af48347eb73fb Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 1 Oct 2025 19:34:29 +0200
Subject: PM: runtime: Introduce one more usage counter guard

Follow previous commit 9a0abc39450a ("PM: runtime: Add auto-cleanup
macros for "resume and get" operations") and define a runtime PM
usage counter guard in which pm_runtime_get_noresume() and
pm_runtime_put_noidle() will be used for incrementing and
decrementing it, respectively.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameorn@huawei.com>
---
 include/linux/pm_runtime.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index edb8aed5ef62..a3f44f6c2da1 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -614,6 +614,9 @@ static inline int pm_runtime_put_autosuspend(struct device *dev)
 	return __pm_runtime_put_autosuspend(dev);
 }
 
+DEFINE_GUARD(pm_runtime_noresume, struct device *,
+	     pm_runtime_get_noresume(_T), pm_runtime_put_noidle(_T));
+
 DEFINE_GUARD(pm_runtime_active, struct device *,
 	     pm_runtime_get_sync(_T), pm_runtime_put(_T));
 DEFINE_GUARD(pm_runtime_active_auto, struct device *,
-- 
cgit v1.2.3


From b8179af120943e2fc099ea87caa234039a709a66 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@kernel.org>
Date: Tue, 29 Jul 2025 08:46:35 +0200
Subject: mm/memory_hotplug: activate node before adding new memory blocks

The sysfs attributes for memory blocks require the node ID to be set and
initialized, so move the node activation before adding new memory blocks.
This also has the nice side effect that the BUG_ON() can be converted into
a WARN_ON() as we now can handle registration errors.

Link: https://lkml.kernel.org/r/20250729064637.51662-3-hare@kernel.org
Fixes: b9ff036082cd ("mm/memory_hotplug.c: make add_memory_resource use __try_online_node")
Signed-off-by: Hannes Reinecke <hare@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Donet Tom <donettom@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memory.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memory.h b/include/linux/memory.h
index 40eb70ccb09d..4a29153e372e 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -159,7 +159,7 @@ static inline unsigned long memory_block_advised_max_size(void)
 extern int register_memory_notifier(struct notifier_block *nb);
 extern void unregister_memory_notifier(struct notifier_block *nb);
 int create_memory_block_devices(unsigned long start, unsigned long size,
-				struct vmem_altmap *altmap,
+				int nid, struct vmem_altmap *altmap,
 				struct memory_group *group);
 void remove_memory_block_devices(unsigned long start, unsigned long size);
 extern void memory_dev_init(void);
-- 
cgit v1.2.3


From 0a947c14e48cbf9de222836170282e0167a9e096 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@kernel.org>
Date: Tue, 29 Jul 2025 08:46:36 +0200
Subject: drivers/base: move memory_block_add_nid() into the caller

Now the node id only needs to be set for early memory, so move
memory_block_add_nid() into the caller and rename it into
memory_block_add_nid_early().  This allows us to further simplify the code
by dropping the 'context' argument to
do_register_memory_block_under_node().

Link: https://lkml.kernel.org/r/20250729064637.51662-4-hare@kernel.org
Suggested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Hannes Reinecke <hare@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Donet Tom <donettom@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memory.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory.h b/include/linux/memory.h
index 4a29153e372e..43d378038ce2 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -202,8 +202,7 @@ static inline unsigned long phys_to_block_id(unsigned long phys)
 }
 
 #ifdef CONFIG_NUMA
-void memory_block_add_nid(struct memory_block *mem, int nid,
-			  enum meminit_context context);
+void memory_block_add_nid_early(struct memory_block *mem, int nid);
 #endif /* CONFIG_NUMA */
 int memory_block_advise_max_size(unsigned long size);
 unsigned long memory_block_advised_max_size(void);
-- 
cgit v1.2.3


From b71a6e2a1b710ea31c363a72c75f510ef35d8e69 Mon Sep 17 00:00:00 2001
From: Byungchul Park <byungchul@sk.com>
Date: Thu, 2 Oct 2025 17:12:35 +0900
Subject: i2c: rename wait_for_completion callback to wait_for_completion_cb

Functionally no change. Remove the ambiguity of 'wait_for_completion'.
It helps development of the DEPT dependency tracker, but seems favorable
in any case.

Signed-off-by: Byungchul Park <byungchul@sk.com>
[wsa: reworded commit message]
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/i2c-algo-pca.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c-algo-pca.h b/include/linux/i2c-algo-pca.h
index 7c522fdd9ea7..e305bf32e40a 100644
--- a/include/linux/i2c-algo-pca.h
+++ b/include/linux/i2c-algo-pca.h
@@ -71,7 +71,7 @@ struct i2c_algo_pca_data {
 	void 				*data;	/* private low level data */
 	void (*write_byte)		(void *data, int reg, int val);
 	int  (*read_byte)		(void *data, int reg);
-	int  (*wait_for_completion)	(void *data);
+	int  (*wait_for_completion_cb)	(void *data);
 	void (*reset_chip)		(void *data);
 	/* For PCA9564, use one of the predefined frequencies:
 	 * 330000, 288000, 217000, 146000, 88000, 59000, 44000, 36000
-- 
cgit v1.2.3


From 48b77733d0dbaf8cd0a122712072f92b2d95d894 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 1 Oct 2025 15:19:07 +0200
Subject: expfs: Fix exportfs_can_encode_fh() for EXPORT_FH_FID

After commit 5402c4d4d200 ("exportfs: require ->fh_to_parent() to encode
connectable file handles") we will fail to create non-decodable file
handles for filesystems without export operations. Fix it.

Fixes: 5402c4d4d200 ("exportfs: require ->fh_to_parent() to encode connectable file handles")
Reviewed-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/exportfs.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index d0cf10d5e0f7..f0cf2714ec52 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -320,9 +320,6 @@ static inline bool exportfs_can_decode_fh(const struct export_operations *nop)
 static inline bool exportfs_can_encode_fh(const struct export_operations *nop,
 					  int fh_flags)
 {
-	if (!nop)
-		return false;
-
 	/*
 	 * If a non-decodeable file handle was requested, we only need to make
 	 * sure that filesystem did not opt-out of encoding fid.
@@ -330,6 +327,10 @@ static inline bool exportfs_can_encode_fh(const struct export_operations *nop,
 	if (fh_flags & EXPORT_FH_FID)
 		return exportfs_can_encode_fid(nop);
 
+	/* Normal file handles cannot be created without export ops */
+	if (!nop)
+		return false;
+
 	/*
 	 * If a connectable file handle was requested, we need to make sure that
 	 * filesystem can also decode connected file handles.
-- 
cgit v1.2.3


From 929bf010e0599ddef6b640cd314f1de65dd1ca3e Mon Sep 17 00:00:00 2001
From: Li Zhe <lizhe.67@bytedance.com>
Date: Thu, 14 Aug 2025 14:47:10 +0800
Subject: mm: introduce num_pages_contiguous()

Let's add a simple helper for determining the number of contiguous pages
that represent contiguous PFNs.

In an ideal world, this helper would be simpler or not even required.
Unfortunately, on some configs we still have to maintain (SPARSEMEM
without VMEMMAP), the memmap is allocated per memory section, and we might
run into weird corner cases of false positives when blindly testing for
contiguous pages only.

One example of such false positives would be a memory section-sized hole
that does not have a memmap. The surrounding memory sections might get
"struct pages" that are contiguous, but the PFNs are actually not.

This helper will, for example, be useful for determining contiguous PFNs
in a GUP result, to batch further operations across returned "struct
page"s. VFIO will utilize this interface to accelerate the VFIO DMA map
process.

Implementation based on Linus' suggestions to avoid new usage of
nth_page() where avoidable.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Suggested-by: Jason Gunthorpe <jgg@ziepe.ca>
Signed-off-by: Li Zhe <lizhe.67@bytedance.com>
Co-developed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Link: https://lore.kernel.org/r/20250814064714.56485-2-lizhe.67@bytedance.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/mm.h        |  7 ++++++-
 include/linux/mm_inline.h | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 06978b4dbeb8..f092ce3530bb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1833,7 +1833,12 @@ static inline unsigned long memdesc_section(memdesc_flags_t mdf)
 {
 	return (mdf.f >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
 }
-#endif
+#else /* !SECTION_IN_PAGE_FLAGS */
+static inline unsigned long memdesc_section(memdesc_flags_t mdf)
+{
+	return 0;
+}
+#endif /* SECTION_IN_PAGE_FLAGS */
 
 /**
  * folio_pfn - Return the Page Frame Number of a folio.
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index d6c1011b38f2..f6a2b2d20016 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -617,4 +617,40 @@ static inline bool vma_has_recency(const struct vm_area_struct *vma)
 	return true;
 }
 
+/**
+ * num_pages_contiguous() - determine the number of contiguous pages
+ *			    that represent contiguous PFNs
+ * @pages: an array of page pointers
+ * @nr_pages: length of the array, at least 1
+ *
+ * Determine the number of contiguous pages that represent contiguous PFNs
+ * in @pages, starting from the first page.
+ *
+ * In some kernel configs contiguous PFNs will not have contiguous struct
+ * pages. In these configurations num_pages_contiguous() will return a num
+ * smaller than ideal number. The caller should continue to check for pfn
+ * contiguity after each call to num_pages_contiguous().
+ *
+ * Returns the number of contiguous pages.
+ */
+static inline size_t num_pages_contiguous(struct page **pages, size_t nr_pages)
+{
+	struct page *cur_page = pages[0];
+	unsigned long section = memdesc_section(cur_page->flags);
+	size_t i;
+
+	for (i = 1; i < nr_pages; i++) {
+		if (++cur_page != pages[i])
+			break;
+		/*
+		 * In unproblematic kernel configs, page_to_section() == 0 and
+		 * the whole check will get optimized out.
+		 */
+		if (memdesc_section(cur_page->flags) != section)
+			break;
+	}
+
+	return i;
+}
+
 #endif
-- 
cgit v1.2.3


From 95920c2ed02bde551ab654e9749c2ca7bc3100e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Tue, 30 Sep 2025 13:43:29 +0200
Subject: page_pool: Fix PP_MAGIC_MASK to avoid crashing on some 32-bit arches
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Helge reported that the introduction of PP_MAGIC_MASK let to crashes on
boot on his 32-bit parisc machine. The cause of this is the mask is set
too wide, so the page_pool_page_is_pp() incurs false positives which
crashes the machine.

Just disabling the check in page_pool_is_pp() will lead to the page_pool
code itself malfunctioning; so instead of doing this, this patch changes
the define for PP_DMA_INDEX_BITS to avoid mistaking arbitrary kernel
pointers for page_pool-tagged pages.

The fix relies on the kernel pointers that alias with the pp_magic field
always being above PAGE_OFFSET. With this assumption, we can use the
lowest bit of the value of PAGE_OFFSET as the upper bound of the
PP_DMA_INDEX_MASK, which should avoid the false positives.

Because we cannot rely on PAGE_OFFSET always being a compile-time
constant, nor on it always being >0, we fall back to disabling the
dma_index storage when there are not enough bits available. This leaves
us in the situation we were in before the patch in the Fixes tag, but
only on a subset of architecture configurations. This seems to be the
best we can do until the transition to page types in complete for
page_pool pages.

v2:
- Make sure there's at least 8 bits available and that the PAGE_OFFSET
  bit calculation doesn't wrap

Link: https://lore.kernel.org/all/aMNJMFa5fDalFmtn@p100/
Fixes: ee62ce7a1d90 ("page_pool: Track DMA-mapped pages and unmap them when destroying the pool")
Cc: stable@vger.kernel.org # 6.15+
Tested-by: Helge Deller <deller@gmx.de>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Reviewed-by: Mina Almasry <almasrymina@google.com>
Tested-by: Helge Deller <deller@gmx.de>
Link: https://patch.msgid.link/20250930114331.675412-1-toke@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mm.h | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1ae97a0b8ec7..0905eb6b55ec 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4159,14 +4159,13 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
  * since this value becomes part of PP_SIGNATURE; meaning we can just use the
  * space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the
  * lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is
- * 0, we make sure that we leave the two topmost bits empty, as that guarantees
- * we won't mistake a valid kernel pointer for a value we set, regardless of the
- * VMSPLIT setting.
+ * 0, we use the lowest bit of PAGE_OFFSET as the boundary if that value is
+ * known at compile-time.
  *
- * Altogether, this means that the number of bits available is constrained by
- * the size of an unsigned long (at the upper end, subtracting two bits per the
- * above), and the definition of PP_SIGNATURE (with or without
- * POISON_POINTER_DELTA).
+ * If the value of PAGE_OFFSET is not known at compile time, or if it is too
+ * small to leave at least 8 bits available above PP_SIGNATURE, we define the
+ * number of bits to be 0, which turns off the DMA index tracking altogether
+ * (see page_pool_register_dma_index()).
  */
 #define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA))
 #if POISON_POINTER_DELTA > 0
@@ -4175,8 +4174,13 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
  */
 #define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT)
 #else
-/* Always leave out the topmost two; see above. */
-#define PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 2)
+/* Use the lowest bit of PAGE_OFFSET if there's at least 8 bits available; see above */
+#define PP_DMA_INDEX_MIN_OFFSET (1 << (PP_DMA_INDEX_SHIFT + 8))
+#define PP_DMA_INDEX_BITS ((__builtin_constant_p(PAGE_OFFSET) && \
+			    PAGE_OFFSET >= PP_DMA_INDEX_MIN_OFFSET && \
+			    !(PAGE_OFFSET & (PP_DMA_INDEX_MIN_OFFSET - 1))) ? \
+			      MIN(32, __ffs(PAGE_OFFSET) - PP_DMA_INDEX_SHIFT) : 0)
+
 #endif
 
 #define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \
-- 
cgit v1.2.3


From 1ed06c83506ecaaf1836ddeb7c65772ff86d8d53 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 7 Oct 2025 11:06:25 +0200
Subject: block: remove bio_iov_iter_get_pages

Switch the only caller to bio_iov_iter_get_pages, and explain why it does
not have any alignment requirements.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index a64a30131031..b01dae9506de 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -449,11 +449,6 @@ int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data,
 int bio_iov_iter_get_pages_aligned(struct bio *bio, struct iov_iter *iter,
 		unsigned len_align_mask);
 
-static inline int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
-{
-	return bio_iov_iter_get_pages_aligned(bio, iter, 0);
-}
-
 void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter);
 void __bio_release_pages(struct bio *bio, bool mark_dirty);
 extern void bio_set_pages_dirty(struct bio *bio);
-- 
cgit v1.2.3


From 82dd5d763c9b718e2d655b9565e0a06a91bb83dc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 7 Oct 2025 11:06:26 +0200
Subject: block: rename bio_iov_iter_get_pages_aligned to
 bio_iov_iter_get_pages

Now that the bio_iov_iter_get_pages is free again, use it instead of
the more complicated now.  Also drop the unused export.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h    | 2 +-
 include/linux/blkdev.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index b01dae9506de..16c1c85613b7 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -446,7 +446,7 @@ int submit_bio_wait(struct bio *bio);
 int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data,
 		size_t len, enum req_op op);
 
-int bio_iov_iter_get_pages_aligned(struct bio *bio, struct iov_iter *iter,
+int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter,
 		unsigned len_align_mask);
 
 void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 066e5309bd45..c97e8b0e67b6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1876,7 +1876,7 @@ static inline int bio_split_rw_at(struct bio *bio,
 static inline int bio_iov_iter_get_bdev_pages(struct bio *bio,
 		struct iov_iter *iter, struct block_device *bdev)
 {
-	return bio_iov_iter_get_pages_aligned(bio, iter,
+	return bio_iov_iter_get_pages(bio, iter,
 					bdev_logical_block_size(bdev) - 1);
 }
 
-- 
cgit v1.2.3


From 506aa235f6e0baa00bf792df82a5e9f618b7a5d8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 7 Oct 2025 11:06:28 +0200
Subject: block: move bio_iov_iter_get_bdev_pages to block/fops.c

Keep bio_iov_iter_get_bdev_pages local with the callers, as blindly
looking at the bdev logical block size is often not the best idea
unless on a block device.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c97e8b0e67b6..d4db5039836d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1873,13 +1873,6 @@ static inline int bio_split_rw_at(struct bio *bio,
 	return bio_split_io_at(bio, lim, segs, max_bytes, lim->dma_alignment);
 }
 
-static inline int bio_iov_iter_get_bdev_pages(struct bio *bio,
-		struct iov_iter *iter, struct block_device *bdev)
-{
-	return bio_iov_iter_get_pages(bio, iter,
-					bdev_logical_block_size(bdev) - 1);
-}
-
 #define DEFINE_IO_COMP_BATCH(name)	struct io_comp_batch name = { }
 
 #endif /* _LINUX_BLKDEV_H */
-- 
cgit v1.2.3


From 8375b76517cb52bac0903071feedc218c45d74d2 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Sun, 21 Sep 2025 08:44:56 +0300
Subject: kho: replace kho_preserve_phys() with kho_preserve_pages()

to make it clear that KHO operates on pages rather than on a random
physical address.

The kho_preserve_pages() will be also used in upcoming support for vmalloc
preservation.

Link: https://lkml.kernel.org/r/20250921054458.4043761-3-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Changyuan Lyu <changyuanl@google.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kexec_handover.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index 559d13a3bc44..cec663b39861 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -18,6 +18,7 @@ enum kho_event {
 
 struct folio;
 struct notifier_block;
+struct page;
 
 #define DECLARE_KHOSER_PTR(name, type) \
 	union {                        \
@@ -43,7 +44,7 @@ bool kho_is_enabled(void);
 bool is_kho_boot(void);
 
 int kho_preserve_folio(struct folio *folio);
-int kho_preserve_phys(phys_addr_t phys, size_t size);
+int kho_preserve_pages(struct page *page, unsigned int nr_pages);
 struct folio *kho_restore_folio(phys_addr_t phys);
 int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt);
 int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
@@ -71,7 +72,7 @@ static inline int kho_preserve_folio(struct folio *folio)
 	return -EOPNOTSUPP;
 }
 
-static inline int kho_preserve_phys(phys_addr_t phys, size_t size)
+static inline int kho_preserve_pages(struct page *page, unsigned int nr_pages)
 {
 	return -EOPNOTSUPP;
 }
-- 
cgit v1.2.3


From a667300bd53f272a3055238bcefe108f88836270 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Date: Sun, 21 Sep 2025 08:44:57 +0300
Subject: kho: add support for preserving vmalloc allocations

A vmalloc allocation is preserved using binary structure similar to global
KHO memory tracker.  It's a linked list of pages where each page is an
array of physical address of pages in vmalloc area.

kho_preserve_vmalloc() hands out the physical address of the head page to
the caller.  This address is used as the argument to kho_vmalloc_restore()
to restore the mapping in the vmalloc address space and populate it with
the preserved pages.

[pasha.tatashin@soleen.com: free chunks using free_page() not kfree()]
  Link: https://lkml.kernel.org/r/mafs0a52idbeg.fsf@kernel.org
[akpm@linux-foundation.org: coding-style cleanups]
Link: https://lkml.kernel.org/r/20250921054458.4043761-4-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Changyuan Lyu <changyuanl@google.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kexec_handover.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index cec663b39861..25042c1d8d54 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -39,13 +39,24 @@ struct page;
 
 struct kho_serialization;
 
+struct kho_vmalloc_chunk;
+struct kho_vmalloc {
+	DECLARE_KHOSER_PTR(first, struct kho_vmalloc_chunk *);
+	unsigned int total_pages;
+	unsigned short flags;
+	unsigned short order;
+};
+
 #ifdef CONFIG_KEXEC_HANDOVER
 bool kho_is_enabled(void);
 bool is_kho_boot(void);
 
 int kho_preserve_folio(struct folio *folio);
 int kho_preserve_pages(struct page *page, unsigned int nr_pages);
+int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation);
 struct folio *kho_restore_folio(phys_addr_t phys);
+struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages);
+void *kho_restore_vmalloc(const struct kho_vmalloc *preservation);
 int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt);
 int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
 
@@ -77,11 +88,28 @@ static inline int kho_preserve_pages(struct page *page, unsigned int nr_pages)
 	return -EOPNOTSUPP;
 }
 
+static inline int kho_preserve_vmalloc(void *ptr,
+				       struct kho_vmalloc *preservation)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline struct folio *kho_restore_folio(phys_addr_t phys)
 {
 	return NULL;
 }
 
+static inline struct page *kho_restore_pages(phys_addr_t phys,
+					     unsigned int nr_pages)
+{
+	return NULL;
+}
+
+static inline void *kho_restore_vmalloc(const struct kho_vmalloc *preservation)
+{
+	return NULL;
+}
+
 static inline int kho_add_subtree(struct kho_serialization *ser,
 				  const char *name, void *fdt)
 {
-- 
cgit v1.2.3


From fcc0669c5aa681994c507b50f1c706c969d99730 Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeel.butt@linux.dev>
Date: Mon, 22 Sep 2025 15:02:03 -0700
Subject: memcg: skip cgroup_file_notify if spinning is not allowed

Generally memcg charging is allowed from all the contexts including NMI
where even spinning on spinlock can cause locking issues.  However one
call chain was missed during the addition of memcg charging from any
context support.  That is try_charge_memcg() -> memcg_memory_event() ->
cgroup_file_notify().

The possible function call tree under cgroup_file_notify() can acquire
many different spin locks in spinning mode.  Some of them are
cgroup_file_kn_lock, kernfs_notify_lock, pool_workqeue's lock.  So, let's
just skip cgroup_file_notify() from memcg charging if the context does not
allow spinning.

Alternative approach was also explored where instead of skipping
cgroup_file_notify(), we defer the memcg event processing to irq_work [1].
However it adds complexity and it was decided to keep things simple until
we need more memcg events with !allow_spinning requirement.

Link: https://lore.kernel.org/all/5qi2llyzf7gklncflo6gxoozljbm4h3tpnuv4u4ej4ztysvi6f@x44v7nz2wdzd/ [1]
Link: https://lkml.kernel.org/r/20250922220203.261714-1-shakeel.butt@linux.dev
Fixes: 3ac4638a734a ("memcg: make memcg_rstat_updated nmi safe")
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Michal Hocko <mhocko@suse.com>
Closes: https://lore.kernel.org/all/20250905061919.439648-1-yepeilin@google.com/
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Peilin Ye <yepeilin@google.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Tejun Heo <tj@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 16fe0306e50e..873e510d6f8d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1001,22 +1001,28 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
 	count_memcg_events_mm(mm, idx, 1);
 }
 
-static inline void memcg_memory_event(struct mem_cgroup *memcg,
-				      enum memcg_memory_event event)
+static inline void __memcg_memory_event(struct mem_cgroup *memcg,
+					enum memcg_memory_event event,
+					bool allow_spinning)
 {
 	bool swap_event = event == MEMCG_SWAP_HIGH || event == MEMCG_SWAP_MAX ||
 			  event == MEMCG_SWAP_FAIL;
 
+	/* For now only MEMCG_MAX can happen with !allow_spinning context. */
+	VM_WARN_ON_ONCE(!allow_spinning && event != MEMCG_MAX);
+
 	atomic_long_inc(&memcg->memory_events_local[event]);
-	if (!swap_event)
+	if (!swap_event && allow_spinning)
 		cgroup_file_notify(&memcg->events_local_file);
 
 	do {
 		atomic_long_inc(&memcg->memory_events[event]);
-		if (swap_event)
-			cgroup_file_notify(&memcg->swap_events_file);
-		else
-			cgroup_file_notify(&memcg->events_file);
+		if (allow_spinning) {
+			if (swap_event)
+				cgroup_file_notify(&memcg->swap_events_file);
+			else
+				cgroup_file_notify(&memcg->events_file);
+		}
 
 		if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
 			break;
@@ -1026,6 +1032,12 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg,
 		 !mem_cgroup_is_root(memcg));
 }
 
+static inline void memcg_memory_event(struct mem_cgroup *memcg,
+				      enum memcg_memory_event event)
+{
+	__memcg_memory_event(memcg, event, true);
+}
+
 static inline void memcg_memory_event_mm(struct mm_struct *mm,
 					 enum memcg_memory_event event)
 {
-- 
cgit v1.2.3


From f04aad36a07cc17b7a5d5b9a2d386ce6fae63e93 Mon Sep 17 00:00:00 2001
From: Jakub Acs <acsjakub@amazon.de>
Date: Wed, 1 Oct 2025 09:03:52 +0000
Subject: mm/ksm: fix flag-dropping behavior in ksm_madvise

syzkaller discovered the following crash: (kernel BUG)

[   44.607039] ------------[ cut here ]------------
[   44.607422] kernel BUG at mm/userfaultfd.c:2067!
[   44.608148] Oops: invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI
[   44.608814] CPU: 1 UID: 0 PID: 2475 Comm: reproducer Not tainted 6.16.0-rc6 #1 PREEMPT(none)
[   44.609635] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[   44.610695] RIP: 0010:userfaultfd_release_all+0x3a8/0x460

<snip other registers, drop unreliable trace>

[   44.617726] Call Trace:
[   44.617926]  <TASK>
[   44.619284]  userfaultfd_release+0xef/0x1b0
[   44.620976]  __fput+0x3f9/0xb60
[   44.621240]  fput_close_sync+0x110/0x210
[   44.622222]  __x64_sys_close+0x8f/0x120
[   44.622530]  do_syscall_64+0x5b/0x2f0
[   44.622840]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[   44.623244] RIP: 0033:0x7f365bb3f227

Kernel panics because it detects UFFD inconsistency during
userfaultfd_release_all().  Specifically, a VMA which has a valid pointer
to vma->vm_userfaultfd_ctx, but no UFFD flags in vma->vm_flags.

The inconsistency is caused in ksm_madvise(): when user calls madvise()
with MADV_UNMEARGEABLE on a VMA that is registered for UFFD in MINOR mode,
it accidentally clears all flags stored in the upper 32 bits of
vma->vm_flags.

Assuming x86_64 kernel build, unsigned long is 64-bit and unsigned int and
int are 32-bit wide.  This setup causes the following mishap during the &=
~VM_MERGEABLE assignment.

VM_MERGEABLE is a 32-bit constant of type unsigned int, 0x8000'0000.
After ~ is applied, it becomes 0x7fff'ffff unsigned int, which is then
promoted to unsigned long before the & operation.  This promotion fills
upper 32 bits with leading 0s, as we're doing unsigned conversion (and
even for a signed conversion, this wouldn't help as the leading bit is 0).
& operation thus ends up AND-ing vm_flags with 0x0000'0000'7fff'ffff
instead of intended 0xffff'ffff'7fff'ffff and hence accidentally clears
the upper 32-bits of its value.

Fix it by changing `VM_MERGEABLE` constant to unsigned long, using the
BIT() macro.

Note: other VM_* flags are not affected: This only happens to the
VM_MERGEABLE flag, as the other VM_* flags are all constants of type int
and after ~ operation, they end up with leading 1 and are thus converted
to unsigned long with leading 1s.

Note 2:
After commit 31defc3b01d9 ("userfaultfd: remove (VM_)BUG_ON()s"), this is
no longer a kernel BUG, but a WARNING at the same place:

[   45.595973] WARNING: CPU: 1 PID: 2474 at mm/userfaultfd.c:2067

but the root-cause (flag-drop) remains the same.

[akpm@linux-foundation.org: rust bindgen wasn't able to handle BIT(), from Miguel]
  Link: https://lore.kernel.org/oe-kbuild-all/202510030449.VfSaAjvd-lkp@intel.com/
Link: https://lkml.kernel.org/r/20251001090353.57523-2-acsjakub@amazon.de
Fixes: 7677f7fd8be7 ("userfaultfd: add minor fault registration mode")
Signed-off-by: Jakub Acs <acsjakub@amazon.de>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: SeongJae Park <sj@kernel.org>
Tested-by: Alice Ryhl <aliceryhl@google.com>
Tested-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Cc: Xu Xin <xu.xin16@zte.com.cn>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 06978b4dbeb8..70a2a76007d4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -323,7 +323,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_MIXEDMAP	0x10000000	/* Can contain "struct page" and pure PFN pages */
 #define VM_HUGEPAGE	0x20000000	/* MADV_HUGEPAGE marked this vma */
 #define VM_NOHUGEPAGE	0x40000000	/* MADV_NOHUGEPAGE marked this vma */
-#define VM_MERGEABLE	0x80000000	/* KSM may merge identical pages */
+#define VM_MERGEABLE	BIT(31)		/* KSM may merge identical pages */
 
 #ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS
 #define VM_HIGH_ARCH_BIT_0	32	/* bit only usable on 64-bit architectures */
-- 
cgit v1.2.3


From 27c0a7b05d13a0dc54ed0b95fc12218210fdea1a Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@kernel.org>
Date: Thu, 31 Jul 2025 12:02:27 -0700
Subject: libceph: Use HMAC-SHA256 library instead of crypto_shash

Use the HMAC-SHA256 library functions instead of crypto_shash.  This is
simpler and faster.

Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/messenger.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 1717cc57cdac..4b49592a738f 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -2,6 +2,7 @@
 #ifndef __FS_CEPH_MESSENGER_H
 #define __FS_CEPH_MESSENGER_H
 
+#include <crypto/sha2.h>
 #include <linux/bvec.h>
 #include <linux/crypto.h>
 #include <linux/kref.h>
@@ -412,7 +413,8 @@ struct ceph_connection_v2_info {
 	struct ceph_msg_data_cursor in_cursor;
 	struct ceph_msg_data_cursor out_cursor;
 
-	struct crypto_shash *hmac_tfm;  /* post-auth signature */
+	struct hmac_sha256_key hmac_key;  /* post-auth signature */
+	bool hmac_key_set;
 	struct crypto_aead *gcm_tfm;  /* on-wire encryption */
 	struct aead_request *gcm_req;
 	struct crypto_wait gcm_wait;
-- 
cgit v1.2.3


From 59699a5a7114f09f890e86c09a6b32afb5eaa64c Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Wed, 6 Aug 2025 11:48:53 +0200
Subject: libceph: make ceph_con_get_out_msg() return the message pointer

The caller in messenger_v1.c loads it anyway, so let's keep the
pointer in the register instead of reloading it from memory.  This
eliminates a tiny bit of unnecessary overhead.

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/messenger.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 4b49592a738f..9ebcac2981fd 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -550,7 +550,7 @@ void ceph_addr_set_port(struct ceph_entity_addr *addr, int p);
 void ceph_con_process_message(struct ceph_connection *con);
 int ceph_con_in_msg_alloc(struct ceph_connection *con,
 			  struct ceph_msg_header *hdr, int *skip);
-void ceph_con_get_out_msg(struct ceph_connection *con);
+struct ceph_msg *ceph_con_get_out_msg(struct ceph_connection *con);
 
 /* messenger_v1.c */
 int ceph_con_v1_try_read(struct ceph_connection *con);
-- 
cgit v1.2.3


From 7399212dcf64d90a6ab239bdd98bd325d922fc7e Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Wed, 6 Aug 2025 11:48:54 +0200
Subject: libceph: pass the message pointer instead of loading con->out_msg

This pointer is in a register anyway, so let's use that instead of
reloading from memory everywhere.

[ idryomov: formatting ]

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/messenger.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 9ebcac2981fd..6aa4c6478c9f 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -555,7 +555,7 @@ struct ceph_msg *ceph_con_get_out_msg(struct ceph_connection *con);
 /* messenger_v1.c */
 int ceph_con_v1_try_read(struct ceph_connection *con);
 int ceph_con_v1_try_write(struct ceph_connection *con);
-void ceph_con_v1_revoke(struct ceph_connection *con);
+void ceph_con_v1_revoke(struct ceph_connection *con, struct ceph_msg *msg);
 void ceph_con_v1_revoke_incoming(struct ceph_connection *con);
 bool ceph_con_v1_opened(struct ceph_connection *con);
 void ceph_con_v1_reset_session(struct ceph_connection *con);
@@ -564,7 +564,7 @@ void ceph_con_v1_reset_protocol(struct ceph_connection *con);
 /* messenger_v2.c */
 int ceph_con_v2_try_read(struct ceph_connection *con);
 int ceph_con_v2_try_write(struct ceph_connection *con);
-void ceph_con_v2_revoke(struct ceph_connection *con);
+void ceph_con_v2_revoke(struct ceph_connection *con, struct ceph_msg *msg);
 void ceph_con_v2_revoke_incoming(struct ceph_connection *con);
 bool ceph_con_v2_opened(struct ceph_connection *con);
 void ceph_con_v2_reset_session(struct ceph_connection *con);
-- 
cgit v1.2.3


From 207696b17f38e869e59889b44d395ab24bb678d3 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Thu, 18 Sep 2025 22:30:18 +0300
Subject: tpm: use a map for tpm2_calc_ordinal_duration()

The current shenanigans for duration calculation introduce too much
complexity for a trivial problem, and further the code is hard to patch and
maintain.

Address these issues with a flat look-up table, which is easy to understand
and patch. If leaf driver specific patching is required in future, it is
easy enough to make a copy of this table during driver initialization and
add the chip parameter back.

'chip->duration' is retained for TPM 1.x.

As the first entry for this new behavior address TCG spec update mentioned
in this issue:

https://github.com/raspberrypi/linux/issues/7054

Therefore, for TPM_SelfTest the duration is set to 3000 ms.

This does not categorize a as bug, given that this is introduced to the
spec after the feature was originally made.

Reviewed-by: Serge Hallyn <serge@hallyn.com>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 include/linux/tpm.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index b0e9eb5ef022..dc0338a783f3 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -228,10 +228,11 @@ enum tpm2_timeouts {
 	TPM2_TIMEOUT_B          =   4000,
 	TPM2_TIMEOUT_C          =    200,
 	TPM2_TIMEOUT_D          =     30,
+};
+
+enum tpm2_durations {
 	TPM2_DURATION_SHORT     =     20,
-	TPM2_DURATION_MEDIUM    =    750,
 	TPM2_DURATION_LONG      =   2000,
-	TPM2_DURATION_LONG_LONG = 300000,
 	TPM2_DURATION_DEFAULT   = 120000,
 };
 
-- 
cgit v1.2.3


From 44c6cb9fe9888b371e31165b2854bd0f4e2787d4 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 3 Oct 2025 16:25:58 -0700
Subject: KVM: guest_memfd: Allow mmap() on guest_memfd for x86 VMs with
 private memory

Allow mmap() on guest_memfd instances for x86 VMs with private memory as
the need to track private vs. shared state in the guest_memfd instance is
only pertinent to INIT_SHARED.  Doing mmap() on private memory isn't
terrible useful (yet!), but it's now possible, and will be desirable when
guest_memfd gains support for other VMA-based syscalls, e.g. mbind() to
set NUMA policy.

Lift the restriction now, before MMAP support is officially released, so
that KVM doesn't need to add another capability to enumerate support for
mmap() on private memory.

Fixes: 3d3a04fad25a ("KVM: Allow and advertise support for host mmap() on guest_memfd files")
Reviewed-by: Ackerley Tng <ackerleytng@google.com>
Tested-by: Ackerley Tng <ackerleytng@google.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Link: https://lore.kernel.org/r/20251003232606.4070510-6-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/linux/kvm_host.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 19b8c4bebb9c..680ca838f018 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -729,7 +729,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
 #endif
 
 #ifdef CONFIG_KVM_GUEST_MEMFD
-bool kvm_arch_supports_gmem_mmap(struct kvm *kvm);
+bool kvm_arch_supports_gmem_init_shared(struct kvm *kvm);
+
+static inline u64 kvm_gmem_get_supported_flags(struct kvm *kvm)
+{
+	u64 flags = GUEST_MEMFD_FLAG_MMAP;
+
+	if (!kvm || kvm_arch_supports_gmem_init_shared(kvm))
+		flags |= GUEST_MEMFD_FLAG_INIT_SHARED;
+
+	return flags;
+}
 #endif
 
 #ifndef kvm_arch_has_readonly_mem
-- 
cgit v1.2.3


From a8482d2c9071d75c920eba0db36428898250ea57 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Sat, 11 Oct 2025 12:31:53 +0200
Subject: Revert "i2c: boardinfo: Annotate code used in init phase only"

This reverts commit 1a2b423be6a89dd07d5fc27ea042be68697a6a49 because we
got a regression report and need time to find out the details.

Reported-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>
Closes: https://lore.kernel.org/r/29ec0082-4dd4-4120-acd2-44b35b4b9487@oss.qualcomm.com
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/i2c.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 11a19241e360..20fd41b51d5c 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -499,7 +499,7 @@ static inline struct i2c_client *i2c_verify_client(struct device *dev)
  * Modules for add-on boards must use other calls.
  */
 #ifdef CONFIG_I2C_BOARDINFO
-int __init
+int
 i2c_register_board_info(int busnum, struct i2c_board_info const *info,
 			unsigned n);
 #else
-- 
cgit v1.2.3


From 12d724f2852d094d68dccaf5101e0ef89a971cde Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 9 Oct 2025 19:46:00 +0900
Subject: ata: libata-core: relax checks in ata_read_log_directory()

Commit 6d4405b16d37 ("ata: libata-core: Cache the general purpose log
directory") introduced caching of a device general purpose log directory
to avoid repeated access to this log page during device scan. This
change also added a check on this log page to verify that the log page
version is 0x0001 as mandated by the ACS specifications.

And it turns out that some devices do not bother reporting this version,
instead reporting a version 0, resulting in error messages such as:

ata6.00: Invalid log directory version 0x0000

and to the device being marked as not supporting the general purpose log
directory log page.

Since before commit 6d4405b16d37 the log page version check did not
exist and things were still working correctly for these devices, relax
ata_read_log_directory() version check and only warn about the invalid
log page version number without disabling access to the log directory
page.

Fixes: 6d4405b16d37 ("ata: libata-core: Cache the general purpose log directory")
Cc: stable@vger.kernel.org
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220635
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
---
 include/linux/libata.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 21de0935775d..7a98de1cc995 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1594,6 +1594,12 @@ do {								\
 #define ata_dev_dbg(dev, fmt, ...)				\
 	ata_dev_printk(debug, dev, fmt, ##__VA_ARGS__)
 
+#define ata_dev_warn_once(dev, fmt, ...)			\
+	pr_warn_once("ata%u.%02u: " fmt,			\
+		     (dev)->link->ap->print_id,			\
+		     (dev)->link->pmp + (dev)->devno,		\
+		     ##__VA_ARGS__)
+
 static inline void ata_print_version_once(const struct device *dev,
 					  const char *version)
 {
-- 
cgit v1.2.3


From 11fb1a82aefa6f7fea6ac82334edb5639b9927df Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Tue, 23 Sep 2025 16:09:27 +0100
Subject: firmware: arm_ffa: Add support for IMPDEF value in the memory access
 descriptor

FF-A v1.2 introduced 16 byte IMPLEMENTATION DEFINED value in the endpoint
memory access descriptor to allow any sender could to specify an its any
custom value for each receiver. Also this value must be specified by the
receiver when retrieving the memory region. The sender must ensure it
informs the receiver of this value via an IMPLEMENTATION DEFINED mechanism
such as a partition message.

So the FF-A driver can use the message interfaces to communicate the value
and set the same in the ffa_mem_region_attributes structures when using
the memory interfaces.

The driver ensure that the size of the endpoint memory access descriptors
is set correctly based on the FF-A version.

Fixes: 9fac08d9d985 ("firmware: arm_ffa: Upgrade FF-A version to v1.2 in the driver")
Reported-by: Lixiang Mao <liximao@qti.qualcomm.com>
Tested-by: Lixiang Mao <liximao@qti.qualcomm.com>
Message-Id: <20250923150927.1218364-1-sudeep.holla@arm.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 include/linux/arm_ffa.h | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index cd7ee4df9045..81e603839c4a 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -338,6 +338,7 @@ struct ffa_mem_region_attributes {
 	 * an `struct ffa_mem_region_addr_range`.
 	 */
 	u32 composite_off;
+	u8 impdef_val[16];
 	u64 reserved;
 };
 
@@ -417,15 +418,31 @@ struct ffa_mem_region {
 #define CONSTITUENTS_OFFSET(x)	\
 	(offsetof(struct ffa_composite_mem_region, constituents[x]))
 
+#define FFA_EMAD_HAS_IMPDEF_FIELD(version)	((version) >= FFA_VERSION_1_2)
+#define FFA_MEM_REGION_HAS_EP_MEM_OFFSET(version) ((version) > FFA_VERSION_1_0)
+
+static inline u32 ffa_emad_size_get(u32 ffa_version)
+{
+	u32 sz;
+	struct ffa_mem_region_attributes *ep_mem_access;
+
+	if (FFA_EMAD_HAS_IMPDEF_FIELD(ffa_version))
+		sz = sizeof(*ep_mem_access);
+	else
+		sz = sizeof(*ep_mem_access) - sizeof(ep_mem_access->impdef_val);
+
+	return sz;
+}
+
 static inline u32
 ffa_mem_desc_offset(struct ffa_mem_region *buf, int count, u32 ffa_version)
 {
-	u32 offset = count * sizeof(struct ffa_mem_region_attributes);
+	u32 offset = count * ffa_emad_size_get(ffa_version);
 	/*
 	 * Earlier to v1.1, the endpoint memory descriptor array started at
 	 * offset 32(i.e. offset of ep_mem_offset in the current structure)
 	 */
-	if (ffa_version <= FFA_VERSION_1_0)
+	if (!FFA_MEM_REGION_HAS_EP_MEM_OFFSET(ffa_version))
 		offset += offsetof(struct ffa_mem_region, ep_mem_offset);
 	else
 		offset += sizeof(struct ffa_mem_region);
-- 
cgit v1.2.3


From 7e8242405b94ceac6db820de7d4fd9318cbc1219 Mon Sep 17 00:00:00 2001
From: Bean Huo <beanhuo@micron.com>
Date: Wed, 1 Oct 2025 08:08:03 +0200
Subject: rpmb: move rpmb_frame struct and constants to common header

Move struct rpmb_frame and RPMB operation constants from MMC block
driver to include/linux/rpmb.h for reuse across different RPMB
implementations (UFS, NVMe, etc.).

Signed-off-by: Bean Huo <beanhuo@micron.com>
Reviewed-by: Avri Altman <avri.altman@sandisk.com>
Acked-by: Jens Wiklander <jens.wiklander@linaro.org>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/rpmb.h | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rpmb.h b/include/linux/rpmb.h
index cccda73eea4d..ed3f8e431eff 100644
--- a/include/linux/rpmb.h
+++ b/include/linux/rpmb.h
@@ -61,6 +61,50 @@ struct rpmb_dev {
 
 #define to_rpmb_dev(x)		container_of((x), struct rpmb_dev, dev)
 
+/**
+ * struct rpmb_frame - RPMB frame structure for authenticated access
+ *
+ * @stuff        : stuff bytes, a padding/reserved area of 196 bytes at the
+ *                 beginning of the RPMB frame. They don’t carry meaningful
+ *                 data but are required to make the frame exactly 512 bytes.
+ * @key_mac      : The authentication key or the message authentication
+ *                 code (MAC) depending on the request/response type.
+ *                 The MAC will be delivered in the last (or the only)
+ *                 block of data.
+ * @data         : Data to be written or read by signed access.
+ * @nonce        : Random number generated by the host for the requests
+ *                 and copied to the response by the RPMB engine.
+ * @write_counter: Counter value for the total amount of the successful
+ *                 authenticated data write requests made by the host.
+ * @addr         : Address of the data to be programmed to or read
+ *                 from the RPMB. Address is the serial number of
+ *                 the accessed block (half sector 256B).
+ * @block_count  : Number of blocks (half sectors, 256B) requested to be
+ *                 read/programmed.
+ * @result       : Includes information about the status of the write counter
+ *                 (valid, expired) and result of the access made to the RPMB.
+ * @req_resp     : Defines the type of request and response to/from the memory.
+ *
+ * The stuff bytes and big-endian properties are modeled to fit to the spec.
+ */
+struct rpmb_frame {
+	u8     stuff[196];
+	u8     key_mac[32];
+	u8     data[256];
+	u8     nonce[16];
+	__be32 write_counter;
+	__be16 addr;
+	__be16 block_count;
+	__be16 result;
+	__be16 req_resp;
+};
+
+#define RPMB_PROGRAM_KEY       0x1    /* Program RPMB Authentication Key */
+#define RPMB_GET_WRITE_COUNTER 0x2    /* Read RPMB write counter */
+#define RPMB_WRITE_DATA        0x3    /* Write data to RPMB partition */
+#define RPMB_READ_DATA         0x4    /* Read data from RPMB partition */
+#define RPMB_RESULT_READ       0x5    /* Read result request  (Internal) */
+
 #if IS_ENABLED(CONFIG_RPMB)
 struct rpmb_dev *rpmb_dev_get(struct rpmb_dev *rdev);
 void rpmb_dev_put(struct rpmb_dev *rdev);
-- 
cgit v1.2.3


From 7a84394f02ab1985ebbe0a8d6f6d69bd040de4b3 Mon Sep 17 00:00:00 2001
From: Joshua Watt <jpewhacker@gmail.com>
Date: Tue, 7 Oct 2025 15:22:58 -0600
Subject: NFS4: Apply delay_retrans to async operations

The setting of delay_retrans is applied to synchronous RPC operations
because the retransmit count is stored in same struct nfs4_exception
that is passed each time an error is checked. However, for asynchronous
operations (READ, WRITE, LOCKU, CLOSE, DELEGRETURN), a new struct
nfs4_exception is made on the stack each time the task callback is
invoked. This means that the retransmit count is always zero and thus
delay_retrans never takes effect.

Apply delay_retrans to these operations by tracking and updating their
retransmit count.

Change-Id: Ieb33e046c2b277cb979caa3faca7f52faf0568c9
Signed-off-by: Joshua Watt <jpewhacker@gmail.com>
Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
---
 include/linux/nfs_xdr.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d56583572c98..31463286402f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1659,6 +1659,7 @@ struct nfs_pgio_header {
 	void			*netfs;
 #endif
 
+	unsigned short		retrans;
 	int			pnfs_error;
 	int			error;		/* merge with pnfs_error */
 	unsigned int		good_bytes;	/* boundary of good data */
-- 
cgit v1.2.3


From e4d0c909bf8328d986bf3aadba0c33a72b5ae30d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kamil=20Hor=C3=A1k=20-=202N?= <kamilh@axis.com>
Date: Thu, 9 Oct 2025 15:06:56 +0200
Subject: net: phy: bcm54811: Fix GMII/MII/MII-Lite selection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Broadcom bcm54811 is hardware-strapped to select among RGMII and
GMII/MII/MII-Lite modes. However, the corresponding bit, RGMII Enable
in Miscellaneous Control Register must be also set to select desired
RGMII or MII(-lite)/GMII mode.

Fixes: 3117a11fff5af9e7 ("net: phy: bcm54811: PHY initialization")
Signed-off-by: Kamil Horák - 2N <kamilh@axis.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Link: https://patch.msgid.link/20251009130656.1308237-2-kamilh@axis.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/brcmphy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 15c35655f482..115a964f3006 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -137,6 +137,7 @@
 
 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC			0x07
 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN	0x0010
+#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RSVD		0x0060
 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_EN	0x0080
 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN	0x0100
 #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX		0x0200
-- 
cgit v1.2.3


From 1d64624243af8329b4b219d8c39e28ea448f9929 Mon Sep 17 00:00:00 2001
From: Vicki Pfau <vi@endrift.com>
Date: Mon, 6 Oct 2025 18:05:31 -0700
Subject: HID: core: Add printk_ratelimited variants to hid_warn() etc

hid_warn_ratelimited() is needed. Add the others as part of the block.

Signed-off-by: Vicki Pfau <vi@endrift.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
---
 include/linux/hid.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index e1b673ad7457..a4ddb94e3ee5 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -1292,4 +1292,15 @@ void hid_quirks_exit(__u16 bus);
 #define hid_dbg_once(hid, fmt, ...)			\
 	dev_dbg_once(&(hid)->dev, fmt, ##__VA_ARGS__)
 
+#define hid_err_ratelimited(hid, fmt, ...)			\
+	dev_err_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_notice_ratelimited(hid, fmt, ...)			\
+	dev_notice_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_warn_ratelimited(hid, fmt, ...)			\
+	dev_warn_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_info_ratelimited(hid, fmt, ...)			\
+	dev_info_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_dbg_ratelimited(hid, fmt, ...)			\
+	dev_dbg_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__)
+
 #endif
-- 
cgit v1.2.3


From afb026b6d35c79f6f47752147327932827aeac8c Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@kernel.org>
Date: Wed, 17 Sep 2025 09:03:13 -0700
Subject: compiler: Tweak __UNIQUE_ID() naming

In preparation for the objtool klp diff subcommand, add an underscore
between the name and the counter.  This will make it possible for
objtool to distinguish between the non-unique and unique parts of the
symbol name so it can properly correlate the symbols.

Acked-by: Petr Mladek <pmladek@suse.com>
Tested-by: Joe Lawrence <joe.lawrence@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
 include/linux/compiler.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 5b45ea7dff3e..6a32250f22f7 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -163,7 +163,11 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 	__asm__ ("" : "=r" (var) : "0" (var))
 #endif
 
-#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
+/* Format: __UNIQUE_ID_<name>_<__COUNTER__> */
+#define __UNIQUE_ID(name)					\
+	__PASTE(__UNIQUE_ID_,					\
+	__PASTE(name,						\
+	__PASTE(_, __COUNTER__)))
 
 /**
  * data_race - mark an expression as containing intentional data races
-- 
cgit v1.2.3


From 9f14f1f91883aa2bfd6663161d2002c8ce937c43 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@kernel.org>
Date: Wed, 17 Sep 2025 09:03:14 -0700
Subject: compiler.h: Make addressable symbols less of an eyesore

Avoid underscore overload by changing:

  __UNIQUE_ID___addressable_loops_per_jiffy_868

to the following:

  __UNIQUE_ID_addressable_loops_per_jiffy_868

This matches the format used by other __UNIQUE_ID()-generated symbols
and improves readability for those who stare at ELF symbol table dumps.

Acked-by: Petr Mladek <pmladek@suse.com>
Tested-by: Joe Lawrence <joe.lawrence@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
 include/linux/compiler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 6a32250f22f7..ab181d87d71d 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -287,7 +287,7 @@ static inline void *offset_to_ptr(const int *off)
  */
 #define ___ADDRESSABLE(sym, __attrs)						\
 	static void * __used __attrs						\
-	__UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)(uintptr_t)&sym;
+	__UNIQUE_ID(__PASTE(addressable_, sym)) = (void *)(uintptr_t)&sym;
 
 #define __ADDRESSABLE(sym) \
 	___ADDRESSABLE(sym, __section(".discard.addressable"))
-- 
cgit v1.2.3


From c2d420796a427dda71a2400909864e7f8e037fd4 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@kernel.org>
Date: Wed, 17 Sep 2025 09:03:15 -0700
Subject: elfnote: Change ELFNOTE() to use __UNIQUE_ID()

In preparation for the objtool klp diff subcommand, replace the custom
unique symbol name generation in ELFNOTE() with __UNIQUE_ID().

This standardizes the naming format for all "unique" symbols, which will
allow objtool to properly correlate them.  Note this also removes the
"one ELF note per line" limitation.

Acked-by: Petr Mladek <pmladek@suse.com>
Tested-by: Joe Lawrence <joe.lawrence@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
 include/linux/elfnote.h | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h
index 69b136e4dd2b..bb3dcded055f 100644
--- a/include/linux/elfnote.h
+++ b/include/linux/elfnote.h
@@ -60,23 +60,21 @@
 
 #else	/* !__ASSEMBLER__ */
 #include <uapi/linux/elf.h>
+#include <linux/compiler.h>
 /*
  * Use an anonymous structure which matches the shape of
  * Elf{32,64}_Nhdr, but includes the name and desc data.  The size and
  * type of name and desc depend on the macro arguments.  "name" must
- * be a literal string, and "desc" must be passed by value.  You may
- * only define one note per line, since __LINE__ is used to generate
- * unique symbols.
+ * be a literal string, and "desc" must be passed by value.
  */
-#define _ELFNOTE_PASTE(a,b)	a##b
-#define _ELFNOTE(size, name, unique, type, desc)			\
+#define ELFNOTE(size, name, type, desc)					\
 	static const struct {						\
 		struct elf##size##_note _nhdr;				\
 		unsigned char _name[sizeof(name)]			\
 		__attribute__((aligned(sizeof(Elf##size##_Word))));	\
 		typeof(desc) _desc					\
 			     __attribute__((aligned(sizeof(Elf##size##_Word)))); \
-	} _ELFNOTE_PASTE(_note_, unique)				\
+	} __UNIQUE_ID(note)						\
 		__used							\
 		__attribute__((section(".note." name),			\
 			       aligned(sizeof(Elf##size##_Word)),	\
@@ -89,11 +87,10 @@
 		name,							\
 		desc							\
 	}
-#define ELFNOTE(size, name, type, desc)		\
-	_ELFNOTE(size, name, __LINE__, type, desc)
 
 #define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc)
 #define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc)
+
 #endif	/* __ASSEMBLER__ */
 
 #endif /* _LINUX_ELFNOTE_H */
-- 
cgit v1.2.3


From 6717e8f91db71641cb52855ed14c7900972ed0bc Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@kernel.org>
Date: Wed, 17 Sep 2025 09:03:16 -0700
Subject: kbuild: Remove 'kmod_' prefix from __KBUILD_MODNAME

In preparation for the objtool klp diff subcommand, remove the arbitrary
'kmod_' prefix from __KBUILD_MODNAME and instead add it explicitly in
the __initcall_id() macro.

This change supports the standardization of "unique" symbol naming by
ensuring the non-unique portion of the name comes before the unique
part.  That will enable objtool to properly correlate symbols across
builds.

Cc: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Petr Mladek <pmladek@suse.com>
Tested-by: Joe Lawrence <joe.lawrence@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
 include/linux/init.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index 17c1bc712e23..40331923b9f4 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -200,12 +200,13 @@ extern struct module __this_module;
 
 /* Format: <modname>__<counter>_<line>_<fn> */
 #define __initcall_id(fn)					\
+	__PASTE(kmod_,						\
 	__PASTE(__KBUILD_MODNAME,				\
 	__PASTE(__,						\
 	__PASTE(__COUNTER__,					\
 	__PASTE(_,						\
 	__PASTE(__LINE__,					\
-	__PASTE(_, fn))))))
+	__PASTE(_, fn)))))))
 
 /* Format: __<prefix>__<iid><id> */
 #define __initcall_name(prefix, __iid, id)			\
-- 
cgit v1.2.3


From b37491d72b43c3a322d396c2d8e951a10be70c17 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@kernel.org>
Date: Thu, 18 Sep 2025 09:30:03 -0700
Subject: interval_tree: Fix ITSTATIC usage for *_subtree_search()

For consistency with the other function templates, change
_subtree_search_*() to use the user-supplied ITSTATIC rather than the
hard-coded 'static'.

Acked-by: Petr Mladek <pmladek@suse.com>
Tested-by: Joe Lawrence <joe.lawrence@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
 include/linux/interval_tree.h         | 4 ++++
 include/linux/interval_tree_generic.h | 2 +-
 include/linux/mm.h                    | 2 ++
 3 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/interval_tree.h b/include/linux/interval_tree.h
index 2b8026a39906..9d5791e9f737 100644
--- a/include/linux/interval_tree.h
+++ b/include/linux/interval_tree.h
@@ -19,6 +19,10 @@ extern void
 interval_tree_remove(struct interval_tree_node *node,
 		     struct rb_root_cached *root);
 
+extern struct interval_tree_node *
+interval_tree_subtree_search(struct interval_tree_node *node,
+			     unsigned long start, unsigned long last);
+
 extern struct interval_tree_node *
 interval_tree_iter_first(struct rb_root_cached *root,
 			 unsigned long start, unsigned long last);
diff --git a/include/linux/interval_tree_generic.h b/include/linux/interval_tree_generic.h
index 1b400f26f63d..c5a2fed49eb0 100644
--- a/include/linux/interval_tree_generic.h
+++ b/include/linux/interval_tree_generic.h
@@ -77,7 +77,7 @@ ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node,			      \
  *   Cond2: start <= ITLAST(node)					      \
  */									      \
 									      \
-static ITSTRUCT *							      \
+ITSTATIC ITSTRUCT *							      \
 ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last)	      \
 {									      \
 	while (true) {							      \
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d16b33bacc32..04fa27718cd1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3369,6 +3369,8 @@ void vma_interval_tree_insert_after(struct vm_area_struct *node,
 				    struct rb_root_cached *root);
 void vma_interval_tree_remove(struct vm_area_struct *node,
 			      struct rb_root_cached *root);
+struct vm_area_struct *vma_interval_tree_subtree_search(struct vm_area_struct *node,
+				unsigned long start, unsigned long last);
 struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root_cached *root,
 				unsigned long start, unsigned long last);
 struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,
-- 
cgit v1.2.3


From d2c60bde1c0fcac8b140e527546f80749ccd9c67 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@kernel.org>
Date: Wed, 17 Sep 2025 09:03:53 -0700
Subject: objtool: Move ANNOTATE* macros to annotate.h

In preparation for using the objtool annotation macros in higher-level
objtool.h macros like UNWIND_HINT, move them to their own file.

Acked-by: Petr Mladek <pmladek@suse.com>
Tested-by: Joe Lawrence <joe.lawrence@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
 include/linux/annotate.h | 109 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/objtool.h  |  90 +-------------------------------------
 2 files changed, 110 insertions(+), 89 deletions(-)
 create mode 100644 include/linux/annotate.h

(limited to 'include/linux')

diff --git a/include/linux/annotate.h b/include/linux/annotate.h
new file mode 100644
index 000000000000..ccb445496331
--- /dev/null
+++ b/include/linux/annotate.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_ANNOTATE_H
+#define _LINUX_ANNOTATE_H
+
+#include <linux/objtool_types.h>
+
+#ifdef CONFIG_OBJTOOL
+
+#ifndef __ASSEMBLY__
+
+#define __ASM_ANNOTATE(label, type)					\
+	".pushsection .discard.annotate_insn,\"M\",@progbits,8\n\t"	\
+	".long " __stringify(label) " - .\n\t"				\
+	".long " __stringify(type) "\n\t"				\
+	".popsection\n\t"
+
+#define ASM_ANNOTATE(type)						\
+	"911:\n\t"							\
+	__ASM_ANNOTATE(911b, type)
+
+#else /* __ASSEMBLY__ */
+
+.macro ANNOTATE type:req
+.Lhere_\@:
+	.pushsection .discard.annotate_insn,"M",@progbits,8
+	.long	.Lhere_\@ - .
+	.long	\type
+	.popsection
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#else /* !CONFIG_OBJTOOL */
+#ifndef __ASSEMBLY__
+#define __ASM_ANNOTATE(label, type) ""
+#define ASM_ANNOTATE(type)
+#else /* __ASSEMBLY__ */
+.macro ANNOTATE type:req
+.endm
+#endif /* __ASSEMBLY__ */
+#endif /* !CONFIG_OBJTOOL */
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Annotate away the various 'relocation to !ENDBR` complaints; knowing that
+ * these relocations will never be used for indirect calls.
+ */
+#define ANNOTATE_NOENDBR		ASM_ANNOTATE(ANNOTYPE_NOENDBR)
+#define ANNOTATE_NOENDBR_SYM(sym)	asm(__ASM_ANNOTATE(sym, ANNOTYPE_NOENDBR))
+
+/*
+ * This should be used immediately before an indirect jump/call. It tells
+ * objtool the subsequent indirect jump/call is vouched safe for retpoline
+ * builds.
+ */
+#define ANNOTATE_RETPOLINE_SAFE		ASM_ANNOTATE(ANNOTYPE_RETPOLINE_SAFE)
+/*
+ * See linux/instrumentation.h
+ */
+#define ANNOTATE_INSTR_BEGIN(label)	__ASM_ANNOTATE(label, ANNOTYPE_INSTR_BEGIN)
+#define ANNOTATE_INSTR_END(label)	__ASM_ANNOTATE(label, ANNOTYPE_INSTR_END)
+/*
+ * objtool annotation to ignore the alternatives and only consider the original
+ * instruction(s).
+ */
+#define ANNOTATE_IGNORE_ALTERNATIVE	ASM_ANNOTATE(ANNOTYPE_IGNORE_ALTS)
+/*
+ * This macro indicates that the following intra-function call is valid.
+ * Any non-annotated intra-function call will cause objtool to issue a warning.
+ */
+#define ANNOTATE_INTRA_FUNCTION_CALL	ASM_ANNOTATE(ANNOTYPE_INTRA_FUNCTION_CALL)
+/*
+ * Use objtool to validate the entry requirement that all code paths do
+ * VALIDATE_UNRET_END before RET.
+ *
+ * NOTE: The macro must be used at the beginning of a global symbol, otherwise
+ * it will be ignored.
+ */
+#define ANNOTATE_UNRET_BEGIN		ASM_ANNOTATE(ANNOTYPE_UNRET_BEGIN)
+/*
+ * This should be used to refer to an instruction that is considered
+ * terminating, like a noreturn CALL or UD2 when we know they are not -- eg
+ * WARN using UD2.
+ */
+#define ANNOTATE_REACHABLE(label)	__ASM_ANNOTATE(label, ANNOTYPE_REACHABLE)
+/*
+ * This should not be used; it annotates away CFI violations. There are a few
+ * valid use cases like kexec handover to the next kernel image, and there is
+ * no security concern there.
+ *
+ * There are also a few real issues annotated away, like EFI because we can't
+ * control the EFI code.
+ */
+#define ANNOTATE_NOCFI_SYM(sym)		asm(__ASM_ANNOTATE(sym, ANNOTYPE_NOCFI))
+
+#else /* __ASSEMBLY__ */
+#define ANNOTATE_NOENDBR		ANNOTATE type=ANNOTYPE_NOENDBR
+#define ANNOTATE_RETPOLINE_SAFE		ANNOTATE type=ANNOTYPE_RETPOLINE_SAFE
+/*	ANNOTATE_INSTR_BEGIN		ANNOTATE type=ANNOTYPE_INSTR_BEGIN */
+/*	ANNOTATE_INSTR_END		ANNOTATE type=ANNOTYPE_INSTR_END */
+#define ANNOTATE_IGNORE_ALTERNATIVE	ANNOTATE type=ANNOTYPE_IGNORE_ALTS
+#define ANNOTATE_INTRA_FUNCTION_CALL	ANNOTATE type=ANNOTYPE_INTRA_FUNCTION_CALL
+#define ANNOTATE_UNRET_BEGIN		ANNOTATE type=ANNOTYPE_UNRET_BEGIN
+#define ANNOTATE_REACHABLE		ANNOTATE type=ANNOTYPE_REACHABLE
+#define ANNOTATE_NOCFI_SYM		ANNOTATE type=ANNOTYPE_NOCFI
+#endif /* __ASSEMBLY__ */
+
+#endif /* _LINUX_ANNOTATE_H */
diff --git a/include/linux/objtool.h b/include/linux/objtool.h
index 46ebaa46e6c5..1973e9f14bf9 100644
--- a/include/linux/objtool.h
+++ b/include/linux/objtool.h
@@ -3,11 +3,10 @@
 #define _LINUX_OBJTOOL_H
 
 #include <linux/objtool_types.h>
+#include <linux/annotate.h>
 
 #ifdef CONFIG_OBJTOOL
 
-#include <asm/asm.h>
-
 #ifndef __ASSEMBLY__
 
 #define UNWIND_HINT(type, sp_reg, sp_offset, signal)	\
@@ -53,16 +52,6 @@
 
 #define __ASM_BREF(label)	label ## b
 
-#define __ASM_ANNOTATE(label, type)					\
-	".pushsection .discard.annotate_insn,\"M\",@progbits,8\n\t"	\
-	".long " __stringify(label) " - .\n\t"			\
-	".long " __stringify(type) "\n\t"				\
-	".popsection\n\t"
-
-#define ASM_ANNOTATE(type)						\
-	"911:\n\t"						\
-	__ASM_ANNOTATE(911b, type)
-
 #else /* __ASSEMBLY__ */
 
 /*
@@ -111,14 +100,6 @@
 #endif
 .endm
 
-.macro ANNOTATE type:req
-.Lhere_\@:
-	.pushsection .discard.annotate_insn,"M",@progbits,8
-	.long	.Lhere_\@ - .
-	.long	\type
-	.popsection
-.endm
-
 #endif /* __ASSEMBLY__ */
 
 #else /* !CONFIG_OBJTOOL */
@@ -128,84 +109,15 @@
 #define UNWIND_HINT(type, sp_reg, sp_offset, signal) "\n\t"
 #define STACK_FRAME_NON_STANDARD(func)
 #define STACK_FRAME_NON_STANDARD_FP(func)
-#define __ASM_ANNOTATE(label, type) ""
-#define ASM_ANNOTATE(type)
 #else
 .macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0
 .endm
 .macro STACK_FRAME_NON_STANDARD func:req
 .endm
-.macro ANNOTATE type:req
-.endm
 #endif
 
 #endif /* CONFIG_OBJTOOL */
 
-#ifndef __ASSEMBLY__
-/*
- * Annotate away the various 'relocation to !ENDBR` complaints; knowing that
- * these relocations will never be used for indirect calls.
- */
-#define ANNOTATE_NOENDBR		ASM_ANNOTATE(ANNOTYPE_NOENDBR)
-#define ANNOTATE_NOENDBR_SYM(sym)	asm(__ASM_ANNOTATE(sym, ANNOTYPE_NOENDBR))
-
-/*
- * This should be used immediately before an indirect jump/call. It tells
- * objtool the subsequent indirect jump/call is vouched safe for retpoline
- * builds.
- */
-#define ANNOTATE_RETPOLINE_SAFE		ASM_ANNOTATE(ANNOTYPE_RETPOLINE_SAFE)
-/*
- * See linux/instrumentation.h
- */
-#define ANNOTATE_INSTR_BEGIN(label)	__ASM_ANNOTATE(label, ANNOTYPE_INSTR_BEGIN)
-#define ANNOTATE_INSTR_END(label)	__ASM_ANNOTATE(label, ANNOTYPE_INSTR_END)
-/*
- * objtool annotation to ignore the alternatives and only consider the original
- * instruction(s).
- */
-#define ANNOTATE_IGNORE_ALTERNATIVE	ASM_ANNOTATE(ANNOTYPE_IGNORE_ALTS)
-/*
- * This macro indicates that the following intra-function call is valid.
- * Any non-annotated intra-function call will cause objtool to issue a warning.
- */
-#define ANNOTATE_INTRA_FUNCTION_CALL	ASM_ANNOTATE(ANNOTYPE_INTRA_FUNCTION_CALL)
-/*
- * Use objtool to validate the entry requirement that all code paths do
- * VALIDATE_UNRET_END before RET.
- *
- * NOTE: The macro must be used at the beginning of a global symbol, otherwise
- * it will be ignored.
- */
-#define ANNOTATE_UNRET_BEGIN		ASM_ANNOTATE(ANNOTYPE_UNRET_BEGIN)
-/*
- * This should be used to refer to an instruction that is considered
- * terminating, like a noreturn CALL or UD2 when we know they are not -- eg
- * WARN using UD2.
- */
-#define ANNOTATE_REACHABLE(label)	__ASM_ANNOTATE(label, ANNOTYPE_REACHABLE)
-/*
- * This should not be used; it annotates away CFI violations. There are a few
- * valid use cases like kexec handover to the next kernel image, and there is
- * no security concern there.
- *
- * There are also a few real issues annotated away, like EFI because we can't
- * control the EFI code.
- */
-#define ANNOTATE_NOCFI_SYM(sym)		asm(__ASM_ANNOTATE(sym, ANNOTYPE_NOCFI))
-
-#else
-#define ANNOTATE_NOENDBR		ANNOTATE type=ANNOTYPE_NOENDBR
-#define ANNOTATE_RETPOLINE_SAFE		ANNOTATE type=ANNOTYPE_RETPOLINE_SAFE
-/*	ANNOTATE_INSTR_BEGIN		ANNOTATE type=ANNOTYPE_INSTR_BEGIN */
-/*	ANNOTATE_INSTR_END		ANNOTATE type=ANNOTYPE_INSTR_END */
-#define ANNOTATE_IGNORE_ALTERNATIVE	ANNOTATE type=ANNOTYPE_IGNORE_ALTS
-#define ANNOTATE_INTRA_FUNCTION_CALL	ANNOTATE type=ANNOTYPE_INTRA_FUNCTION_CALL
-#define ANNOTATE_UNRET_BEGIN		ANNOTATE type=ANNOTYPE_UNRET_BEGIN
-#define ANNOTATE_REACHABLE		ANNOTATE type=ANNOTYPE_REACHABLE
-#define ANNOTATE_NOCFI_SYM		ANNOTATE type=ANNOTYPE_NOCFI
-#endif
-
 #if defined(CONFIG_NOINSTR_VALIDATION) && \
 	(defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO))
 #define VALIDATE_UNRET_BEGIN	ANNOTATE_UNRET_BEGIN
-- 
cgit v1.2.3


From 58f36a5756445dcd0a733504cd798955ebe968c1 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@kernel.org>
Date: Wed, 17 Sep 2025 09:03:54 -0700
Subject: objtool: Add ANNOTATE_DATA_SPECIAL

In preparation for the objtool klp diff subcommand, add an
ANNOTATE_DATA_SPECIAL macro which annotates special section entries so
that objtool can determine their size and location and extract them
when needed.

Acked-by: Petr Mladek <pmladek@suse.com>
Tested-by: Joe Lawrence <joe.lawrence@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
 include/linux/annotate.h      | 49 ++++++++++++++++++++++++++++++++-----------
 include/linux/objtool_types.h |  2 ++
 2 files changed, 39 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/annotate.h b/include/linux/annotate.h
index ccb445496331..7c10d34d198c 100644
--- a/include/linux/annotate.h
+++ b/include/linux/annotate.h
@@ -8,34 +8,52 @@
 
 #ifndef __ASSEMBLY__
 
-#define __ASM_ANNOTATE(label, type)					\
-	".pushsection .discard.annotate_insn,\"M\",@progbits,8\n\t"	\
+#define __ASM_ANNOTATE(section, label, type)				\
+	".pushsection " section ",\"M\", @progbits, 8\n\t"		\
 	".long " __stringify(label) " - .\n\t"				\
 	".long " __stringify(type) "\n\t"				\
 	".popsection\n\t"
 
+#define ASM_ANNOTATE_LABEL(label, type)					\
+	__ASM_ANNOTATE(".discard.annotate_insn", label, type)
+
 #define ASM_ANNOTATE(type)						\
 	"911:\n\t"							\
-	__ASM_ANNOTATE(911b, type)
+	ASM_ANNOTATE_LABEL(911b, type)
+
+#define ASM_ANNOTATE_DATA(type)						\
+	"912:\n\t"							\
+	__ASM_ANNOTATE(".discard.annotate_data", 912b, type)
 
 #else /* __ASSEMBLY__ */
 
-.macro ANNOTATE type:req
+.macro __ANNOTATE section, type
 .Lhere_\@:
-	.pushsection .discard.annotate_insn,"M",@progbits,8
+	.pushsection \section, "M", @progbits, 8
 	.long	.Lhere_\@ - .
 	.long	\type
 	.popsection
 .endm
 
+.macro ANNOTATE type
+	__ANNOTATE ".discard.annotate_insn", \type
+.endm
+
+.macro ANNOTATE_DATA type
+	__ANNOTATE ".discard.annotate_data", \type
+.endm
+
 #endif /* __ASSEMBLY__ */
 
 #else /* !CONFIG_OBJTOOL */
 #ifndef __ASSEMBLY__
-#define __ASM_ANNOTATE(label, type) ""
+#define ASM_ANNOTATE_LABEL(label, type) ""
 #define ASM_ANNOTATE(type)
+#define ASM_ANNOTATE_DATA(type)
 #else /* __ASSEMBLY__ */
-.macro ANNOTATE type:req
+.macro ANNOTATE type
+.endm
+.macro ANNOTATE_DATA type
 .endm
 #endif /* __ASSEMBLY__ */
 #endif /* !CONFIG_OBJTOOL */
@@ -47,7 +65,7 @@
  * these relocations will never be used for indirect calls.
  */
 #define ANNOTATE_NOENDBR		ASM_ANNOTATE(ANNOTYPE_NOENDBR)
-#define ANNOTATE_NOENDBR_SYM(sym)	asm(__ASM_ANNOTATE(sym, ANNOTYPE_NOENDBR))
+#define ANNOTATE_NOENDBR_SYM(sym)	asm(ASM_ANNOTATE_LABEL(sym, ANNOTYPE_NOENDBR))
 
 /*
  * This should be used immediately before an indirect jump/call. It tells
@@ -58,8 +76,8 @@
 /*
  * See linux/instrumentation.h
  */
-#define ANNOTATE_INSTR_BEGIN(label)	__ASM_ANNOTATE(label, ANNOTYPE_INSTR_BEGIN)
-#define ANNOTATE_INSTR_END(label)	__ASM_ANNOTATE(label, ANNOTYPE_INSTR_END)
+#define ANNOTATE_INSTR_BEGIN(label)	ASM_ANNOTATE_LABEL(label, ANNOTYPE_INSTR_BEGIN)
+#define ANNOTATE_INSTR_END(label)	ASM_ANNOTATE_LABEL(label, ANNOTYPE_INSTR_END)
 /*
  * objtool annotation to ignore the alternatives and only consider the original
  * instruction(s).
@@ -83,7 +101,7 @@
  * terminating, like a noreturn CALL or UD2 when we know they are not -- eg
  * WARN using UD2.
  */
-#define ANNOTATE_REACHABLE(label)	__ASM_ANNOTATE(label, ANNOTYPE_REACHABLE)
+#define ANNOTATE_REACHABLE(label)	ASM_ANNOTATE_LABEL(label, ANNOTYPE_REACHABLE)
 /*
  * This should not be used; it annotates away CFI violations. There are a few
  * valid use cases like kexec handover to the next kernel image, and there is
@@ -92,7 +110,13 @@
  * There are also a few real issues annotated away, like EFI because we can't
  * control the EFI code.
  */
-#define ANNOTATE_NOCFI_SYM(sym)		asm(__ASM_ANNOTATE(sym, ANNOTYPE_NOCFI))
+#define ANNOTATE_NOCFI_SYM(sym)		asm(ASM_ANNOTATE_LABEL(sym, ANNOTYPE_NOCFI))
+
+/*
+ * Annotate a special section entry.  This emables livepatch module generation
+ * to find and extract individual special section entries as needed.
+ */
+#define ANNOTATE_DATA_SPECIAL		ASM_ANNOTATE_DATA(ANNOTYPE_DATA_SPECIAL)
 
 #else /* __ASSEMBLY__ */
 #define ANNOTATE_NOENDBR		ANNOTATE type=ANNOTYPE_NOENDBR
@@ -104,6 +128,7 @@
 #define ANNOTATE_UNRET_BEGIN		ANNOTATE type=ANNOTYPE_UNRET_BEGIN
 #define ANNOTATE_REACHABLE		ANNOTATE type=ANNOTYPE_REACHABLE
 #define ANNOTATE_NOCFI_SYM		ANNOTATE type=ANNOTYPE_NOCFI
+#define ANNOTATE_DATA_SPECIAL		ANNOTATE_DATA type=ANNOTYPE_DATA_SPECIAL
 #endif /* __ASSEMBLY__ */
 
 #endif /* _LINUX_ANNOTATE_H */
diff --git a/include/linux/objtool_types.h b/include/linux/objtool_types.h
index aceac94632c8..c6def4049b1a 100644
--- a/include/linux/objtool_types.h
+++ b/include/linux/objtool_types.h
@@ -67,4 +67,6 @@ struct unwind_hint {
 #define ANNOTYPE_REACHABLE		8
 #define ANNOTYPE_NOCFI			9
 
+#define ANNOTYPE_DATA_SPECIAL		1
+
 #endif /* _LINUX_OBJTOOL_TYPES_H */
-- 
cgit v1.2.3


From aca282ab7e75dd3c1d14230146357a03bef12194 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@kernel.org>
Date: Wed, 17 Sep 2025 09:03:55 -0700
Subject: x86/asm: Annotate special section entries

In preparation for the objtool klp diff subcommand, add annotations for
special section entries.  This will enable objtool to determine the size
and location of the entries and to extract them when needed.

Acked-by: Petr Mladek <pmladek@suse.com>
Tested-by: Joe Lawrence <joe.lawrence@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
 include/linux/objtool.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/objtool.h b/include/linux/objtool.h
index 1973e9f14bf9..4fea6a042b28 100644
--- a/include/linux/objtool.h
+++ b/include/linux/objtool.h
@@ -9,9 +9,10 @@
 
 #ifndef __ASSEMBLY__
 
-#define UNWIND_HINT(type, sp_reg, sp_offset, signal)	\
+#define UNWIND_HINT(type, sp_reg, sp_offset, signal)		\
 	"987: \n\t"						\
 	".pushsection .discard.unwind_hints\n\t"		\
+	ANNOTATE_DATA_SPECIAL					\
 	/* struct unwind_hint */				\
 	".long 987b - .\n\t"					\
 	".short " __stringify(sp_offset) "\n\t"			\
@@ -78,6 +79,7 @@
 .macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0
 .Lhere_\@:
 	.pushsection .discard.unwind_hints
+		ANNOTATE_DATA_SPECIAL
 		/* struct unwind_hint */
 		.long .Lhere_\@ - .
 		.short \sp_offset
-- 
cgit v1.2.3


From f6b740ef5f4724f95363ac0d664e88d221343fa1 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@kernel.org>
Date: Wed, 17 Sep 2025 09:03:56 -0700
Subject: objtool: Unify STACK_FRAME_NON_STANDARD entry sizes

The C implementation of STACK_FRAME_NON_STANDARD emits 8-byte entries,
whereas the asm version's entries are only 4 bytes.

Make them consistent by converting the asm version to 8-byte entries.

This is much easier than converting the C version to 4-bytes, which
would require awkwardly putting inline asm in a dummy function in order
to pass the 'func' pointer to the asm.

Acked-by: Petr Mladek <pmladek@suse.com>
Tested-by: Joe Lawrence <joe.lawrence@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
 include/linux/objtool.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/objtool.h b/include/linux/objtool.h
index 4fea6a042b28..b18ab53561c9 100644
--- a/include/linux/objtool.h
+++ b/include/linux/objtool.h
@@ -92,7 +92,7 @@
 
 .macro STACK_FRAME_NON_STANDARD func:req
 	.pushsection .discard.func_stack_frame_non_standard, "aw"
-	.long \func - .
+	.quad \func
 	.popsection
 .endm
 
-- 
cgit v1.2.3


From dd590d4d57ebeeb826823c288741f2ed20f452af Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@kernel.org>
Date: Wed, 17 Sep 2025 09:03:59 -0700
Subject: objtool/klp: Introduce klp diff subcommand for diffing object files

Add a new klp diff subcommand which performs a binary diff between two
object files and extracts changed functions into a new object which can
then be linked into a livepatch module.

This builds on concepts from the longstanding out-of-tree kpatch [1]
project which began in 2012 and has been used for many years to generate
livepatch modules for production kernels.  However, this is a complete
rewrite which incorporates hard-earned lessons from 12+ years of
maintaining kpatch.

Key improvements compared to kpatch-build:

  - Integrated with objtool: Leverages objtool's existing control-flow
    graph analysis to help detect changed functions.

  - Works on vmlinux.o: Supports late-linked objects, making it
    compatible with LTO, IBT, and similar.

  - Simplified code base: ~3k fewer lines of code.

  - Upstream: No more out-of-tree #ifdef hacks, far less cruft.

  - Cleaner internals: Vastly simplified logic for symbol/section/reloc
    inclusion and special section extraction.

  - Robust __LINE__ macro handling: Avoids false positive binary diffs
    caused by the __LINE__ macro by introducing a fix-patch-lines script
    (coming in a later patch) which injects #line directives into the
    source .patch to preserve the original line numbers at compile time.

Note the end result of this subcommand is not yet functionally complete.
Livepatch needs some ELF magic which linkers don't like:

  - Two relocation sections (.rela*, .klp.rela*) for the same text
    section.

  - Use of SHN_LIVEPATCH to mark livepatch symbols.

Unfortunately linkers tend to mangle such things.  To work around that,
klp diff generates a linker-compliant intermediate binary which encodes
the relevant KLP section/reloc/symbol metadata.

After module linking, a klp post-link step (coming soon) will clean up
the mess and convert the linked .ko into a fully compliant livepatch
module.

Note this subcommand requires the diffed binaries to have been compiled
with -ffunction-sections and -fdata-sections, and processed with
'objtool --checksum'.  Those constraints will be handled by a klp-build
script introduced in a later patch.

Without '-ffunction-sections -fdata-sections', reliable object diffing
would be infeasible due to toolchain limitations:

  - For intra-file+intra-section references, the compiler might
    occasionally generated hard-coded instruction offsets instead of
    relocations.

  - Section-symbol-based references can be ambiguous:

    - Overlapping or zero-length symbols create ambiguity as to which
      symbol is being referenced.

    - A reference to the end of a symbol (e.g., checking array bounds)
      can be misinterpreted as a reference to the next symbol, or vice
      versa.

A potential future alternative to '-ffunction-sections -fdata-sections'
would be to introduce a toolchain option that forces symbol-based
(non-section) relocations.

Acked-by: Petr Mladek <pmladek@suse.com>
Tested-by: Joe Lawrence <joe.lawrence@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
 include/linux/livepatch.h          | 25 +------------
 include/linux/livepatch_external.h | 76 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 24 deletions(-)
 create mode 100644 include/linux/livepatch_external.h

(limited to 'include/linux')

diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index 51a258c24ff5..772919e8096a 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -13,6 +13,7 @@
 #include <linux/ftrace.h>
 #include <linux/completion.h>
 #include <linux/list.h>
+#include <linux/livepatch_external.h>
 #include <linux/livepatch_sched.h>
 
 #if IS_ENABLED(CONFIG_LIVEPATCH)
@@ -77,30 +78,6 @@ struct klp_func {
 	bool transition;
 };
 
-struct klp_object;
-
-/**
- * struct klp_callbacks - pre/post live-(un)patch callback structure
- * @pre_patch:		executed before code patching
- * @post_patch:		executed after code patching
- * @pre_unpatch:	executed before code unpatching
- * @post_unpatch:	executed after code unpatching
- * @post_unpatch_enabled:	flag indicating if post-unpatch callback
- * 				should run
- *
- * All callbacks are optional.  Only the pre-patch callback, if provided,
- * will be unconditionally executed.  If the parent klp_object fails to
- * patch for any reason, including a non-zero error status returned from
- * the pre-patch callback, no further callbacks will be executed.
- */
-struct klp_callbacks {
-	int (*pre_patch)(struct klp_object *obj);
-	void (*post_patch)(struct klp_object *obj);
-	void (*pre_unpatch)(struct klp_object *obj);
-	void (*post_unpatch)(struct klp_object *obj);
-	bool post_unpatch_enabled;
-};
-
 /**
  * struct klp_object - kernel object structure for live patching
  * @name:	module name (or NULL for vmlinux)
diff --git a/include/linux/livepatch_external.h b/include/linux/livepatch_external.h
new file mode 100644
index 000000000000..138af19b0f5c
--- /dev/null
+++ b/include/linux/livepatch_external.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * External livepatch interfaces for patch creation tooling
+ */
+
+#ifndef _LINUX_LIVEPATCH_EXTERNAL_H_
+#define _LINUX_LIVEPATCH_EXTERNAL_H_
+
+#include <linux/types.h>
+
+#define KLP_RELOC_SEC_PREFIX		".klp.rela."
+#define KLP_SYM_PREFIX			".klp.sym."
+
+#define __KLP_PRE_PATCH_PREFIX		__klp_pre_patch_callback_
+#define __KLP_POST_PATCH_PREFIX		__klp_post_patch_callback_
+#define __KLP_PRE_UNPATCH_PREFIX	__klp_pre_unpatch_callback_
+#define __KLP_POST_UNPATCH_PREFIX	__klp_post_unpatch_callback_
+
+#define KLP_PRE_PATCH_PREFIX		__stringify(__KLP_PRE_PATCH_PREFIX)
+#define KLP_POST_PATCH_PREFIX		__stringify(__KLP_POST_PATCH_PREFIX)
+#define KLP_PRE_UNPATCH_PREFIX		__stringify(__KLP_PRE_UNPATCH_PREFIX)
+#define KLP_POST_UNPATCH_PREFIX		__stringify(__KLP_POST_UNPATCH_PREFIX)
+
+struct klp_object;
+
+typedef int (*klp_pre_patch_t)(struct klp_object *obj);
+typedef void (*klp_post_patch_t)(struct klp_object *obj);
+typedef void (*klp_pre_unpatch_t)(struct klp_object *obj);
+typedef void (*klp_post_unpatch_t)(struct klp_object *obj);
+
+/**
+ * struct klp_callbacks - pre/post live-(un)patch callback structure
+ * @pre_patch:		executed before code patching
+ * @post_patch:		executed after code patching
+ * @pre_unpatch:	executed before code unpatching
+ * @post_unpatch:	executed after code unpatching
+ * @post_unpatch_enabled:	flag indicating if post-unpatch callback
+ *				should run
+ *
+ * All callbacks are optional.  Only the pre-patch callback, if provided,
+ * will be unconditionally executed.  If the parent klp_object fails to
+ * patch for any reason, including a non-zero error status returned from
+ * the pre-patch callback, no further callbacks will be executed.
+ */
+struct klp_callbacks {
+	klp_pre_patch_t		pre_patch;
+	klp_post_patch_t	post_patch;
+	klp_pre_unpatch_t	pre_unpatch;
+	klp_post_unpatch_t	post_unpatch;
+	bool post_unpatch_enabled;
+};
+
+/*
+ * 'struct klp_{func,object}_ext' are compact "external" representations of
+ * 'struct klp_{func,object}'.   They are used by objtool for livepatch
+ * generation.  The structs are then read by the livepatch module and converted
+ * to the real structs before calling klp_enable_patch().
+ *
+ * TODO make these the official API for klp_enable_patch().  That should
+ * simplify livepatch's interface as well as its data structure lifetime
+ * management.
+ */
+struct klp_func_ext {
+	const char *old_name;
+	void *new_func;
+	unsigned long sympos;
+};
+
+struct klp_object_ext {
+	const char *name;
+	struct klp_func_ext *funcs;
+	struct klp_callbacks callbacks;
+	unsigned int nr_funcs;
+};
+
+#endif /* _LINUX_LIVEPATCH_EXTERNAL_H_ */
-- 
cgit v1.2.3


From b9976fa4649627c04dde26183333c3dcc90a0b76 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@kernel.org>
Date: Wed, 17 Sep 2025 09:04:11 -0700
Subject: livepatch: Introduce source code helpers for livepatch modules

Add some helper macros which can be used by livepatch source .patch
files to register callbacks, convert static calls to regular calls where
needed, and patch syscalls.

Acked-by: Petr Mladek <pmladek@suse.com>
Tested-by: Joe Lawrence <joe.lawrence@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
 include/linux/livepatch_helpers.h | 77 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 include/linux/livepatch_helpers.h

(limited to 'include/linux')

diff --git a/include/linux/livepatch_helpers.h b/include/linux/livepatch_helpers.h
new file mode 100644
index 000000000000..99d68d0773fa
--- /dev/null
+++ b/include/linux/livepatch_helpers.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_LIVEPATCH_HELPERS_H
+#define _LINUX_LIVEPATCH_HELPERS_H
+
+/*
+ * Interfaces for use by livepatch patches
+ */
+
+#include <linux/syscalls.h>
+#include <linux/livepatch.h>
+
+#ifdef MODULE
+#define KLP_OBJNAME __KBUILD_MODNAME
+#else
+#define KLP_OBJNAME vmlinux
+#endif
+
+/* Livepatch callback registration */
+
+#define KLP_CALLBACK_PTRS ".discard.klp_callback_ptrs"
+
+#define KLP_PRE_PATCH_CALLBACK(func)						\
+	klp_pre_patch_t __used __section(KLP_CALLBACK_PTRS)			\
+		__PASTE(__KLP_PRE_PATCH_PREFIX, KLP_OBJNAME) = func
+
+#define KLP_POST_PATCH_CALLBACK(func)						\
+	klp_post_patch_t __used __section(KLP_CALLBACK_PTRS)			\
+		__PASTE(__KLP_POST_PATCH_PREFIX, KLP_OBJNAME) = func
+
+#define KLP_PRE_UNPATCH_CALLBACK(func)						\
+	klp_pre_unpatch_t __used __section(KLP_CALLBACK_PTRS)			\
+		__PASTE(__KLP_PRE_UNPATCH_PREFIX, KLP_OBJNAME) = func
+
+#define KLP_POST_UNPATCH_CALLBACK(func)						\
+	klp_post_unpatch_t __used __section(KLP_CALLBACK_PTRS)			\
+		__PASTE(__KLP_POST_UNPATCH_PREFIX, KLP_OBJNAME) = func
+
+/*
+ * Replace static_call() usage with this macro when create-diff-object
+ * recommends it due to the original static call key living in a module.
+ *
+ * This converts the static call to a regular indirect call.
+ */
+#define KLP_STATIC_CALL(name) \
+	((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
+
+/* Syscall patching */
+
+#define KLP_SYSCALL_DEFINE1(name, ...) KLP_SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
+#define KLP_SYSCALL_DEFINE2(name, ...) KLP_SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
+#define KLP_SYSCALL_DEFINE3(name, ...) KLP_SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
+#define KLP_SYSCALL_DEFINE4(name, ...) KLP_SYSCALL_DEFINEx(4, _##name, __VA_ARGS__)
+#define KLP_SYSCALL_DEFINE5(name, ...) KLP_SYSCALL_DEFINEx(5, _##name, __VA_ARGS__)
+#define KLP_SYSCALL_DEFINE6(name, ...) KLP_SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
+
+#define KLP_SYSCALL_DEFINEx(x, sname, ...)				\
+	__KLP_SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
+
+#ifdef CONFIG_X86_64
+// TODO move this to arch/x86/include/asm/syscall_wrapper.h and share code
+#define __KLP_SYSCALL_DEFINEx(x, name, ...)			\
+	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
+	static inline long __klp_do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
+	__X64_SYS_STUBx(x, name, __VA_ARGS__)				\
+	__IA32_SYS_STUBx(x, name, __VA_ARGS__)				\
+	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))	\
+	{								\
+		long ret = __klp_do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));\
+		__MAP(x,__SC_TEST,__VA_ARGS__);				\
+		__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));	\
+		return ret;						\
+	}								\
+	static inline long __klp_do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+
+#endif
+
+#endif /* _LINUX_LIVEPATCH_HELPERS_H */
-- 
cgit v1.2.3


From 5fb750e8a9ae123b2034771b864b8a21dbef65cd Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 14 Oct 2025 17:07:00 -0700
Subject: bpf: Replace bpf_map_kmalloc_node() with kmalloc_nolock() to allocate
 bpf_async_cb structures.

The following kmemleak splat:

[    8.105530] kmemleak: Trying to color unknown object at 0xff11000100e918c0 as Black
[    8.106521] Call Trace:
[    8.106521]  <TASK>
[    8.106521]  dump_stack_lvl+0x4b/0x70
[    8.106521]  kvfree_call_rcu+0xcb/0x3b0
[    8.106521]  ? hrtimer_cancel+0x21/0x40
[    8.106521]  bpf_obj_free_fields+0x193/0x200
[    8.106521]  htab_map_update_elem+0x29c/0x410
[    8.106521]  bpf_prog_cfc8cd0f42c04044_overwrite_cb+0x47/0x4b
[    8.106521]  bpf_prog_8c30cd7c4db2e963_overwrite_timer+0x65/0x86
[    8.106521]  bpf_prog_test_run_syscall+0xe1/0x2a0

happens due to the combination of features and fixes, but mainly due to
commit 6d78b4473cdb ("bpf: Tell memcg to use allow_spinning=false path in bpf_timer_init()")
It's using __GFP_HIGH, which instructs slub/kmemleak internals to skip
kmemleak_alloc_recursive() on allocation, so subsequent kfree_rcu()->
kvfree_call_rcu()->kmemleak_ignore() complains with the above splat.

To fix this imbalance, replace bpf_map_kmalloc_node() with
kmalloc_nolock() and kfree_rcu() with call_rcu() + kfree_nolock() to
make sure that the objects allocated with kmalloc_nolock() are freed
with kfree_nolock() rather than the implicit kfree() that kfree_rcu()
uses internally.

Note, the kmalloc_nolock() happens under bpf_spin_lock_irqsave(), so
it will always fail in PREEMPT_RT. This is not an issue at the moment,
since bpf_timers are disabled in PREEMPT_RT. In the future
bpf_spin_lock will be replaced with state machine similar to
bpf_task_work.

Fixes: 6d78b4473cdb ("bpf: Tell memcg to use allow_spinning=false path in bpf_timer_init()")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Harry Yoo <harry.yoo@oracle.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: linux-mm@kvack.org
Link: https://lore.kernel.org/bpf/20251015000700.28988-1-alexei.starovoitov@gmail.com
---
 include/linux/bpf.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a98c83346134..d808253f2e94 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2499,6 +2499,8 @@ int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
 #ifdef CONFIG_MEMCG
 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
 			   int node);
+void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags,
+			     int node);
 void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags);
 void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size,
 		       gfp_t flags);
@@ -2511,6 +2513,8 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
  */
 #define bpf_map_kmalloc_node(_map, _size, _flags, _node)	\
 		kmalloc_node(_size, _flags, _node)
+#define bpf_map_kmalloc_nolock(_map, _size, _flags, _node)	\
+		kmalloc_nolock(_size, _flags, _node)
 #define bpf_map_kzalloc(_map, _size, _flags)			\
 		kzalloc(_size, _flags)
 #define bpf_map_kvcalloc(_map, _n, _size, _flags)		\
-- 
cgit v1.2.3


From c97513cddcfc235f2522617980838e500af21d01 Mon Sep 17 00:00:00 2001
From: Lance Yang <lance.yang@linux.dev>
Date: Tue, 9 Sep 2025 22:52:43 +0800
Subject: hung_task: fix warnings caused by unaligned lock pointers

The blocker tracking mechanism assumes that lock pointers are at least
4-byte aligned to use their lower bits for type encoding.

However, as reported by Eero Tamminen, some architectures like m68k
only guarantee 2-byte alignment of 32-bit values. This breaks the
assumption and causes two related WARN_ON_ONCE checks to trigger.

To fix this, the runtime checks are adjusted to silently ignore any lock
that is not 4-byte aligned, effectively disabling the feature in such
cases and avoiding the related warnings.

Thanks to Geert Uytterhoeven for bisecting!

Link: https://lkml.kernel.org/r/20250909145243.17119-1-lance.yang@linux.dev
Fixes: e711faaafbe5 ("hung_task: replace blocker_mutex with encoded blocker")
Signed-off-by: Lance Yang <lance.yang@linux.dev>
Reported-by: Eero Tamminen <oak@helsinkinet.fi>
Closes: https://lore.kernel.org/lkml/CAMuHMdW7Ab13DdGs2acMQcix5ObJK0O2dG_Fxzr8_g58Rc1_0g@mail.gmail.com
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Finn Thain <fthain@linux-m68k.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: John Stultz <jstultz@google.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Mingzhe Yang <mingzhe.yang@ly.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tomasz Figa <tfiga@chromium.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yongliang Gao <leonylgao@tencent.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hung_task.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hung_task.h b/include/linux/hung_task.h
index 34e615c76ca5..c4403eeb7144 100644
--- a/include/linux/hung_task.h
+++ b/include/linux/hung_task.h
@@ -20,6 +20,10 @@
  * always zero. So we can use these bits to encode the specific blocking
  * type.
  *
+ * Note that on architectures where this is not guaranteed, or for any
+ * unaligned lock, this tracking mechanism is silently skipped for that
+ * lock.
+ *
  * Type encoding:
  * 00 - Blocked on mutex			(BLOCKER_TYPE_MUTEX)
  * 01 - Blocked on semaphore			(BLOCKER_TYPE_SEM)
@@ -45,7 +49,7 @@ static inline void hung_task_set_blocker(void *lock, unsigned long type)
 	 * If the lock pointer matches the BLOCKER_TYPE_MASK, return
 	 * without writing anything.
 	 */
-	if (WARN_ON_ONCE(lock_ptr & BLOCKER_TYPE_MASK))
+	if (lock_ptr & BLOCKER_TYPE_MASK)
 		return;
 
 	WRITE_ONCE(current->blocker, lock_ptr | type);
@@ -53,8 +57,6 @@ static inline void hung_task_set_blocker(void *lock, unsigned long type)
 
 static inline void hung_task_clear_blocker(void)
 {
-	WARN_ON_ONCE(!READ_ONCE(current->blocker));
-
 	WRITE_ONCE(current->blocker, 0UL);
 }
 
-- 
cgit v1.2.3


From 0fbbcab7f9082cdc233da5e5e353f69830f11956 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 17 Oct 2025 00:07:42 -0700
Subject: cgroup/misc: fix misc_res_type kernel-doc warning

Format the kernel-doc for SCALE_HW_CALIB_INVALID correctly to
avoid a kernel-doc warning:

Warning: include/linux/misc_cgroup.h:26 Enum value
 'MISC_CG_RES_TDX' not described in enum 'misc_res_type'

Fixes: 7c035bea9407 ("KVM: TDX: Register TDX host key IDs to cgroup misc controller")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/misc_cgroup.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h
index 71cf5bfc6349..0cb36a3ffc47 100644
--- a/include/linux/misc_cgroup.h
+++ b/include/linux/misc_cgroup.h
@@ -19,7 +19,7 @@ enum misc_res_type {
 	MISC_CG_RES_SEV_ES,
 #endif
 #ifdef CONFIG_INTEL_TDX_HOST
-	/* Intel TDX HKIDs resource */
+	/** @MISC_CG_RES_TDX: Intel TDX HKIDs resource */
 	MISC_CG_RES_TDX,
 #endif
 	/** @MISC_CG_RES_TYPES: count of enum misc_res_type constants */
-- 
cgit v1.2.3


From 00aaae60faf554c27c95e93d47f200a93ff266ef Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 14 Oct 2025 18:53:53 +0300
Subject: gpio: regmap: add the .fixed_direction_output configuration parameter

There are GPIO controllers such as the one present in the LX2160ARDB
QIXIS FPGA which have fixed-direction input and output GPIO lines mixed
together in a single register. This cannot be modeled using the
gpio-regmap as-is since there is no way to present the true direction of
a GPIO line.

In order to make this use case possible, add a new configuration
parameter - fixed_direction_output - into the gpio_regmap_config
structure. This will enable user drivers to provide a bitmap that
represents the fixed direction of the GPIO lines.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Michael Walle <mwalle@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio/regmap.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio/regmap.h b/include/linux/gpio/regmap.h
index 622a2939ebe0..87983a5f3681 100644
--- a/include/linux/gpio/regmap.h
+++ b/include/linux/gpio/regmap.h
@@ -38,6 +38,10 @@ struct regmap;
  *			offset to a register/bitmask pair. If not
  *			given the default gpio_regmap_simple_xlate()
  *			is used.
+ * @fixed_direction_output:
+ *			(Optional) Bitmap representing the fixed direction of
+ *			the GPIO lines. Useful when there are GPIO lines with a
+ *			fixed direction mixed together in the same register.
  * @drvdata:		(Optional) Pointer to driver specific data which is
  *			not used by gpio-remap but is provided "as is" to the
  *			driver callback(s).
@@ -85,6 +89,7 @@ struct gpio_regmap_config {
 	int reg_stride;
 	int ngpio_per_reg;
 	struct irq_domain *irq_domain;
+	unsigned long *fixed_direction_output;
 
 #ifdef CONFIG_REGMAP_IRQ
 	struct regmap_irq_chip *regmap_irq_chip;
-- 
cgit v1.2.3


From db82b8dbf5f06d7b1abec4e1326ed8c02fa16897 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 20 Oct 2025 17:03:28 +0200
Subject: PM: runtime: Fix conditional guard definitions

Since pm_runtime_get_active() returns 0 on success, all of the
DEFINE_GUARD_COND() macros in pm_runtime.h need the "_RET == 0"
condition at the end of the argument list or they would not work
correctly.

Fixes: 9a0abc39450a ("PM: runtime: Add auto-cleanup macros for "resume and get" operations")
Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/linux-pm/202510191529.BCyjKlLQ-lkp@intel.com/
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Farhan Ali <alifm@linux.ibm.com>
Link: https://patch.msgid.link/5943878.DvuYhMxLoT@rafael.j.wysocki
---
 include/linux/pm_runtime.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index a3f44f6c2da1..0b436e15f4cd 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -629,13 +629,13 @@ DEFINE_GUARD(pm_runtime_active_auto, struct device *,
  * device.
  */
 DEFINE_GUARD_COND(pm_runtime_active, _try,
-		  pm_runtime_get_active(_T, RPM_TRANSPARENT))
+		  pm_runtime_get_active(_T, RPM_TRANSPARENT), _RET == 0)
 DEFINE_GUARD_COND(pm_runtime_active, _try_enabled,
-		  pm_runtime_resume_and_get(_T))
+		  pm_runtime_resume_and_get(_T), _RET == 0)
 DEFINE_GUARD_COND(pm_runtime_active_auto, _try,
-		  pm_runtime_get_active(_T, RPM_TRANSPARENT))
+		  pm_runtime_get_active(_T, RPM_TRANSPARENT), _RET == 0)
 DEFINE_GUARD_COND(pm_runtime_active_auto, _try_enabled,
-		  pm_runtime_resume_and_get(_T))
+		  pm_runtime_resume_and_get(_T), _RET == 0)
 
 /**
  * pm_runtime_put_sync - Drop device usage counter and run "idle check" if 0.
-- 
cgit v1.2.3


From 9025688bf6d427e553aca911308cd92e92634f51 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@kernel.org>
Date: Mon, 20 Oct 2025 10:53:40 -0700
Subject: module: Fix device table module aliases

Commit 6717e8f91db7 ("kbuild: Remove 'kmod_' prefix from
__KBUILD_MODNAME") inadvertently broke module alias generation for
modules which rely on MODULE_DEVICE_TABLE().

It removed the "kmod_" prefix from __KBUILD_MODNAME, which caused
MODULE_DEVICE_TABLE() to generate a symbol name which no longer matched
the format expected by handle_moddevtable() in scripts/mod/file2alias.c.

As a result, modpost failed to find the device tables, leading to
missing module aliases.

Fix this by explicitly adding the "kmod_" string within the
MODULE_DEVICE_TABLE() macro itself, restoring the symbol name to the
format expected by file2alias.c.

Fixes: 6717e8f91db7 ("kbuild: Remove 'kmod_' prefix from __KBUILD_MODNAME")
Reported-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reported-by: Mark Brown <broonie@kernel.org>
Reported-by: Cosmin Tanislav <demonsingur@gmail.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Cosmin Tanislav <demonsingur@gmail.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Mark Brown <broonie@kernel.org>
Tested-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Tested-by: Chen-Yu Tsai <wenst@chromium.org>
Tested-by: Anders Roxell <anders.roxell@linaro.org>
Link: https://patch.msgid.link/e52ee3edf32874da645a9e037a7d77c69893a22a.1760982784.git.jpoimboe@kernel.org
---
 include/linux/module.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index e135cc79acee..d80c3ea57472 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -251,10 +251,11 @@ struct module_kobject *lookup_or_create_module_kobject(const char *name);
  */
 #define __mod_device_table(type, name)	\
 	__PASTE(__mod_device_table__,	\
+	__PASTE(kmod_,			\
 	__PASTE(__KBUILD_MODNAME,	\
 	__PASTE(__,			\
 	__PASTE(type,			\
-	__PASTE(__, name)))))
+	__PASTE(__, name))))))
 
 /* Creates an alias so file2alias.c can find device table. */
 #define MODULE_DEVICE_TABLE(type, name)					\
-- 
cgit v1.2.3


From f6ceec6434b5efff62cecbaa2ff74fc29b96c0c6 Mon Sep 17 00:00:00 2001
From: Ralf Lici <ralf@mandelbit.com>
Date: Tue, 21 Oct 2025 12:09:40 +0200
Subject: net: datagram: introduce datagram_poll_queue for custom receive
 queues

Some protocols using TCP encapsulation (e.g., espintcp, openvpn) deliver
userspace-bound packets through a custom skb queue rather than the
standard sk_receive_queue.

Introduce datagram_poll_queue that accepts an explicit receive queue,
and convert datagram_poll into a wrapper around datagram_poll_queue.
This allows protocols with custom skb queues to reuse the core polling
logic without relying on sk_receive_queue.

Cc: Sabrina Dubroca <sd@queasysnail.net>
Cc: Antonio Quartulli <antonio@openvpn.net>
Signed-off-by: Ralf Lici <ralf@mandelbit.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Antonio Quartulli <antonio@openvpn.net>
Link: https://patch.msgid.link/20251021100942.195010-2-ralf@mandelbit.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/skbuff.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index fb3fec9affaa..a7cc3d1f4fd1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4204,6 +4204,9 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk,
 				    struct sk_buff_head *sk_queue,
 				    unsigned int flags, int *off, int *err);
 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int *err);
+__poll_t datagram_poll_queue(struct file *file, struct socket *sock,
+			     struct poll_table_struct *wait,
+			     struct sk_buff_head *rcv_queue);
 __poll_t datagram_poll(struct file *file, struct socket *sock,
 			   struct poll_table_struct *wait);
 int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
-- 
cgit v1.2.3


From b2284768c6b32aa224ca7d0ef0741beb434f03aa Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 22 Oct 2025 11:44:21 +0800
Subject: virtio-net: zero unused hash fields

When GSO tunnel is negotiated virtio_net_hdr_tnl_from_skb() tries to
initialize the tunnel metadata but forget to zero unused rxhash
fields. This may leak information to another side. Fixing this by
zeroing the unused hash fields.

Acked-by: Michael S. Tsirkin <mst@redhat.com>
Fixes: a2fb4bc4e2a6a ("net: implement virtio helpers to handle UDP GSO tunneling")
Cc: <stable@vger.kernel.org>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Link: https://patch.msgid.link/20251022034421.70244-1-jasowang@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/virtio_net.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 20e0584db1dd..4d1780848d0e 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -401,6 +401,10 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb,
 	if (!tnl_hdr_negotiated)
 		return -EINVAL;
 
+        vhdr->hash_hdr.hash_value = 0;
+        vhdr->hash_hdr.hash_report = 0;
+        vhdr->hash_hdr.padding = 0;
+
 	/* Let the basic parsing deal with plain GSO features. */
 	skb_shinfo(skb)->gso_type &= ~tnl_gso_type;
 	ret = virtio_net_hdr_from_skb(skb, hdr, true, false, vlan_hlen);
-- 
cgit v1.2.3


From bb65e0c141f879cdf54db11ae446ee3605fb54d5 Mon Sep 17 00:00:00 2001
From: Alexei Lazar <alazar@nvidia.com>
Date: Wed, 22 Oct 2025 15:29:39 +0300
Subject: net/mlx5: Add PPHCR to PCAM supported registers mask

Add the PPHCR bit to the port_access_reg_cap_mask field of PCAM
register to indicate that the device supports the PPHCR register
and the RS-FEC histogram feature.

Signed-off-by: Alexei Lazar <alazar@nvidia.com>
Reviewed-by: Yael Chemla <ychemla@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1761136182-918470-2-git-send-email-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 07614cd95bed..1b0b36aa2a76 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -10833,7 +10833,9 @@ struct mlx5_ifc_pcam_regs_5000_to_507f_bits {
 	u8         port_access_reg_cap_mask_127_to_96[0x20];
 	u8         port_access_reg_cap_mask_95_to_64[0x20];
 
-	u8         port_access_reg_cap_mask_63_to_36[0x1c];
+	u8         port_access_reg_cap_mask_63[0x1];
+	u8         pphcr[0x1];
+	u8         port_access_reg_cap_mask_61_to_36[0x1a];
 	u8         pplm[0x1];
 	u8         port_access_reg_cap_mask_34_to_32[0x3];
 
-- 
cgit v1.2.3


From 12a1c9353c47c0fb3464eba2d78cdf649dee1cf7 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Mon, 27 Oct 2025 09:27:32 +0900
Subject: block: fix op_is_zone_mgmt() to handle REQ_OP_ZONE_RESET_ALL

REQ_OP_ZONE_RESET_ALL is a zone management request. Fix
op_is_zone_mgmt() to return true for that operation, like it already
does for REQ_OP_ZONE_RESET.

While no problems were reported without this fix, this change allows
strengthening checks in various block device drivers (scsi sd,
virtioblk, DM) where op_is_zone_mgmt() is used to verify that a zone
management command is not being issued to a regular block device.

Fixes: 6c1b1da58f8c ("block: add zone open, close and finish operations")
Cc: stable@vger.kernel.org
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk_types.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 8e8d1cc8b06c..d8ba743a89b7 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -478,6 +478,7 @@ static inline bool op_is_zone_mgmt(enum req_op op)
 {
 	switch (op & REQ_OP_MASK) {
 	case REQ_OP_ZONE_RESET:
+	case REQ_OP_ZONE_RESET_ALL:
 	case REQ_OP_ZONE_OPEN:
 	case REQ_OP_ZONE_CLOSE:
 	case REQ_OP_ZONE_FINISH:
-- 
cgit v1.2.3


From 19de03b312d69a7e9bacb51c806c6e3f4207376c Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Mon, 27 Oct 2025 09:27:33 +0900
Subject: block: make REQ_OP_ZONE_OPEN a write operation

A REQ_OP_OPEN_ZONE request changes the condition of a sequential zone of
a zoned block device to the explicitly open condition
(BLK_ZONE_COND_EXP_OPEN). As such, it should be considered a write
operation.

Change this operation code to be an odd number to reflect this. The
following operation numbers are changed to keep the numbering compact.

No problems were reported without this change as this operation has no
data. However, this unifies the zone operation to reflect that they
modify the device state and also allows strengthening checks in the
block layer, e.g. checking if this operation is not issued against a
read-only device.

Fixes: 6c1b1da58f8c ("block: add zone open, close and finish operations")
Cc: stable@vger.kernel.org
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk_types.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d8ba743a89b7..44c30183ecc3 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -341,15 +341,15 @@ enum req_op {
 	/* write the zero filled sector many times */
 	REQ_OP_WRITE_ZEROES	= (__force blk_opf_t)9,
 	/* Open a zone */
-	REQ_OP_ZONE_OPEN	= (__force blk_opf_t)10,
+	REQ_OP_ZONE_OPEN	= (__force blk_opf_t)11,
 	/* Close a zone */
-	REQ_OP_ZONE_CLOSE	= (__force blk_opf_t)11,
+	REQ_OP_ZONE_CLOSE	= (__force blk_opf_t)13,
 	/* Transition a zone to full */
-	REQ_OP_ZONE_FINISH	= (__force blk_opf_t)13,
+	REQ_OP_ZONE_FINISH	= (__force blk_opf_t)15,
 	/* reset a zone write pointer */
-	REQ_OP_ZONE_RESET	= (__force blk_opf_t)15,
+	REQ_OP_ZONE_RESET	= (__force blk_opf_t)17,
 	/* reset all the zone present on the device */
-	REQ_OP_ZONE_RESET_ALL	= (__force blk_opf_t)17,
+	REQ_OP_ZONE_RESET_ALL	= (__force blk_opf_t)19,
 
 	/* Driver private requests */
 	REQ_OP_DRV_IN		= (__force blk_opf_t)34,
-- 
cgit v1.2.3


From f0f7a3f542c1698edb69075f25a3f846207facba Mon Sep 17 00:00:00 2001
From: Qiu Wenbo <qiuwenbo@kylinsec.com.cn>
Date: Tue, 28 Oct 2025 14:30:09 +0800
Subject: platform/x86: int3472: Fix double free of GPIO device during
 unregister
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

regulator_unregister() already frees the associated GPIO device. On
ThinkPad X9 (Lunar Lake), this causes a double free issue that leads to
random failures when other drivers (typically Intel THC) attempt to
allocate interrupts. The root cause is that the reference count of the
pinctrl_intel_platform module unexpectedly drops to zero when this
driver defers its probe.

This behavior can also be reproduced by unloading the module directly.

Fix the issue by removing the redundant release of the GPIO device
during regulator unregistration.

Cc: stable@vger.kernel.org
Fixes: 1e5d088a52c2 ("platform/x86: int3472: Stop using devm_gpiod_get()")
Signed-off-by: Qiu Wenbo <qiuwenbo@kylinsec.com.cn>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Hans de Goede <hansg@kernel.org>
Reviewed-by: Daniel Scally <dan.scally@ideasonboard.com>
Link: https://patch.msgid.link/20251028063009.289414-1-qiuwenbo@gnome.org
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/platform_data/x86/int3472.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h
index 1571e9157fa5..b1b837583d54 100644
--- a/include/linux/platform_data/x86/int3472.h
+++ b/include/linux/platform_data/x86/int3472.h
@@ -100,7 +100,6 @@ struct int3472_gpio_regulator {
 	struct regulator_consumer_supply supply_map[GPIO_REGULATOR_SUPPLY_MAP_COUNT * 2];
 	char supply_name_upper[GPIO_SUPPLY_NAME_LENGTH];
 	char regulator_name[GPIO_REGULATOR_NAME_LENGTH];
-	struct gpio_desc *ena_gpio;
 	struct regulator_dev *rdev;
 	struct regulator_desc rdesc;
 };
-- 
cgit v1.2.3


From 48cbf50531d8eca15b8a811717afdebb8677de9b Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawnguo@kernel.org>
Date: Fri, 24 Oct 2025 16:23:44 +0800
Subject: regmap: irq: Correct documentation of wake_invert flag

Per commit 9442490a0286 ("regmap: irq: Support wake IRQ mask inversion")
the wake_invert flag is to support enable register, so cleared bits are
wake disabled.

Fixes: 68622bdfefb9 ("regmap: irq: document mask/wake_invert flags")
Cc: stable@vger.kernel.org
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
Link: https://patch.msgid.link/20251024082344.2188895-1-shawnguo2@yeah.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 4e1ac1fbcec4..55343795644b 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -1643,7 +1643,7 @@ struct regmap_irq_chip_data;
  * @status_invert: Inverted status register: cleared bits are active interrupts.
  * @status_is_level: Status register is actuall signal level: Xor status
  *		     register with previous value to get active interrupts.
- * @wake_invert: Inverted wake register: cleared bits are wake enabled.
+ * @wake_invert: Inverted wake register: cleared bits are wake disabled.
  * @type_in_mask: Use the mask registers for controlling irq type. Use this if
  *		  the hardware provides separate bits for rising/falling edge
  *		  or low/high level interrupts and they should be combined into
-- 
cgit v1.2.3


From a1f3058930745d2b938b6b4f5bd9630dc74b26b7 Mon Sep 17 00:00:00 2001
From: Quanmin Yan <yanquanmin1@huawei.com>
Date: Fri, 10 Oct 2025 16:16:59 +0800
Subject: fbcon: Set fb_display[i]->mode to NULL when the mode is released

Recently, we discovered the following issue through syzkaller:

BUG: KASAN: slab-use-after-free in fb_mode_is_equal+0x285/0x2f0
Read of size 4 at addr ff11000001b3c69c by task syz.xxx
...
Call Trace:
 <TASK>
 dump_stack_lvl+0xab/0xe0
 print_address_description.constprop.0+0x2c/0x390
 print_report+0xb9/0x280
 kasan_report+0xb8/0xf0
 fb_mode_is_equal+0x285/0x2f0
 fbcon_mode_deleted+0x129/0x180
 fb_set_var+0xe7f/0x11d0
 do_fb_ioctl+0x6a0/0x750
 fb_ioctl+0xe0/0x140
 __x64_sys_ioctl+0x193/0x210
 do_syscall_64+0x5f/0x9c0
 entry_SYSCALL_64_after_hwframe+0x76/0x7e

Based on experimentation and analysis, during framebuffer unregistration,
only the memory of fb_info->modelist is freed, without setting the
corresponding fb_display[i]->mode to NULL for the freed modes. This leads
to UAF issues during subsequent accesses. Here's an example of reproduction
steps:
1. With /dev/fb0 already registered in the system, load a kernel module
   to register a new device /dev/fb1;
2. Set fb1's mode to the global fb_display[] array (via FBIOPUT_CON2FBMAP);
3. Switch console from fb to VGA (to allow normal rmmod of the ko);
4. Unload the kernel module, at this point fb1's modelist is freed, leaving
   a wild pointer in fb_display[];
5. Trigger the bug via system calls through fb0 attempting to delete a mode
   from fb0.

Add a check in do_unregister_framebuffer(): if the mode to be freed exists
in fb_display[], set the corresponding mode pointer to NULL.

Signed-off-by: Quanmin Yan <yanquanmin1@huawei.com>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Helge Deller <deller@gmx.de>
Cc: stable@vger.kernel.org
---
 include/linux/fbcon.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h
index 81f0e698acbf..f206370060e1 100644
--- a/include/linux/fbcon.h
+++ b/include/linux/fbcon.h
@@ -18,6 +18,7 @@ void fbcon_suspended(struct fb_info *info);
 void fbcon_resumed(struct fb_info *info);
 int fbcon_mode_deleted(struct fb_info *info,
 		       struct fb_videomode *mode);
+void fbcon_delete_modelist(struct list_head *head);
 void fbcon_new_modelist(struct fb_info *info);
 void fbcon_get_requirement(struct fb_info *info,
 			   struct fb_blit_caps *caps);
@@ -38,6 +39,7 @@ static inline void fbcon_suspended(struct fb_info *info) {}
 static inline void fbcon_resumed(struct fb_info *info) {}
 static inline int fbcon_mode_deleted(struct fb_info *info,
 				     struct fb_videomode *mode) { return 0; }
+static inline void fbcon_delete_modelist(struct list_head *head) {}
 static inline void fbcon_new_modelist(struct fb_info *info) {}
 static inline void fbcon_get_requirement(struct fb_info *info,
 					 struct fb_blit_caps *caps) {}
-- 
cgit v1.2.3


From 39c89ee6e9c4464eb366f4e594379454a6c4db39 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Sat, 25 Oct 2025 21:53:18 +0100
Subject: compiler_types: Introduce __nocfi_generic

There are two different ways that LLVM can expand kCFI operand bundles
in LLVM IR: generically in the middle end or using an architecture
specific sequence when lowering LLVM IR to machine code in the backend.
The generic pass allows any architecture to take advantage of kCFI but
the expansion of these bundles in the middle end can mess with
optimizations that may turn indirect calls into direct calls when the
call target is known at compile time, such as after inlining.

Add __nocfi_generic, dependent on an architecture selecting
CONFIG_ARCH_USES_CFI_GENERIC_LLVM_PASS, to disable kCFI bundle
generation in functions where only the generic kCFI pass may cause
problems.

Link: https://github.com/ClangBuiltLinux/linux/issues/2124
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Link: https://patch.msgid.link/20251025-idpf-fix-arm-kcfi-build-error-v1-1-ec57221153ae@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/compiler_types.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 59288a2c1ad2..1414be493738 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -461,6 +461,12 @@ struct ftrace_likely_data {
 # define __nocfi
 #endif
 
+#if defined(CONFIG_ARCH_USES_CFI_GENERIC_LLVM_PASS)
+# define __nocfi_generic	__nocfi
+#else
+# define __nocfi_generic
+#endif
+
 /*
  * Any place that could be marked with the "alloc_size" attribute is also
  * a place to be marked with the "malloc" attribute, except those that may
-- 
cgit v1.2.3


From 14a7f2392f42bbb71c1a5ea68930006221fcd80a Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song@linux.dev>
Date: Wed, 29 Oct 2025 11:36:46 -0700
Subject: bpf: Make migrate_disable always inline to avoid partial inlining

The build fails with llvm 21/22:

  $ make LLVM=1 -j
    ...
    LD      vmlinux.o
    GEN     .vmlinux.objs
    ...
    BTF     .tmp_vmlinux1.btf.o
    ...
    AS      .tmp_vmlinux2.kallsyms.o
    LD      vmlinux.unstripped
    BTFIDS  vmlinux.unstripped
  WARN: resolve_btfids: unresolved symbol migrate_enable
  WARN: resolve_btfids: unresolved symbol migrate_disable
  make[2]: *** [vmlinux.unstripped] Error 255
  make[2]: *** Deleting file 'vmlinux.unstripped'
  make[1]: *** [Makefile:1242: vmlinux] Error 2
  make: *** [Makefile:248: __sub-make] Error 2

Two functions with identical names but different addresses are
considered ambiguous and removed by "pahole" from vmlinux BTF.
Later resolve_btfids warns since it cannot find them.

Commit 378b7708194f ("sched: Make migrate_{en,dis}able() inline") made
them inlineable in most places, but in vmlinux built with llvm 21 and 22
there are four symbols for migrate_{enable,disable}:
three static functions and one global function.

Fix the issue by marking migrate_{enable,disable} as always inline.
The alternative is to mark them as notrace/nokprobe which is more
drastic. Only bpf programs are prevented from attaching to these
functions. The rest of the tracing shouldn't be affected.

[note: Peter ok-ed the patch, Alexei rewrote commit log]

Fixes: 378b7708194f ("sched: Make migrate_{en,dis}able() inline")
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Acked-by: Menglong Dong <menglong.dong@linux.dev>
Link: https://lore.kernel.org/r/20251029183646.3811774-1-yonghong.song@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index cbb7340c5866..b469878de25c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2407,12 +2407,12 @@ static inline void __migrate_enable(void) { }
  * be defined in kernel/sched/core.c.
  */
 #ifndef INSTANTIATE_EXPORTED_MIGRATE_DISABLE
-static inline void migrate_disable(void)
+static __always_inline void migrate_disable(void)
 {
 	__migrate_disable();
 }
 
-static inline void migrate_enable(void)
+static __always_inline void migrate_enable(void)
 {
 	__migrate_enable();
 }
-- 
cgit v1.2.3


From c3838262b824c71c145cd3668722e99a69bc9cd9 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 31 Oct 2025 14:05:51 +0800
Subject: virtio_net: fix alignment for virtio_net_hdr_v1_hash

Changing alignment of header would mean it's no longer safe to cast a
2 byte aligned pointer between formats. Use two 16 bit fields to make
it 2 byte aligned as previously.

This fixes the performance regression since
commit ("virtio_net: enable gso over UDP tunnel support.") as it uses
virtio_net_hdr_v1_hash_tunnel which embeds
virtio_net_hdr_v1_hash. Pktgen in guest + XDP_DROP on TAP + vhost_net
shows the TX PPS is recovered from 2.4Mpps to 4.45Mpps.

Fixes: 56a06bd40fab ("virtio_net: enable gso over UDP tunnel support.")
Cc: stable@vger.kernel.org
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Tested-by: Lei Yang <leiyang@redhat.com>
Link: https://patch.msgid.link/20251031060551.126-1-jasowang@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/virtio_net.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 4d1780848d0e..b673c31569f3 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -401,7 +401,8 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb,
 	if (!tnl_hdr_negotiated)
 		return -EINVAL;
 
-        vhdr->hash_hdr.hash_value = 0;
+	vhdr->hash_hdr.hash_value_lo = 0;
+	vhdr->hash_hdr.hash_value_hi = 0;
         vhdr->hash_hdr.hash_report = 0;
         vhdr->hash_hdr.padding = 0;
 
-- 
cgit v1.2.3


From b1d16f7c0063b7209fd3251ce40c77d37b477b83 Mon Sep 17 00:00:00 2001
From: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Date: Tue, 4 Nov 2025 09:23:31 -0800
Subject: libie: depend on DEBUG_FS when building LIBIE_FWLOG

LIBIE_FWLOG is unusable without DEBUG_FS. Mark it in Kconfig.

Fix build error on ixgbe when DEBUG_FS is not set. To not add another
layer of #if IS_ENABLED(LIBIE_FWLOG) in ixgbe fwlog code define debugfs
dentry even when DEBUG_FS isn't enabled. In this case the dummy
functions of LIBIE_FWLOG will be used, so not initialized dentry isn't a
problem.

Fixes: 641585bc978e ("ixgbe: fwlog support for e610")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Closes: https://lore.kernel.org/lkml/f594c621-f9e1-49f2-af31-23fbcb176058@roeck-us.net/
Signed-off-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Tested-by: Rinitha S <sx.rinitha@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Link: https://patch.msgid.link/20251104172333.752445-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/net/intel/libie/fwlog.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/net/intel/libie/fwlog.h b/include/linux/net/intel/libie/fwlog.h
index 36b13fabca9e..7273c78c826b 100644
--- a/include/linux/net/intel/libie/fwlog.h
+++ b/include/linux/net/intel/libie/fwlog.h
@@ -78,8 +78,20 @@ struct libie_fwlog {
 	);
 };
 
+#if IS_ENABLED(CONFIG_LIBIE_FWLOG)
 int libie_fwlog_init(struct libie_fwlog *fwlog, struct libie_fwlog_api *api);
 void libie_fwlog_deinit(struct libie_fwlog *fwlog);
 void libie_fwlog_reregister(struct libie_fwlog *fwlog);
 void libie_get_fwlog_data(struct libie_fwlog *fwlog, u8 *buf, u16 len);
+#else
+static inline int libie_fwlog_init(struct libie_fwlog *fwlog,
+				   struct libie_fwlog_api *api)
+{
+	return -EOPNOTSUPP;
+}
+static inline void libie_fwlog_deinit(struct libie_fwlog *fwlog) { }
+static inline void libie_fwlog_reregister(struct libie_fwlog *fwlog) { }
+static inline void libie_get_fwlog_data(struct libie_fwlog *fwlog, u8 *buf,
+					u16 len) { }
+#endif /* CONFIG_LIBIE_FWLOG */
 #endif /* _LIBIE_FWLOG_H_ */
-- 
cgit v1.2.3


From 9818af18db4bfefd320d0fef41390a616365e6f7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 6 Nov 2025 11:50:00 +0100
Subject: compiler_types: Move unused static inline functions warning to W=2

Per Nathan, clang catches unused "static inline" functions in C files
since commit 6863f5643dd7 ("kbuild: allow Clang to find unused static
inline functions for W=1 build").

Linus said:

> So I entirely ignore W=1 issues, because I think so many of the extra
> warnings are bogus.
>
> But if this one in particular is causing more problems than most -
> some teams do seem to use W=1 as part of their test builds - it's fine
> to send me a patch that just moves bad warnings to W=2.
>
> And if anybody uses W=2 for their test builds, that's THEIR problem..

Here is the change to bump the warning from W=1 to W=2.

Fixes: 6863f5643dd7 ("kbuild: allow Clang to find unused static inline functions for W=1 build")
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://patch.msgid.link/20251106105000.2103276-1-andriy.shevchenko@linux.intel.com
[nathan: Adjust comment as well]
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
---
 include/linux/compiler_types.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 59288a2c1ad2..339603f05b54 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -250,10 +250,9 @@ struct ftrace_likely_data {
 /*
  * GCC does not warn about unused static inline functions for -Wunused-function.
  * Suppress the warning in clang as well by using __maybe_unused, but enable it
- * for W=1 build. This will allow clang to find unused functions. Remove the
- * __inline_maybe_unused entirely after fixing most of -Wunused-function warnings.
+ * for W=2 build. This will allow clang to find unused functions.
  */
-#ifdef KBUILD_EXTRA_WARN1
+#ifdef KBUILD_EXTRA_WARN2
 #define __inline_maybe_unused
 #else
 #define __inline_maybe_unused __maybe_unused
-- 
cgit v1.2.3