From 55b6dd54c3bcb6edf7ad630a4510759f4b0cf1cd Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Tue, 13 Jan 2026 13:37:39 -0500
Subject: nfsd/sunrpc: add svc_rqst->rq_private pointer and remove
 rq_lease_breaker

rq_lease_breaker has always been a NFSv4 specific layering violation in
svc_rqst. The reason it's there though is that we need a place that is
thread-local, and accessible from the svc_rqst pointer.

Add a new rq_private pointer to struct svc_rqst. This is intended for
use by the threads that are handling the service. sunrpc code doesn't
touch it.

In nfsd, define a new struct nfsd_thread_local_info. nfsd declares one
of these on the stack and puts a pointer to it in rq_private.

Add a new ntli_lease_breaker field to the new struct and convert all of
the places that access rq_lease_breaker to use the new field instead.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Benjamin Coddington <bcodding@hammerspace.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 4dc14c7a711b..ab8237ba9596 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -175,6 +175,9 @@ static inline unsigned long svc_serv_maxpages(const struct svc_serv *serv)
 /*
  * The context of a single thread, including the request currently being
  * processed.
+ *
+ * RPC programs are free to use rq_private to stash thread-local information.
+ * The sunrpc layer will not access it.
  */
 struct svc_rqst {
 	struct list_head	rq_all;		/* all threads list */
@@ -251,7 +254,7 @@ struct svc_rqst {
 	unsigned long		bc_to_initval;
 	unsigned int		bc_to_retries;
 	unsigned int		rq_status_counter; /* RPC processing counter */
-	void			**rq_lease_breaker; /* The v4 client breaking a lease */
+	void			*rq_private;	/* For use by the service thread */
 };
 
 /* bits for rq_flags */
-- 
cgit v1.2.3


From 322ecd01bf8ad7e0da21e174679aff1759e68b2c Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Tue, 13 Jan 2026 13:37:40 -0500
Subject: nfsd/sunrpc: move rq_cachetype into struct nfsd_thread_local_info

The svc_rqst->rq_cachetype field is only accessed by nfsd. Move it
into the nfsd_thread_local_info instead.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Benjamin Coddington <bcodding@hammerspace.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index ab8237ba9596..62152e4f3bcc 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -218,7 +218,6 @@ struct svc_rqst {
 	u32			rq_vers;	/* program version */
 	u32			rq_proc;	/* procedure number */
 	u32			rq_prot;	/* IP protocol */
-	int			rq_cachetype;	/* catering to nfsd */
 	unsigned long		rq_flags;	/* flags field */
 	ktime_t			rq_qtime;	/* enqueue time */
 
-- 
cgit v1.2.3


From 153b9e025308417d167332c93e1bcc11174178de Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:23 -0500
Subject: lockd: Relocate and rename nlm_drop_reply

The nlm_drop_reply status code is internal to the kernel's lockd
implementation and must never appear on the wire. Its previous
location in xdr.h grouped it with legitimate NLM protocol status
codes, obscuring this critical distinction.

Relocate the definition to lockd.h with a comment block for internal
status codes, and rename to nlm__int__drop_reply to make its
internal-only nature explicit. This prepares for adding additional
internal status codes in subsequent patches.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/lockd/lockd.h | 6 ++++++
 include/linux/lockd/xdr.h   | 2 --
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 330e38776bb2..fdefec39553f 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -38,6 +38,12 @@
  */
 #define LOCKD_DFLT_TIMEO	10
 
+/*
+ * Internal-use status codes, not to be placed on the wire.
+ * Version handlers translate these to appropriate wire values.
+ */
+#define nlm__int__drop_reply	cpu_to_be32(30000)
+
 /*
  * Lockd host handle (used both by the client and server personality).
  */
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index 17d53165d9f2..292e4e38d17d 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -33,8 +33,6 @@ struct svc_rqst;
 #define	nlm_lck_blocked		cpu_to_be32(NLM_LCK_BLOCKED)
 #define	nlm_lck_denied_grace_period	cpu_to_be32(NLM_LCK_DENIED_GRACE_PERIOD)
 
-#define nlm_drop_reply		cpu_to_be32(30000)
-
 /* Lock info passed via NLM */
 struct nlm_lock {
 	char *			caller;
-- 
cgit v1.2.3


From 9e0d0c61940796893e0c2200cdc7be0684218238 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:24 -0500
Subject: lockd: Introduce nlm__int__deadlock

The use of CONFIG_LOCKD_V4 in combination with a later cast_status()
in the NLMv3 code is difficult to reason about. Instead, replace the
use of nlm_deadlock with an implementation-defined status value that
version-specific code translates appropriately.

The new approach establishes a translation boundary: generic lockd
code returns nlm__int__deadlock when posix_lock_file() yields
-EDEADLK. Version-specific handlers (svc4proc.c for NLMv4,
svcproc.c for NLMv3) translate this internal status to the
appropriate wire protocol value. NLMv4 maps to nlm4_deadlock;
NLMv3 maps to nlm_lck_denied (since NLMv3 lacks a deadlock-specific
status code).

Later this modification will also remove the need to include NLMv4
headers in NLMv3 and generic code.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/lockd/lockd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index fdefec39553f..793691912137 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -43,6 +43,7 @@
  * Version handlers translate these to appropriate wire values.
  */
 #define nlm__int__drop_reply	cpu_to_be32(30000)
+#define nlm__int__deadlock	cpu_to_be32(30001)
 
 /*
  * Lockd host handle (used both by the client and server personality).
-- 
cgit v1.2.3


From 7db001e03d7a668ca6c3789fee42a24236ca90f6 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:25 -0500
Subject: lockd: Have nlm_fopen() return errno values

The nlm_fopen() function is part of the API between nfsd and lockd.

Currently its return value is an on-the-wire NLM status code. But
that forces NFSD to include NLM wire protocol definitions despite
having no other dependency on the NLM wire protocol.

In addition, a CONFIG_LOCKD_V4 Kconfig symbol appears in the middle
of NFSD source code.

Refactor: Let's not use on-the-wire values as part of a high-level
API between two Linux kernel modules. That's what we have errno for,
right?

And, instead of simply moving the CONFIG_LOCKD_V4 check, we can get
rid of it entirely and let the decision of what actual NLM status
code goes on the wire to be left up to NLM version-specific code.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/lockd/bind.h  | 8 +++-----
 include/linux/lockd/lockd.h | 2 ++
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index c53c81242e72..2f5dd9e943ee 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -26,11 +26,9 @@ struct rpc_clnt;
  * This is the set of functions for lockd->nfsd communication
  */
 struct nlmsvc_binding {
-	__be32			(*fopen)(struct svc_rqst *,
-						struct nfs_fh *,
-						struct file **,
-						int mode);
-	void			(*fclose)(struct file *);
+	int		(*fopen)(struct svc_rqst *rqstp, struct nfs_fh *f,
+				 struct file **filp, int flags);
+	void		(*fclose)(struct file *filp);
 };
 
 extern const struct nlmsvc_binding *nlmsvc_ops;
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 793691912137..195e6ce28f6e 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -44,6 +44,8 @@
  */
 #define nlm__int__drop_reply	cpu_to_be32(30000)
 #define nlm__int__deadlock	cpu_to_be32(30001)
+#define nlm__int__stale_fh	cpu_to_be32(30002)
+#define nlm__int__failed	cpu_to_be32(30003)
 
 /*
  * Lockd host handle (used both by the client and server personality).
-- 
cgit v1.2.3


From efb5b15e3b78f5644dd2d4ddec8880e0c9aa5b5f Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:26 -0500
Subject: lockd: Relocate nlmsvc_unlock API declarations

The nlmsvc_unlock_all_by_sb() and nlmsvc_unlock_all_by_ip()
functions are part of lockd's external API, consumed by other
kernel subsystems. Their declarations currently reside in
linux/lockd/lockd.h alongside internal implementation details,
which blurs the boundary between lockd's public interface and
its private internals.

Moving these declarations to linux/lockd/bind.h groups them
with other external API functions and makes the separation
explicit. This clarifies which functions are intended for
external use and reduces the risk of internal implementation
details leaking into the public API surface.

Build-tested with allyesconfig; no functional changes.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/lockd/bind.h  | 7 +++++++
 include/linux/lockd/lockd.h | 6 ------
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 2f5dd9e943ee..82eca0a13ccc 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -21,6 +21,7 @@
 struct svc_rqst;
 struct rpc_task;
 struct rpc_clnt;
+struct super_block;
 
 /*
  * This is the set of functions for lockd->nfsd communication
@@ -80,4 +81,10 @@ extern int	nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl, vo
 extern int	lockd_up(struct net *net, const struct cred *cred);
 extern void	lockd_down(struct net *net);
 
+/*
+ * Cluster failover support
+ */
+int nlmsvc_unlock_all_by_sb(struct super_block *sb);
+int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
+
 #endif /* LINUX_LOCKD_BIND_H */
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 195e6ce28f6e..0d883f48ec21 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -311,12 +311,6 @@ void		  nlmsvc_mark_resources(struct net *);
 void		  nlmsvc_free_host_resources(struct nlm_host *);
 void		  nlmsvc_invalidate_all(void);
 
-/*
- * Cluster failover support
- */
-int           nlmsvc_unlock_all_by_sb(struct super_block *sb);
-int           nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
-
 static inline struct file *nlmsvc_file_file(const struct nlm_file *file)
 {
 	return file->f_file[O_RDONLY] ?
-- 
cgit v1.2.3


From 840621fd2ff23ada8b9262d90477e75232566e6b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:27 -0500
Subject: NFS: Use nlmclnt_shutdown_rpc_clnt() to safely shut down NLM

A race condition exists in shutdown_store() when writing to the sysfs
"shutdown" file concurrently with nlm_shutdown_hosts_net(). Without
synchronization, the following sequence can occur:

  1. shutdown_store() reads server->nlm_host (non-NULL)
  2. nlm_shutdown_hosts_net() acquires nlm_host_mutex, calls
     rpc_shutdown_client(), sets h_rpcclnt to NULL, and potentially
     frees the host via nlm_gc_hosts()
  3. shutdown_store() dereferences the now-stale or freed host

Introduce nlmclnt_shutdown_rpc_clnt(), which acquires nlm_host_mutex
before accessing h_rpcclnt. This synchronizes with
nlm_shutdown_hosts_net() and ensures the rpc_clnt pointer remains
valid during the shutdown operation.

This change also improves API layering: NFS client code no longer
needs to include the internal lockd header to access nlm_host fields.
The new helper resides in bind.h alongside other public lockd
interfaces.

Reported-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/lockd/bind.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 82eca0a13ccc..39c124dcb19c 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -57,6 +57,7 @@ struct nlmclnt_initdata {
 extern struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init);
 extern void	nlmclnt_done(struct nlm_host *host);
 extern struct rpc_clnt *nlmclnt_rpc_clnt(struct nlm_host *host);
+extern void	nlmclnt_shutdown_rpc_clnt(struct nlm_host *host);
 
 /*
  * NLM client operations provide a means to modify RPC processing of NLM
-- 
cgit v1.2.3


From f4d5f8caadd858f11b21e8a9e5c85290fc21a568 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:28 -0500
Subject: lockd: Move xdr4.h from include/linux/lockd/ to fs/lockd/

The xdr4.h header declares NLMv4-specific XDR encoder/decoder
functions and error codes that are used exclusively within the
lockd subsystem. Moving it from include/linux/lockd/ to fs/lockd/
clarifies the intended scope of these declarations and prevents
external code from depending on lockd-internal interfaces.

This change reduces the public API surface of the lockd module
and makes it easier to refactor NLMv4 internals without risk of
breaking out-of-tree consumers. The header's contents are
implementation details of the NLMv4 wire protocol handling, not
a contract with other kernel subsystems.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/lockd/bind.h  |  3 ---
 include/linux/lockd/lockd.h |  7 ++++---
 include/linux/lockd/xdr4.h  | 43 -------------------------------------------
 3 files changed, 4 insertions(+), 49 deletions(-)
 delete mode 100644 include/linux/lockd/xdr4.h

(limited to 'include/linux')

diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 39c124dcb19c..077da0696f12 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -13,9 +13,6 @@
 #include <linux/lockd/nlm.h>
 /* need xdr-encoded error codes too, so... */
 #include <linux/lockd/xdr.h>
-#ifdef CONFIG_LOCKD_V4
-#include <linux/lockd/xdr4.h>
-#endif
 
 /* Dummy declarations */
 struct svc_rqst;
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 0d883f48ec21..46f244141645 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -22,9 +22,6 @@
 #include <linux/utsname.h>
 #include <linux/lockd/bind.h>
 #include <linux/lockd/xdr.h>
-#ifdef CONFIG_LOCKD_V4
-#include <linux/lockd/xdr4.h>
-#endif
 #include <linux/lockd/debug.h>
 #include <linux/sunrpc/svc.h>
 
@@ -235,6 +232,10 @@ int		  nlmclnt_reclaim(struct nlm_host *, struct file_lock *,
 				  struct nlm_rqst *);
 void		  nlmclnt_next_cookie(struct nlm_cookie *);
 
+#ifdef CONFIG_LOCKD_V4
+extern const struct rpc_version nlm_version4;
+#endif
+
 /*
  * Host cache
  */
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
deleted file mode 100644
index 72831e35dca3..000000000000
--- a/include/linux/lockd/xdr4.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/xdr4.h
- *
- * XDR types for the NLM protocol
- *
- * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LOCKD_XDR4_H
-#define LOCKD_XDR4_H
-
-#include <linux/fs.h>
-#include <linux/nfs.h>
-#include <linux/sunrpc/xdr.h>
-#include <linux/lockd/xdr.h>
-
-/* error codes new to NLMv4 */
-#define	nlm4_deadlock		cpu_to_be32(NLM_DEADLCK)
-#define	nlm4_rofs		cpu_to_be32(NLM_ROFS)
-#define	nlm4_stale_fh		cpu_to_be32(NLM_STALE_FH)
-#define	nlm4_fbig		cpu_to_be32(NLM_FBIG)
-#define	nlm4_failed		cpu_to_be32(NLM_FAILED)
-
-void	nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len);
-bool	nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-
-bool	nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-
-extern const struct rpc_version nlm_version4;
-
-#endif /* LOCKD_XDR4_H */
-- 
cgit v1.2.3


From 4db2f8a016dc9f9b357bfbf5c507c2582bb36730 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:29 -0500
Subject: lockd: Move share.h from include/linux/lockd/ to fs/lockd/

The share.h header defines struct nlm_share and declares the DOS
share management functions used by the NLM server to implement
NLM_SHARE and NLM_UNSHARE operations. These interfaces are used
exclusively within the lockd subsystem. A git grep search confirms
no external code references them.

Relocating this header from include/linux/lockd/ to fs/lockd/
narrows the public API surface of the lockd module. Out-of-tree
code cannot depend on these internal interfaces after this change.
Future refactoring of the share management implementation thus
requires no consideration of external consumers.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/lockd/lockd.h |  2 ++
 include/linux/lockd/share.h | 32 --------------------------------
 2 files changed, 2 insertions(+), 32 deletions(-)
 delete mode 100644 include/linux/lockd/share.h

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 46f244141645..eebcecd12fae 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -155,6 +155,8 @@ struct nlm_rqst {
 	void *	a_callback_data; /* sent to nlmclnt_operations callbacks */
 };
 
+struct nlm_share;
+
 /*
  * This struct describes a file held open by lockd on behalf of
  * an NFS client.
diff --git a/include/linux/lockd/share.h b/include/linux/lockd/share.h
deleted file mode 100644
index 1f18a9faf645..000000000000
--- a/include/linux/lockd/share.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/share.h
- *
- * DOS share management for lockd.
- *
- * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LINUX_LOCKD_SHARE_H
-#define LINUX_LOCKD_SHARE_H
-
-/*
- * DOS share for a specific file
- */
-struct nlm_share {
-	struct nlm_share *	s_next;		/* linked list */
-	struct nlm_host *	s_host;		/* client host */
-	struct nlm_file *	s_file;		/* shared file */
-	struct xdr_netobj	s_owner;	/* owner handle */
-	u32			s_access;	/* access mode */
-	u32			s_mode;		/* deny mode */
-};
-
-__be32	nlmsvc_share_file(struct nlm_host *, struct nlm_file *,
-					       struct nlm_args *);
-__be32	nlmsvc_unshare_file(struct nlm_host *, struct nlm_file *,
-					       struct nlm_args *);
-void	nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *,
-					       nlm_host_match_fn_t);
-
-#endif /* LINUX_LOCKD_SHARE_H */
-- 
cgit v1.2.3


From 2c562c6e6715619ce34bb37d8a0a5e40fdcc7a44 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:30 -0500
Subject: lockd: Relocate include/linux/lockd/lockd.h

Headers placed in include/linux/ form part of the kernel's
internal API and signal to subsystem maintainers that other
parts of the kernel may depend on them. By moving lockd.h
into fs/lockd/, lockd becomes a more self-contained module
whose internal interfaces are clearly distinguished from its
public contract with the rest of the kernel. This relocation
addresses a long-standing XXX comment in the header itself
that acknowledged the file's misplacement. Future changes to
lockd internals can now proceed with confidence that external
consumers are not inadvertently coupled to implementation
details.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/lockd/lockd.h | 401 --------------------------------------------
 1 file changed, 401 deletions(-)
 delete mode 100644 include/linux/lockd/lockd.h

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
deleted file mode 100644
index eebcecd12fae..000000000000
--- a/include/linux/lockd/lockd.h
+++ /dev/null
@@ -1,401 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/lockd.h
- *
- * General-purpose lockd include file.
- *
- * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LINUX_LOCKD_LOCKD_H
-#define LINUX_LOCKD_LOCKD_H
-
-/* XXX: a lot of this should really be under fs/lockd. */
-
-#include <linux/exportfs.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <net/ipv6.h>
-#include <linux/fs.h>
-#include <linux/kref.h>
-#include <linux/refcount.h>
-#include <linux/utsname.h>
-#include <linux/lockd/bind.h>
-#include <linux/lockd/xdr.h>
-#include <linux/lockd/debug.h>
-#include <linux/sunrpc/svc.h>
-
-/*
- * Version string
- */
-#define LOCKD_VERSION		"0.5"
-
-/*
- * Default timeout for RPC calls (seconds)
- */
-#define LOCKD_DFLT_TIMEO	10
-
-/*
- * Internal-use status codes, not to be placed on the wire.
- * Version handlers translate these to appropriate wire values.
- */
-#define nlm__int__drop_reply	cpu_to_be32(30000)
-#define nlm__int__deadlock	cpu_to_be32(30001)
-#define nlm__int__stale_fh	cpu_to_be32(30002)
-#define nlm__int__failed	cpu_to_be32(30003)
-
-/*
- * Lockd host handle (used both by the client and server personality).
- */
-struct nlm_host {
-	struct hlist_node	h_hash;		/* doubly linked list */
-	struct sockaddr_storage	h_addr;		/* peer address */
-	size_t			h_addrlen;
-	struct sockaddr_storage	h_srcaddr;	/* our address (optional) */
-	size_t			h_srcaddrlen;
-	struct rpc_clnt		*h_rpcclnt;	/* RPC client to talk to peer */
-	char			*h_name;		/* remote hostname */
-	u32			h_version;	/* interface version */
-	unsigned short		h_proto;	/* transport proto */
-	unsigned short		h_reclaiming : 1,
-				h_server     : 1, /* server side, not client side */
-				h_noresvport : 1,
-				h_inuse      : 1;
-	wait_queue_head_t	h_gracewait;	/* wait while reclaiming */
-	struct rw_semaphore	h_rwsem;	/* Reboot recovery lock */
-	u32			h_state;	/* pseudo-state counter */
-	u32			h_nsmstate;	/* true remote NSM state */
-	u32			h_pidcount;	/* Pseudopids */
-	refcount_t		h_count;	/* reference count */
-	struct mutex		h_mutex;	/* mutex for pmap binding */
-	unsigned long		h_nextrebind;	/* next portmap call */
-	unsigned long		h_expires;	/* eligible for GC */
-	struct list_head	h_lockowners;	/* Lockowners for the client */
-	spinlock_t		h_lock;
-	struct list_head	h_granted;	/* Locks in GRANTED state */
-	struct list_head	h_reclaim;	/* Locks in RECLAIM state */
-	struct nsm_handle	*h_nsmhandle;	/* NSM status handle */
-	char			*h_addrbuf;	/* address eyecatcher */
-	struct net		*net;		/* host net */
-	const struct cred	*h_cred;
-	char			nodename[UNX_MAXNODENAME + 1];
-	const struct nlmclnt_operations	*h_nlmclnt_ops;	/* Callback ops for NLM users */
-};
-
-/*
- * The largest string sm_addrbuf should hold is a full-size IPv6 address
- * (no "::" anywhere) with a scope ID.  The buffer size is computed to
- * hold eight groups of colon-separated four-hex-digit numbers, a
- * percent sign, a scope id (at most 32 bits, in decimal), and NUL.
- */
-#define NSM_ADDRBUF		((8 * 4 + 7) + (1 + 10) + 1)
-
-struct nsm_handle {
-	struct list_head	sm_link;
-	refcount_t		sm_count;
-	char			*sm_mon_name;
-	char			*sm_name;
-	struct sockaddr_storage	sm_addr;
-	size_t			sm_addrlen;
-	unsigned int		sm_monitored : 1,
-				sm_sticky : 1;	/* don't unmonitor */
-	struct nsm_private	sm_priv;
-	char			sm_addrbuf[NSM_ADDRBUF];
-};
-
-/*
- * Rigorous type checking on sockaddr type conversions
- */
-static inline struct sockaddr *nlm_addr(const struct nlm_host *host)
-{
-	return (struct sockaddr *)&host->h_addr;
-}
-
-static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host)
-{
-	return (struct sockaddr *)&host->h_srcaddr;
-}
-
-/*
- * Map an fl_owner_t into a unique 32-bit "pid"
- */
-struct nlm_lockowner {
-	struct list_head list;
-	refcount_t count;
-
-	struct nlm_host *host;
-	fl_owner_t owner;
-	uint32_t pid;
-};
-
-/*
- * This is the representation of a blocked client lock.
- */
-struct nlm_wait {
-	struct list_head	b_list;		/* linked list */
-	wait_queue_head_t	b_wait;		/* where to wait on */
-	struct nlm_host		*b_host;
-	struct file_lock	*b_lock;	/* local file lock */
-	__be32			b_status;	/* grant callback status */
-};
-
-/*
- * Memory chunk for NLM client RPC request.
- */
-#define NLMCLNT_OHSIZE		((__NEW_UTS_LEN) + 10u)
-struct nlm_rqst {
-	refcount_t		a_count;
-	unsigned int		a_flags;	/* initial RPC task flags */
-	struct nlm_host *	a_host;		/* host handle */
-	struct nlm_args		a_args;		/* arguments */
-	struct nlm_res		a_res;		/* result */
-	struct nlm_block *	a_block;
-	unsigned int		a_retries;	/* Retry count */
-	u8			a_owner[NLMCLNT_OHSIZE];
-	void *	a_callback_data; /* sent to nlmclnt_operations callbacks */
-};
-
-struct nlm_share;
-
-/*
- * This struct describes a file held open by lockd on behalf of
- * an NFS client.
- */
-struct nlm_file {
-	struct hlist_node	f_list;		/* linked list */
-	struct nfs_fh		f_handle;	/* NFS file handle */
-	struct file *		f_file[2];	/* VFS file pointers,
-						   indexed by O_ flags */
-	struct nlm_share *	f_shares;	/* DOS shares */
-	struct list_head	f_blocks;	/* blocked locks */
-	unsigned int		f_locks;	/* guesstimate # of locks */
-	unsigned int		f_count;	/* reference count */
-	struct mutex		f_mutex;	/* avoid concurrent access */
-};
-
-/*
- * This is a server block (i.e. a lock requested by some client which
- * couldn't be granted because of a conflicting lock).
- */
-#define NLM_NEVER		(~(unsigned long) 0)
-/* timeout on non-blocking call: */
-#define NLM_TIMEOUT		(7 * HZ)
-
-struct nlm_block {
-	struct kref		b_count;	/* Reference count */
-	struct list_head	b_list;		/* linked list of all blocks */
-	struct list_head	b_flist;	/* linked list (per file) */
-	struct nlm_rqst	*	b_call;		/* RPC args & callback info */
-	struct svc_serv *	b_daemon;	/* NLM service */
-	struct nlm_host *	b_host;		/* host handle for RPC clnt */
-	unsigned long		b_when;		/* next re-xmit */
-	unsigned int		b_id;		/* block id */
-	unsigned char		b_granted;	/* VFS granted lock */
-	struct nlm_file *	b_file;		/* file in question */
-	struct cache_req *	b_cache_req;	/* deferred request handling */
-	struct cache_deferred_req * b_deferred_req;
-	unsigned int		b_flags;	/* block flags */
-#define B_QUEUED		1	/* lock queued */
-#define B_GOT_CALLBACK		2	/* got lock or conflicting lock */
-#define B_TIMED_OUT		4	/* filesystem too slow to respond */
-};
-
-/*
- * Global variables
- */
-extern const struct rpc_program	nlm_program;
-extern const struct svc_procedure nlmsvc_procedures[24];
-#ifdef CONFIG_LOCKD_V4
-extern const struct svc_procedure nlmsvc_procedures4[24];
-#endif
-extern int			nlmsvc_grace_period;
-extern unsigned long		nlm_timeout;
-extern bool			nsm_use_hostnames;
-extern u32			nsm_local_state;
-
-extern struct timer_list	nlmsvc_retry;
-
-/*
- * Lockd client functions
- */
-struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
-int		  nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
-int		  nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
-void		  nlmclnt_release_call(struct nlm_rqst *);
-void		  nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host,
-					struct file_lock *fl);
-void		  nlmclnt_queue_block(struct nlm_wait *block);
-__be32		  nlmclnt_dequeue_block(struct nlm_wait *block);
-int		  nlmclnt_wait(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
-__be32		  nlmclnt_grant(const struct sockaddr *addr,
-				const struct nlm_lock *lock);
-void		  nlmclnt_recovery(struct nlm_host *);
-int		  nlmclnt_reclaim(struct nlm_host *, struct file_lock *,
-				  struct nlm_rqst *);
-void		  nlmclnt_next_cookie(struct nlm_cookie *);
-
-#ifdef CONFIG_LOCKD_V4
-extern const struct rpc_version nlm_version4;
-#endif
-
-/*
- * Host cache
- */
-struct nlm_host  *nlmclnt_lookup_host(const struct sockaddr *sap,
-					const size_t salen,
-					const unsigned short protocol,
-					const u32 version,
-					const char *hostname,
-					int noresvport,
-					struct net *net,
-					const struct cred *cred);
-void		  nlmclnt_release_host(struct nlm_host *);
-struct nlm_host  *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
-					const char *hostname,
-					const size_t hostname_len);
-void		  nlmsvc_release_host(struct nlm_host *);
-struct rpc_clnt * nlm_bind_host(struct nlm_host *);
-void		  nlm_rebind_host(struct nlm_host *);
-struct nlm_host * nlm_get_host(struct nlm_host *);
-void		  nlm_shutdown_hosts(void);
-void		  nlm_shutdown_hosts_net(struct net *net);
-void		  nlm_host_rebooted(const struct net *net,
-					const struct nlm_reboot *);
-
-/*
- * Host monitoring
- */
-int		  nsm_monitor(const struct nlm_host *host);
-void		  nsm_unmonitor(const struct nlm_host *host);
-
-struct nsm_handle *nsm_get_handle(const struct net *net,
-					const struct sockaddr *sap,
-					const size_t salen,
-					const char *hostname,
-					const size_t hostname_len);
-struct nsm_handle *nsm_reboot_lookup(const struct net *net,
-					const struct nlm_reboot *info);
-void		  nsm_release(struct nsm_handle *nsm);
-
-/*
- * This is used in garbage collection and resource reclaim
- * A return value != 0 means destroy the lock/block/share
- */
-typedef int	  (*nlm_host_match_fn_t)(void *cur, struct nlm_host *ref);
-
-/*
- * Server-side lock handling
- */
-int		  lock_to_openmode(struct file_lock *);
-__be32		  nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
-			      struct nlm_host *, struct nlm_lock *, int,
-			      struct nlm_cookie *, int);
-__be32		  nlmsvc_unlock(struct net *net, struct nlm_file *, struct nlm_lock *);
-__be32		  nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
-			struct nlm_host *host, struct nlm_lock *lock,
-			struct nlm_lock *conflock);
-__be32		  nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *);
-void		  nlmsvc_retry_blocked(struct svc_rqst *rqstp);
-void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
-					nlm_host_match_fn_t match);
-void		  nlmsvc_grant_reply(struct nlm_cookie *, __be32);
-void		  nlmsvc_release_call(struct nlm_rqst *);
-void		  nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t);
-
-/*
- * File handling for the server personality
- */
-__be32		  nlm_lookup_file(struct svc_rqst *, struct nlm_file **,
-					struct nlm_lock *);
-void		  nlm_release_file(struct nlm_file *);
-void		  nlmsvc_put_lockowner(struct nlm_lockowner *);
-void		  nlmsvc_release_lockowner(struct nlm_lock *);
-void		  nlmsvc_mark_resources(struct net *);
-void		  nlmsvc_free_host_resources(struct nlm_host *);
-void		  nlmsvc_invalidate_all(void);
-
-static inline struct file *nlmsvc_file_file(const struct nlm_file *file)
-{
-	return file->f_file[O_RDONLY] ?
-	       file->f_file[O_RDONLY] : file->f_file[O_WRONLY];
-}
-
-static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
-{
-	return file_inode(nlmsvc_file_file(file));
-}
-
-static inline bool
-nlmsvc_file_cannot_lock(const struct nlm_file *file)
-{
-	return exportfs_cannot_lock(nlmsvc_file_file(file)->f_path.dentry->d_sb->s_export_op);
-}
-
-static inline int __nlm_privileged_request4(const struct sockaddr *sap)
-{
-	const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
-
-	if (ntohs(sin->sin_port) > 1023)
-		return 0;
-
-	return ipv4_is_loopback(sin->sin_addr.s_addr);
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static inline int __nlm_privileged_request6(const struct sockaddr *sap)
-{
-	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
-
-	if (ntohs(sin6->sin6_port) > 1023)
-		return 0;
-
-	if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED)
-		return ipv4_is_loopback(sin6->sin6_addr.s6_addr32[3]);
-
-	return ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK;
-}
-#else	/* IS_ENABLED(CONFIG_IPV6) */
-static inline int __nlm_privileged_request6(const struct sockaddr *sap)
-{
-	return 0;
-}
-#endif	/* IS_ENABLED(CONFIG_IPV6) */
-
-/*
- * Ensure incoming requests are from local privileged callers.
- *
- * Return TRUE if sender is local and is connecting via a privileged port;
- * otherwise return FALSE.
- */
-static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
-{
-	const struct sockaddr *sap = svc_addr(rqstp);
-
-	switch (sap->sa_family) {
-	case AF_INET:
-		return __nlm_privileged_request4(sap);
-	case AF_INET6:
-		return __nlm_privileged_request6(sap);
-	default:
-		return 0;
-	}
-}
-
-/*
- * Compare two NLM locks.
- * When the second lock is of type F_UNLCK, this acts like a wildcard.
- */
-static inline int nlm_compare_locks(const struct file_lock *fl1,
-				    const struct file_lock *fl2)
-{
-	return file_inode(fl1->c.flc_file) == file_inode(fl2->c.flc_file)
-	     && fl1->c.flc_pid   == fl2->c.flc_pid
-	     && fl1->c.flc_owner == fl2->c.flc_owner
-	     && fl1->fl_start == fl2->fl_start
-	     && fl1->fl_end   == fl2->fl_end
-	     &&(fl1->c.flc_type  == fl2->c.flc_type || fl2->c.flc_type == F_UNLCK);
-}
-
-extern const struct lock_manager_operations nlmsvc_lock_operations;
-
-#endif /* LINUX_LOCKD_LOCKD_H */
-- 
cgit v1.2.3


From 236f3171ac690f632e13d391f47c68c3a8519bd2 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:31 -0500
Subject: lockd: Remove lockd/debug.h

The lockd include structure has unnecessary indirection. The header
include/linux/lockd/debug.h is consumed only by fs/lockd/lockd.h,
creating an extra compilation dependency and making the code harder
to navigate.

Fold the debug.h definitions directly into lockd.h and remove the
now-redundant header. This reduces the include tree depth and makes
the debug-related definitions easier to find when working on lockd
internals.

Build-tested with lockd built as module and built-in.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/lockd/debug.h | 40 ----------------------------------------
 1 file changed, 40 deletions(-)
 delete mode 100644 include/linux/lockd/debug.h

(limited to 'include/linux')

diff --git a/include/linux/lockd/debug.h b/include/linux/lockd/debug.h
deleted file mode 100644
index eede2ab5246f..000000000000
--- a/include/linux/lockd/debug.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/debug.h
- *
- * Debugging stuff.
- *
- * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LINUX_LOCKD_DEBUG_H
-#define LINUX_LOCKD_DEBUG_H
-
-#include <linux/sunrpc/debug.h>
-
-/*
- * Enable lockd debugging.
- * Requires RPC_DEBUG.
- */
-#undef ifdebug
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define ifdebug(flag)		if (unlikely(nlm_debug & NLMDBG_##flag))
-#else
-# define ifdebug(flag)		if (0)
-#endif
-
-/*
- * Debug flags
- */
-#define NLMDBG_SVC		0x0001
-#define NLMDBG_CLIENT		0x0002
-#define NLMDBG_CLNTLOCK		0x0004
-#define NLMDBG_SVCLOCK		0x0008
-#define NLMDBG_MONITOR		0x0010
-#define NLMDBG_CLNTSUBS		0x0020
-#define NLMDBG_SVCSUBS		0x0040
-#define NLMDBG_HOSTCACHE	0x0080
-#define NLMDBG_XDR		0x0100
-#define NLMDBG_ALL		0x7fff
-
-#endif /* LINUX_LOCKD_DEBUG_H */
-- 
cgit v1.2.3


From 615384a24b1e6b0f091ebc1dfbf7ec8b4c27fa81 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:32 -0500
Subject: lockd: Move xdr.h from include/linux/lockd/ to fs/lockd/

The lockd subsystem unnecessarily exposes internal NLM XDR type
definitions through the global include path. These definitions
are not used by any code outside fs/lockd/, making them
inappropriate for include/linux/lockd/.

Moving xdr.h to fs/lockd/ narrows the API surface and clarifies
that these types are internal implementation details. The
comment in linux/lockd/bind.h stating xdr.h was needed for
"xdr-encoded error codes" is stale: no lockd API consumers use
those codes.

Forward declarations for struct nfs_fh and struct file_lock are
added to bind.h because their definitions were previously pulled
in transitively through xdr.h. Additionally, nfs3proc.c and
proc.c need explicit includes of filelock.h for FL_CLOSE and
for accessing struct file_lock members, respectively.

Built and tested with lockd client/server operations. No
functional change.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/lockd/bind.h |   5 +-
 include/linux/lockd/xdr.h  | 113 ---------------------------------------------
 2 files changed, 2 insertions(+), 116 deletions(-)
 delete mode 100644 include/linux/lockd/xdr.h

(limited to 'include/linux')

diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 077da0696f12..ba9258c96bfd 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -11,10 +11,9 @@
 #define LINUX_LOCKD_BIND_H
 
 #include <linux/lockd/nlm.h>
-/* need xdr-encoded error codes too, so... */
-#include <linux/lockd/xdr.h>
 
-/* Dummy declarations */
+struct file_lock;
+struct nfs_fh;
 struct svc_rqst;
 struct rpc_task;
 struct rpc_clnt;
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
deleted file mode 100644
index 292e4e38d17d..000000000000
--- a/include/linux/lockd/xdr.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/xdr.h
- *
- * XDR types for the NLM protocol
- *
- * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LOCKD_XDR_H
-#define LOCKD_XDR_H
-
-#include <linux/fs.h>
-#include <linux/filelock.h>
-#include <linux/nfs.h>
-#include <linux/sunrpc/xdr.h>
-
-#define SM_MAXSTRLEN		1024
-#define SM_PRIV_SIZE		16
-
-struct nsm_private {
-	unsigned char		data[SM_PRIV_SIZE];
-};
-
-struct svc_rqst;
-
-#define NLM_MAXCOOKIELEN    	32
-#define NLM_MAXSTRLEN		1024
-
-#define	nlm_granted		cpu_to_be32(NLM_LCK_GRANTED)
-#define	nlm_lck_denied		cpu_to_be32(NLM_LCK_DENIED)
-#define	nlm_lck_denied_nolocks	cpu_to_be32(NLM_LCK_DENIED_NOLOCKS)
-#define	nlm_lck_blocked		cpu_to_be32(NLM_LCK_BLOCKED)
-#define	nlm_lck_denied_grace_period	cpu_to_be32(NLM_LCK_DENIED_GRACE_PERIOD)
-
-/* Lock info passed via NLM */
-struct nlm_lock {
-	char *			caller;
-	unsigned int		len; 	/* length of "caller" */
-	struct nfs_fh		fh;
-	struct xdr_netobj	oh;
-	u32			svid;
-	u64			lock_start;
-	u64			lock_len;
-	struct file_lock	fl;
-};
-
-/*
- *	NLM cookies. Technically they can be 1K, but Linux only uses 8 bytes.
- *	FreeBSD uses 16, Apple Mac OS X 10.3 uses 20. Therefore we set it to
- *	32 bytes.
- */
-
-struct nlm_cookie
-{
-	unsigned char data[NLM_MAXCOOKIELEN];
-	unsigned int len;
-};
-
-/*
- * Generic lockd arguments for all but sm_notify
- */
-struct nlm_args {
-	struct nlm_cookie	cookie;
-	struct nlm_lock		lock;
-	u32			block;
-	u32			reclaim;
-	u32			state;
-	u32			monitor;
-	u32			fsm_access;
-	u32			fsm_mode;
-};
-
-/*
- * Generic lockd result
- */
-struct nlm_res {
-	struct nlm_cookie	cookie;
-	__be32			status;
-	struct nlm_lock		lock;
-};
-
-/*
- * statd callback when client has rebooted
- */
-struct nlm_reboot {
-	char			*mon;
-	unsigned int		len;
-	u32			state;
-	struct nsm_private	priv;
-};
-
-/*
- * Contents of statd callback when monitored host rebooted
- */
-#define NLMSVC_XDRSIZE		sizeof(struct nlm_args)
-
-bool	nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-
-bool	nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-
-#endif /* LOCKD_XDR_H */
-- 
cgit v1.2.3


From 5829352e568d24dd04ae112128a4f44748d073bc Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:33 -0500
Subject: lockd: Make linux/lockd/nlm.h an internal header

The NLM protocol constants and status codes in nlm.h are needed
only by lockd's internal implementation. NFS client code and
NFSD interact with lockd through the stable API in bind.h and
have no direct use for protocol-level definitions.

Exposing these definitions globally via bind.h creates unnecessary
coupling between lockd internals and its consumers. Moving nlm.h
from include/linux/lockd/ to fs/lockd/ clarifies the API boundary:
bind.h provides the lockd service interface, while nlm.h remains
available only to code within fs/lockd/ that implements the
protocol.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/lockd/bind.h |  2 --
 include/linux/lockd/nlm.h  | 58 ----------------------------------------------
 2 files changed, 60 deletions(-)
 delete mode 100644 include/linux/lockd/nlm.h

(limited to 'include/linux')

diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index ba9258c96bfd..b614e0deea72 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -10,8 +10,6 @@
 #ifndef LINUX_LOCKD_BIND_H
 #define LINUX_LOCKD_BIND_H
 
-#include <linux/lockd/nlm.h>
-
 struct file_lock;
 struct nfs_fh;
 struct svc_rqst;
diff --git a/include/linux/lockd/nlm.h b/include/linux/lockd/nlm.h
deleted file mode 100644
index 6e343ef760dc..000000000000
--- a/include/linux/lockd/nlm.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/nlm.h
- *
- * Declarations for the Network Lock Manager protocol.
- *
- * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LINUX_LOCKD_NLM_H
-#define LINUX_LOCKD_NLM_H
-
-
-/* Maximum file offset in file_lock.fl_end */
-# define NLM_OFFSET_MAX		((s32) 0x7fffffff)
-# define NLM4_OFFSET_MAX	((s64) ((~(u64)0) >> 1))
-
-/* Return states for NLM */
-enum {
-	NLM_LCK_GRANTED			= 0,
-	NLM_LCK_DENIED			= 1,
-	NLM_LCK_DENIED_NOLOCKS		= 2,
-	NLM_LCK_BLOCKED			= 3,
-	NLM_LCK_DENIED_GRACE_PERIOD	= 4,
-#ifdef CONFIG_LOCKD_V4
-	NLM_DEADLCK			= 5,
-	NLM_ROFS			= 6,
-	NLM_STALE_FH			= 7,
-	NLM_FBIG			= 8,
-	NLM_FAILED			= 9,
-#endif
-};
-
-#define NLM_PROGRAM		100021
-
-#define NLMPROC_NULL		0
-#define NLMPROC_TEST		1
-#define NLMPROC_LOCK		2
-#define NLMPROC_CANCEL		3
-#define NLMPROC_UNLOCK		4
-#define NLMPROC_GRANTED		5
-#define NLMPROC_TEST_MSG	6
-#define NLMPROC_LOCK_MSG	7
-#define NLMPROC_CANCEL_MSG	8
-#define NLMPROC_UNLOCK_MSG	9
-#define NLMPROC_GRANTED_MSG	10
-#define NLMPROC_TEST_RES	11
-#define NLMPROC_LOCK_RES	12
-#define NLMPROC_CANCEL_RES	13
-#define NLMPROC_UNLOCK_RES	14
-#define NLMPROC_GRANTED_RES	15
-#define NLMPROC_NSM_NOTIFY	16		/* statd callback */
-#define NLMPROC_SHARE		20
-#define NLMPROC_UNSHARE		21
-#define NLMPROC_NM_LOCK		22
-#define NLMPROC_FREE_ALL	23
-
-#endif /* LINUX_LOCKD_NLM_H */
-- 
cgit v1.2.3


From adcc59114ccd402259c089b0fea24da5e4974563 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Feb 2026 21:21:50 +0100
Subject: sunrpc: Kill RPC_IFDEBUG()

RPC_IFDEBUG() is used in only two places. In one the user of
the definition is guarded by ifdeffery, in the second one
it's implied due to dprintk() usage. Kill the macro and move
the ifdeffery to the regular condition with the variable defined
inside, while in the second case add the same conditional and
move the respective code there.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/debug.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index eb4bd62df319..93d1a11ffbfb 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -49,12 +49,10 @@ do {									\
 	}								\
 } while (0)
 
-# define RPC_IFDEBUG(x)		x
 #else
 # define ifdebug(fac)		if (0)
 # define dfprintk(fac, fmt, ...)	do {} while (0)
 # define dfprintk_rcu(fac, fmt, ...)	do {} while (0)
-# define RPC_IFDEBUG(x)
 #endif
 
 /*
-- 
cgit v1.2.3


From 6f57293abb8d087de830dd3f02e66d94b3e59973 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Feb 2026 21:21:51 +0100
Subject: sunrpc: Fix compilation error (`make W=1`) when dprintk() is no-op

Clang compiler is not happy about set but unused variables:

.../flexfilelayout/flexfilelayoutdev.c:56:9: error: variable 'ret' set but not used [-Werror,-Wunused-but-set-variable]
.../flexfilelayout/flexfilelayout.c:1505:6: error: variable 'err' set but not used [-Werror,-Wunused-but-set-variable]
.../nfs4proc.c:9244:12: error: variable 'ptr' set but not used [-Werror,-Wunused-but-set-variable]

Fix these by forwarding parameters of dprintk() to no_printk().
The positive side-effect is a format-string checker enabled even for the cases
when dprintk() is no-op.

Fixes: d67ae825a59d ("pnfs/flexfiles: Add the FlexFile Layout Driver")
Fixes: fc931582c260 ("nfs41: create_session operation")
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/debug.h | 8 ++++++--
 include/linux/sunrpc/sched.h | 3 ---
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index 93d1a11ffbfb..ab61bed2f7af 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -38,6 +38,8 @@ extern unsigned int		nlm_debug;
 do {									\
 	ifdebug(fac)							\
 		__sunrpc_printk(fmt, ##__VA_ARGS__);			\
+	else								\
+		no_printk(fmt, ##__VA_ARGS__);				\
 } while (0)
 
 # define dfprintk_rcu(fac, fmt, ...)					\
@@ -46,13 +48,15 @@ do {									\
 		rcu_read_lock();					\
 		__sunrpc_printk(fmt, ##__VA_ARGS__);			\
 		rcu_read_unlock();					\
+	} else {							\
+		no_printk(fmt, ##__VA_ARGS__);				\
 	}								\
 } while (0)
 
 #else
 # define ifdebug(fac)		if (0)
-# define dfprintk(fac, fmt, ...)	do {} while (0)
-# define dfprintk_rcu(fac, fmt, ...)	do {} while (0)
+# define dfprintk(fac, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
+# define dfprintk_rcu(fac, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /*
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index ccba79ebf893..0dbdf3722537 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -95,10 +95,7 @@ struct rpc_task {
 	int			tk_rpc_status;	/* Result of last RPC operation */
 	unsigned short		tk_flags;	/* misc flags */
 	unsigned short		tk_timeouts;	/* maj timeouts */
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS)
 	unsigned short		tk_pid;		/* debugging aid */
-#endif
 	unsigned char		tk_priority : 2,/* Task priority */
 				tk_garb_retry : 2,
 				tk_cred_retry : 2;
-- 
cgit v1.2.3


From f52792f484ba2316853736856dde19b7e7458861 Mon Sep 17 00:00:00 2001
From: Dai Ngo <dai.ngo@oracle.com>
Date: Fri, 13 Feb 2026 10:36:30 -0800
Subject: NFSD: Enforce timeout on layout recall and integrate lease manager
 fencing

When a layout conflict triggers a recall, enforcing a timeout is
necessary to prevent excessive nfsd threads from being blocked in
__break_lease ensuring the server continues servicing incoming
requests efficiently.

This patch introduces a new function to lease_manager_operations:

lm_breaker_timedout: Invoked when a lease recall times out and is
about to be disposed of. This function enables the lease manager
to inform the caller whether the file_lease should remain on the
flc_list or be disposed of.

For the NFSD lease manager, this function now handles layout recall
timeouts. If the layout type supports fencing and the client has not
been fenced, a fence operation is triggered to prevent the client
from accessing the block device.

While the fencing operation is in progress, the conflicting file_lease
remains on the flc_list until fencing is complete. This guarantees
that no other clients can access the file, and the client with
exclusive access is properly blocked before disposal.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/filelock.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index d2c9740e26a8..5f0a2fb31450 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -50,6 +50,7 @@ struct lease_manager_operations {
 	void (*lm_setup)(struct file_lease *, void **);
 	bool (*lm_breaker_owns_lease)(struct file_lease *);
 	int (*lm_open_conflict)(struct file *, int);
+	bool (*lm_breaker_timedout)(struct file_lease *fl);
 };
 
 struct lock_manager {
-- 
cgit v1.2.3


From 5bc37b759ec0cdde2c652a2637d704f2d6306617 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 17 Feb 2026 17:06:53 -0500
Subject: Documentation: Add the RPC language description of NLM version 4

In order to generate source code to encode and decode NLMv4 protocol
elements, include a copy of the RPC language description of NLMv4
for xdrgen to process. The language description is an amalgam of
RFC 1813 and the Open Group's XNFS specification:

  https://pubs.opengroup.org/onlinepubs/9629799/chap10.htm

The C code committed here was generated from the new nlm4.x file
using tools/net/sunrpc/xdrgen/xdrgen.

The goals of replacing hand-written XDR functions with ones that
are tool-generated are to improve memory safety and make XDR
encoding and decoding less brittle to maintain.

The xdrgen utility derives both the type definitions and the
encode/decode functions directly from protocol specifications,
using names and symbols familiar to anyone who knows those specs.
Unlike hand-written code that can inadvertently diverge from the
specification, xdrgen guarantees that the generated code matches
the specification exactly.

We would eventually like xdrgen to generate Rust code as well,
making the conversion of the kernel's NFS stacks to use Rust just
a little easier for us.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/xdrgen/nlm4.h | 233 +++++++++++++++++++++++++++++++++++++
 1 file changed, 233 insertions(+)
 create mode 100644 include/linux/sunrpc/xdrgen/nlm4.h

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdrgen/nlm4.h b/include/linux/sunrpc/xdrgen/nlm4.h
new file mode 100644
index 000000000000..e95e8f105624
--- /dev/null
+++ b/include/linux/sunrpc/xdrgen/nlm4.h
@@ -0,0 +1,233 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Generated by xdrgen. Manual edits will be lost. */
+/* XDR specification file: ../../Documentation/sunrpc/xdr/nlm4.x */
+/* XDR specification modification time: Thu Dec 25 13:10:19 2025 */
+
+#ifndef _LINUX_XDRGEN_NLM4_DEF_H
+#define _LINUX_XDRGEN_NLM4_DEF_H
+
+#include <linux/types.h>
+#include <linux/sunrpc/xdrgen/_defs.h>
+
+enum { LM_MAXSTRLEN = 1024 };
+
+enum { LM_MAXNAMELEN = 1025 };
+
+enum { MAXNETOBJ_SZ = 1024 };
+
+typedef opaque netobj;
+
+enum fsh4_mode {
+	fsm_DN = 0,
+	fsm_DR = 1,
+	fsm_DW = 2,
+	fsm_DRW = 3,
+};
+
+typedef enum fsh4_mode fsh4_mode;
+
+enum fsh4_access {
+	fsa_NONE = 0,
+	fsa_R = 1,
+	fsa_W = 2,
+	fsa_RW = 3,
+};
+
+typedef enum fsh4_access fsh4_access;
+
+enum { SM_MAXSTRLEN = 1024 };
+
+typedef u64 uint64;
+
+typedef s64 int64;
+
+typedef u32 uint32;
+
+typedef s32 int32;
+
+enum nlm4_stats {
+	NLM4_GRANTED = 0,
+	NLM4_DENIED = 1,
+	NLM4_DENIED_NOLOCKS = 2,
+	NLM4_BLOCKED = 3,
+	NLM4_DENIED_GRACE_PERIOD = 4,
+	NLM4_DEADLCK = 5,
+	NLM4_ROFS = 6,
+	NLM4_STALE_FH = 7,
+	NLM4_FBIG = 8,
+	NLM4_FAILED = 9,
+};
+
+typedef __be32 nlm4_stats;
+
+struct nlm4_holder {
+	bool exclusive;
+	int32 svid;
+	netobj oh;
+	uint64 l_offset;
+	uint64 l_len;
+};
+
+struct nlm4_testrply {
+	nlm4_stats stat;
+	union {
+		struct nlm4_holder holder;
+	} u;
+};
+
+struct nlm4_stat {
+	nlm4_stats stat;
+};
+
+struct nlm4_res {
+	netobj cookie;
+	struct nlm4_stat stat;
+};
+
+struct nlm4_testres {
+	netobj cookie;
+	struct nlm4_testrply stat;
+};
+
+struct nlm4_lock {
+	string caller_name;
+	netobj fh;
+	netobj oh;
+	int32 svid;
+	uint64 l_offset;
+	uint64 l_len;
+};
+
+struct nlm4_lockargs {
+	netobj cookie;
+	bool block;
+	bool exclusive;
+	struct nlm4_lock alock;
+	bool reclaim;
+	int32 state;
+};
+
+struct nlm4_cancargs {
+	netobj cookie;
+	bool block;
+	bool exclusive;
+	struct nlm4_lock alock;
+};
+
+struct nlm4_testargs {
+	netobj cookie;
+	bool exclusive;
+	struct nlm4_lock alock;
+};
+
+struct nlm4_unlockargs {
+	netobj cookie;
+	struct nlm4_lock alock;
+};
+
+struct nlm4_share {
+	string caller_name;
+	netobj fh;
+	netobj oh;
+	fsh4_mode mode;
+	fsh4_access access;
+};
+
+struct nlm4_shareargs {
+	netobj cookie;
+	struct nlm4_share share;
+	bool reclaim;
+};
+
+struct nlm4_shareres {
+	netobj cookie;
+	nlm4_stats stat;
+	int32 sequence;
+};
+
+struct nlm4_notify {
+	string name;
+	int32 state;
+};
+
+enum { SM_PRIV_SIZE = 16 };
+
+struct nlm4_notifyargs {
+	struct nlm4_notify notify;
+	u8 private[SM_PRIV_SIZE];
+};
+
+enum {
+	NLMPROC4_NULL = 0,
+	NLMPROC4_TEST = 1,
+	NLMPROC4_LOCK = 2,
+	NLMPROC4_CANCEL = 3,
+	NLMPROC4_UNLOCK = 4,
+	NLMPROC4_GRANTED = 5,
+	NLMPROC4_TEST_MSG = 6,
+	NLMPROC4_LOCK_MSG = 7,
+	NLMPROC4_CANCEL_MSG = 8,
+	NLMPROC4_UNLOCK_MSG = 9,
+	NLMPROC4_GRANTED_MSG = 10,
+	NLMPROC4_TEST_RES = 11,
+	NLMPROC4_LOCK_RES = 12,
+	NLMPROC4_CANCEL_RES = 13,
+	NLMPROC4_UNLOCK_RES = 14,
+	NLMPROC4_GRANTED_RES = 15,
+	NLMPROC4_SM_NOTIFY = 16,
+	NLMPROC4_SHARE = 20,
+	NLMPROC4_UNSHARE = 21,
+	NLMPROC4_NM_LOCK = 22,
+	NLMPROC4_FREE_ALL = 23,
+};
+
+#ifndef NLM4_PROG
+#define NLM4_PROG (100021)
+#endif
+
+#define NLM4_netobj_sz                  (XDR_unsigned_int + XDR_QUADLEN(MAXNETOBJ_SZ))
+#define NLM4_fsh4_mode_sz               (XDR_int)
+#define NLM4_fsh4_access_sz             (XDR_int)
+#define NLM4_uint64_sz                  \
+	(XDR_unsigned_hyper)
+#define NLM4_int64_sz                   \
+	(XDR_hyper)
+#define NLM4_uint32_sz                  \
+	(XDR_unsigned_long)
+#define NLM4_int32_sz                   \
+	(XDR_long)
+#define NLM4_nlm4_stats_sz              (XDR_int)
+#define NLM4_nlm4_holder_sz             \
+	(XDR_bool + NLM4_int32_sz + NLM4_netobj_sz + NLM4_uint64_sz + NLM4_uint64_sz)
+#define NLM4_nlm4_testrply_sz           \
+	(NLM4_nlm4_stats_sz + NLM4_nlm4_holder_sz)
+#define NLM4_nlm4_stat_sz               \
+	(NLM4_nlm4_stats_sz)
+#define NLM4_nlm4_res_sz                \
+	(NLM4_netobj_sz + NLM4_nlm4_stat_sz)
+#define NLM4_nlm4_testres_sz            \
+	(NLM4_netobj_sz + NLM4_nlm4_testrply_sz)
+#define NLM4_nlm4_lock_sz               \
+	(XDR_unsigned_int + XDR_QUADLEN(LM_MAXSTRLEN) + NLM4_netobj_sz + NLM4_netobj_sz + NLM4_int32_sz + NLM4_uint64_sz + NLM4_uint64_sz)
+#define NLM4_nlm4_lockargs_sz           \
+	(NLM4_netobj_sz + XDR_bool + XDR_bool + NLM4_nlm4_lock_sz + XDR_bool + NLM4_int32_sz)
+#define NLM4_nlm4_cancargs_sz           \
+	(NLM4_netobj_sz + XDR_bool + XDR_bool + NLM4_nlm4_lock_sz)
+#define NLM4_nlm4_testargs_sz           \
+	(NLM4_netobj_sz + XDR_bool + NLM4_nlm4_lock_sz)
+#define NLM4_nlm4_unlockargs_sz         \
+	(NLM4_netobj_sz + NLM4_nlm4_lock_sz)
+#define NLM4_nlm4_share_sz              \
+	(XDR_unsigned_int + XDR_QUADLEN(LM_MAXSTRLEN) + NLM4_netobj_sz + NLM4_netobj_sz + NLM4_fsh4_mode_sz + NLM4_fsh4_access_sz)
+#define NLM4_nlm4_shareargs_sz          \
+	(NLM4_netobj_sz + NLM4_nlm4_share_sz + XDR_bool)
+#define NLM4_nlm4_shareres_sz           \
+	(NLM4_netobj_sz + NLM4_nlm4_stats_sz + NLM4_int32_sz)
+#define NLM4_nlm4_notify_sz             \
+	(XDR_unsigned_int + XDR_QUADLEN(LM_MAXNAMELEN) + NLM4_int32_sz)
+#define NLM4_nlm4_notifyargs_sz         \
+	(NLM4_nlm4_notify_sz + XDR_QUADLEN(SM_PRIV_SIZE))
+#define NLM4_MAX_ARGS_SZ                \
+	(NLM4_nlm4_lockargs_sz)
+
+#endif /* _LINUX_XDRGEN_NLM4_DEF_H */
-- 
cgit v1.2.3


From 17c1d66579ff27a7a8f2f407d1425272ff6fdd8c Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 23 Feb 2026 12:09:59 -0500
Subject: sunrpc: convert queue_lock from global spinlock to per-cache-detail
 lock

The global queue_lock serializes all upcall queue operations across
every cache_detail instance. Convert it to a per-cache-detail spinlock
so that different caches (e.g. auth.unix.ip vs nfsd.fh) no longer
contend with each other on queue operations.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/cache.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index e783132e481f..3d32dd1f7b05 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -113,6 +113,7 @@ struct cache_detail {
 
 	/* fields for communication over channel */
 	struct list_head	queue;
+	spinlock_t		queue_lock;
 
 	atomic_t		writers;		/* how many time is /channel open */
 	time64_t		last_close;		/* if no writers, when did last close */
-- 
cgit v1.2.3


From 552d0e17ea042fc4f959c4543cbbd0e54de7a8e9 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 23 Feb 2026 12:10:00 -0500
Subject: sunrpc: convert queue_wait from global to per-cache-detail waitqueue

The queue_wait waitqueue is currently a file-scoped global, so a
wake_up for one cache_detail wakes pollers on all caches. Convert it
to a per-cache-detail field so that only pollers on the relevant cache
are woken.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/cache.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 3d32dd1f7b05..031379efba24 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -16,6 +16,7 @@
 #include <linux/atomic.h>
 #include <linux/kstrtox.h>
 #include <linux/proc_fs.h>
+#include <linux/wait.h>
 
 /*
  * Each cache requires:
@@ -114,6 +115,7 @@ struct cache_detail {
 	/* fields for communication over channel */
 	struct list_head	queue;
 	spinlock_t		queue_lock;
+	wait_queue_head_t	queue_wait;
 
 	atomic_t		writers;		/* how many time is /channel open */
 	time64_t		last_close;		/* if no writers, when did last close */
-- 
cgit v1.2.3


From facc4e3c80420e3466003ce09b576e005b56a015 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 23 Feb 2026 12:10:01 -0500
Subject: sunrpc: split cache_detail queue into request and reader lists

Replace the single interleaved queue (which mixed cache_request and
cache_reader entries distinguished by a ->reader flag) with two
dedicated lists: cd->requests for upcall requests and cd->readers
for open file handles.

Readers now track their position via a monotonically increasing
sequence number (next_seqno) rather than by their position in the
shared list. Each cache_request is assigned a seqno when enqueued,
and a new cache_next_request() helper finds the next request at or
after a given seqno.

This eliminates the cache_queue wrapper struct entirely, simplifies
the reader-skipping loops in cache_read/cache_poll/cache_ioctl/
cache_release, and makes the data flow easier to reason about.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/cache.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 031379efba24..b1e595c2615b 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -113,9 +113,11 @@ struct cache_detail {
 	int			entries;
 
 	/* fields for communication over channel */
-	struct list_head	queue;
+	struct list_head	requests;
+	struct list_head	readers;
 	spinlock_t		queue_lock;
 	wait_queue_head_t	queue_wait;
+	u64			next_seqno;
 
 	atomic_t		writers;		/* how many time is /channel open */
 	time64_t		last_close;		/* if no writers, when did last close */
-- 
cgit v1.2.3


From ee66b9e3e1c69efc986f3932555f07121c3460a7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 26 Feb 2026 09:47:35 -0500
Subject: SUNRPC: Allocate a separate Reply page array

struct svc_rqst uses a single dynamically-allocated page array
(rq_pages) for both the incoming RPC Call message and the outgoing
RPC Reply message. rq_respages is a sliding pointer into rq_pages
that each transport receive path must compute based on how many
pages the Call consumed. This boundary tracking is a source of
confusion and bugs, and prevents an RPC transaction from having
both a large Call and a large Reply simultaneously.

Allocate rq_respages as its own page array, eliminating the boundary
arithmetic. This decouples Call and Reply buffer lifetimes,
following the precedent set by rq_bvec (a separate dynamically-
allocated array for I/O vectors).

Each svc_rqst now pins twice as many pages as before. For a server
running 16 threads with a 1MB maximum payload, the additional cost
is roughly 16MB of pinned memory. The new dynamic svc thread count
facility keeps this overhead minimal on an idle server. A subsequent
patch in this series limits per-request repopulation to only the
pages released during the previous RPC, avoiding a full-array scan
on each call to svc_alloc_arg().

Note: We've considered several alternatives to maintaining a full
second array. Each alternative reintroduces either boundary logic
complexity or I/O-path allocation pressure.

rq_next_page is initialized in svc_alloc_arg() and svc_process()
during Reply construction, and in svc_rdma_recvfrom() as a
precaution on error paths. Transport receive paths no longer compute
it from the Call size.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 47 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 62152e4f3bcc..3b1a98ab5cba 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -134,25 +134,24 @@ enum {
 extern u32 svc_max_payload(const struct svc_rqst *rqstp);
 
 /*
- * RPC Requests and replies are stored in one or more pages.
- * We maintain an array of pages for each server thread.
- * Requests are copied into these pages as they arrive.  Remaining
- * pages are available to write the reply into.
+ * RPC Call and Reply messages each have their own page array.
+ * rq_pages holds the incoming Call message; rq_respages holds
+ * the outgoing Reply message. Both arrays are sized to
+ * svc_serv_maxpages() entries and are allocated dynamically.
  *
- * Pages are sent using ->sendmsg with MSG_SPLICE_PAGES so each server thread
- * needs to allocate more to replace those used in sending.  To help keep track
- * of these pages we have a receive list where all pages initialy live, and a
- * send list where pages are moved to when there are to be part of a reply.
+ * Pages are sent using ->sendmsg with MSG_SPLICE_PAGES so each
+ * server thread needs to allocate more to replace those used in
+ * sending.
  *
- * We use xdr_buf for holding responses as it fits well with NFS
- * read responses (that have a header, and some data pages, and possibly
- * a tail) and means we can share some client side routines.
+ * xdr_buf holds responses; the structure fits NFS read responses
+ * (header, data pages, optional tail) and enables sharing of
+ * client-side routines.
  *
- * The xdr_buf.head kvec always points to the first page in the rq_*pages
- * list.  The xdr_buf.pages pointer points to the second page on that
- * list.  xdr_buf.tail points to the end of the first page.
- * This assumes that the non-page part of an rpc reply will fit
- * in a page - NFSd ensures this.  lockd also has no trouble.
+ * The xdr_buf.head kvec always points to the first page in the
+ * rq_*pages list. The xdr_buf.pages pointer points to the second
+ * page on that list. xdr_buf.tail points to the end of the first
+ * page. This assumes that the non-page part of an rpc reply will
+ * fit in a page - NFSd ensures this. lockd also has no trouble.
  */
 
 /**
@@ -162,10 +161,10 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp);
  * Returns a count of pages or vectors that can hold the maximum
  * size RPC message for @serv.
  *
- * Each request/reply pair can have at most one "payload", plus two
- * pages, one for the request, and one for the reply.
- * nfsd_splice_actor() might need an extra page when a READ payload
- * is not page-aligned.
+ * Each page array can hold at most one payload plus two
+ * overhead pages (one for the RPC header, one for tail data).
+ * nfsd_splice_actor() might need an extra page when a READ
+ * payload is not page-aligned.
  */
 static inline unsigned long svc_serv_maxpages(const struct svc_serv *serv)
 {
@@ -204,11 +203,11 @@ struct svc_rqst {
 	struct xdr_stream	rq_res_stream;
 	struct folio		*rq_scratch_folio;
 	struct xdr_buf		rq_res;
-	unsigned long		rq_maxpages;	/* num of entries in rq_pages */
-	struct page *		*rq_pages;
-	struct page *		*rq_respages;	/* points into rq_pages */
+	unsigned long		rq_maxpages;	/* entries per page array */
+	struct page *		*rq_pages;	/* Call buffer pages */
+	struct page *		*rq_respages;	/* Reply buffer pages */
 	struct page *		*rq_next_page; /* next reply page to use */
-	struct page *		*rq_page_end;  /* one past the last page */
+	struct page *		*rq_page_end;  /* one past the last reply page */
 
 	struct folio_batch	rq_fbatch;
 	struct bio_vec		*rq_bvec;
-- 
cgit v1.2.3


From 7ed7504287a627834f2a35ef04e5dfd26d1c8986 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 26 Feb 2026 09:47:38 -0500
Subject: SUNRPC: Track consumed rq_pages entries

The rq_pages array holds pages allocated for incoming RPC requests.
Two transport receive paths NULL entries in rq_pages to prevent
svc_rqst_release_pages() from freeing pages that the transport has
taken ownership of:

- svc_tcp_save_pages() moves partial request data pages to
  svsk->sk_pages during multi-fragment TCP reassembly.

- svc_rdma_clear_rqst_pages() moves request data pages to
  head->rc_pages because they are targets of active RDMA Read WRs.

A new rq_pages_nfree field in struct svc_rqst records how many
entries were NULLed. svc_alloc_arg() uses it to refill only those
entries rather than scanning the full rq_pages array. In steady
state, the transport NULLs a handful of entries per RPC, so the
allocator visits only those entries instead of the full ~259 slots
(for 1MB messages).

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 3b1a98ab5cba..c3399cf64524 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -143,6 +143,15 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp);
  * server thread needs to allocate more to replace those used in
  * sending.
  *
+ * rq_pages request page contract:
+ *
+ * Transport receive paths that move request data pages out of
+ * rq_pages -- TCP multi-fragment reassembly (svc_tcp_save_pages)
+ * and RDMA Read I/O (svc_rdma_clear_rqst_pages) -- NULL those
+ * entries to prevent svc_rqst_release_pages() from freeing pages
+ * still in transport use, and set rq_pages_nfree to the count.
+ * svc_alloc_arg() refills only that many rq_pages entries.
+ *
  * xdr_buf holds responses; the structure fits NFS read responses
  * (header, data pages, optional tail) and enables sharing of
  * client-side routines.
@@ -204,6 +213,7 @@ struct svc_rqst {
 	struct folio		*rq_scratch_folio;
 	struct xdr_buf		rq_res;
 	unsigned long		rq_maxpages;	/* entries per page array */
+	unsigned long		rq_pages_nfree;	/* rq_pages entries NULLed by transport */
 	struct page *		*rq_pages;	/* Call buffer pages */
 	struct page *		*rq_respages;	/* Reply buffer pages */
 	struct page *		*rq_next_page; /* next reply page to use */
-- 
cgit v1.2.3


From d7f3efd9ff474867b04e1ea784690f02450a245b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 26 Feb 2026 09:47:39 -0500
Subject: SUNRPC: Optimize rq_respages allocation in svc_alloc_arg

svc_alloc_arg() invokes alloc_pages_bulk() with the full rq_maxpages
count (~259 for 1MB messages) for the rq_respages array, causing a
full-array scan despite most slots holding valid pages.

svc_rqst_release_pages() NULLs only the range

  [rq_respages, rq_next_page)

after each RPC, so only that range contains NULL entries. Limit the
rq_respages fill in svc_alloc_arg() to that range instead of
scanning the full array.

svc_init_buffer() initializes rq_next_page to span the entire
rq_respages array, so the first svc_alloc_arg() call fills all
slots.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index c3399cf64524..669c944eaf7f 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -152,6 +152,10 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp);
  * still in transport use, and set rq_pages_nfree to the count.
  * svc_alloc_arg() refills only that many rq_pages entries.
  *
+ * For rq_respages, svc_rqst_release_pages() NULLs entries in
+ * [rq_respages, rq_next_page) after each RPC. svc_alloc_arg()
+ * refills only that range.
+ *
  * xdr_buf holds responses; the structure fits NFS read responses
  * (header, data pages, optional tail) and enables sharing of
  * client-side routines.
-- 
cgit v1.2.3


From ccc89b9d1ed233349cfe8d87b842e7351b74d8de Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 27 Feb 2026 09:03:28 -0500
Subject: svcrdma: Add fair queuing for Send Queue access

When the Send Queue fills, multiple threads may wait for SQ slots.
The previous implementation had no ordering guarantee, allowing
starvation when one thread repeatedly acquires slots while others
wait indefinitely.

Introduce a ticket-based fair queuing system. Each waiter takes a
ticket number and is served in FIFO order. This ensures forward
progress for all waiters when SQ capacity is constrained.

The implementation has two phases:
1. Fast path: attempt to reserve SQ slots without waiting
2. Slow path: take a ticket, wait for turn, then wait for slots

The ticket system adds two atomic counters to the transport:
- sc_sq_ticket_head: next ticket to issue
- sc_sq_ticket_tail: ticket currently being served

A dedicated wait queue (sc_sq_ticket_wait) handles ticket
ordering, separate from sc_send_wait which handles SQ capacity.
This separation ensures that send completions (the high-frequency
wake source) wake only the current ticket holder rather than all
queued waiters. Ticket handoff wakes only the ticket wait queue,
and each ticket holder that exits via connection close propagates
the wake to the next waiter in line.

When a waiter successfully reserves slots, it advances the tail
counter and wakes the next waiter. This creates an orderly handoff
that prevents starvation while maintaining good throughput on the
fast path when contention is low.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_rdma.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 57f4fd94166a..658b8498177e 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -84,6 +84,9 @@ struct svcxprt_rdma {
 
 	atomic_t             sc_sq_avail;	/* SQEs ready to be consumed */
 	unsigned int	     sc_sq_depth;	/* Depth of SQ */
+	atomic_t	     sc_sq_ticket_head;	/* Next ticket to issue */
+	atomic_t	     sc_sq_ticket_tail;	/* Ticket currently serving */
+	wait_queue_head_t    sc_sq_ticket_wait;	/* Ticket ordering waitlist */
 	__be32		     sc_fc_credits;	/* Forward credits */
 	u32		     sc_max_requests;	/* Max requests */
 	u32		     sc_max_bc_requests;/* Backward credits */
@@ -306,6 +309,13 @@ extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
 				    struct svc_rdma_recv_ctxt *rctxt,
 				    int status);
 extern void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail);
+extern int svc_rdma_sq_wait(struct svcxprt_rdma *rdma,
+			    const struct rpc_rdma_cid *cid, int sqecount);
+extern int svc_rdma_post_send_err(struct svcxprt_rdma *rdma,
+				  const struct rpc_rdma_cid *cid,
+				  const struct ib_send_wr *bad_wr,
+				  const struct ib_send_wr *first_wr,
+				  int sqecount, int ret);
 extern int svc_rdma_sendto(struct svc_rqst *);
 extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
 				   unsigned int length);
-- 
cgit v1.2.3


From d16f060f3ee297424c0aba047b1d49208adb9318 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 27 Feb 2026 09:03:31 -0500
Subject: svcrdma: Add Write chunk WRs to the RPC's Send WR chain

Previously, Write chunk RDMA Writes were posted via a separate
ib_post_send() call with their own completion handler. Each Write
chunk incurred a doorbell and generated a completion event.

Link Write chunk WRs onto the RPC Reply's Send WR chain so that a
single ib_post_send() call posts both the RDMA Writes and the Send
WR. A single completion event signals that all operations have
finished. This reduces both doorbell rate and completion rate, as
well as eliminating the latency of a round-trip between the Write
chunk completion and the subsequent Send WR posting.

The lifecycle of Write chunk resources changes: previously, the
svc_rdma_write_done() completion handler released Write chunk
resources when RDMA Writes completed. With WR chaining, resources
remain live until the Send completion. A new sc_write_info_list
tracks Write chunk metadata attached to each Send context, and
svc_rdma_write_chunk_release() frees these resources when the
Send context is released.

The svc_rdma_write_done() handler now handles only error cases.
On success it returns immediately since the Send completion handles
resource release. On failure (WR flush), it closes the connection
to signal to the client that the RPC Reply is incomplete.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_rdma.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 658b8498177e..df6e08aaad57 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -216,6 +216,7 @@ struct svc_rdma_recv_ctxt {
  */
 struct svc_rdma_write_info {
 	struct svcxprt_rdma	*wi_rdma;
+	struct list_head	wi_list;
 
 	const struct svc_rdma_chunk	*wi_chunk;
 
@@ -244,7 +245,10 @@ struct svc_rdma_send_ctxt {
 	struct ib_cqe		sc_cqe;
 	struct xdr_buf		sc_hdrbuf;
 	struct xdr_stream	sc_stream;
+
+	struct list_head	sc_write_info_list;
 	struct svc_rdma_write_info sc_reply_info;
+
 	void			*sc_xprt_buf;
 	int			sc_page_count;
 	int			sc_cur_sge_no;
@@ -277,11 +281,14 @@ extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
 extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
 				struct svc_rdma_chunk_ctxt *cc,
 				enum dma_data_direction dir);
+extern void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
+					 struct svc_rdma_send_ctxt *ctxt);
 extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
 					 struct svc_rdma_send_ctxt *ctxt);
-extern int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
-				    const struct svc_rdma_recv_ctxt *rctxt,
-				    const struct xdr_buf *xdr);
+extern int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
+				       const struct svc_rdma_recv_ctxt *rctxt,
+				       struct svc_rdma_send_ctxt *sctxt,
+				       const struct xdr_buf *xdr);
 extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
 					const struct svc_rdma_pcl *write_pcl,
 					const struct svc_rdma_pcl *reply_pcl,
-- 
cgit v1.2.3


From 3603bf99062c6d563df4fba3848f829d5401d959 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 28 Feb 2026 14:09:22 -0800
Subject: SUNRPC: xdr.h: fix all kernel-doc warnings

Correct a function parameter name (s/page/folio/) and add function
return value sections for multiple functions to eliminate
kernel-doc warnings:

Warning: include/linux/sunrpc/xdr.h:298 function parameter 'folio' not
 described in 'xdr_set_scratch_folio'
Warning: include/linux/sunrpc/xdr.h:337 No description found for return
 value of 'xdr_stream_remaining'
Warning: include/linux/sunrpc/xdr.h:357 No description found for return
 value of 'xdr_align_size'
Warning: include/linux/sunrpc/xdr.h:374 No description found for return
 value of 'xdr_pad_size'
Warning: include/linux/sunrpc/xdr.h:387 No description found for return
 value of 'xdr_stream_encode_item_present'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/xdr.h | 48 +++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 152597750f55..b639a6fafcbc 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -290,7 +290,7 @@ xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen)
 /**
  * xdr_set_scratch_folio - Attach a scratch buffer for decoding data
  * @xdr: pointer to xdr_stream struct
- * @page: an anonymous folio
+ * @folio: an anonymous folio
  *
  * See xdr_set_scratch_buffer().
  */
@@ -330,7 +330,7 @@ static inline void xdr_commit_encode(struct xdr_stream *xdr)
  * xdr_stream_remaining - Return the number of bytes remaining in the stream
  * @xdr: pointer to struct xdr_stream
  *
- * Return value:
+ * Returns:
  *   Number of bytes remaining in @xdr before xdr->end
  */
 static inline size_t
@@ -350,7 +350,7 @@ ssize_t xdr_stream_encode_opaque_auth(struct xdr_stream *xdr, u32 flavor,
  * xdr_align_size - Calculate padded size of an object
  * @n: Size of an object being XDR encoded (in bytes)
  *
- * Return value:
+ * Returns:
  *   Size (in bytes) of the object including xdr padding
  */
 static inline size_t
@@ -368,7 +368,7 @@ xdr_align_size(size_t n)
  * This implementation avoids the need for conditional
  * branches or modulo division.
  *
- * Return value:
+ * Returns:
  *   Size (in bytes) of the needed XDR pad
  */
 static inline size_t xdr_pad_size(size_t n)
@@ -380,7 +380,7 @@ static inline size_t xdr_pad_size(size_t n)
  * xdr_stream_encode_item_present - Encode a "present" list item
  * @xdr: pointer to xdr_stream
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -399,7 +399,7 @@ static inline ssize_t xdr_stream_encode_item_present(struct xdr_stream *xdr)
  * xdr_stream_encode_item_absent - Encode a "not present" list item
  * @xdr: pointer to xdr_stream
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -419,7 +419,7 @@ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr)
  * @p: address in a buffer into which to encode
  * @n: boolean value to encode
  *
- * Return value:
+ * Returns:
  *   Address of item following the encoded boolean
  */
 static inline __be32 *xdr_encode_bool(__be32 *p, u32 n)
@@ -433,7 +433,7 @@ static inline __be32 *xdr_encode_bool(__be32 *p, u32 n)
  * @xdr: pointer to xdr_stream
  * @n: boolean value to encode
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -453,7 +453,7 @@ static inline int xdr_stream_encode_bool(struct xdr_stream *xdr, __u32 n)
  * @xdr: pointer to xdr_stream
  * @n: integer to encode
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -474,7 +474,7 @@ xdr_stream_encode_u32(struct xdr_stream *xdr, __u32 n)
  * @xdr: pointer to xdr_stream
  * @n: integer to encode
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -495,7 +495,7 @@ xdr_stream_encode_be32(struct xdr_stream *xdr, __be32 n)
  * @xdr: pointer to xdr_stream
  * @n: 64-bit integer to encode
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -517,7 +517,7 @@ xdr_stream_encode_u64(struct xdr_stream *xdr, __u64 n)
  * @ptr: pointer to void pointer
  * @len: size of object
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -542,7 +542,7 @@ xdr_stream_encode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t len)
  * @ptr: pointer to opaque data object
  * @len: size of object pointed to by @ptr
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -563,7 +563,7 @@ xdr_stream_encode_opaque_fixed(struct xdr_stream *xdr, const void *ptr, size_t l
  * @ptr: pointer to opaque data object
  * @len: size of object pointed to by @ptr
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -585,7 +585,7 @@ xdr_stream_encode_opaque(struct xdr_stream *xdr, const void *ptr, size_t len)
  * @array: array of integers
  * @array_size: number of elements in @array
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -608,7 +608,7 @@ xdr_stream_encode_uint32_array(struct xdr_stream *xdr,
  * xdr_item_is_absent - symbolically handle XDR discriminators
  * @p: pointer to undecoded discriminator
  *
- * Return values:
+ * Returns:
  *   %true if the following XDR item is absent
  *   %false if the following XDR item is present
  */
@@ -621,7 +621,7 @@ static inline bool xdr_item_is_absent(const __be32 *p)
  * xdr_item_is_present - symbolically handle XDR discriminators
  * @p: pointer to undecoded discriminator
  *
- * Return values:
+ * Returns:
  *   %true if the following XDR item is present
  *   %false if the following XDR item is absent
  */
@@ -635,7 +635,7 @@ static inline bool xdr_item_is_present(const __be32 *p)
  * @xdr: pointer to xdr_stream
  * @ptr: pointer to a u32 in which to store the result
  *
- * Return values:
+ * Returns:
  *   %0 on success
  *   %-EBADMSG on XDR buffer overflow
  */
@@ -656,7 +656,7 @@ xdr_stream_decode_bool(struct xdr_stream *xdr, __u32 *ptr)
  * @xdr: pointer to xdr_stream
  * @ptr: location to store integer
  *
- * Return values:
+ * Returns:
  *   %0 on success
  *   %-EBADMSG on XDR buffer overflow
  */
@@ -677,7 +677,7 @@ xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr)
  * @xdr: pointer to xdr_stream
  * @ptr: location to store integer
  *
- * Return values:
+ * Returns:
  *   %0 on success
  *   %-EBADMSG on XDR buffer overflow
  */
@@ -698,7 +698,7 @@ xdr_stream_decode_be32(struct xdr_stream *xdr, __be32 *ptr)
  * @xdr: pointer to xdr_stream
  * @ptr: location to store 64-bit integer
  *
- * Return values:
+ * Returns:
  *   %0 on success
  *   %-EBADMSG on XDR buffer overflow
  */
@@ -720,7 +720,7 @@ xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr)
  * @ptr: location to store data
  * @len: size of buffer pointed to by @ptr
  *
- * Return values:
+ * Returns:
  *   %0 on success
  *   %-EBADMSG on XDR buffer overflow
  */
@@ -746,7 +746,7 @@ xdr_stream_decode_opaque_fixed(struct xdr_stream *xdr, void *ptr, size_t len)
  * on @xdr. It is therefore expected that the object it points to should
  * be processed immediately.
  *
- * Return values:
+ * Returns:
  *   On success, returns size of object stored in *@ptr
  *   %-EBADMSG on XDR buffer overflow
  *   %-EMSGSIZE if the size of the object would exceed @maxlen
@@ -777,7 +777,7 @@ xdr_stream_decode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t maxle
  * @array: location to store the integer array or NULL
  * @array_size: number of elements to store
  *
- * Return values:
+ * Returns:
  *   On success, returns number of elements stored in @array
  *   %-EBADMSG on XDR buffer overflow
  *   %-EMSGSIZE if the size of the array exceeds @array_size
-- 
cgit v1.2.3


From 4e2866b2baaddfff6069a2f18fc134c1d5a08f2b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 11 Mar 2026 12:18:54 -0400
Subject: SUNRPC: Add svc_rqst_page_release() helper

svc_rqst_replace_page() releases displaced pages through a
per-rqst folio batch, but exposes the add-or-flush sequence
directly. svc_tcp_restore_pages() releases displaced pages
individually with put_page().

Introduce svc_rqst_page_release() to encapsulate the
batched release mechanism. Convert svc_rqst_replace_page()
and svc_tcp_restore_pages() to use it. The latter now
benefits from the same batched release that
svc_rqst_replace_page() already uses.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 669c944eaf7f..1ebd9c7efa70 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -498,6 +498,21 @@ int		   svc_generic_rpcbind_set(struct net *net,
 
 #define	RPC_MAX_ADDRBUFLEN	(63U)
 
+/**
+ * svc_rqst_page_release - release a page associated with an RPC transaction
+ * @rqstp: RPC transaction context
+ * @page: page to release
+ *
+ * Released pages are batched and freed together, reducing
+ * allocator pressure under heavy RPC workloads.
+ */
+static inline void svc_rqst_page_release(struct svc_rqst *rqstp,
+					 struct page *page)
+{
+	if (!folio_batch_add(&rqstp->rq_fbatch, page_folio(page)))
+		__folio_batch_release(&rqstp->rq_fbatch);
+}
+
 /*
  * When we want to reduce the size of the reserved space in the response
  * buffer, we need to take into account the size of any checksum data that
-- 
cgit v1.2.3