From df5e49c880ea0776806b8a9f8ab95e035272cf6f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 29 Nov 2021 15:51:25 +1100 Subject: SUNRPC: change svc_get() to return the svc. It is common for 'get' functions to return the object that was 'got', and there are a couple of places where users of svc_get() would be a little simpler if svc_get() did that. Make it so. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 0ae28ae6caf2..5d9568953fcd 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -120,9 +120,10 @@ struct svc_serv { * change the number of threads. Horrible, but there it is. * Should be called with the "service mutex" held. */ -static inline void svc_get(struct svc_serv *serv) +static inline struct svc_serv *svc_get(struct svc_serv *serv) { serv->sv_nrthreads++; + return serv; } /* -- cgit v1.2.3 From 8c62d12740a1450d2e8456d5747f440e10db281a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 29 Nov 2021 15:51:25 +1100 Subject: SUNRPC/NFSD: clean up get/put functions. svc_destroy() is poorly named - it doesn't necessarily destroy the svc, it might just reduce the ref count. nfsd_destroy() is poorly named for the same reason. This patch: - removes the refcount functionality from svc_destroy(), moving it to a new svc_put(). Almost all previous callers of svc_destroy() now call svc_put(). - renames nfsd_destroy() to nfsd_put() and improves the code, using the new svc_destroy() rather than svc_put() - removes a few comments that explain the important for balanced get/put calls. This should be obvious. The only non-trivial part of this is that svc_destroy() would call svc_sock_update() on a non-final decrement. It can no longer do that, and svc_put() isn't really a good place of it. This call is now made from svc_exit_thread() which seems like a good place. This makes the call *before* sv_nrthreads is decremented rather than after. This is not particularly important as the call just sets a flag which causes sv_nrthreads set be checked later. A subsequent patch will improve the ordering. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 5d9568953fcd..73d56d33a36d 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -114,8 +114,13 @@ struct svc_serv { #endif /* CONFIG_SUNRPC_BACKCHANNEL */ }; -/* - * We use sv_nrthreads as a reference count. svc_destroy() drops +/** + * svc_get() - increment reference count on a SUNRPC serv + * @serv: the svc_serv to have count incremented + * + * Returns: the svc_serv that was passed in. + * + * We use sv_nrthreads as a reference count. svc_put() drops * this refcount, so we need to bump it up around operations that * change the number of threads. Horrible, but there it is. * Should be called with the "service mutex" held. @@ -126,6 +131,22 @@ static inline struct svc_serv *svc_get(struct svc_serv *serv) return serv; } +void svc_destroy(struct svc_serv *serv); + +/** + * svc_put - decrement reference count on a SUNRPC serv + * @serv: the svc_serv to have count decremented + * + * When the reference count reaches zero, svc_destroy() + * is called to clean up and free the serv. + */ +static inline void svc_put(struct svc_serv *serv) +{ + serv->sv_nrthreads -= 1; + if (serv->sv_nrthreads == 0) + svc_destroy(serv); +} + /* * Maximum payload size supported by a kernel RPC server. * This is use to determine the max number of pages nfsd is @@ -515,7 +536,6 @@ struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); -void svc_destroy(struct svc_serv *); void svc_shutdown_net(struct svc_serv *, struct net *); int svc_process(struct svc_rqst *); int bc_svc_process(struct svc_serv *, struct rpc_rqst *, -- cgit v1.2.3 From ec52361df99b490f6af412b046df9799b92c1050 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 29 Nov 2021 15:51:25 +1100 Subject: SUNRPC: stop using ->sv_nrthreads as a refcount The use of sv_nrthreads as a general refcount results in clumsy code, as is seen by various comments needed to explain the situation. This patch introduces a 'struct kref' and uses that for reference counting, leaving sv_nrthreads to be a pure count of threads. The kref is managed particularly in svc_get() and svc_put(), and also nfsd_put(); svc_destroy() now takes a pointer to the embedded kref, rather than to the serv. nfsd allows the svc_serv to exist with ->sv_nrhtreads being zero. This happens when a transport is created before the first thread is started. To support this, a 'keep_active' flag is introduced which holds a ref on the svc_serv. This is set when any listening socket is successfully added (unless there are running threads), and cleared when the number of threads is set. So when the last thread exits, the nfs_serv will be destroyed. The use of 'keep_active' replaces previous code which checked if there were any permanent sockets. We no longer clear ->rq_server when nfsd() exits. This was done to prevent svc_exit_thread() from calling svc_destroy(). Instead we take an extra reference to the svc_serv to prevent svc_destroy() from being called. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 73d56d33a36d..3903b4ae8ac5 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -85,6 +85,7 @@ struct svc_serv { struct svc_program * sv_program; /* RPC program */ struct svc_stat * sv_stats; /* RPC statistics */ spinlock_t sv_lock; + struct kref sv_refcnt; unsigned int sv_nrthreads; /* # of server threads */ unsigned int sv_maxconn; /* max connections allowed or * '0' causing max to be based @@ -119,19 +120,14 @@ struct svc_serv { * @serv: the svc_serv to have count incremented * * Returns: the svc_serv that was passed in. - * - * We use sv_nrthreads as a reference count. svc_put() drops - * this refcount, so we need to bump it up around operations that - * change the number of threads. Horrible, but there it is. - * Should be called with the "service mutex" held. */ static inline struct svc_serv *svc_get(struct svc_serv *serv) { - serv->sv_nrthreads++; + kref_get(&serv->sv_refcnt); return serv; } -void svc_destroy(struct svc_serv *serv); +void svc_destroy(struct kref *); /** * svc_put - decrement reference count on a SUNRPC serv @@ -142,9 +138,7 @@ void svc_destroy(struct svc_serv *serv); */ static inline void svc_put(struct svc_serv *serv) { - serv->sv_nrthreads -= 1; - if (serv->sv_nrthreads == 0) - svc_destroy(serv); + kref_put(&serv->sv_refcnt, svc_destroy); } /* -- cgit v1.2.3 From 3409e4f1e8f239f0ed81be0b068ecf4e73e2e826 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 29 Nov 2021 15:51:25 +1100 Subject: NFSD: Make it possible to use svc_set_num_threads_sync nfsd cannot currently use svc_set_num_threads_sync. It instead uses svc_set_num_threads which does *not* wait for threads to all exit, and has a separate mechanism (nfsd_shutdown_complete) to wait for completion. The reason that nfsd is unlike other services is that nfsd threads can exit separately from svc_set_num_threads being called - they die on receipt of SIGKILL. Also, when the last thread exits, the service must be shut down (sockets closed). For this, the nfsd_mutex needs to be taken, and as that mutex needs to be held while svc_set_num_threads is called, the one cannot wait for the other. This patch changes the nfsd thread so that it can drop the ref on the service without blocking on nfsd_mutex, so that svc_set_num_threads_sync can be used: - if it can drop a non-last reference, it does that. This does not trigger shutdown and does not require a mutex. This will likely happen for all but the last thread signalled, and for all threads being shut down by nfsd_shutdown_threads() - if it can get the mutex without blocking (trylock), it does that and then drops the reference. This will likely happen for the last thread killed by SIGKILL - Otherwise there might be an unrelated task holding the mutex, possibly in another network namespace, or nfsd_shutdown_threads() might be just about to get a reference on the service, after which we can drop ours safely. We cannot conveniently get wakeup notifications on these events, and we are unlikely to need to, so we sleep briefly and check again. With this we can discard nfsd_shutdown_complete and nfsd_complete_shutdown(), and switch to svc_set_num_threads_sync. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 3903b4ae8ac5..36bfc0281988 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -141,6 +141,19 @@ static inline void svc_put(struct svc_serv *serv) kref_put(&serv->sv_refcnt, svc_destroy); } +/** + * svc_put_not_last - decrement non-final reference count on SUNRPC serv + * @serv: the svc_serv to have count decremented + * + * Returns: %true is refcount was decremented. + * + * If the refcount is 1, it is not decremented and instead failure is reported. + */ +static inline bool svc_put_not_last(struct svc_serv *serv) +{ + return refcount_dec_not_one(&serv->sv_refcnt.refcount); +} + /* * Maximum payload size supported by a kernel RPC server. * This is use to determine the max number of pages nfsd is -- cgit v1.2.3 From 3ebdbe5203a874614819700d3f470724cb803709 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 29 Nov 2021 15:51:25 +1100 Subject: SUNRPC: discard svo_setup and rename svc_set_num_threads_sync() The ->svo_setup callback serves no purpose. It is always called from within the same module that chooses which callback is needed. So discard it and call the relevant function directly. Now that svc_set_num_threads() is no longer used remove it and rename svc_set_num_threads_sync() to remove the "_sync" suffix. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 36bfc0281988..0b38c6eaf985 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -64,9 +64,6 @@ struct svc_serv_ops { /* queue up a transport for servicing */ void (*svo_enqueue_xprt)(struct svc_xprt *); - /* set up thread (or whatever) execution context */ - int (*svo_setup)(struct svc_serv *, struct svc_pool *, int); - /* optional module to count when adding threads (pooled svcs only) */ struct module *svo_module; }; @@ -541,7 +538,6 @@ void svc_pool_map_put(void); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, const struct svc_serv_ops *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); -int svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_shutdown_net(struct svc_serv *, struct net *); int svc_process(struct svc_rqst *); -- cgit v1.2.3 From cf0e124e0a489944d08fcc3c694d2b234d2cc658 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 29 Nov 2021 15:51:25 +1100 Subject: SUNRPC: move the pool_map definitions (back) into svc.c These definitions are not used outside of svc.c, and there is no evidence that they ever have been. So move them into svc.c and make the declarations 'static'. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 0b38c6eaf985..d69e6108cb83 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -494,29 +494,6 @@ struct svc_procedure { const char * pc_name; /* for display */ }; -/* - * Mode for mapping cpus to pools. - */ -enum { - SVC_POOL_AUTO = -1, /* choose one of the others */ - SVC_POOL_GLOBAL, /* no mapping, just a single global pool - * (legacy & UP mode) */ - SVC_POOL_PERCPU, /* one pool per cpu */ - SVC_POOL_PERNODE /* one pool per numa node */ -}; - -struct svc_pool_map { - int count; /* How many svc_servs use us */ - int mode; /* Note: int not enum to avoid - * warnings about "enumeration value - * not handled in switch" */ - unsigned int npools; - unsigned int *pool_to; /* maps pool id to cpu or node */ - unsigned int *to_pool; /* maps cpu or node to pool id */ -}; - -extern struct svc_pool_map svc_pool_map; - /* * Function prototypes. */ @@ -533,8 +510,6 @@ void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page); void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); -unsigned int svc_pool_map_get(void); -void svc_pool_map_put(void); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, const struct svc_serv_ops *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); -- cgit v1.2.3 From 6b044fbaab02292fedb17565dbb3f2528083b169 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 29 Nov 2021 15:51:25 +1100 Subject: lockd: use svc_set_num_threads() for thread start and stop svc_set_num_threads() does everything that lockd_start_svc() does, except set sv_maxconn. It also (when passed 0) finds the threads and stops them with kthread_stop(). So move the setting for sv_maxconn, and use svc_set_num_thread() We now don't need nlmsvc_task. Now that we use svc_set_num_threads() it makes sense to set svo_module. This request that the thread exists with module_put_and_exit(). Also fix the documentation for svo_module to make this explicit. svc_prepare_thread is now only used where it is defined, so it can be made static. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index d69e6108cb83..cf175d47c6b7 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -64,7 +64,9 @@ struct svc_serv_ops { /* queue up a transport for servicing */ void (*svo_enqueue_xprt)(struct svc_xprt *); - /* optional module to count when adding threads (pooled svcs only) */ + /* optional module to count when adding threads. + * Thread function must call module_put_and_exit() to exit. + */ struct module *svo_module; }; @@ -504,8 +506,6 @@ struct svc_serv *svc_create(struct svc_program *, unsigned int, const struct svc_serv_ops *); struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node); -struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, - struct svc_pool *pool, int node); void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page); void svc_rqst_free(struct svc_rqst *); -- cgit v1.2.3 From 5089f3d97552b0b07101e02a3fca0146b9b9d3b5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 19 Oct 2021 15:17:21 -0400 Subject: SUNRPC: Remove low signal-to-noise tracepoints I'm about to add more information to the server-side SUNRPC tracepoints, so I'm going to offset the increased trace log consumption by getting rid of some tracepoints that fire frequently but don't offer much value. trace_svc_xprt_received() was useful for debugging, perhaps, but is not generally informative. trace_svc_handle_xprt() reports largely the same information as trace_svc_xdr_recvfrom(). As a clean-up, rename trace_svc_xprt_do_enqueue() to match svc_xprt_dequeue(). Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 3a99358c262b..684cc0e322fa 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1768,7 +1768,7 @@ TRACE_EVENT(svc_xprt_create_err, __entry->error) ); -TRACE_EVENT(svc_xprt_do_enqueue, +TRACE_EVENT(svc_xprt_enqueue, TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst), TP_ARGS(xprt, rqst), @@ -1815,7 +1815,6 @@ DECLARE_EVENT_CLASS(svc_xprt_event, ), \ TP_ARGS(xprt)) -DEFINE_SVC_XPRT_EVENT(received); DEFINE_SVC_XPRT_EVENT(no_write_space); DEFINE_SVC_XPRT_EVENT(close); DEFINE_SVC_XPRT_EVENT(detach); @@ -1902,27 +1901,6 @@ TRACE_EVENT(svc_alloc_arg_err, TP_printk("pages=%u", __entry->pages) ); -TRACE_EVENT(svc_handle_xprt, - TP_PROTO(struct svc_xprt *xprt, int len), - - TP_ARGS(xprt, len), - - TP_STRUCT__entry( - __field(int, len) - __field(unsigned long, flags) - __string(addr, xprt->xpt_remotebuf) - ), - - TP_fast_assign( - __entry->len = len; - __entry->flags = xprt->xpt_flags; - __assign_str(addr, xprt->xpt_remotebuf); - ), - - TP_printk("addr=%s len=%d flags=%s", __get_str(addr), - __entry->len, show_svc_xprt_flags(__entry->flags)) -); - TRACE_EVENT(svc_stats_latency, TP_PROTO(const struct svc_rqst *rqst), -- cgit v1.2.3 From 40595cdc93edf4110c0f0c0b06f8d82008f23929 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 16 Dec 2021 12:20:13 -0500 Subject: nfs: block notification on fs with its own ->lock NFSv4.1 supports an optional lock notification feature which notifies the client when a lock comes available. (Normally NFSv4 clients just poll for locks if necessary.) To make that work, we need to request a blocking lock from the filesystem. We turned that off for NFS in commit f657f8eef3ff ("nfs: don't atempt blocking locks on nfs reexports") [sic] because it actually blocks the nfsd thread while waiting for the lock. Thanks to Vasily Averin for pointing out that NFS isn't the only filesystem with that problem. Any filesystem that leaves ->lock NULL will use posix_lock_file(), which does the right thing. Simplest is just to assume that any filesystem that defines its own ->lock is not safe to request a blocking lock from. So, this patch mostly reverts commit f657f8eef3ff ("nfs: don't atempt blocking locks on nfs reexports") [sic] and commit b840be2f00c0 ("lockd: don't attempt blocking locks on nfs reexports"), and instead uses a check of ->lock (Vasily's suggestion) to decide whether to support blocking lock notifications on a given filesystem. Also add a little documentation. Perhaps someday we could add back an export flag later to allow filesystems with "good" ->lock methods to support blocking lock notifications. Reported-by: Vasily Averin Signed-off-by: J. Bruce Fields [ cel: Description rewritten to address checkpatch nits ] [ cel: Fixed warning when SUNRPC debugging is disabled ] [ cel: Fixed NULL check ] Signed-off-by: Chuck Lever Reviewed-by: Vasily Averin --- include/linux/exportfs.h | 2 -- include/linux/lockd/lockd.h | 9 +++++++-- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 3260fe714846..fe848901fcc3 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -221,8 +221,6 @@ struct export_operations { #define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply atomic attribute updates */ -#define EXPORT_OP_SYNC_LOCKS (0x20) /* Filesystem can't do - asychronous blocking locks */ unsigned long flags; }; diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index c4ae6506b8b3..fcef192e5e45 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -303,10 +303,15 @@ void nlmsvc_invalidate_all(void); int nlmsvc_unlock_all_by_sb(struct super_block *sb); int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr); +static inline struct file *nlmsvc_file_file(struct nlm_file *file) +{ + return file->f_file[O_RDONLY] ? + file->f_file[O_RDONLY] : file->f_file[O_WRONLY]; +} + static inline struct inode *nlmsvc_file_inode(struct nlm_file *file) { - return locks_inode(file->f_file[O_RDONLY] ? - file->f_file[O_RDONLY] : file->f_file[O_WRONLY]); + return locks_inode(nlmsvc_file_file(file)); } static inline int __nlm_privileged_request4(const struct sockaddr *sap) -- cgit v1.2.3 From 0ea9fc15b1d7d6636d429e74ffe3f86bf2f2f7d6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 23 Nov 2021 17:05:07 +0100 Subject: fs/locks: fix fcntl_getlk64/fcntl_setlk64 stub prototypes My patch to rework oabi fcntl64() introduced a harmless sparse warning when file locking is disabled: arch/arm/kernel/sys_oabi-compat.c:251:51: sparse: sparse: incorrect type in argument 3 (different address spaces) @@ expected struct flock64 [noderef] __user *user @@ got struct flock64 * @@ arch/arm/kernel/sys_oabi-compat.c:251:51: sparse: expected struct flock64 [noderef] __user *user arch/arm/kernel/sys_oabi-compat.c:251:51: sparse: got struct flock64 * arch/arm/kernel/sys_oabi-compat.c:265:55: sparse: sparse: incorrect type in argument 4 (different address spaces) @@ expected struct flock64 [noderef] __user *user @@ got struct flock64 * @@ arch/arm/kernel/sys_oabi-compat.c:265:55: sparse: expected struct flock64 [noderef] __user *user arch/arm/kernel/sys_oabi-compat.c:265:55: sparse: got struct flock64 * When file locking is enabled, everything works correctly and the right data gets passed, but the stub declarations in linux/fs.h did not get modified when the calling conventions changed in an earlier patch. Reported-by: kernel test robot Fixes: 7e2d8c29ecdd ("ARM: 9111/1: oabi-compat: rework fcntl64() emulation") Fixes: a75d30c77207 ("fs/locks: pass kernel struct flock to fcntl_getlk/setlk") Cc: Christoph Hellwig Reviewed-by: Christoph Hellwig Acked-by: Christian Brauner Signed-off-by: Arnd Bergmann Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index bbf812ce89a8..5122d13775c2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1220,13 +1220,13 @@ static inline int fcntl_setlk(unsigned int fd, struct file *file, #if BITS_PER_LONG == 32 static inline int fcntl_getlk64(struct file *file, unsigned int cmd, - struct flock64 __user *user) + struct flock64 *user) { return -EINVAL; } static inline int fcntl_setlk64(unsigned int fd, struct file *file, - unsigned int cmd, struct flock64 __user *user) + unsigned int cmd, struct flock64 *user) { return -EACCES; } -- cgit v1.2.3 From dc6c6fb3d639756a532bcc47d4a9bf9f3965881b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 9 Jan 2022 13:26:51 -0500 Subject: SUNRPC: Fix sockaddr handling in the svc_xprt_create_error trace point While testing, I got an unexpected KASAN splat: Jan 08 13:50:27 oracle-102.nfsv4.dev kernel: BUG: KASAN: stack-out-of-bounds in trace_event_raw_event_svc_xprt_create_err+0x190/0x210 [sunrpc] Jan 08 13:50:27 oracle-102.nfsv4.dev kernel: Read of size 28 at addr ffffc9000008f728 by task mount.nfs/4628 The memcpy() in the TP_fast_assign section of this trace point copies the size of the destination buffer in order that the buffer won't be overrun. In other similar trace points, the source buffer for this memcpy is a "struct sockaddr_storage" so the actual length of the source buffer is always long enough to prevent the memcpy from reading uninitialized or unallocated memory. However, for this trace point, the source buffer can be as small as a "struct sockaddr_in". For AF_INET sockaddrs, the memcpy() reads memory that follows the source buffer, which is not always valid memory. To avoid copying past the end of the passed-in sockaddr, make the source address's length available to the memcpy(). It would be a little nicer if the tracing infrastructure was more friendly about storing socket addresses that are not AF_INET, but I could not find a way to make printk("%pIS") work with a dynamic array. Reported-by: KASAN Fixes: 4b8f380e46e4 ("SUNRPC: Tracepoint to record errors in svc_xpo_create()") Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 684cc0e322fa..1c0a288f6a5c 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1744,10 +1744,11 @@ TRACE_EVENT(svc_xprt_create_err, const char *program, const char *protocol, struct sockaddr *sap, + size_t salen, const struct svc_xprt *xprt ), - TP_ARGS(program, protocol, sap, xprt), + TP_ARGS(program, protocol, sap, salen, xprt), TP_STRUCT__entry( __field(long, error) @@ -1760,7 +1761,7 @@ TRACE_EVENT(svc_xprt_create_err, __entry->error = PTR_ERR(xprt); __assign_str(program, program); __assign_str(protocol, protocol); - memcpy(__entry->addr, sap, sizeof(__entry->addr)); + memcpy(__entry->addr, sap, min(salen, sizeof(__entry->addr))); ), TP_printk("addr=%pISpc program=%s protocol=%s error=%ld", -- cgit v1.2.3 From 16720861675393a35974532b3c837d9fd7bfe08c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 8 Jan 2022 16:59:54 -0500 Subject: SUNRPC: Fix sockaddr handling in svcsock_accept_class trace points Avoid potentially hazardous memory copying and the needless use of "%pIS" -- in the kernel, an RPC service listener is always bound to ANYADDR. Having the network namespace is helpful when recording errors, though. Fixes: a0469f46faab ("SUNRPC: Replace dprintk call sites in TCP state change callouts") Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 1c0a288f6a5c..1e566ac4b812 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -2125,17 +2125,17 @@ DECLARE_EVENT_CLASS(svcsock_accept_class, TP_STRUCT__entry( __field(long, status) __string(service, service) - __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + __field(unsigned int, netns_ino) ), TP_fast_assign( __entry->status = status; __assign_str(service, service); - memcpy(__entry->addr, &xprt->xpt_local, sizeof(__entry->addr)); + __entry->netns_ino = xprt->xpt_net->ns.inum; ), - TP_printk("listener=%pISpc service=%s status=%ld", - __entry->addr, __get_str(service), __entry->status + TP_printk("addr=listener service=%s status=%ld", + __get_str(service), __entry->status ) ); -- cgit v1.2.3