From 518662e0fcb9fa241fe90a337b59bc5066b2a930 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 10 Apr 2017 12:22:09 +1000 Subject: NFS: fix usage of mempools. When passed GFP flags that allow sleeping (such as GFP_NOIO), mempool_alloc() will never return NULL, it will wait until memory is available. This means that we don't need to handle failure, but that we do need to ensure one thread doesn't call mempool_alloc() twice on the one pool without queuing or freeing the first allocation. If multiple threads did this during times of high memory pressure, the pool could be exhausted and a deadlock could result. pnfs_generic_alloc_ds_commits() attempts to allocate from the nfs_commit_mempool while already holding an allocation from that pool. This is not safe. So change nfs_commitdata_alloc() to take a flag that indicates whether failure is acceptable. In pnfs_generic_alloc_ds_commits(), accept failure and handle it as we currently do. Else where, do not accept failure, and do not handle it. Even when failure is acceptable, we want to succeed if possible. That means both - using an entry from the pool if there is one - waiting for direct reclaim is there isn't. We call mempool_alloc(GFP_NOWAIT) to achieve the first, then kmem_cache_alloc(GFP_NOIO|__GFP_NORETRY) to achieve the second. Each of these can fail, but together they do the best they can without blocking indefinitely. The objects returned by kmem_cache_alloc() will still be freed by mempool_free(). This is safe as mempool_alloc() uses exactly the same function to allocate objects (since the mempool was created with mempool_create_slab_pool()). The object returned by mempool_alloc() and kmem_cache_alloc() are indistinguishable so mempool_free() will handle both identically, either adding to the pool or calling kmem_cache_free(). Also, don't test for failure when allocating from nfs_wdata_mempool. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 287f34161086..1b29915247b2 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -502,7 +502,7 @@ extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); extern int nfs_commit_inode(struct inode *, int); -extern struct nfs_commit_data *nfs_commitdata_alloc(void); +extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail); extern void nfs_commit_free(struct nfs_commit_data *data); static inline int -- cgit v1.2.3 From fbe77c30e9abcb3429380dec622439991a718e31 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 19 Apr 2017 10:11:35 -0400 Subject: NFS: move rw_mode to nfs_pageio_header Let's try to have it in a cacheline in nfs4_proc_pgio_rpc_prepare(). Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 4 ++-- include/linux/nfs_xdr.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 957049f72290..6f01e28bba27 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -64,7 +64,6 @@ struct nfs_pageio_ops { }; struct nfs_rw_ops { - const fmode_t rw_mode; struct nfs_pgio_header *(*rw_alloc_header)(void); void (*rw_free_header)(struct nfs_pgio_header *); int (*rw_done)(struct rpc_task *, struct nfs_pgio_header *, @@ -124,7 +123,8 @@ extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, const struct nfs_pgio_completion_ops *compl_ops, const struct nfs_rw_ops *rw_ops, size_t bsize, - int how); + int how, + gfp_t gfp_flags); extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, struct nfs_page *); extern int nfs_pageio_resend(struct nfs_pageio_descriptor *, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 348f7c158084..51e27f9746ee 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1427,6 +1427,7 @@ struct nfs_pgio_header { struct list_head pages; struct nfs_page *req; struct nfs_writeverf verf; /* Used for writes */ + fmode_t rw_mode; struct pnfs_layout_segment *lseg; loff_t io_start; const struct rpc_call_ops *mds_ops; -- cgit v1.2.3 From 50f2112cf7a3e62a8d33838eb205d5fef306457a Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 11 Apr 2017 12:50:09 -0400 Subject: locks: Set FL_CLOSE when removing flock locks on close() Set FL_CLOSE in fl_flags as in locks_remove_posix() when clearing locks. NFS will check for this flag to ensure an unlock is sent in a following patch. Fuse handles flock and posix locks differently for FL_CLOSE, and so requires a fixup to retain the existing behavior for flock. Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Acked-by: Miklos Szeredi Signed-off-by: Trond Myklebust --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7251f7bb45e8..72061aa65405 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -909,6 +909,8 @@ static inline struct file *get_file(struct file *f) #define FL_OFDLCK 1024 /* lock is "owned" by struct file */ #define FL_LAYOUT 2048 /* outstanding pNFS layout */ +#define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE) + /* * Special return value from posix_lock_file() and vfs_lock_file() for * asynchronous locking. -- cgit v1.2.3 From 7d6ddf88c4db372689c8aa65ea652d0514d66c06 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 11 Apr 2017 12:50:10 -0400 Subject: NFS: Add an iocounter wait function for async RPC tasks By sleeping on a new NFS Unlock-On-Close waitqueue, rpc tasks may wait for a lock context's iocounter to reach zero. The rpc waitqueue is only woken when the open_context has the NFS_CONTEXT_UNLOCK flag set in order to mitigate spurious wake-ups for any iocounter reaching zero. Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_page.h | 1 + 3 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 1b29915247b2..9aa044e76820 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -76,6 +76,7 @@ struct nfs_open_context { #define NFS_CONTEXT_ERROR_WRITE (0) #define NFS_CONTEXT_RESEND_WRITES (1) #define NFS_CONTEXT_BAD (2) +#define NFS_CONTEXT_UNLOCK (3) int error; struct list_head list; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index b34097c67848..2a70f34dffe8 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -222,6 +222,7 @@ struct nfs_server { u32 mountd_version; unsigned short mountd_port; unsigned short mountd_protocol; + struct rpc_wait_queue uoc_rpcwaitq; }; /* Server capabilities */ diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 6f01e28bba27..247cc3d3498f 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -141,6 +141,7 @@ extern int nfs_page_group_lock(struct nfs_page *, bool); extern void nfs_page_group_lock_wait(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); +extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *); /* * Lock the page of an asynchronous request -- cgit v1.2.3 From b1ece737f44f91dca8f4829cf0b442e752e406db Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 11 Apr 2017 12:50:11 -0400 Subject: lockd: Introduce nlmclnt_operations NFS would enjoy the ability to modify the behavior of the NLM client's unlock RPC task in order to delay the transmission of the unlock until IO that was submitted under that lock has completed. This ability can ensure that the NLM client will always complete the transmission of an unlock even if the waiting caller has been interrupted with fatal signal. For this purpose, a pointer to a struct nlmclnt_operations can be assigned in a nfs_module's nfs_rpc_ops that will install those nlmclnt_operations on the nlm_host. The struct nlmclnt_operations defines three callback operations that will be used in a following patch: nlmclnt_alloc_call - used to call back after a successful allocation of a struct nlm_rqst in nlmclnt_proc(). nlmclnt_unlock_prepare - used to call back during NLM unlock's rpc_call_prepare. The NLM client defers calling rpc_call_start() until this callback returns false. nlmclnt_release_call - used to call back when the NLM client's struct nlm_rqst is freed. Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/lockd/bind.h | 24 ++++++++++++++++++++++-- include/linux/lockd/lockd.h | 2 ++ include/linux/nfs_xdr.h | 1 + 3 files changed, 25 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h index 140edab64446..05728396a1a1 100644 --- a/include/linux/lockd/bind.h +++ b/include/linux/lockd/bind.h @@ -18,6 +18,7 @@ /* Dummy declarations */ struct svc_rqst; +struct rpc_task; /* * This is the set of functions for lockd->nfsd communication @@ -43,6 +44,7 @@ struct nlmclnt_initdata { u32 nfs_version; int noresvport; struct net *net; + const struct nlmclnt_operations *nlmclnt_ops; }; /* @@ -52,8 +54,26 @@ struct nlmclnt_initdata { extern struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init); extern void nlmclnt_done(struct nlm_host *host); -extern int nlmclnt_proc(struct nlm_host *host, int cmd, - struct file_lock *fl); +/* + * NLM client operations provide a means to modify RPC processing of NLM + * requests. Callbacks receive a pointer to data passed into the call to + * nlmclnt_proc(). + */ +struct nlmclnt_operations { + /* Called on successful allocation of nlm_rqst, use for allocation or + * reference counting. */ + void (*nlmclnt_alloc_call)(void *); + + /* Called in rpc_task_prepare for unlock. A return value of true + * indicates the callback has put the task to sleep on a waitqueue + * and NLM should not call rpc_call_start(). */ + bool (*nlmclnt_unlock_prepare)(struct rpc_task*, void *); + + /* Called when the nlm_rqst is freed, callbacks should clean up here */ + void (*nlmclnt_release_call)(void *); +}; + +extern int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl, void *data); extern int lockd_up(struct net *net); extern void lockd_down(struct net *net); diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index b37dee3acaba..41f7b6a04d69 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -69,6 +69,7 @@ struct nlm_host { char *h_addrbuf; /* address eyecatcher */ struct net *net; /* host net */ char nodename[UNX_MAXNODENAME + 1]; + const struct nlmclnt_operations *h_nlmclnt_ops; /* Callback ops for NLM users */ }; /* @@ -142,6 +143,7 @@ struct nlm_rqst { struct nlm_block * a_block; unsigned int a_retries; /* Retry count */ u8 a_owner[NLMCLNT_OHSIZE]; + void * a_callback_data; /* sent to nlmclnt_operations callbacks */ }; /* diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 51e27f9746ee..677c6b91dfcd 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1551,6 +1551,7 @@ struct nfs_rpc_ops { const struct inode_operations *dir_inode_ops; const struct inode_operations *file_inode_ops; const struct file_operations *file_ops; + const struct nlmclnt_operations *nlmclnt_ops; int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); -- cgit v1.2.3 From c373fff7bd252ec36e8a895c58a584088f1d38bc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 26 Apr 2017 12:26:22 -0400 Subject: NFSv4: Don't special case "launder" If the client receives a fatal server error from nfs_pageio_add_request(), then we should always truncate the page on which the error occurred. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 9aa044e76820..bb0eb2c9acca 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -500,24 +500,12 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned */ extern int nfs_sync_inode(struct inode *inode); extern int nfs_wb_all(struct inode *inode); -extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder); +extern int nfs_wb_page(struct inode *inode, struct page *page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail); extern void nfs_commit_free(struct nfs_commit_data *data); -static inline int -nfs_wb_launder_page(struct inode *inode, struct page *page) -{ - return nfs_wb_single_page(inode, page, true); -} - -static inline int -nfs_wb_page(struct inode *inode, struct page *page) -{ - return nfs_wb_single_page(inode, page, false); -} - static inline int nfs_have_writebacks(struct inode *inode) { -- cgit v1.2.3 From e092693443b995c8e3a565a73b5fdb05f1260f9b Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 8 May 2017 18:02:24 -0400 Subject: NFS append COMMIT after synchronous COPY Instead of messing with the commit path which has been causing issues, add a COMMIT op after the COPY and ask for stable copies in the first space. It saves a round trip, since after the COPY, the client sends a COMMIT anyway. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 677c6b91dfcd..b28c83475ee8 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1383,6 +1383,7 @@ struct nfs42_copy_res { struct nfs42_write_res write_res; bool consecutive; bool synchronous; + struct nfs_commitres commit_res; }; struct nfs42_seek_args { -- cgit v1.2.3