From 8554116e17eef055d9dd58a94b3427cb2ad1c317 Mon Sep 17 00:00:00 2001 From: Idan Kedar Date: Thu, 2 Aug 2012 11:47:10 +0300 Subject: pnfs: defer release of pages in layoutget we have encountered a bug whereby reading a lot of files (copying fedora's /bin) from a pNFS mount and hitting Ctrl+C in the middle caused a general protection fault in xdr_shrink_bufhead. this function is called when decoding the response from LAYOUTGET. the decoding is done by a worker thread, and the caller of LAYOUTGET waits for the worker thread to complete. hitting Ctrl+C caused the synchronous wait to end and the next thing the caller does is to free the pages, so when the worker thread calls xdr_shrink_bufhead, the pages are gone. therefore, the cleanup of these pages has been moved to nfs4_layoutget_release. Signed-off-by: Idan Kedar Signed-off-by: Benny Halevy Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) (limited to 'fs/nfs/nfs4proc.c') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a99a8d948721..6a78d49da5c1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6223,11 +6223,58 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) dprintk("<-- %s\n", __func__); } +static size_t max_response_pages(struct nfs_server *server) +{ + u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; + return nfs_page_array_len(0, max_resp_sz); +} + +static void nfs4_free_pages(struct page **pages, size_t size) +{ + int i; + + if (!pages) + return; + + for (i = 0; i < size; i++) { + if (!pages[i]) + break; + __free_page(pages[i]); + } + kfree(pages); +} + +static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) +{ + struct page **pages; + int i; + + pages = kcalloc(size, sizeof(struct page *), gfp_flags); + if (!pages) { + dprintk("%s: can't alloc array of %zu pages\n", __func__, size); + return NULL; + } + + for (i = 0; i < size; i++) { + pages[i] = alloc_page(gfp_flags); + if (!pages[i]) { + dprintk("%s: failed to allocate page\n", __func__); + nfs4_free_pages(pages, size); + return NULL; + } + } + + return pages; +} + static void nfs4_layoutget_release(void *calldata) { struct nfs4_layoutget *lgp = calldata; + struct nfs_server *server = NFS_SERVER(lgp->args.inode); + size_t max_pages = max_response_pages(server); dprintk("--> %s\n", __func__); + nfs4_free_pages(lgp->args.layout.pages, max_pages); put_nfs_open_context(lgp->args.ctx); kfree(calldata); dprintk("<-- %s\n", __func__); @@ -6239,9 +6286,10 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { .rpc_release = nfs4_layoutget_release, }; -int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) +int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) { struct nfs_server *server = NFS_SERVER(lgp->args.inode); + size_t max_pages = max_response_pages(server); struct rpc_task *task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], @@ -6259,6 +6307,13 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) dprintk("--> %s\n", __func__); + lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); + if (!lgp->args.layout.pages) { + nfs4_layoutget_release(lgp); + return -ENOMEM; + } + lgp->args.layout.pglen = max_pages * PAGE_SIZE; + lgp->res.layoutp = &lgp->args.layout; lgp->res.seq_res.sr_slot = NULL; nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); -- cgit v1.2.3 From 21d1f58aedc5f7ac4bb0c4e3d78c74ea31ac050f Mon Sep 17 00:00:00 2001 From: Idan Kedar Date: Thu, 2 Aug 2012 11:47:11 +0300 Subject: pnfs: nfs4_proc_layoutget returns void since the only user of nfs4_proc_layoutget is send_layoutget, which ignores its return value, there is no reason to return any value. Signed-off-by: Idan Kedar Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/nfs/nfs4proc.c') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6a78d49da5c1..f94f6b3928fc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6286,7 +6286,7 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { .rpc_release = nfs4_layoutget_release, }; -int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) +void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) { struct nfs_server *server = NFS_SERVER(lgp->args.inode); size_t max_pages = max_response_pages(server); @@ -6310,7 +6310,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); if (!lgp->args.layout.pages) { nfs4_layoutget_release(lgp); - return -ENOMEM; + return; } lgp->args.layout.pglen = max_pages * PAGE_SIZE; @@ -6319,7 +6319,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) - return PTR_ERR(task); + return; status = nfs4_wait_for_completion_rpc_task(task); if (status == 0) status = task->tk_status; @@ -6327,7 +6327,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) status = pnfs_layout_process(lgp); rpc_put_task(task); dprintk("<-- %s status=%d\n", __func__, status); - return status; + return; } static void -- cgit v1.2.3 From 47fbf7976e0b7d9dcdd799e2a1baba19064d9631 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 8 Aug 2012 16:03:13 -0400 Subject: NFSv4.1: Remove a bogus BUG_ON() in nfs4_layoutreturn_done Ever since commit 0a57cdac3f (NFSv4.1 send layoutreturn to fence disconnected data server) we've been sending layoutreturn calls while there is potentially still outstanding I/O to the data servers. The reason we do this is to avoid races between replayed writes to the MDS and the original writes to the DS. When this happens, the BUG_ON() in nfs4_layoutreturn_done can be triggered because it assumes that we would never call layoutreturn without knowing that all I/O to the DS is finished. The fix is to remove the BUG_ON() now that the assumptions behind the test are obsolete. Reported-by: Boaz Harrosh Reported-by: Tigran Mkrtchyan Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org [>=3.5] --- fs/nfs/nfs4proc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'fs/nfs/nfs4proc.c') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f94f6b3928fc..c77d296bdaa6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6359,12 +6359,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) return; } spin_lock(&lo->plh_inode->i_lock); - if (task->tk_status == 0) { - if (lrp->res.lrs_present) { - pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); - } else - BUG_ON(!list_empty(&lo->plh_segs)); - } + if (task->tk_status == 0 && lrp->res.lrs_present) + pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); lo->plh_block_lgets--; spin_unlock(&lo->plh_inode->i_lock); dprintk("<-- %s\n", __func__); -- cgit v1.2.3 From 519d3959e30a98f8e135e7a16647c10af5ad63d5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Aug 2012 17:30:10 -0400 Subject: NFSv4: Fix pointer arithmetic in decode_getacl Resetting the cursor xdr->p to a previous value is not a safe practice: if the xdr_stream has crossed out of the initial iovec, then a bunch of other fields would need to be reset too. Fix this issue by using xdr_enter_page() so that the buffer gets page aligned at the bitmap _before_ we decode it. Also fix the confusion of the ACL length with the page buffer length by not adding the base offset to the ACL length... Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/nfs4proc.c') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c77d296bdaa6..286ab7078413 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3819,7 +3819,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu if (ret) goto out_free; - acl_len = res.acl_len - res.acl_data_offset; + acl_len = res.acl_len; if (acl_len > args.acl_len) nfs4_write_cached_acl(inode, NULL, 0, acl_len); else -- cgit v1.2.3 From b291f1b1c86aa0c7bc3df2994e6a1a4e53f1fde0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Aug 2012 18:30:41 -0400 Subject: NFSv4: Fix the acl cache size calculation Currently, we do not take into account the size of the 16 byte struct nfs4_cached_acl header, when deciding whether or not we should cache the acl data. Consequently, we will end up allocating an 8k buffer in order to fit a maximum size 4k acl. This patch adjusts the calculation so that we limit the cache size to 4k for the acl header+data. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/nfs/nfs4proc.c') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 286ab7078413..635274140b18 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3737,9 +3737,10 @@ out: static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len) { struct nfs4_cached_acl *acl; + size_t buflen = sizeof(*acl) + acl_len; - if (pages && acl_len <= PAGE_SIZE) { - acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL); + if (pages && buflen <= PAGE_SIZE) { + acl = kmalloc(buflen, GFP_KERNEL); if (acl == NULL) goto out; acl->cached = 1; -- cgit v1.2.3 From c3f52af3e03013db5237e339c817beaae5ec9e3a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Sep 2012 14:56:02 -0400 Subject: NFS: Fix the initialisation of the readdir 'cookieverf' array When the NFS_COOKIEVERF helper macro was converted into a static inline function in commit 99fadcd764 (nfs: convert NFS_*(inode) helpers to static inline), we broke the initialisation of the readdir cookies, since that depended on doing a memset with an argument of 'sizeof(NFS_COOKIEVERF(inode))' which therefore changed from sizeof(be32 cookieverf[2]) to sizeof(be32 *). At this point, NFS_COOKIEVERF seems to be more of an obfuscation than a helper, so the best thing would be to just get rid of it. Also see: https://bugzilla.kernel.org/show_bug.cgi?id=46881 Reported-by: Andi Kleen Reported-by: David Binderman Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs/nfs4proc.c') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 635274140b18..86b4c7361036 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3215,11 +3215,11 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long long)cookie); - nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); + nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args); res.pgbase = args.pgbase; status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); if (status >= 0) { - memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); + memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE); status += args.pgbase; } -- cgit v1.2.3 From 21f498c2f73bd6150d82931f09965826dca0b5f2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 24 Aug 2012 10:59:25 -0400 Subject: NFSv4: Fix range checking in __nfs4_get_acl_uncached and __nfs4_proc_set_acl Ensure that the user supplied buffer size doesn't cause us to overflow the 'pages' array. Also fix up some confusion between the use of PAGE_SIZE and PAGE_CACHE_SIZE when calculating buffer sizes. We're not using the page cache for anything here. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'fs/nfs/nfs4proc.c') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 86b4c7361036..6b94f2d52532 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3653,11 +3653,11 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server) && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); } -/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, and that - * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) bytes on +/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_SIZE, and that + * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_SIZE) bytes on * the stack. */ -#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) +#define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE) static int buf_to_pages_noslab(const void *buf, size_t buflen, struct page **pages, unsigned int *pgbase) @@ -3668,7 +3668,7 @@ static int buf_to_pages_noslab(const void *buf, size_t buflen, spages = pages; do { - len = min_t(size_t, PAGE_CACHE_SIZE, buflen); + len = min_t(size_t, PAGE_SIZE, buflen); newpage = alloc_page(GFP_KERNEL); if (newpage == NULL) @@ -3782,17 +3782,16 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .rpc_argp = &args, .rpc_resp = &res, }; - int ret = -ENOMEM, npages, i; + unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); + int ret = -ENOMEM, i; size_t acl_len = 0; - npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; /* As long as we're doing a round trip to the server anyway, * let's be prepared for a page of acl data. */ if (npages == 0) npages = 1; - - /* Add an extra page to handle the bitmap returned */ - npages++; + if (npages > ARRAY_SIZE(pages)) + return -ERANGE; for (i = 0; i < npages; i++) { pages[i] = alloc_page(GFP_KERNEL); @@ -3891,10 +3890,13 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl .rpc_argp = &arg, .rpc_resp = &res, }; + unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); int ret, i; if (!nfs4_server_supports_acls(server)) return -EOPNOTSUPP; + if (npages > ARRAY_SIZE(pages)) + return -ERANGE; i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase); if (i < 0) return i; -- cgit v1.2.3 From 1f1ea6c2d9d8c0be9ec56454b05315273b5de8ce Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 26 Aug 2012 11:44:43 -0700 Subject: NFSv4: Fix buffer overflow checking in __nfs4_get_acl_uncached Pass the checks made by decode_getacl back to __nfs4_get_acl_uncached so that it knows if the acl has been truncated. The current overflow checking is broken, resulting in Oopses on user-triggered nfs4_getfacl calls, and is opaque to the point where several attempts at fixing it have failed. This patch tries to clean up the code in addition to fixing the Oopses by ensuring that the overflow checks are performed in a single place (decode_getacl). If the overflow check failed, we will still be able to report the acl length, but at least we will no longer attempt to cache the acl or copy the truncated contents to user space. Reported-by: Sachin Prabhu Signed-off-by: Trond Myklebust Tested-by: Sachin Prabhu --- fs/nfs/nfs4proc.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'fs/nfs/nfs4proc.c') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6b94f2d52532..1e50326d00dd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3739,7 +3739,7 @@ static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size struct nfs4_cached_acl *acl; size_t buflen = sizeof(*acl) + acl_len; - if (pages && buflen <= PAGE_SIZE) { + if (buflen <= PAGE_SIZE) { acl = kmalloc(buflen, GFP_KERNEL); if (acl == NULL) goto out; @@ -3784,7 +3784,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu }; unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); int ret = -ENOMEM, i; - size_t acl_len = 0; /* As long as we're doing a round trip to the server anyway, * let's be prepared for a page of acl data. */ @@ -3807,11 +3806,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu args.acl_len = npages * PAGE_SIZE; args.acl_pgbase = 0; - /* Let decode_getfacl know not to fail if the ACL data is larger than - * the page we send as a guess */ - if (buf == NULL) - res.acl_flags |= NFS4_ACL_LEN_REQUEST; - dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", __func__, buf, buflen, npages, args.acl_len); ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), @@ -3819,20 +3813,19 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu if (ret) goto out_free; - acl_len = res.acl_len; - if (acl_len > args.acl_len) - nfs4_write_cached_acl(inode, NULL, 0, acl_len); - else - nfs4_write_cached_acl(inode, pages, res.acl_data_offset, - acl_len); - if (buf) { + /* Handle the case where the passed-in buffer is too short */ + if (res.acl_flags & NFS4_ACL_TRUNC) { + /* Did the user only issue a request for the acl length? */ + if (buf == NULL) + goto out_ok; ret = -ERANGE; - if (acl_len > buflen) - goto out_free; - _copy_from_pages(buf, pages, res.acl_data_offset, - acl_len); + goto out_free; } - ret = acl_len; + nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len); + if (buf) + _copy_from_pages(buf, pages, res.acl_data_offset, res.acl_len); +out_ok: + ret = res.acl_len; out_free: for (i = 0; i < npages; i++) if (pages[i]) -- cgit v1.2.3