From b3c2aa07454cf7ab4ec3ee882c586abbea033132 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 20 Sep 2015 14:32:45 -0400 Subject: NFSv4: Refactor NFSv4 error handling Prepare for unification of the synchronous and asynchronous error handling. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 6 ++++-- fs/nfs/nfs4proc.c | 39 ++++++++++++++++++++++++++++++++------- 2 files changed, 36 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 50cfc4ca7a02..4afdee420d25 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -183,10 +183,12 @@ struct nfs4_state { struct nfs4_exception { - long timeout; - int retry; struct nfs4_state *state; struct inode *inode; + long timeout; + unsigned char delay : 1, + recovering : 1, + retry : 1; }; struct nfs4_state_recovery_ops { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5133bb18830e..d5c2a461f281 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -344,13 +344,16 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) /* This is the error handling routine for processes that are allowed * to sleep. */ -int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +static int nfs4_do_handle_exception(struct nfs_server *server, + int errorcode, struct nfs4_exception *exception) { struct nfs_client *clp = server->nfs_client; struct nfs4_state *state = exception->state; struct inode *inode = exception->inode; int ret = errorcode; + exception->delay = 0; + exception->recovering = 0; exception->retry = 0; switch(errorcode) { case 0: @@ -411,9 +414,9 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ } case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: - ret = nfs4_delay(server->client, &exception->timeout); - if (ret != 0) - break; + exception->delay = 1; + return 0; + case -NFS4ERR_RETRY_UNCACHED_REP: case -NFS4ERR_OLD_STATEID: exception->retry = 1; @@ -434,9 +437,31 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ /* We failed to handle the error */ return nfs4_map_errors(ret); wait_on_recovery: - ret = nfs4_wait_clnt_recover(clp); - if (test_bit(NFS_MIG_FAILED, &server->mig_status)) - return -EIO; + exception->recovering = 1; + return 0; +} + +/* This is the error handling routine for processes that are allowed + * to sleep. + */ +int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +{ + struct nfs_client *clp = server->nfs_client; + int ret; + + ret = nfs4_do_handle_exception(server, errorcode, exception); + if (exception->delay) { + ret = nfs4_delay(server->client, &exception->timeout); + goto out_retry; + } + if (exception->recovering) { + ret = nfs4_wait_clnt_recover(clp); + if (test_bit(NFS_MIG_FAILED, &server->mig_status)) + return -EIO; + goto out_retry; + } + return ret; +out_retry: if (ret == 0) exception->retry = 1; return ret; -- cgit v1.2.3 From 2598ed344586cf3612bfbd33041527c55543acfe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 20 Sep 2015 16:10:18 -0400 Subject: NFSv4: Update the delay statistics counter for synchronous delays Currently, we only do so for asynchronous delays. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d5c2a461f281..d044c7b11ff7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -412,8 +412,9 @@ static int nfs4_do_handle_exception(struct nfs_server *server, ret = -EBUSY; break; } - case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: + nfs_inc_server_stats(server, NFSIOS_DELAY); + case -NFS4ERR_GRACE: exception->delay = 1; return 0; -- cgit v1.2.3 From 516285ebe0efadc40b914a0e61a913a390604810 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 20 Sep 2015 16:15:24 -0400 Subject: NFSv4: nfs4_async_handle_error should take a non-const nfs_server For symmetry with the synchronous handler, and so that we can potentially handle errors such as NFS4ERR_BADNAME. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d044c7b11ff7..ae5cde621954 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -78,7 +78,7 @@ struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); -static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, long *); +static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *, struct nfs4_state *, long *); static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); @@ -4982,7 +4982,7 @@ out: static int -nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, +nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server, struct nfs4_state *state, long *timeout) { struct nfs_client *clp = server->nfs_client; @@ -5559,7 +5559,7 @@ struct nfs4_unlockdata { struct nfs4_lock_state *lsp; struct nfs_open_context *ctx; struct file_lock fl; - const struct nfs_server *server; + struct nfs_server *server; unsigned long timestamp; }; -- cgit v1.2.3 From 4816fdadab9ff874ec1a4138dad43b636c7d220b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 20 Sep 2015 14:58:42 -0400 Subject: NFSv4: Don't use synchronous delegation recall in exception handling The code needs to be able to work from inside an asynchronous context. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 6 ++---- fs/nfs/nfs4proc.c | 8 +++----- 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index be806ead7f4d..5166adcfc0fb 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -721,14 +721,12 @@ int nfs_async_inode_return_delegation(struct inode *inode, struct nfs_client *clp = server->nfs_client; struct nfs_delegation *delegation; - filemap_flush(inode->i_mapping); - rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); if (delegation == NULL) goto out_enoent; - - if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) + if (stateid != NULL && + !clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) goto out_enoent; nfs_mark_return_delegation(server, delegation); rcu_read_unlock(); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ae5cde621954..2a155ec05f6d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -362,11 +362,9 @@ static int nfs4_do_handle_exception(struct nfs_server *server, case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - if (inode && nfs4_have_delegation(inode, FMODE_READ)) { - nfs4_inode_return_delegation(inode); - exception->retry = 1; - return 0; - } + if (inode && nfs_async_inode_return_delegation(inode, + NULL) == 0) + goto wait_on_recovery; if (state == NULL) break; ret = nfs4_schedule_stateid_recovery(server, state); -- cgit v1.2.3 From 037fc9808a777f6fb6a54c6510b9656716e0c8c8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 20 Sep 2015 15:51:00 -0400 Subject: NFSv4: Unify synchronous and asynchronous error handling They now only differ in the way we handle waiting, so let's unify. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 123 ++++++++++++++++++++++-------------------------------- 1 file changed, 49 insertions(+), 74 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2a155ec05f6d..e32b7e93f253 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -78,7 +78,6 @@ struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); -static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *, struct nfs4_state *, long *); static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); @@ -466,6 +465,55 @@ out_retry: return ret; } +static int +nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server, + int errorcode, struct nfs4_exception *exception) +{ + struct nfs_client *clp = server->nfs_client; + int ret; + + ret = nfs4_do_handle_exception(server, errorcode, exception); + if (exception->delay) { + rpc_delay(task, nfs4_update_delay(&exception->timeout)); + goto out_retry; + } + if (exception->recovering) { + rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); + if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) + rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); + goto out_retry; + } + if (test_bit(NFS_MIG_FAILED, &server->mig_status)) + ret = -EIO; + return ret; +out_retry: + if (ret == 0) + exception->retry = 1; + return ret; +} + +static int +nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server, + struct nfs4_state *state, long *timeout) +{ + struct nfs4_exception exception = { + .state = state, + }; + + if (task->tk_status >= 0) + return 0; + if (timeout) + exception.timeout = *timeout; + task->tk_status = nfs4_async_handle_exception(task, server, + task->tk_status, + &exception); + if (exception.delay && timeout) + *timeout = exception.timeout; + if (exception.retry) + return -EAGAIN; + return 0; +} + /* * Return 'true' if 'clp' is using an rpc_client that is integrity protected * or 'false' otherwise. @@ -4979,79 +5027,6 @@ out: #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ -static int -nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server, - struct nfs4_state *state, long *timeout) -{ - struct nfs_client *clp = server->nfs_client; - - if (task->tk_status >= 0) - return 0; - switch(task->tk_status) { - case -NFS4ERR_DELEG_REVOKED: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OPENMODE: - if (state == NULL) - break; - if (nfs4_schedule_stateid_recovery(server, state) < 0) - goto recovery_failed; - goto wait_on_recovery; - case -NFS4ERR_EXPIRED: - if (state != NULL) { - if (nfs4_schedule_stateid_recovery(server, state) < 0) - goto recovery_failed; - } - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_STALE_CLIENTID: - nfs4_schedule_lease_recovery(clp); - goto wait_on_recovery; - case -NFS4ERR_MOVED: - if (nfs4_schedule_migration_recovery(server) < 0) - goto recovery_failed; - goto wait_on_recovery; - case -NFS4ERR_LEASE_MOVED: - nfs4_schedule_lease_moved_recovery(clp); - goto wait_on_recovery; -#if defined(CONFIG_NFS_V4_1) - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_DEADSESSION: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - case -NFS4ERR_SEQ_FALSE_RETRY: - case -NFS4ERR_SEQ_MISORDERED: - dprintk("%s ERROR %d, Reset session\n", __func__, - task->tk_status); - nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - goto wait_on_recovery; -#endif /* CONFIG_NFS_V4_1 */ - case -NFS4ERR_DELAY: - nfs_inc_server_stats(server, NFSIOS_DELAY); - rpc_delay(task, nfs4_update_delay(timeout)); - goto restart_call; - case -NFS4ERR_GRACE: - rpc_delay(task, NFS4_POLL_RETRY_MAX); - case -NFS4ERR_RETRY_UNCACHED_REP: - case -NFS4ERR_OLD_STATEID: - goto restart_call; - } - task->tk_status = nfs4_map_errors(task->tk_status); - return 0; -recovery_failed: - task->tk_status = -EIO; - return 0; -wait_on_recovery: - rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); - if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) - rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); - if (test_bit(NFS_MIG_FAILED, &server->mig_status)) - goto recovery_failed; -restart_call: - task->tk_status = 0; - return -EAGAIN; -} - static void nfs4_init_boot_verifier(const struct nfs_client *clp, nfs4_verifier *bootverf) { -- cgit v1.2.3 From 36022770de6cf9a403c40a68712ed2d2ea2746be Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 26 Sep 2015 02:24:34 +0800 Subject: nfs42: add CLONE xdr functions xdr definitions per draft-ietf-nfsv4-minorversion2-38.txt Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/nfs/nfs4xdr.c | 1 + 2 files changed, 97 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 0eb29e14070d..0ca482a51e53 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -34,6 +34,12 @@ 1 /* opaque devaddr4 length */ + \ XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE)) #define decode_layoutstats_maxsz (op_decode_hdr_maxsz) +#define encode_clone_maxsz (encode_stateid_maxsz + \ + encode_stateid_maxsz + \ + 2 /* src offset */ + \ + 2 /* dst offset */ + \ + 2 /* count */) +#define decode_clone_maxsz (op_decode_hdr_maxsz) #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -65,7 +71,20 @@ decode_sequence_maxsz + \ decode_putfh_maxsz + \ PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz) - +#define NFS4_enc_clone_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_savefh_maxsz + \ + encode_putfh_maxsz + \ + encode_clone_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_clone_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_savefh_maxsz + \ + decode_putfh_maxsz + \ + decode_clone_maxsz + \ + decode_getattr_maxsz) static void encode_fallocate(struct xdr_stream *xdr, struct nfs42_falloc_args *args) @@ -128,6 +147,21 @@ static void encode_layoutstats(struct xdr_stream *xdr, encode_uint32(xdr, 0); } +static void encode_clone(struct xdr_stream *xdr, + struct nfs42_clone_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + encode_op_hdr(xdr, OP_CLONE, decode_clone_maxsz, hdr); + encode_nfs4_stateid(xdr, &args->src_stateid); + encode_nfs4_stateid(xdr, &args->dst_stateid); + p = reserve_space(xdr, 3*8); + p = xdr_encode_hyper(p, args->src_offset); + p = xdr_encode_hyper(p, args->dst_offset); + xdr_encode_hyper(p, args->count); +} + /* * Encode ALLOCATE request */ @@ -206,6 +240,27 @@ static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode CLONE request + */ +static void nfs4_xdr_enc_clone(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_clone_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->src_fh, &hdr); + encode_savefh(xdr, &hdr); + encode_putfh(xdr, args->dst_fh, &hdr); + encode_clone(xdr, args, &hdr); + encode_getfattr(xdr, args->dst_bitmask, &hdr); + encode_nops(&hdr); +} + static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) { return decode_op_hdr(xdr, OP_ALLOCATE); @@ -243,6 +298,11 @@ static int decode_layoutstats(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_LAYOUTSTATS); } +static int decode_clone(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_CLONE); +} + /* * Decode ALLOCATE request */ @@ -351,4 +411,39 @@ out: return status; } +/* + * Decode CLONE request + */ +static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_clone_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_savefh(xdr); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_clone(xdr); + if (status) + goto out; + status = decode_getfattr(xdr, res->dst_fattr, res->server); + +out: + res->rpc_status = status; + return status; +} + #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 788adf3897c7..868472b6b303 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7465,6 +7465,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(ALLOCATE, enc_allocate, dec_allocate), PROC(DEALLOCATE, enc_deallocate, dec_deallocate), PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), + PROC(CLONE, enc_clone, dec_clone), #endif /* CONFIG_NFS_V4_2 */ }; -- cgit v1.2.3 From e5341f3a5762d17be9cdd06257c02c0098bdcab8 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 26 Sep 2015 02:24:35 +0800 Subject: nfs42: add CLONE proc functions Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs42.h | 1 + fs/nfs/nfs42proc.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4proc.c | 3 ++- 3 files changed, 74 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 814c1255f1d2..b587ccd31083 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -17,5 +17,6 @@ int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); int nfs42_proc_layoutstats_generic(struct nfs_server *, struct nfs42_layoutstat_data *); +int nfs42_proc_clone(struct file *, struct file *, loff_t, loff_t, loff_t); #endif /* __LINUX_FS_NFS_NFS4_2_H */ diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 0f020e4d8421..3e92a3cde15d 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -271,3 +271,74 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server, return PTR_ERR(task); return 0; } + +static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, + struct file *dst_f, loff_t src_offset, + loff_t dst_offset, loff_t count) +{ + struct inode *src_inode = file_inode(src_f); + struct inode *dst_inode = file_inode(dst_f); + struct nfs_server *server = NFS_SERVER(dst_inode); + struct nfs42_clone_args args = { + .src_fh = NFS_FH(src_inode), + .dst_fh = NFS_FH(dst_inode), + .src_offset = src_offset, + .dst_offset = dst_offset, + .dst_bitmask = server->cache_consistency_bitmask, + }; + struct nfs42_clone_res res = { + .server = server, + }; + int status; + + msg->rpc_argp = &args; + msg->rpc_resp = &res; + + status = nfs42_set_rw_stateid(&args.src_stateid, src_f, FMODE_READ); + if (status) + return status; + + status = nfs42_set_rw_stateid(&args.dst_stateid, dst_f, FMODE_WRITE); + if (status) + return status; + + res.dst_fattr = nfs_alloc_fattr(); + if (!res.dst_fattr) + return -ENOMEM; + + status = nfs4_call_sync(server->client, server, msg, + &args.seq_args, &res.seq_res, 0); + if (status == 0) + status = nfs_post_op_update_inode(dst_inode, res.dst_fattr); + + kfree(res.dst_fattr); + return status; +} + +int nfs42_proc_clone(struct file *src_f, struct file *dst_f, + loff_t src_offset, loff_t dst_offset, loff_t count) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLONE], + }; + struct inode *inode = file_inode(src_f); + struct nfs_server *server = NFS_SERVER(file_inode(src_f)); + struct nfs4_exception exception = { }; + int err; + + if (!nfs_server_capable(inode, NFS_CAP_CLONE)) + return -EOPNOTSUPP; + + do { + err = _nfs42_proc_clone(&msg, src_f, dst_f, src_offset, + dst_offset, count); + if (err == -ENOTSUPP || err == -EOPNOTSUPP) { + NFS_SERVER(inode)->caps &= ~NFS_CAP_CLONE; + return -EOPNOTSUPP; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + + return err; + +} diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5133bb18830e..9688b1a9787f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8729,7 +8729,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_ALLOCATE | NFS_CAP_DEALLOCATE | NFS_CAP_SEEK - | NFS_CAP_LAYOUTSTATS, + | NFS_CAP_LAYOUTSTATS + | NFS_CAP_CLONE, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, -- cgit v1.2.3 From bea51b30b281039f0f43fb4f42028ddf33fb601f Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 26 Sep 2015 02:24:36 +0800 Subject: nfs42: add NFS_IOC_CLONE ioctl It can be called by user space to CLONE two files. Follow btrfs lead and define NFS_IOC_CLONE same as BTRFS_IOC_CLONE. Thus we don't mess up userspace with too many ioctls. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs4file.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index b0dbe0abed53..fc68ba5f030c 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -4,6 +4,7 @@ * Copyright (C) 1992 Rick Sladkey */ #include +#include #include #include #include "delegation.h" @@ -192,8 +193,104 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t return nfs42_proc_deallocate(filep, offset, len); return nfs42_proc_allocate(filep, offset, len); } + +static noinline long +nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, + u64 src_off, u64 dst_off, u64 count) +{ + struct inode *dst_inode = file_inode(dst_file); + struct fd src_file; + struct inode *src_inode; + int ret; + + /* dst file must be opened for writing */ + if (!(dst_file->f_mode & FMODE_WRITE)) + return -EINVAL; + + ret = mnt_want_write_file(dst_file); + if (ret) + return ret; + + src_file = fdget(srcfd); + if (!src_file.file) { + ret = -EBADF; + goto out_drop_write; + } + + src_inode = file_inode(src_file.file); + + /* src and dst must be different files */ + ret = -EINVAL; + if (src_inode == dst_inode) + goto out_fput; + + /* src file must be opened for reading */ + if (!(src_file.file->f_mode & FMODE_READ)) + goto out_fput; + + /* src and dst must be regular files */ + ret = -EISDIR; + if (!S_ISREG(src_inode->i_mode) || !S_ISREG(dst_inode->i_mode)) + goto out_fput; + + ret = -EXDEV; + if (src_file.file->f_path.mnt != dst_file->f_path.mnt || + src_inode->i_sb != dst_inode->i_sb) + goto out_fput; + + /* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */ + if (dst_inode < src_inode) { + mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD); + } else { + mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_CHILD); + } + + /* flush all pending writes on both src and dst so that server + * has the latest data */ + ret = nfs_sync_inode(src_inode); + if (ret) + goto out_unlock; + ret = nfs_sync_inode(dst_inode); + if (ret) + goto out_unlock; + + ret = nfs42_proc_clone(src_file.file, dst_file, src_off, dst_off, count); + + /* truncate inode page cache of the dst range so that future reads can fetch + * new data from server */ + if (!ret) + truncate_inode_pages_range(&dst_inode->i_data, dst_off, dst_off + count - 1); + +out_unlock: + if (dst_inode < src_inode) { + mutex_unlock(&src_inode->i_mutex); + mutex_unlock(&dst_inode->i_mutex); + } else { + mutex_unlock(&dst_inode->i_mutex); + mutex_unlock(&src_inode->i_mutex); + } +out_fput: + fdput(src_file); +out_drop_write: + mnt_drop_write_file(dst_file); + return ret; +} #endif /* CONFIG_NFS_V4_2 */ +long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { +#ifdef CONFIG_NFS_V4_2 + case NFS_IOC_CLONE: + return nfs42_ioctl_clone(file, arg, 0, 0, 0); +#endif + } + + return -ENOTTY; +} + const struct file_operations nfs4_file_operations = { #ifdef CONFIG_NFS_V4_2 .llseek = nfs4_file_llseek, @@ -216,4 +313,9 @@ const struct file_operations nfs4_file_operations = { #endif /* CONFIG_NFS_V4_2 */ .check_flags = nfs_check_flags, .setlease = simple_nosetlease, +#ifdef CONFIG_COMPAT + .unlocked_ioctl = nfs4_ioctl, +#else + .compat_ioctl = nfs4_ioctl, +#endif /* CONFIG_COMPAT */ }; -- cgit v1.2.3 From 2a92ee92d4545448066fb664674c0ae5a9d5ea99 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 26 Sep 2015 02:24:37 +0800 Subject: nfs: get clone_blksize when probing fsinfo NFSv42 CLONE operation is supposed to respect it. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 + fs/nfs/nfs4proc.c | 1 + fs/nfs/nfs4xdr.c | 25 +++++++++++++++++++++++++ 3 files changed, 27 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 57c5a02f6213..d6d5d2a48e83 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -764,6 +764,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, server->time_delta = fsinfo->time_delta; + server->clone_blksize = fsinfo->clone_blksize; /* We're airborne Set socket buffersize */ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9688b1a9787f..8814fbe62623 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -239,6 +239,7 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_FS_LAYOUT_TYPES, FATTR4_WORD2_LAYOUT_BLKSIZE + | FATTR4_WORD2_CLONE_BLKSIZE }; const u32 nfs4_fs_locations_bitmap[3] = { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 868472b6b303..9f656791a338 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4764,6 +4764,28 @@ static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap, return 0; } +/* + * The granularity of a CLONE operation. + */ +static int decode_attr_clone_blksize(struct xdr_stream *xdr, uint32_t *bitmap, + uint32_t *res) +{ + __be32 *p; + + dprintk("%s: bitmap is %x\n", __func__, bitmap[2]); + *res = 0; + if (bitmap[2] & FATTR4_WORD2_CLONE_BLKSIZE) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) { + print_overflow_msg(__func__, xdr); + return -EIO; + } + *res = be32_to_cpup(p); + bitmap[2] &= ~FATTR4_WORD2_CLONE_BLKSIZE; + } + return 0; +} + static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) { unsigned int savep; @@ -4796,6 +4818,9 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) if (status != 0) goto xdr_error; status = decode_attr_layout_blksize(xdr, bitmap, &fsinfo->blksize); + if (status) + goto xdr_error; + status = decode_attr_clone_blksize(xdr, bitmap, &fsinfo->clone_blksize); if (status) goto xdr_error; -- cgit v1.2.3 From 811b7b85d6641df580a6c43184cf13d6fcc7498d Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 26 Sep 2015 02:24:38 +0800 Subject: nfs42: respect clone_blksize draft-ietf-nfsv4-minorversion2-38.txt says: Both cl_src_offset and cl_dst_offset must be aligned to the clone block size Section 12.2.1. The number of bytes to be cloned must be a multiple of the clone block size, except in the case in which cl_src_offset plus the number of bytes to be cloned is equal to the source file size. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs4file.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index fc68ba5f030c..4f463dd8bae3 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -199,8 +199,10 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, u64 src_off, u64 dst_off, u64 count) { struct inode *dst_inode = file_inode(dst_file); + struct nfs_server *server = NFS_SERVER(dst_inode); struct fd src_file; struct inode *src_inode; + unsigned int bs = server->clone_blksize; int ret; /* dst file must be opened for writing */ @@ -238,6 +240,15 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, src_inode->i_sb != dst_inode->i_sb) goto out_fput; + /* check alignment w.r.t. clone_blksize */ + ret = -EINVAL; + if (bs) { + if (!IS_ALIGNED(src_off, bs) || !IS_ALIGNED(dst_off, bs)) + goto out_fput; + if (!IS_ALIGNED(count, bs) && i_size_read(src_inode) != (src_off + count)) + goto out_fput; + } + /* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */ if (dst_inode < src_inode) { mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_PARENT); -- cgit v1.2.3 From a340abcf4173461f688292a6879b4d5bc781c2b1 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 26 Sep 2015 02:24:39 +0800 Subject: nfs42: add NFS_IOC_CLONE_RANGE ioctl It follows btrfs BTRFS_IOC_CLONE_RANGE lead on ioctl number and arguments. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs4file.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 4f463dd8bae3..4384a1d00ceb 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -288,14 +288,28 @@ out_drop_write: mnt_drop_write_file(dst_file); return ret; } + +static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp) +{ + struct nfs_ioctl_clone_range_args args; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_off, args.dst_off, args.count); +} #endif /* CONFIG_NFS_V4_2 */ long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + void __user *argp = (void __user *)arg; + switch (cmd) { #ifdef CONFIG_NFS_V4_2 case NFS_IOC_CLONE: return nfs42_ioctl_clone(file, arg, 0, 0, 0); + case NFS_IOC_CLONE_RANGE: + return nfs42_ioctl_clone_range(file, argp); #endif } -- cgit v1.2.3 From 275058a2188903786e42b380ea5889ef0a7cdf95 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 15 Oct 2015 16:20:11 -0400 Subject: NFS: Fix an 'unused variable' complaint when #ifndef CONFIG_NFS_V4_2 Signed-off-by: Trond Myklebust --- fs/nfs/nfs4file.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 4384a1d00ceb..4aa571956cd6 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -298,6 +298,17 @@ static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp) return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_off, args.dst_off, args.count); } +#else +static long nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, + u64 src_off, u64 dst_off, u64 count) +{ + return -ENOTTY; +} + +static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp) +{ + return -ENOTTY; +} #endif /* CONFIG_NFS_V4_2 */ long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -305,12 +316,10 @@ long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg) void __user *argp = (void __user *)arg; switch (cmd) { -#ifdef CONFIG_NFS_V4_2 case NFS_IOC_CLONE: return nfs42_ioctl_clone(file, arg, 0, 0, 0); case NFS_IOC_CLONE_RANGE: return nfs42_ioctl_clone_range(file, argp); -#endif } return -ENOTTY; -- cgit v1.2.3 From c646619355d196293daffdbb4fd877c8f5048e49 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 24 Sep 2015 10:19:09 +0800 Subject: nfsroot: make nfsroot to accept the 1024 bytes long directory name although NFS_MAXPATHLEN is defined to 1024, nfs client hopes to accept a 1024 byte path, but nfs_root_parms is limited to 256, and the nfs path will truncated when a user inputs nfs path from kernel cmdline enlarge nfs_root_parms to 1024, to make it accept the 1024 bytes long directory name, since nfs_root_parms is defined as _initdata, it will be released after system bootup Signed-off-by: Li RongQing Signed-off-by: Trond Myklebust --- fs/nfs/nfsroot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 9bc9f04fb7f6..89a15dbe5efc 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -90,7 +90,7 @@ #define NFS_DEF_OPTIONS "vers=2,udp,rsize=4096,wsize=4096" /* Parameters passed from the kernel command line */ -static char nfs_root_parms[256] __initdata = ""; +static char nfs_root_parms[NFS_MAXPATHLEN + 1] __initdata = ""; /* Text-based mount options passed to super.c */ static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS; -- cgit v1.2.3 From d4e2ce096101bad9ca5bad3c27488905f91e30cb Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Thu, 24 Sep 2015 20:55:21 +0800 Subject: NFS: Get rid of the unneeded addr stored in callback arguments Commit c36fca52f5 "NFS refactor nfs_find_client and reference client across callback processing" has store clp in cb_process_state which is set in cb_sequence. So that, it's unneeded to store address pointer in any callback arguments. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/callback.h | 5 ----- fs/nfs/callback_xdr.c | 5 ----- 2 files changed, 10 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 84326e9fb47a..0a590f05c7cd 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -61,7 +61,6 @@ struct cb_compound_hdr_res { }; struct cb_getattrargs { - struct sockaddr *addr; struct nfs_fh fh; uint32_t bitmap[2]; }; @@ -76,7 +75,6 @@ struct cb_getattrres { }; struct cb_recallargs { - struct sockaddr *addr; struct nfs_fh fh; nfs4_stateid stateid; uint32_t truncate; @@ -134,7 +132,6 @@ extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, #define RCA4_TYPE_MASK_ALL 0xf31f struct cb_recallanyargs { - struct sockaddr *craa_addr; uint32_t craa_objs_to_keep; uint32_t craa_type_mask; }; @@ -144,7 +141,6 @@ extern __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, struct cb_process_state *cps); struct cb_recallslotargs { - struct sockaddr *crsa_addr; uint32_t crsa_target_highest_slotid; }; extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, @@ -152,7 +148,6 @@ extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, struct cb_process_state *cps); struct cb_layoutrecallargs { - struct sockaddr *cbl_addr; uint32_t cbl_recall_type; uint32_t cbl_layout_type; uint32_t cbl_layoutchanged; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 6b1697a01dde..e87d7f0f4e23 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -198,7 +198,6 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr status = decode_fh(xdr, &args->fh); if (unlikely(status != 0)) goto out; - args->addr = svc_addr(rqstp); status = decode_bitmap(xdr, args->bitmap); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); @@ -210,7 +209,6 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, __be32 *p; __be32 status; - args->addr = svc_addr(rqstp); status = decode_stateid(xdr, &args->stateid); if (unlikely(status != 0)) goto out; @@ -236,7 +234,6 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp, __be32 status = 0; uint32_t iomode; - args->cbl_addr = svc_addr(rqstp); p = read_buf(xdr, 4 * sizeof(uint32_t)); if (unlikely(p == NULL)) { status = htonl(NFS4ERR_BADXDR); @@ -500,7 +497,6 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp, uint32_t bitmap[2]; __be32 *p, status; - args->craa_addr = svc_addr(rqstp); p = read_buf(xdr, 4); if (unlikely(p == NULL)) return htonl(NFS4ERR_BADXDR); @@ -519,7 +515,6 @@ static __be32 decode_recallslot_args(struct svc_rqst *rqstp, { __be32 *p; - args->crsa_addr = svc_addr(rqstp); p = read_buf(xdr, 4); if (unlikely(p == NULL)) return htonl(NFS4ERR_BADXDR); -- cgit v1.2.3 From 8c163d8e5ad2bd7982904bbe568706e1b0bbf60a Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Thu, 24 Sep 2015 20:55:59 +0800 Subject: NFS: Remove the left global variable nfs_callback_tcpport Commit bbe0a3aa4e22 "NFS: make nfs_callback_tcpport per network context" has make nfs_callback_tcpport per network, but left the global nfs_callback_tcpport, remove it. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/callback.h | 1 - fs/nfs/super.c | 2 -- 2 files changed, 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 0a590f05c7cd..cbcc903b8985 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -204,6 +204,5 @@ extern int nfs4_set_callback_sessionid(struct nfs_client *clp); #define NFS41_BC_MAX_CALLBACKS 1 extern unsigned int nfs_callback_set_tcpport; -extern unsigned short nfs_callback_tcpport; #endif /* __LINUX_FS_NFS_CALLBACK_H */ diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 383a027de452..f1268280244e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2816,7 +2816,6 @@ out_invalid_transport_udp: * NFS client for backwards compatibility */ unsigned int nfs_callback_set_tcpport; -unsigned short nfs_callback_tcpport; /* Default cache timeout is 10 minutes */ unsigned int nfs_idmap_cache_timeout = 600; /* Turn off NFSv4 uid/gid mapping when using AUTH_SYS */ @@ -2827,7 +2826,6 @@ char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = ""; bool recover_lost_locks = false; EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport); -EXPORT_SYMBOL_GPL(nfs_callback_tcpport); EXPORT_SYMBOL_GPL(nfs_idmap_cache_timeout); EXPORT_SYMBOL_GPL(nfs4_disable_idmapping); EXPORT_SYMBOL_GPL(max_session_slots); -- cgit v1.2.3 From f765bf762bb57d8bda825e5440a4801232cd835f Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Thu, 24 Sep 2015 20:56:29 +0800 Subject: NFS: Remove the left function defines in callback.h Commit 778be232a207 "NFS do not find client in NFSv4 pg_authenticate" has remove the define and using of nfs4_set_callback_sessionid(), and commit 36281caa839f "NFSv4: Further clean-ups of delegation stateid validation" has update the checking of stateid, and move the code to nfs4proc.c. This patch remove those function defines left in callback.h Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/callback.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index cbcc903b8985..ff8195bd75ea 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -117,9 +117,6 @@ extern __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, struct cb_sequenceres *res, struct cb_process_state *cps); -extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, - const nfs4_stateid *stateid); - #define RCA4_TYPE_MASK_RDATA_DLG 0 #define RCA4_TYPE_MASK_WDATA_DLG 1 #define RCA4_TYPE_MASK_DIR_DLG 2 @@ -191,9 +188,6 @@ extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, #if IS_ENABLED(CONFIG_NFS_V4) extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); extern void nfs_callback_down(int minorversion, struct net *net); -extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, - const nfs4_stateid *stateid); -extern int nfs4_set_callback_sessionid(struct nfs_client *clp); #endif /* CONFIG_NFS_V4 */ /* * nfs41: Callbacks are expected to not cause substantial latency, -- cgit v1.2.3 From 39de493e8801812cae076a02d84b4f80b88f94b9 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Thu, 24 Sep 2015 20:57:03 +0800 Subject: NFS: Remove unneeded NFS_DEBUG checking before define NFSDBG_FACILITY It's not needed to checking NFS_DEBUG before define NFSDBG_FACILITY, remove it. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 -- fs/nfs/mount_clnt.c | 4 +--- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index b85cf7a30232..807eb6ef4f91 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -17,9 +17,7 @@ #include "nfs4session.h" #include "nfs4trace.h" -#ifdef NFS_DEBUG #define NFSDBG_FACILITY NFSDBG_CALLBACK -#endif __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res, diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 99a45283b9ee..09b190015df4 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -16,9 +16,7 @@ #include #include "internal.h" -#ifdef NFS_DEBUG -# define NFSDBG_FACILITY NFSDBG_MOUNT -#endif +#define NFSDBG_FACILITY NFSDBG_MOUNT /* * Defined by RFC 1094, section A.3; and RFC 1813, section 5.1.4 -- cgit v1.2.3 From 590184a6cea32c1f864a8e614f44e35a81a53fe0 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Thu, 24 Sep 2015 20:57:37 +0800 Subject: NFS: Use NFS4_MAX_SESSIONID_LEN directly for decode/encode sessionid It's no need to define a temporary variables for NFS4_MAX_SESSIONID_LEN. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index e87d7f0f4e23..09aeb9a47dac 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -380,13 +380,12 @@ static __be32 decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid) { __be32 *p; - int len = NFS4_MAX_SESSIONID_LEN; - p = read_buf(xdr, len); + p = read_buf(xdr, NFS4_MAX_SESSIONID_LEN); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); - memcpy(sid->data, p, len); + memcpy(sid->data, p, NFS4_MAX_SESSIONID_LEN); return 0; } @@ -679,13 +678,12 @@ static __be32 encode_sessionid(struct xdr_stream *xdr, const struct nfs4_sessionid *sid) { __be32 *p; - int len = NFS4_MAX_SESSIONID_LEN; - p = xdr_reserve_space(xdr, len); + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); - memcpy(p, sid, len); + memcpy(p, sid, NFS4_MAX_SESSIONID_LEN); return 0; } -- cgit v1.2.3 From 45724e8a5b2a69b9524fd16ff73345fab9aae279 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Thu, 24 Sep 2015 20:57:58 +0800 Subject: NFS: Fix bad defines of callback response maxsize As CB_OP_TAGLEN_MAXSZ, all XXX_MAXSZ should be defined as bit. Each operation should not cantains CB_OP_TAGLEN_MAXSZ. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 09aeb9a47dac..9f0f0f646eaf 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -18,19 +18,21 @@ #include "internal.h" #include "nfs4session.h" -#define CB_OP_TAGLEN_MAXSZ (512) -#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ) -#define CB_OP_GETATTR_BITMAP_MAXSZ (4) -#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ - CB_OP_GETATTR_BITMAP_MAXSZ + \ - 2 + 2 + 3 + 3) -#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) +#define CB_OP_TAGLEN_MAXSZ (512) +#define CB_OP_HDR_RES_MAXSZ (2 * 4) // opcode, status +#define CB_OP_GETATTR_BITMAP_MAXSZ (4 * 4) // bitmap length, 3 bitmaps +#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ + CB_OP_GETATTR_BITMAP_MAXSZ + \ + /* change, size, ctime, mtime */\ + (2 + 2 + 3 + 3) * 4) +#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #if defined(CONFIG_NFS_V4_1) #define CB_OP_LAYOUTRECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #define CB_OP_DEVICENOTIFY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ - 4 + 1 + 3) + NFS4_MAX_SESSIONID_LEN + \ + (1 + 3) * 4) // seqid, 3 slotids #define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #define CB_OP_RECALLSLOT_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #endif /* CONFIG_NFS_V4_1 */ -- cgit v1.2.3 From 403889c0399c01a12877e3736ae1e96c9ded27be Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Thu, 24 Sep 2015 20:58:16 +0800 Subject: NFS: Fix bad checking of max taglen in callback request The taglen should be checked with CB_OP_TAGLEN_MAXSZ directly. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 9f0f0f646eaf..4ad39fe203f7 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -159,7 +159,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound if (unlikely(status != 0)) return status; /* We do not like overly long tags! */ - if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) { + if (hdr->taglen > CB_OP_TAGLEN_MAXSZ) { printk("NFS: NFSv4 CALLBACK %s: client sent tag of length %u\n", __func__, hdr->taglen); return htonl(NFS4ERR_RESOURCE); -- cgit v1.2.3 From e0a63c0bfcc6f8283377258b5d892f88abd913c1 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Thu, 24 Sep 2015 20:58:38 +0800 Subject: NFS: Return directly if encode_sessionid fail encode_sessionid() may return error, nfs needs process the return value. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 4ad39fe203f7..646cdac73488 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -699,7 +699,9 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp, if (unlikely(status != 0)) goto out; - encode_sessionid(xdr, &res->csr_sessionid); + status = encode_sessionid(xdr, &res->csr_sessionid); + if (status) + goto out; p = xdr_reserve_space(xdr, 4 * sizeof(uint32_t)); if (unlikely(p == NULL)) -- cgit v1.2.3 From 15ae2c7bdc9a5a46999319b88a465a64d265dc40 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 16 Oct 2015 17:22:50 +0800 Subject: nfs/blocklayout: Fix bad using of page offset in bl_read_pagelist Blocklayout uses file offset for the read-back page's offset of first writing, it's definitely wrong, it writes data to bad address of page that cause userspace application segment fault. It must be the page base stored in header->args.pgbase. Also, the pg_offset has no influence with isect and extent length. Note: The offset of the non-first page is always zero. Ps: A test program will segment fault at read() as, #define _GNU_SOURCE #include #include #include #include #include #include #include int main(int argc, char **argv) { char buf[2049]; char *filename = NULL; int fd = -1; if (argc < 2) { printf("Usage: %s filename\n", argv[0]); return 0; } filename = argv[1]; fd = open(filename, O_RDONLY | O_DIRECT); if (fd < 0) { printf("Open %s fail: %m\n", filename); return 1; } lseek(fd, 2048, SEEK_SET); if (read(fd, buf, sizeof(buf) - 1) != (sizeof(buf) - 1)) printf("Read 4096 bityes data from %s fail: %m\n", filename); out: close(fd); return 0; } Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 9cd4eb3a1e22..ddd0138f410c 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -229,7 +229,7 @@ bl_read_pagelist(struct nfs_pgio_header *header) struct parallel_io *par; loff_t f_offset = header->args.offset; size_t bytes_left = header->args.count; - unsigned int pg_offset, pg_len; + unsigned int pg_offset = header->args.pgbase, pg_len; struct page **pages = header->args.pages; int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; const bool is_dio = (header->dreq != NULL); @@ -262,7 +262,6 @@ bl_read_pagelist(struct nfs_pgio_header *header) extent_length = be.be_length - (isect - be.be_f_offset); } - pg_offset = f_offset & ~PAGE_CACHE_MASK; if (is_dio) { if (pg_offset + bytes_left > PAGE_CACHE_SIZE) pg_len = PAGE_CACHE_SIZE - pg_offset; @@ -273,9 +272,6 @@ bl_read_pagelist(struct nfs_pgio_header *header) pg_len = PAGE_CACHE_SIZE; } - isect += (pg_offset >> SECTOR_SHIFT); - extent_length -= (pg_offset >> SECTOR_SHIFT); - if (is_hole(&be)) { bio = bl_submit_bio(READ, bio); /* Fill hole w/ zeroes w/o accessing device */ @@ -301,6 +297,7 @@ bl_read_pagelist(struct nfs_pgio_header *header) extent_length -= (pg_len >> SECTOR_SHIFT); f_offset += pg_len; bytes_left -= pg_len; + pg_offset = 0; } if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { header->res.eof = 1; -- cgit v1.2.3 From f8417b481cce2bed4744fda733f2ff22278bd7ce Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 16 Oct 2015 17:23:29 +0800 Subject: NFSv4.1/pnfs: Retry through MDS when getting bad length of data If non rpc-based layout driver return bad length of data, nfs retries by calling rpc_restart_call_prepare() that cause an NULL reference panic. This patch lets nfs retry through MDS for non rpc-based layout driver return bad length of data. [13034.883329] BUG: unable to handle kernel NULL pointer dereference at (null) [13034.884902] IP: [] rpc_restart_call_prepare+0x62/0x90 [sunrpc] [13034.886558] PGD 0 [13034.888126] Oops: 0000 [#1] KASAN [13034.889710] Modules linked in: blocklayoutdriver(OE) nfsv4(OE) nfs(OE) fscache(E) nfsd(OE) xfs libcrc32c coretemp btrfs crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel ppdev vmw_balloon auth_rpcgss shpchp nfs_acl lockd vmw_vmci parport_pc xor raid6_pq grace parport sunrpc i2c_piix4 vmwgfx drm_kms_helper ttm drm mptspi e1000 serio_raw scsi_transport_spi mptscsih mptbase ata_generic pata_acpi [last unloaded: fscache] [13034.898260] CPU: 0 PID: 10112 Comm: kworker/0:1 Tainted: G OE 4.3.0-rc5+ #279 [13034.899932] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015 [13034.903342] Workqueue: events bl_read_cleanup [blocklayoutdriver] [13034.905059] task: ffff88006a9148c0 ti: ffff880035e90000 task.ti: ffff880035e90000 [13034.906827] RIP: 0010:[] [] rpc_restart_call_prepare+0x62/0x90 [sunrpc] [13034.910522] RSP: 0018:ffff880035e97b58 EFLAGS: 00010282 [13034.912378] RAX: fffffbfff04a5a94 RBX: ffff880068fe4858 RCX: 0000000000000003 [13034.914339] RDX: dffffc0000000000 RSI: 0000000000000003 RDI: 0000000000000282 [13034.916236] RBP: ffff880035e97b68 R08: 0000000000000001 R09: 0000000000000001 [13034.918229] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 [13034.920007] R13: ffff880068fe4858 R14: ffff880068fe4a60 R15: 0000000000001000 [13034.921845] FS: 0000000000000000(0000) GS:ffffffff82247000(0000) knlGS:0000000000000000 [13034.923645] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [13034.925525] CR2: 0000000000000000 CR3: 00000000063dd000 CR4: 00000000001406f0 [13034.932808] Stack: [13034.934813] ffff880068fe4780 0000000000001000 ffff880035e97ba8 ffffffffa08800d2 [13034.936675] ffffffffa088029d ffff880068fe4780 ffff880068fe4858 ffffffffa089c0a0 [13034.938593] ffff880068fe47e0 ffff88005d59faf0 ffff880035e97be0 ffffffffa087e08f [13034.940454] Call Trace: [13034.942388] [] nfs_readpage_result+0x112/0x200 [nfs] [13034.944317] [] ? nfs_readpage_done+0xdd/0x160 [nfs] [13034.946267] [] nfs_pgio_result+0x9f/0x120 [nfs] [13034.948166] [] pnfs_ld_read_done+0x7c/0x1e0 [nfsv4] [13034.950247] [] bl_read_cleanup+0x2e/0x60 [blocklayoutdriver] [13034.952156] [] process_one_work+0x412/0x870 [13034.954102] [] ? process_one_work+0x334/0x870 [13034.955949] [] ? queue_delayed_work_on+0x40/0x40 [13034.957985] [] worker_thread+0x81/0x6a0 [13034.959817] [] ? process_one_work+0x870/0x870 [13034.961785] [] kthread+0x17d/0x1a0 [13034.963544] [] ? kthread_create_on_node+0x330/0x330 [13034.965479] [] ? finish_task_switch+0x88/0x220 [13034.967223] [] ? kthread_create_on_node+0x330/0x330 [13034.968929] [] ret_from_fork+0x3f/0x70 [13034.970534] [] ? kthread_create_on_node+0x330/0x330 [13034.972176] Code: c7 43 50 40 84 0d a0 e8 3d fe 1c e1 48 8d 7b 58 c7 83 e4 00 00 00 00 00 00 00 e8 ca fe 1c e1 4c 8b 63 58 4c 89 e7 e8 be fe 1c e1 <49> 83 3c 24 00 74 12 48 c7 43 50 f0 a2 0e a0 b8 01 00 00 00 5b [13034.977148] RIP [] rpc_restart_call_prepare+0x62/0x90 [sunrpc] [13034.978780] RSP [13034.980399] CR2: 0000000000000000 Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 12 +++++++----- fs/nfs/read.c | 9 ++++++++- fs/nfs/write.c | 7 +++++++ 3 files changed, 22 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 8abe27165ad0..93496c059837 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1912,12 +1912,13 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr) */ void pnfs_ld_write_done(struct nfs_pgio_header *hdr) { - trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); - if (!hdr->pnfs_error) { + if (likely(!hdr->pnfs_error)) { pnfs_set_layoutcommit(hdr->inode, hdr->lseg, hdr->mds_offset + hdr->res.count); hdr->mds_ops->rpc_call_done(&hdr->task, hdr); - } else + } + trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); + if (unlikely(hdr->pnfs_error)) pnfs_ld_handle_write_error(hdr); hdr->mds_ops->rpc_release(hdr); } @@ -2028,11 +2029,12 @@ static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr) */ void pnfs_ld_read_done(struct nfs_pgio_header *hdr) { - trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); if (likely(!hdr->pnfs_error)) { __nfs4_read_done_cb(hdr); hdr->mds_ops->rpc_call_done(&hdr->task, hdr); - } else + } + trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); + if (unlikely(hdr->pnfs_error)) pnfs_ld_handle_read_error(hdr); hdr->mds_ops->rpc_release(hdr); } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 01b8cc8e8cfc..0a5e33f33b5c 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -246,6 +246,13 @@ static void nfs_readpage_retry(struct rpc_task *task, nfs_set_pgio_error(hdr, -EIO, argp->offset); return; } + + /* For non rpc-based layout drivers, retry-through-MDS */ + if (!task->tk_ops) { + hdr->pnfs_error = -EAGAIN; + return; + } + /* Yes, so retry the read at the end of the hdr */ hdr->mds_offset += resp->count; argp->offset += resp->count; @@ -268,7 +275,7 @@ static void nfs_readpage_result(struct rpc_task *task, hdr->good_bytes = bound - hdr->io_start; } spin_unlock(&hdr->lock); - } else if (hdr->res.count != hdr->args.count) + } else if (hdr->res.count < hdr->args.count) nfs_readpage_retry(task, hdr); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 75ab7622e0cc..7b9316406930 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1505,6 +1505,13 @@ static void nfs_writeback_result(struct rpc_task *task, task->tk_status = -EIO; return; } + + /* For non rpc-based layout drivers, retry-through-MDS */ + if (!task->tk_ops) { + hdr->pnfs_error = -EAGAIN; + return; + } + /* Was this an NFSv2 write or an NFSv3 stable write? */ if (resp->verf->committed != NFS_UNSTABLE) { /* Resend from where the server left off */ -- cgit v1.2.3 From 135444126a1175912b43366f6109cb297018f034 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Nov 2015 10:11:17 -0500 Subject: pNFS/flexfiles: When mirrored, retry failed reads by switching mirrors If the pNFS/flexfiles file is mirrored, and a read to one mirror fails, then we should bump the mirror index, so that we retry to a different mirror. Once we've iterated through all mirrors and all failed, we can return the layout and issue a new LAYOUTGET. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index fbc5a56de875..7fc14b90886e 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -741,17 +741,17 @@ ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, } static struct nfs4_pnfs_ds * -ff_layout_choose_best_ds_for_read(struct nfs_pageio_descriptor *pgio, +ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, + int start_idx, int *best_idx) { - struct nfs4_ff_layout_segment *fls; + struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); struct nfs4_pnfs_ds *ds; int idx; - fls = FF_LAYOUT_LSEG(pgio->pg_lseg); /* mirrors are sorted by efficiency */ - for (idx = 0; idx < fls->mirror_array_cnt; idx++) { - ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, idx, false); + for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { + ds = nfs4_ff_layout_prepare_ds(lseg, idx, false); if (ds) { *best_idx = idx; return ds; @@ -782,7 +782,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, if (pgio->pg_lseg == NULL) goto out_mds; - ds = ff_layout_choose_best_ds_for_read(pgio, &ds_idx); + ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx); if (!ds) goto out_mds; mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); @@ -1171,6 +1171,10 @@ static int ff_layout_read_done_cb(struct rpc_task *task, switch (err) { case -NFS4ERR_RESET_TO_PNFS: + if (ff_layout_choose_best_ds_for_read(hdr->lseg, + hdr->pgio_mirror_idx + 1, + &hdr->pgio_mirror_idx)) + goto out_eagain; set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &hdr->lseg->pls_layout->plh_flags); pnfs_read_resend_pnfs(hdr); @@ -1179,11 +1183,13 @@ static int ff_layout_read_done_cb(struct rpc_task *task, ff_layout_reset_read(hdr); return task->tk_status; case -EAGAIN: - rpc_restart_call_prepare(task); - return -EAGAIN; + goto out_eagain; } return 0; +out_eagain: + rpc_restart_call_prepare(task); + return -EAGAIN; } static bool -- cgit v1.2.3 From 260074cd8413489903d4484058e61649d6e08580 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Nov 2015 09:59:00 -0500 Subject: pNFS/flexfiles: Add support for FF_FLAGS_NO_IO_THRU_MDS For loosely coupled pNFS/flexfiles systems, there is often no advantage at all in going through the MDS for I/O, since the MDS is subject to the same limitations as all other clients when talking to DSes. If a DS is unresponsive, I/O through the MDS will fail. For such systems, the only scalable solution is to have the pNFS clients retry doing pNFS, and so the protocol now provides a flag that allows the pNFS server to signal this. If LAYOUTGET returns FF_FLAGS_NO_IO_THRU_MDS, then we should assume that the MDS wants the client to retry using these devices, even if they were previously marked as being unavailable. To do so, we add a helper, ff_layout_mark_devices_valid() that will be called from layoutget. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 18 ++++++++++++++++-- fs/nfs/flexfilelayout/flexfilelayout.h | 7 +++++++ 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 7fc14b90886e..03516c80855a 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -339,6 +339,19 @@ static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) } } +static void ff_layout_mark_devices_valid(struct nfs4_ff_layout_segment *fls) +{ + struct nfs4_deviceid_node *node; + int i; + + if (!(fls->flags & FF_FLAGS_NO_IO_THRU_MDS)) + return; + for (i = 0; i < fls->mirror_array_cnt; i++) { + node = &fls->mirror_array[i]->mirror_ds->id_node; + clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags); + } +} + static struct pnfs_layout_segment * ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, struct nfs4_layoutget_res *lgr, @@ -499,6 +512,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, rc = ff_layout_check_layout(lgr); if (rc) goto out_err_free; + ff_layout_mark_devices_valid(fls); ret = &fls->generic_hdr; dprintk("<-- %s (success)\n", __func__); @@ -1035,7 +1049,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, rpc_wake_up(&tbl->slot_tbl_waitq); /* fall through */ default: - if (ff_layout_has_available_ds(lseg)) + if (ff_layout_no_fallback_to_mds(lseg) || + ff_layout_has_available_ds(lseg)) return -NFS4ERR_RESET_TO_PNFS; reset: dprintk("%s Retry through MDS. Error %d\n", __func__, @@ -1153,7 +1168,6 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, } /* NFS_PROTO call done callback routines */ - static int ff_layout_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) { diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 68cc0d9828f9..2bb08bc6aaf0 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -10,6 +10,7 @@ #define FS_NFS_NFS4FLEXFILELAYOUT_H #define FF_FLAGS_NO_LAYOUTCOMMIT 1 +#define FF_FLAGS_NO_IO_THRU_MDS 2 #include "../pnfs.h" @@ -145,6 +146,12 @@ FF_LAYOUT_MIRROR_COUNT(struct pnfs_layout_segment *lseg) return FF_LAYOUT_LSEG(lseg)->mirror_array_cnt; } +static inline bool +ff_layout_no_fallback_to_mds(struct pnfs_layout_segment *lseg) +{ + return FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_IO_THRU_MDS; +} + static inline bool ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node) { -- cgit v1.2.3 From 76566773a1f1c2295ed901b6f1241cfe10d99029 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 24 Oct 2015 17:28:32 -0400 Subject: NFS: Enable client side NFSv4.1 backchannel to use other transports Forechannel transports get their own "bc_up" method to create an endpoint for the backchannel service. Signed-off-by: Chuck Lever [Anna Schumaker: Add forward declaration of struct net to xprt.h] Signed-off-by: Anna Schumaker --- fs/nfs/callback.c | 40 ++++++++-------------------------------- 1 file changed, 8 insertions(+), 32 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 75f7c0a7538a..a7f2e6e33305 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -99,17 +99,6 @@ nfs4_callback_up(struct svc_serv *serv) } #if defined(CONFIG_NFS_V4_1) -static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) -{ - /* - * Create an svc_sock for the back channel service that shares the - * fore channel connection. - * Returns the input port (0) and sets the svc_serv bc_xprt on success - */ - return svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0, - SVC_SOCK_ANONYMOUS); -} - /* * The callback service for NFSv4.1 callbacks */ @@ -184,11 +173,6 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, xprt->bc_serv = serv; } #else -static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) -{ - return 0; -} - static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv, struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) { @@ -259,7 +243,8 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc svc_shutdown_net(serv, net); } -static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct net *net) +static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, + struct net *net, struct rpc_xprt *xprt) { struct nfs_net *nn = net_generic(net, nfs_net_id); int ret; @@ -275,20 +260,11 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct n goto err_bind; } - switch (minorversion) { - case 0: - ret = nfs4_callback_up_net(serv, net); - break; - case 1: - case 2: - ret = nfs41_callback_up_net(serv, net); - break; - default: - printk(KERN_ERR "NFS: unknown callback version: %d\n", - minorversion); - ret = -EINVAL; - break; - } + ret = -EPROTONOSUPPORT; + if (minorversion == 0) + ret = nfs4_callback_up_net(serv, net); + else if (xprt->ops->bc_up) + ret = xprt->ops->bc_up(serv, net); if (ret < 0) { printk(KERN_ERR "NFS: callback service start failed\n"); @@ -364,7 +340,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) goto err_create; } - ret = nfs_callback_up_net(minorversion, serv, net); + ret = nfs_callback_up_net(minorversion, serv, net, xprt); if (ret < 0) goto err_net; -- cgit v1.2.3 From 118c9163562426face9ff0903f1cbd3e1a959ca9 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Mon, 28 Sep 2015 22:30:32 +0800 Subject: fs/nfs: remove unnecessary new_valid_dev check As new_valid_dev always returns 1, so !new_valid_dev check is not needed, remove it. Signed-off-by: Yaowei Bai Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3d8e4ffa0a33..ce5a21861074 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1714,9 +1714,6 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) dfprintk(VFS, "NFS: mknod(%s/%lu), %pd\n", dir->i_sb->s_id, dir->i_ino, dentry); - if (!new_valid_dev(rdev)) - return -EINVAL; - attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; -- cgit v1.2.3 From 8fbcf237439f841e7e9c4675790e08ea1c295bd3 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 3 Nov 2015 18:25:34 +0100 Subject: nfs: Remove unused xdr page offsets in getacl/setacl arguments The arguments passed around for getacl and setacl xdr encoding, struct nfs_setaclargs and struct nfs_getaclargs, both contain an array of pages, an offset into the first page, and the length of the page data. The offset is unused as it is always zero; remove it. Signed-off-by: Andreas Gruenbacher Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 ++--- fs/nfs/nfs4xdr.c | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 87a081c6299d..7ed8f2cd97f8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4603,7 +4603,7 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server) #define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE) static int buf_to_pages_noslab(const void *buf, size_t buflen, - struct page **pages, unsigned int *pgbase) + struct page **pages) { struct page *newpage, **spages; int rc = 0; @@ -4747,7 +4747,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu goto out_free; args.acl_len = npages * PAGE_SIZE; - args.acl_pgbase = 0; dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", __func__, buf, buflen, npages, args.acl_len); @@ -4839,7 +4838,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl return -EOPNOTSUPP; if (npages > ARRAY_SIZE(pages)) return -ERANGE; - i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase); + i = buf_to_pages_noslab(buf, buflen, arg.acl_pages); if (i < 0) return i; nfs4_inode_return_delegation(inode); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 9f656791a338..22a1ddd4fe96 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1659,7 +1659,7 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun *p = cpu_to_be32(FATTR4_WORD0_ACL); p = reserve_space(xdr, 4); *p = cpu_to_be32(arg->acl_len); - xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); + xdr_write_pages(xdr, arg->acl_pages, 0, arg->acl_len); } static void @@ -2491,7 +2491,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); xdr_inline_pages(&req->rq_rcv_buf, replen << 2, - args->acl_pages, args->acl_pgbase, args->acl_len); + args->acl_pages, 0, args->acl_len); encode_nops(&hdr); } -- cgit v1.2.3 From 1ca843a2d28dec89e58e7227c27a9d55f21f59e1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 3 Nov 2015 18:25:33 +0100 Subject: nfs: Fix GETATTR bitmap verification When decoding GETATTR replies, the client checks the attribute bitmap for which attributes the server has sent. It misses bits at the word boundaries, though; fix that. Signed-off-by: Andreas Gruenbacher Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 22a1ddd4fe96..dfed4f5c8fcc 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4375,6 +4375,11 @@ static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) goto xdr_error; if ((status = decode_attr_files_total(xdr, bitmap, &fsstat->tfiles)) != 0) goto xdr_error; + + status = -EIO; + if (unlikely(bitmap[0])) + goto xdr_error; + if ((status = decode_attr_space_avail(xdr, bitmap, &fsstat->abytes)) != 0) goto xdr_error; if ((status = decode_attr_space_free(xdr, bitmap, &fsstat->fbytes)) != 0) @@ -4574,6 +4579,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, goto xdr_error; fattr->valid |= status; + status = -EIO; + if (unlikely(bitmap[0])) + goto xdr_error; + status = decode_attr_mode(xdr, bitmap, &fmode); if (status < 0) goto xdr_error; @@ -4627,6 +4636,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, goto xdr_error; fattr->valid |= status; + status = -EIO; + if (unlikely(bitmap[1])) + goto xdr_error; + status = decode_attr_mdsthreshold(xdr, bitmap, fattr->mdsthreshold); if (status < 0) goto xdr_error; @@ -4811,12 +4824,22 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0) goto xdr_error; fsinfo->wtpref = fsinfo->wtmax; + + status = -EIO; + if (unlikely(bitmap[0])) + goto xdr_error; + status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta); if (status != 0) goto xdr_error; status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype); if (status != 0) goto xdr_error; + + status = -EIO; + if (unlikely(bitmap[1])) + goto xdr_error; + status = decode_attr_layout_blksize(xdr, bitmap, &fsinfo->blksize); if (status) goto xdr_error; -- cgit v1.2.3