From 6f18dc893981e4daab29221d6a9771f3ce2dd8c5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 12 Nov 2015 09:44:33 -0500 Subject: svcrdma: Do not send XDR roundup bytes for a write chunk Minor optimization: when dealing with write chunk XDR roundup, do not post a Write WR for the zero bytes in the pad. Simply update the write segment in the RPC-over-RDMA header to reflect the extra pad bytes. The Reply chunk is also a write chunk, but the server does not use send_write_chunks() to send the Reply chunk. That's OK in this case: the server Upper Layer typically marshals the Reply chunk contents in a single contiguous buffer, without a separate tail for the XDR pad. The comments and the variable naming refer to "chunks" but what is really meant is "segments." The existing code sends only one xdr_write_chunk per RPC reply. The fix assumes this as well. When the XDR pad in the first write chunk is reached, the assumption is the Write list is complete and send_write_chunks() returns. That will remain a valid assumption until the server Upper Layer can support multiple bulk payload results per RPC. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 969a1ab75fc3..bad5eaa9f812 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -342,6 +342,13 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, arg_ch->rs_handle, arg_ch->rs_offset, write_len); + + /* Do not send XDR pad bytes */ + if (chunk_no && write_len < 4) { + chunk_no++; + break; + } + chunk_off = 0; while (write_len) { ret = send_write(xprt, rqstp, -- cgit v1.2.3 From 7c582e4faaf2593116068fc9ec8f5d81f720c02b Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 22 Nov 2015 08:22:10 +0100 Subject: nfsd: recover: constify nfsd4_client_tracking_ops structures The nfsd4_client_tracking_ops structures are never modified, so declare them as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 2 +- fs/nfsd/nfs4recover.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index d8b16c2568f3..5fbf3bbd00d0 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -92,7 +92,7 @@ struct nfsd_net { struct file *rec_file; bool in_grace; - struct nfsd4_client_tracking_ops *client_tracking_ops; + const struct nfsd4_client_tracking_ops *client_tracking_ops; time_t nfsd4_lease; time_t nfsd4_grace; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index e3d47091b191..79f0307a5ec8 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -631,7 +631,7 @@ nfsd4_check_legacy_client(struct nfs4_client *clp) return -ENOENT; } -static struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = { +static const struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = { .init = nfsd4_legacy_tracking_init, .exit = nfsd4_legacy_tracking_exit, .create = nfsd4_create_clid_dir, @@ -1050,7 +1050,7 @@ out_err: printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret); } -static struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = { +static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = { .init = nfsd4_init_cld_pipe, .exit = nfsd4_remove_cld_pipe, .create = nfsd4_cld_create, @@ -1394,7 +1394,7 @@ nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn) kfree(legacy); } -static struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = { +static const struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = { .init = nfsd4_umh_cltrack_init, .exit = NULL, .create = nfsd4_umh_cltrack_create, -- cgit v1.2.3 From c4cb897462c93ba09543d912344c29a26c92eb31 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 21 Nov 2015 22:57:39 +0100 Subject: nfsd: constify nfsd4_callback_ops structure The nfsd4_callback_ops structure is never modified, so declare it as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 2 +- fs/nfsd/nfs4layouts.c | 4 ++-- fs/nfsd/nfs4state.c | 4 ++-- fs/nfsd/state.h | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index e7f50c4081d6..081709c3fa4f 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1143,7 +1143,7 @@ nfsd4_run_cb_work(struct work_struct *work) } void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, - struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op) + const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op) { cb->cb_clp = clp; cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op]; diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 9ffef06b30d5..fec49febb75b 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -22,7 +22,7 @@ struct nfs4_layout { static struct kmem_cache *nfs4_layout_cache; static struct kmem_cache *nfs4_layout_stateid_cache; -static struct nfsd4_callback_ops nfsd4_cb_layout_ops; +static const struct nfsd4_callback_ops nfsd4_cb_layout_ops; static const struct lock_manager_operations nfsd4_layouts_lm_ops; const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = { @@ -665,7 +665,7 @@ nfsd4_cb_layout_release(struct nfsd4_callback *cb) nfs4_put_stid(&ls->ls_stid); } -static struct nfsd4_callback_ops nfsd4_cb_layout_ops = { +static const struct nfsd4_callback_ops nfsd4_cb_layout_ops = { .prepare = nfsd4_cb_layout_prepare, .done = nfsd4_cb_layout_done, .release = nfsd4_cb_layout_release, diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6b800b5b8fed..0d30a36bbae6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -98,7 +98,7 @@ static struct kmem_cache *odstate_slab; static void free_session(struct nfsd4_session *); -static struct nfsd4_callback_ops nfsd4_cb_recall_ops; +static const struct nfsd4_callback_ops nfsd4_cb_recall_ops; static bool is_session_dead(struct nfsd4_session *ses) { @@ -3648,7 +3648,7 @@ static void nfsd4_cb_recall_release(struct nfsd4_callback *cb) nfs4_put_stid(&dp->dl_stid); } -static struct nfsd4_callback_ops nfsd4_cb_recall_ops = { +static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = { .prepare = nfsd4_cb_recall_prepare, .done = nfsd4_cb_recall_done, .release = nfsd4_cb_recall_release, diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 77fdf4de91ba..5e2ba19fe3cb 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -65,7 +65,7 @@ struct nfsd4_callback { struct nfs4_client *cb_clp; u32 cb_minorversion; struct rpc_message cb_msg; - struct nfsd4_callback_ops *cb_ops; + const struct nfsd4_callback_ops *cb_ops; struct work_struct cb_work; int cb_seq_status; int cb_status; @@ -599,7 +599,7 @@ extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, - struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op); + const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op); extern void nfsd4_run_cb(struct nfsd4_callback *cb); extern int nfsd4_create_callback_queue(void); extern void nfsd4_destroy_callback_queue(void); -- cgit v1.2.3 From d3f03403a8735cebfcc16db4edfbf07c5c7421f5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 21 Nov 2015 13:28:50 +0300 Subject: nfsd: fix a warning message The WARN() macro takes a condition and a format string. The condition was accidentally left out here so it just prints the function name instead of the message. Signed-off-by: Dan Carpenter Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0d30a36bbae6..9f6beb8e9918 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2240,7 +2240,8 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) base = resp->cstate.data_offset; slot->sl_datalen = buf->len - base; if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen)) - WARN("%s: sessions DRC could not cache compound\n", __func__); + WARN(1, "%s: sessions DRC could not cache compound\n", + __func__); return; } -- cgit v1.2.3 From 6496500cf15f29ac8afc565e2e4b6f92a1324860 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 20 Nov 2015 15:45:35 -0500 Subject: svcrpc: move some initialization to common code Minor cleanup, no change in behavior. Signed-off-by: J. Bruce Fields --- net/sunrpc/svcauth.c | 2 ++ net/sunrpc/svcauth_unix.c | 8 -------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 79c0f3459b5c..69841db1f533 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -55,6 +55,7 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) spin_unlock(&authtab_lock); rqstp->rq_auth_slack = 0; + init_svc_cred(&rqstp->rq_cred); rqstp->rq_authop = aops; return aops->accept(rqstp, authp); @@ -63,6 +64,7 @@ EXPORT_SYMBOL_GPL(svc_authenticate); int svc_set_client(struct svc_rqst *rqstp) { + rqstp->rq_client = NULL; return rqstp->rq_authop->set_client(rqstp); } EXPORT_SYMBOL_GPL(svc_set_client); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 621ca7b4a155..dfacdc95b3f5 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -728,10 +728,6 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) struct kvec *resv = &rqstp->rq_res.head[0]; struct svc_cred *cred = &rqstp->rq_cred; - cred->cr_group_info = NULL; - cred->cr_principal = NULL; - rqstp->rq_client = NULL; - if (argv->iov_len < 3*4) return SVC_GARBAGE; @@ -794,10 +790,6 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) u32 slen, i; int len = argv->iov_len; - cred->cr_group_info = NULL; - cred->cr_principal = NULL; - rqstp->rq_client = NULL; - if ((len -= 3*4) < 0) return SVC_GARBAGE; -- cgit v1.2.3 From 50043859325b377e728676d31aad7affaf91b2ce Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 20 Nov 2015 15:58:37 -0500 Subject: nfsd: helper for dup of possibly NULL string Technically the initialization in the NULL case isn't even needed as the only caller already has target zeroed out, but it seems safer to keep copy_cred generic. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9f6beb8e9918..641604a221bc 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1857,15 +1857,24 @@ static void copy_clid(struct nfs4_client *target, struct nfs4_client *source) target->cl_clientid.cl_id = source->cl_clientid.cl_id; } -static int copy_cred(struct svc_cred *target, struct svc_cred *source) +int strdup_if_nonnull(char **target, char *source) { - if (source->cr_principal) { - target->cr_principal = - kstrdup(source->cr_principal, GFP_KERNEL); - if (target->cr_principal == NULL) + if (source) { + *target = kstrdup(source, GFP_KERNEL); + if (!*target) return -ENOMEM; } else - target->cr_principal = NULL; + *target = NULL; + return 0; +} + +static int copy_cred(struct svc_cred *target, struct svc_cred *source) +{ + int ret; + + ret = strdup_if_nonnull(&target->cr_principal, source->cr_principal); + if (ret) + return ret; target->cr_flavor = source->cr_flavor; target->cr_uid = source->cr_uid; target->cr_gid = source->cr_gid; -- cgit v1.2.3 From 50c7b948adbd1f8f0475fa0c92abb51c8a49f847 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 23 Nov 2015 15:39:12 -0700 Subject: nfsd: minor consolidation of mach_cred handling code Minor cleanup, no change in functionality. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 641604a221bc..efa3d4c09dab 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2375,10 +2375,17 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, if (exid->flags & ~EXCHGID4_FLAG_MASK_A) return nfserr_inval; + new = create_client(exid->clname, rqstp, &verf); + if (new == NULL) + return nfserr_jukebox; + switch (exid->spa_how) { case SP4_MACH_CRED: - if (!svc_rqst_integrity_protected(rqstp)) - return nfserr_inval; + if (!svc_rqst_integrity_protected(rqstp)) { + status = nfserr_inval; + goto out_nolock; + } + new->cl_mach_cred = true; case SP4_NONE: break; default: /* checked by xdr code */ @@ -2387,10 +2394,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, return nfserr_encr_alg_unsupp; } - new = create_client(exid->clname, rqstp, &verf); - if (new == NULL) - return nfserr_jukebox; - /* Cases below refer to rfc 5661 section 18.35.4: */ spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&exid->clname, nn); @@ -2452,7 +2455,6 @@ out_new: goto out; } new->cl_minorversion = cstate->minorversion; - new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED); gen_clid(new, nn); add_to_unconfirmed(new); @@ -2470,6 +2472,7 @@ out_copy: out: spin_unlock(&nn->client_lock); +out_nolock: if (new) expire_client(new); if (unconf) -- cgit v1.2.3 From 920dd9bb7d7cf9ae339e15240326a28a22f08a74 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 20 Nov 2015 16:42:40 -0500 Subject: nfsd: fix unlikely NULL deref in mach_creds_match We really shouldn't allow a client to be created with cl_mach_cred set unless it also has a principal name. This also allows us to fail such cases immediately on EXCHANGE_ID as opposed to waiting and incorrectly returning WRONG_CRED on the following CREATE_SESSION. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index efa3d4c09dab..ed58ced6fa8b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2385,6 +2385,15 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, status = nfserr_inval; goto out_nolock; } + /* + * Sometimes userspace doesn't give us a principal. + * Which is a bug, really. Anyway, we can't enforce + * MACH_CRED in that case, better to give up now: + */ + if (!new->cl_cred.cr_principal) { + status = nfserr_serverfault; + goto out_nolock; + } new->cl_mach_cred = true; case SP4_NONE: break; -- cgit v1.2.3 From 414ca017a54d26c3a58ed1504884e51448d22ae1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 20 Nov 2015 10:48:02 -0500 Subject: nfsd4: fix gss-proxy 4.1 mounts for some AD principals The principal name on a gss cred is used to setup the NFSv4.0 callback, which has to have a client principal name to authenticate to. That code wants the name to be in the form servicetype@hostname. rpc.svcgssd passes down such names (and passes down no principal name at all in the case the principal isn't a service principal). gss-proxy always passes down the principal name, and passes it down in the form servicetype/hostname@REALM. So we've been munging the name gss-proxy passes down into the format the NFSv4.0 callback code expects, or throwing away the name if we can't. Since the introduction of the MACH_CRED enforcement in NFSv4.1, we've also been using the principal name to verify that certain operations are done as the same principal as was used on the original EXCHANGE_ID call. For that application, the original name passed down by gss-proxy is also useful. Lack of that name in some cases was causing some kerberized NFSv4.1 mount failures in an Active Directory environment. This fix only works in the gss-proxy case. The fix for legacy rpc.svcgssd would be more involved, and rpc.svcgssd already has other problems in the AD case. Reported-and-tested-by: James Ralston Acked-by: Simo Sorce Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 10 +++++++++- include/linux/sunrpc/svcauth.h | 9 ++++++++- net/sunrpc/auth_gss/gss_rpc_upcall.c | 3 +++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ed58ced6fa8b..113ecbfac25c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1873,6 +1873,10 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source) int ret; ret = strdup_if_nonnull(&target->cr_principal, source->cr_principal); + if (ret) + return ret; + ret = strdup_if_nonnull(&target->cr_raw_principal, + source->cr_raw_principal); if (ret) return ret; target->cr_flavor = source->cr_flavor; @@ -1978,6 +1982,9 @@ static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp) return false; if (!svc_rqst_integrity_protected(rqstp)) return false; + if (cl->cl_cred.cr_raw_principal) + return 0 == strcmp(cl->cl_cred.cr_raw_principal, + cr->cr_raw_principal); if (!cr->cr_principal) return false; return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal); @@ -2390,7 +2397,8 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, * Which is a bug, really. Anyway, we can't enforce * MACH_CRED in that case, better to give up now: */ - if (!new->cl_cred.cr_principal) { + if (!new->cl_cred.cr_principal && + !new->cl_cred.cr_raw_principal) { status = nfserr_serverfault; goto out_nolock; } diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 8d71d6577459..c00f53a4ccdd 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -23,13 +23,19 @@ struct svc_cred { kgid_t cr_gid; struct group_info *cr_group_info; u32 cr_flavor; /* pseudoflavor */ - char *cr_principal; /* for gss */ + /* name of form servicetype/hostname@REALM, passed down by + * gss-proxy: */ + char *cr_raw_principal; + /* name of form servicetype@hostname, passed down by + * rpc.svcgssd, or computed from the above: */ + char *cr_principal; struct gss_api_mech *cr_gss_mech; }; static inline void init_svc_cred(struct svc_cred *cred) { cred->cr_group_info = NULL; + cred->cr_raw_principal = NULL; cred->cr_principal = NULL; cred->cr_gss_mech = NULL; } @@ -38,6 +44,7 @@ static inline void free_svc_cred(struct svc_cred *cred) { if (cred->cr_group_info) put_group_info(cred->cr_group_info); + kfree(cred->cr_raw_principal); kfree(cred->cr_principal); gss_mech_put(cred->cr_gss_mech); init_svc_cred(cred); diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 59eeed43eda2..f0c6a8c78a56 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -326,6 +326,9 @@ int gssp_accept_sec_context_upcall(struct net *net, if (data->found_creds && client_name.data != NULL) { char *c; + data->creds.cr_raw_principal = kstrndup(client_name.data, + client_name.len, GFP_KERNEL); + data->creds.cr_principal = kstrndup(client_name.data, client_name.len, GFP_KERNEL); if (data->creds.cr_principal) { -- cgit v1.2.3 From d4f72cb7fa4a3705f6675f2740f9713dc3400dd3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 24 Nov 2015 15:43:03 -0700 Subject: nfsd: don't base cl_cb_status on stale information The rpc client we use to send callbacks may change occasionally. (In the 4.0 case, the client can use setclientid/setclientid_confirm to update the callback parameters. In the 4.1+ case, sessions and connections can come and go.) The update is done from the callback thread by nfsd4_process_cb_update, which shuts down the old rpc client and then creates a new one. The client shutdown kills any ongoing rpc calls. There won't be any new ones till the new one's created and the callback thread moves on. When an rpc encounters a problem that may suggest callback rpc's aren't working any longer, it normally sets NFSD4_CB_DOWN, so the server can tell the client something's wrong. But if the rpc notices CB_UPDATE is set, then the failure may just be a normal result of shutting down the callback client. Or it could just be a coincidence, but in any case, it means we're runing with the old about-to-be-discarded client, so the failure's not interesting. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 081709c3fa4f..7389cb1d7409 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -792,12 +792,16 @@ static void warn_no_callback_path(struct nfs4_client *clp, int reason) static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason) { + if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags)) + return; clp->cl_cb_state = NFSD4_CB_DOWN; warn_no_callback_path(clp, reason); } static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason) { + if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags)) + return; clp->cl_cb_state = NFSD4_CB_FAULT; warn_no_callback_path(clp, reason); } -- cgit v1.2.3 From c3d4879e01bec484f50a78c108341f039d470e96 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 11 Dec 2015 16:45:58 -0500 Subject: sunrpc: Add a function to close temporary transports immediately Add a function svc_age_temp_xprts_now() to close temporary transports whose xpt_local matches the address passed in server_addr immediately instead of waiting for them to be closed by the timer function. The function is intended to be used by notifier_blocks that will be added to nfsd and lockd that will run when an ip address is deleted. This will eliminate the ACK storms and client hangs that occur in HA-NFS configurations where nfsd & lockd is left running on the cluster nodes all the time and the NFS 'service' is migrated back and forth within a short timeframe. Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svc_xprt.c | 45 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 78512cfe1fe6..b7dabc4baafd 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -128,6 +128,7 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, const unsigned short port); int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen); void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *xprt); +void svc_age_temp_xprts_now(struct svc_serv *, struct sockaddr *); static inline void svc_xprt_get(struct svc_xprt *xprt) { diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index a6cbb2104667..7422f28818b2 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -10,11 +10,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #define RPCDBG_FACILITY RPCDBG_SVCXPRT @@ -938,6 +940,49 @@ static void svc_age_temp_xprts(unsigned long closure) mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); } +/* Close temporary transports whose xpt_local matches server_addr immediately + * instead of waiting for them to be picked up by the timer. + * + * This is meant to be called from a notifier_block that runs when an ip + * address is deleted. + */ +void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr) +{ + struct svc_xprt *xprt; + struct svc_sock *svsk; + struct socket *sock; + struct list_head *le, *next; + LIST_HEAD(to_be_closed); + struct linger no_linger = { + .l_onoff = 1, + .l_linger = 0, + }; + + spin_lock_bh(&serv->sv_lock); + list_for_each_safe(le, next, &serv->sv_tempsocks) { + xprt = list_entry(le, struct svc_xprt, xpt_list); + if (rpc_cmp_addr(server_addr, (struct sockaddr *) + &xprt->xpt_local)) { + dprintk("svc_age_temp_xprts_now: found %p\n", xprt); + list_move(le, &to_be_closed); + } + } + spin_unlock_bh(&serv->sv_lock); + + while (!list_empty(&to_be_closed)) { + le = to_be_closed.next; + list_del_init(le); + xprt = list_entry(le, struct svc_xprt, xpt_list); + dprintk("svc_age_temp_xprts_now: closing %p\n", xprt); + svsk = container_of(xprt, struct svc_sock, sk_xprt); + sock = svsk->sk_sock; + kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, + (char *)&no_linger, sizeof(no_linger)); + svc_close_xprt(xprt); + } +} +EXPORT_SYMBOL_GPL(svc_age_temp_xprts_now); + static void call_xpt_users(struct svc_xprt *xprt) { struct svc_xpt_user *u; -- cgit v1.2.3 From 366849966f20a3d996a2160778861e348cc6a7c6 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 11 Dec 2015 16:45:59 -0500 Subject: nfsd: Register callbacks on the inetaddr_chain and inet6addr_chain Register callbacks on inetaddr_chain and inet6addr_chain to trigger cleanup of nfsd transport sockets when an ip address is deleted. Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index ad4e2377dd63..3779a5fbeb42 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -14,9 +14,13 @@ #include #include +#include #include #include #include +#include +#include +#include #include #include "nfsd.h" #include "cache.h" @@ -306,10 +310,70 @@ static void nfsd_shutdown_net(struct net *net) nfsd_shutdown_generic(); } +static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + struct net_device *dev = ifa->ifa_dev->dev; + struct net *net = dev_net(dev); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct sockaddr_in sin; + + if (event != NETDEV_DOWN) + goto out; + + if (nn->nfsd_serv) { + dprintk("nfsd_inetaddr_event: removed %pI4\n", &ifa->ifa_local); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = ifa->ifa_local; + svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin); + } + +out: + return NOTIFY_DONE; +} + +static struct notifier_block nfsd_inetaddr_notifier = { + .notifier_call = nfsd_inetaddr_event, +}; + +#if IS_ENABLED(CONFIG_IPV6) +static int nfsd_inet6addr_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; + struct net_device *dev = ifa->idev->dev; + struct net *net = dev_net(dev); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct sockaddr_in6 sin6; + + if (event != NETDEV_DOWN) + goto out; + + if (nn->nfsd_serv) { + dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr); + sin6.sin6_family = AF_INET6; + sin6.sin6_addr = ifa->addr; + svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6); + } + +out: + return NOTIFY_DONE; +} + +static struct notifier_block nfsd_inet6addr_notifier = { + .notifier_call = nfsd_inet6addr_event, +}; +#endif + static void nfsd_last_thread(struct svc_serv *serv, struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); + unregister_inetaddr_notifier(&nfsd_inetaddr_notifier); +#if IS_ENABLED(CONFIG_IPV6) + unregister_inet6addr_notifier(&nfsd_inet6addr_notifier); +#endif /* * write_ports can create the server without actually starting * any threads--if we get shut down before any threads are @@ -425,6 +489,10 @@ int nfsd_create_serv(struct net *net) } set_max_drc(); + register_inetaddr_notifier(&nfsd_inetaddr_notifier); +#if IS_ENABLED(CONFIG_IPV6) + register_inet6addr_notifier(&nfsd_inet6addr_notifier); +#endif do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ return 0; } -- cgit v1.2.3 From 0751ddf77b6af2efe1041efb81141badd64efb65 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 11 Dec 2015 16:46:00 -0500 Subject: lockd: Register callbacks on the inetaddr_chain and inet6addr_chain Register callbacks on inetaddr_chain and inet6addr_chain to trigger cleanup of lockd transport sockets when an ip address is deleted. Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 2 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 5f31ebd96c06..44d18ad4d364 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -25,13 +25,17 @@ #include #include #include +#include #include #include #include #include #include +#include #include +#include +#include #include #include @@ -279,6 +283,68 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net) } } +static int lockd_inetaddr_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + struct sockaddr_in sin; + + if (event != NETDEV_DOWN) + goto out; + + if (nlmsvc_rqst) { + dprintk("lockd_inetaddr_event: removed %pI4\n", + &ifa->ifa_local); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = ifa->ifa_local; + svc_age_temp_xprts_now(nlmsvc_rqst->rq_server, + (struct sockaddr *)&sin); + } + +out: + return NOTIFY_DONE; +} + +static struct notifier_block lockd_inetaddr_notifier = { + .notifier_call = lockd_inetaddr_event, +}; + +#if IS_ENABLED(CONFIG_IPV6) +static int lockd_inet6addr_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; + struct sockaddr_in6 sin6; + + if (event != NETDEV_DOWN) + goto out; + + if (nlmsvc_rqst) { + dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr); + sin6.sin6_family = AF_INET6; + sin6.sin6_addr = ifa->addr; + svc_age_temp_xprts_now(nlmsvc_rqst->rq_server, + (struct sockaddr *)&sin6); + } + +out: + return NOTIFY_DONE; +} + +static struct notifier_block lockd_inet6addr_notifier = { + .notifier_call = lockd_inet6addr_event, +}; +#endif + +static void lockd_svc_exit_thread(void) +{ + unregister_inetaddr_notifier(&lockd_inetaddr_notifier); +#if IS_ENABLED(CONFIG_IPV6) + unregister_inet6addr_notifier(&lockd_inet6addr_notifier); +#endif + svc_exit_thread(nlmsvc_rqst); +} + static int lockd_start_svc(struct svc_serv *serv) { int error; @@ -315,7 +381,7 @@ static int lockd_start_svc(struct svc_serv *serv) return 0; out_task: - svc_exit_thread(nlmsvc_rqst); + lockd_svc_exit_thread(); nlmsvc_task = NULL; out_rqst: nlmsvc_rqst = NULL; @@ -360,6 +426,10 @@ static struct svc_serv *lockd_create_svc(void) printk(KERN_WARNING "lockd_up: create service failed\n"); return ERR_PTR(-ENOMEM); } + register_inetaddr_notifier(&lockd_inetaddr_notifier); +#if IS_ENABLED(CONFIG_IPV6) + register_inet6addr_notifier(&lockd_inet6addr_notifier); +#endif dprintk("lockd_up: service created\n"); return serv; } @@ -428,7 +498,7 @@ lockd_down(struct net *net) } kthread_stop(nlmsvc_task); dprintk("lockd_down: service stopped\n"); - svc_exit_thread(nlmsvc_rqst); + lockd_svc_exit_thread(); dprintk("lockd_down: service destroyed\n"); nlmsvc_task = NULL; nlmsvc_rqst = NULL; -- cgit v1.2.3 From 3daa020f9bf803c03c6b6c895921e2b09fcd494a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 23 Dec 2015 15:08:08 -0500 Subject: Revert "svcrdma: Do not send XDR roundup bytes for a write chunk" This reverts commit 6f18dc893981e4daab29221d6a9771f3ce2dd8c5. Just as one example, it appears this code could do the wrong thing in the case of a two-byte NFS READ that crosses a page boundary. Chuck says: "In that case, nfsd would pass down an xdr_buf that has one byte in a page, one byte in another page, and a two-byte XDR pad. The logic introduced by this optimization would be fooled, and neither the second byte nor the XDR pad would be written to the client." Cc: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index bad5eaa9f812..969a1ab75fc3 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -342,13 +342,6 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, arg_ch->rs_handle, arg_ch->rs_offset, write_len); - - /* Do not send XDR pad bytes */ - if (chunk_no && write_len < 4) { - chunk_no++; - break; - } - chunk_off = 0; while (write_len) { ret = send_write(xprt, rqstp, -- cgit v1.2.3 From 2e55f3ab45a0ef2e41548eae64e13b4bf39e5374 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Jan 2016 22:06:28 +0800 Subject: nfsd: use to_delayed_work Use to_delayed_work() instead of open-coding it. Signed-off-by: Geliang Tang Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 113ecbfac25c..086a81ce34fc 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4571,8 +4571,7 @@ static void laundromat_main(struct work_struct *laundry) { time_t t; - struct delayed_work *dwork = container_of(laundry, struct delayed_work, - work); + struct delayed_work *dwork = to_delayed_work(laundry); struct nfsd_net *nn = container_of(dwork, struct nfsd_net, laundromat_work); -- cgit v1.2.3 From ea44463f376236a0541288cb8d575ab6b50370d2 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Jan 2016 22:06:29 +0800 Subject: lockd: use to_delayed_work Use to_delayed_work() instead of open-coding it. Signed-off-by: Geliang Tang Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 44d18ad4d364..b4006c720f55 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -94,8 +94,7 @@ static unsigned long get_lockd_grace_period(void) static void grace_ender(struct work_struct *grace) { - struct delayed_work *dwork = container_of(grace, struct delayed_work, - work); + struct delayed_work *dwork = to_delayed_work(grace); struct lockd_net *ln = container_of(dwork, struct lockd_net, grace_period_end); -- cgit v1.2.3 From 2a297450dd188a5d4e5e428c189b2de54f9073ba Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 23 Dec 2015 22:25:13 +0100 Subject: lockd: constify nlmsvc_binding structure The nlmsvc_binding structure is never modified, so declare it as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 2 +- fs/nfsd/lockd.c | 2 +- include/linux/lockd/bind.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index b4006c720f55..154a107cd376 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -48,7 +48,7 @@ static struct svc_program nlmsvc_program; -struct nlmsvc_binding * nlmsvc_ops; +const struct nlmsvc_binding *nlmsvc_ops; EXPORT_SYMBOL_GPL(nlmsvc_ops); static DEFINE_MUTEX(nlmsvc_mutex); diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 77e7a5cca888..1a03bc3059e8 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -58,7 +58,7 @@ nlm_fclose(struct file *filp) fput(filp); } -static struct nlmsvc_binding nfsd_nlm_ops = { +static const struct nlmsvc_binding nfsd_nlm_ops = { .fopen = nlm_fopen, /* open file for locking */ .fclose = nlm_fclose, /* close file */ }; diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h index 4d24d64578c4..140edab64446 100644 --- a/include/linux/lockd/bind.h +++ b/include/linux/lockd/bind.h @@ -29,7 +29,7 @@ struct nlmsvc_binding { void (*fclose)(struct file *); }; -extern struct nlmsvc_binding * nlmsvc_ops; +extern const struct nlmsvc_binding *nlmsvc_ops; /* * Similar to nfs_client_initdata, but without the NFS-specific -- cgit v1.2.3 From 691412b4438da1b77ff3078de0546023d244d841 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Mon, 4 Jan 2016 11:15:21 +0800 Subject: nfsd: Fix nfsd leaks sunrpc module references Stefan Hajnoczi reports, nfsd leaks 3 references to the sunrpc module here: # echo -n "asdf 1234" >/proc/fs/nfsd/portlist bash: echo: write error: Protocol not supported Now stop nfsd and try unloading the kernel modules: # systemctl stop nfs-server # systemctl stop nfs # systemctl stop proc-fs-nfsd.mount # systemctl stop var-lib-nfs-rpc_pipefs.mount # rmmod nfsd # rmmod nfs_acl # rmmod lockd # rmmod auth_rpcgss # rmmod sunrpc rmmod: ERROR: Module sunrpc is in use # lsmod | grep rpc sunrpc 315392 3 It is caused by nfsd don't cleanup rpcb program for nfsd when destroying svc service after creating xprt fail. Reported-by: Stefan Hajnoczi Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 3779a5fbeb42..45007acaf364 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -378,14 +378,13 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) * write_ports can create the server without actually starting * any threads--if we get shut down before any threads are * started, then nfsd_last_thread will be run before any of this - * other initialization has been done. + * other initialization has been done except the rpcb information. */ + svc_rpcb_cleanup(serv, net); if (!nn->nfsd_net_up) return; - nfsd_shutdown_net(net); - - svc_rpcb_cleanup(serv, net); + nfsd_shutdown_net(net); printk(KERN_WARNING "nfsd: last server has exited, flushing export " "cache\n"); nfsd_export_flush(net); -- cgit v1.2.3 From 6b9b21073d3b250e17812cd562fffc9006962b39 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 8 Dec 2015 07:23:48 -0500 Subject: nfsd: give up on CB_LAYOUTRECALLs after two lease periods Have the CB_LAYOUTRECALL code treat NFS4_OK and NFS4ERR_DELAY returns equivalently. Change the code to periodically resend CB_LAYOUTRECALLS until the ls_layouts list is empty or the client returns a different error code. If we go for two lease periods without the list being emptied or the client sending a hard error, then we give up and clean out the list anyway. Signed-off-by: Jeff Layton Tested-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4layouts.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index fec49febb75b..76c13b0228b1 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -623,24 +623,39 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) { struct nfs4_layout_stateid *ls = container_of(cb, struct nfs4_layout_stateid, ls_recall); + struct nfsd_net *nn; + ktime_t now, cutoff; LIST_HEAD(reaplist); + switch (task->tk_status) { case 0: - return 1; + case -NFS4ERR_DELAY: + /* + * Anything left? If not, then call it done. Note that we don't + * take the spinlock since this is an optimization and nothing + * should get added until the cb counter goes to zero. + */ + if (list_empty(&ls->ls_layouts)) + return 1; + + /* Poll the client until it's done with the layout */ + now = ktime_get(); + nn = net_generic(ls->ls_stid.sc_client->net, nfsd_net_id); + + /* Client gets 2 lease periods to return it */ + cutoff = ktime_add_ns(task->tk_start, + nn->nfsd4_lease * NSEC_PER_SEC * 2); + + if (ktime_before(now, cutoff)) { + rpc_delay(task, HZ/100); /* 10 mili-seconds */ + return 0; + } + /* Fallthrough */ case -NFS4ERR_NOMATCHING_LAYOUT: trace_layout_recall_done(&ls->ls_stid.sc_stateid); task->tk_status = 0; return 1; - case -NFS4ERR_DELAY: - /* Poll the client until it's done with the layout */ - /* FIXME: cap number of retries. - * The pnfs standard states that we need to only expire - * the client after at-least "lease time" .eg lease-time * 2 - * when failing to communicate a recall - */ - rpc_delay(task, HZ/100); /* 10 mili-seconds */ - return 0; default: /* * Unknown error or non-responding client, we'll need to fence. -- cgit v1.2.3 From 6e8b50d16a757d53f8817acecba97c5d4aa1cf65 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 17 Nov 2015 06:52:23 -0500 Subject: nfsd: add new io class tracepoint Add some new tracepoints in the nfsd read/write codepaths. The idea is that this will give us the ability to measure how long each phase of a read or write operation takes. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsfh.h | 23 +++++++++++++++++++++++ fs/nfsd/trace.h | 41 +++++++++++++++++++++++++++++++++++++++++ fs/nfsd/vfs.c | 15 +++++++++++++++ 3 files changed, 79 insertions(+) diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 2087bae17582..0770bcb543c8 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -7,6 +7,7 @@ #ifndef _LINUX_NFSD_NFSFH_H #define _LINUX_NFSD_NFSFH_H +#include #include #include @@ -205,6 +206,28 @@ static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) return true; } +#ifdef CONFIG_CRC32 +/** + * knfsd_fh_hash - calculate the crc32 hash for the filehandle + * @fh - pointer to filehandle + * + * returns a crc32 hash for the filehandle that is compatible with + * the one displayed by "wireshark". + */ + +static inline u32 +knfsd_fh_hash(struct knfsd_fh *fh) +{ + return ~crc32_le(0xFFFFFFFF, (unsigned char *)&fh->fh_base, fh->fh_size); +} +#else +static inline u32 +knfsd_fh_hash(struct knfsd_fh *fh) +{ + return 0; +} +#endif + #ifdef CONFIG_NFSD_V3 /* * The wcc data stored in current_fh should be cleared diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 0befe762762b..3287041905da 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -8,6 +8,47 @@ #define _NFSD_TRACE_H #include +#include "nfsfh.h" + +DECLARE_EVENT_CLASS(nfsd_io_class, + TP_PROTO(struct svc_rqst *rqstp, + struct svc_fh *fhp, + loff_t offset, + int len), + TP_ARGS(rqstp, fhp, offset, len), + TP_STRUCT__entry( + __field(__be32, xid) + __field_struct(struct knfsd_fh, fh) + __field(loff_t, offset) + __field(int, len) + ), + TP_fast_assign( + __entry->xid = rqstp->rq_xid, + fh_copy_shallow(&__entry->fh, &fhp->fh_handle); + __entry->offset = offset; + __entry->len = len; + ), + TP_printk("xid=0x%x fh=0x%x offset=%lld len=%d", + __be32_to_cpu(__entry->xid), knfsd_fh_hash(&__entry->fh), + __entry->offset, __entry->len) +) + +#define DEFINE_NFSD_IO_EVENT(name) \ +DEFINE_EVENT(nfsd_io_class, name, \ + TP_PROTO(struct svc_rqst *rqstp, \ + struct svc_fh *fhp, \ + loff_t offset, \ + int len), \ + TP_ARGS(rqstp, fhp, offset, len)) + +DEFINE_NFSD_IO_EVENT(read_start); +DEFINE_NFSD_IO_EVENT(read_opened); +DEFINE_NFSD_IO_EVENT(read_io_done); +DEFINE_NFSD_IO_EVENT(read_done); +DEFINE_NFSD_IO_EVENT(write_start); +DEFINE_NFSD_IO_EVENT(write_opened); +DEFINE_NFSD_IO_EVENT(write_io_done); +DEFINE_NFSD_IO_EVENT(write_done); #include "state.h" diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 994d66fbb446..3257c59dc860 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -42,6 +42,7 @@ #include "nfsd.h" #include "vfs.h" +#include "trace.h" #define NFSDDBG_FACILITY NFSDDBG_FILEOP @@ -983,16 +984,23 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct raparms *ra; __be32 err; + trace_read_start(rqstp, fhp, offset, vlen); err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); if (err) return err; ra = nfsd_init_raparms(file); + + trace_read_opened(rqstp, fhp, offset, vlen); err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count); + trace_read_io_done(rqstp, fhp, offset, vlen); + if (ra) nfsd_put_raparams(file, ra); fput(file); + trace_read_done(rqstp, fhp, offset, vlen); + return err; } @@ -1008,24 +1016,31 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, { __be32 err = 0; + trace_write_start(rqstp, fhp, offset, vlen); + if (file) { err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE); if (err) goto out; + trace_write_opened(rqstp, fhp, offset, vlen); err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stablep); + trace_write_io_done(rqstp, fhp, offset, vlen); } else { err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); if (err) goto out; + trace_write_opened(rqstp, fhp, offset, vlen); if (cnt) err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stablep); + trace_write_io_done(rqstp, fhp, offset, vlen); fput(file); } out: + trace_write_done(rqstp, fhp, offset, vlen); return err; } -- cgit v1.2.3