diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-09 14:31:18 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-09 14:31:18 -0700 |
commit | 0d10c2c170e3384dd63f40216d7af4673d5ebb50 (patch) | |
tree | 2ce6760501b92ab279677edc3c8d981183ad97f6 /net/sunrpc | |
parent | 023f78b02c729070116fa3a7ebd4107a032d3f5c (diff) | |
parent | d1e458fe671baf1e60afafc88bda090202a412f1 (diff) |
Merge branch 'for-3.17' of git://linux-nfs.org/~bfields/linux
Pull nfsd updates from Bruce Fields:
"This includes a major rewrite of the NFSv4 state code, which has
always depended on a single mutex. As an example, open creates are no
longer serialized, fixing a performance regression on NFSv3->NFSv4
upgrades. Thanks to Jeff, Trond, and Benny, and to Christoph for
review.
Also some RDMA fixes from Chuck Lever and Steve Wise, and
miscellaneous fixes from Kinglong Mee and others"
* 'for-3.17' of git://linux-nfs.org/~bfields/linux: (167 commits)
svcrdma: remove rdma_create_qp() failure recovery logic
nfsd: add some comments to the nfsd4 object definitions
nfsd: remove the client_mutex and the nfs4_lock/unlock_state wrappers
nfsd: remove nfs4_lock_state: nfs4_state_shutdown_net
nfsd: remove nfs4_lock_state: nfs4_laundromat
nfsd: Remove nfs4_lock_state(): reclaim_complete()
nfsd: Remove nfs4_lock_state(): setclientid, setclientid_confirm, renew
nfsd: Remove nfs4_lock_state(): exchange_id, create/destroy_session()
nfsd: Remove nfs4_lock_state(): nfsd4_open and nfsd4_open_confirm
nfsd: Remove nfs4_lock_state(): nfsd4_delegreturn()
nfsd: Remove nfs4_lock_state(): nfsd4_open_downgrade + nfsd4_close
nfsd: Remove nfs4_lock_state(): nfsd4_lock/locku/lockt()
nfsd: Remove nfs4_lock_state(): nfsd4_release_lockowner
nfsd: Remove nfs4_lock_state(): nfsd4_test_stateid/nfsd4_free_stateid
nfsd: Remove nfs4_lock_state(): nfs4_preprocess_stateid_op()
nfsd: remove old fault injection infrastructure
nfsd: add more granular locking to *_delegations fault injectors
nfsd: add more granular locking to forget_openowners fault injector
nfsd: add more granular locking to forget_locks fault injector
nfsd: add a list_head arg to nfsd_foreach_client_lock
...
Diffstat (limited to 'net/sunrpc')
-rw-r--r-- | net/sunrpc/auth_gss/svcauth_gss.c | 2 | ||||
-rw-r--r-- | net/sunrpc/svc.c | 4 | ||||
-rw-r--r-- | net/sunrpc/svc_xprt.c | 27 | ||||
-rw-r--r-- | net/sunrpc/svcsock.c | 50 | ||||
-rw-r--r-- | net/sunrpc/xdr.c | 3 | ||||
-rw-r--r-- | net/sunrpc/xprt.c | 2 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 28 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_sendto.c | 39 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_transport.c | 20 |
9 files changed, 103 insertions, 72 deletions
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 4ce5eccec1f6..c548ab213f76 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -886,7 +886,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs u32 priv_len, maj_stat; int pad, saved_len, remaining_len, offset; - rqstp->rq_splice_ok = 0; + rqstp->rq_splice_ok = false; priv_len = svc_getnl(&buf->head[0]); if (rqstp->rq_deferred) { diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 5de6801cd924..1db5007ddbce 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1086,9 +1086,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto err_short_len; /* Will be turned off only in gss privacy case: */ - rqstp->rq_splice_ok = 1; + rqstp->rq_splice_ok = true; /* Will be turned off only when NFSv4 Sessions are used */ - rqstp->rq_usedeferral = 1; + rqstp->rq_usedeferral = true; rqstp->rq_dropme = false; /* Setup reply header */ diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index b4737fbdec13..6666c6745858 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -23,6 +23,7 @@ static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); static void svc_age_temp_xprts(unsigned long closure); static void svc_delete_xprt(struct svc_xprt *xprt); +static void svc_xprt_do_enqueue(struct svc_xprt *xprt); /* apparently the "standard" is that clients close * idle connections after 5 minutes, servers after @@ -222,11 +223,12 @@ static void svc_xprt_received(struct svc_xprt *xprt) if (!test_bit(XPT_BUSY, &xprt->xpt_flags)) return; /* As soon as we clear busy, the xprt could be closed and - * 'put', so we need a reference to call svc_xprt_enqueue with: + * 'put', so we need a reference to call svc_xprt_do_enqueue with: */ svc_xprt_get(xprt); + smp_mb__before_atomic(); clear_bit(XPT_BUSY, &xprt->xpt_flags); - svc_xprt_enqueue(xprt); + svc_xprt_do_enqueue(xprt); svc_xprt_put(xprt); } @@ -335,12 +337,7 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) return false; } -/* - * Queue up a transport with data pending. If there are idle nfsd - * processes, wake 'em up. - * - */ -void svc_xprt_enqueue(struct svc_xprt *xprt) +static void svc_xprt_do_enqueue(struct svc_xprt *xprt) { struct svc_pool *pool; struct svc_rqst *rqstp; @@ -398,6 +395,18 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) out_unlock: spin_unlock_bh(&pool->sp_lock); } + +/* + * Queue up a transport with data pending. If there are idle nfsd + * processes, wake 'em up. + * + */ +void svc_xprt_enqueue(struct svc_xprt *xprt) +{ + if (test_bit(XPT_BUSY, &xprt->xpt_flags)) + return; + svc_xprt_do_enqueue(xprt); +} EXPORT_SYMBOL_GPL(svc_xprt_enqueue); /* @@ -439,6 +448,8 @@ void svc_reserve(struct svc_rqst *rqstp, int space) atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved); rqstp->rq_reserved = space; + if (xprt->xpt_ops->xpo_adjust_wspace) + xprt->xpt_ops->xpo_adjust_wspace(xprt); svc_xprt_enqueue(xprt); } } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b507cd327d9b..c24a8ff33f8f 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -446,15 +446,43 @@ static void svc_write_space(struct sock *sk) } } +static int svc_tcp_has_wspace(struct svc_xprt *xprt) +{ + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + struct svc_serv *serv = svsk->sk_xprt.xpt_server; + int required; + + if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) + return 1; + required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg; + if (sk_stream_wspace(svsk->sk_sk) >= required || + (sk_stream_min_wspace(svsk->sk_sk) == 0 && + atomic_read(&xprt->xpt_reserved) == 0)) + return 1; + set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); + return 0; +} + static void svc_tcp_write_space(struct sock *sk) { + struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); struct socket *sock = sk->sk_socket; - if (sk_stream_is_writeable(sk) && sock) + if (!sk_stream_is_writeable(sk) || !sock) + return; + if (!svsk || svc_tcp_has_wspace(&svsk->sk_xprt)) clear_bit(SOCK_NOSPACE, &sock->flags); svc_write_space(sk); } +static void svc_tcp_adjust_wspace(struct svc_xprt *xprt) +{ + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + + if (svc_tcp_has_wspace(xprt)) + clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); +} + /* * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo */ @@ -692,6 +720,7 @@ static struct svc_xprt_class svc_udp_class = { .xcl_owner = THIS_MODULE, .xcl_ops = &svc_udp_ops, .xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP, + .xcl_ident = XPRT_TRANSPORT_UDP, }; static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) @@ -1197,23 +1226,6 @@ static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) svc_putnl(resv, 0); } -static int svc_tcp_has_wspace(struct svc_xprt *xprt) -{ - struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); - struct svc_serv *serv = svsk->sk_xprt.xpt_server; - int required; - - if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) - return 1; - required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg; - if (sk_stream_wspace(svsk->sk_sk) >= required || - (sk_stream_min_wspace(svsk->sk_sk) == 0 && - atomic_read(&xprt->xpt_reserved) == 0)) - return 1; - set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); - return 0; -} - static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, struct net *net, struct sockaddr *sa, int salen, @@ -1285,6 +1297,7 @@ static struct svc_xprt_ops svc_tcp_ops = { .xpo_has_wspace = svc_tcp_has_wspace, .xpo_accept = svc_tcp_accept, .xpo_secure_port = svc_sock_secure_port, + .xpo_adjust_wspace = svc_tcp_adjust_wspace, }; static struct svc_xprt_class svc_tcp_class = { @@ -1292,6 +1305,7 @@ static struct svc_xprt_class svc_tcp_class = { .xcl_owner = THIS_MODULE, .xcl_ops = &svc_tcp_ops, .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, + .xcl_ident = XPRT_TRANSPORT_TCP, }; void svc_init_xprt_sock(void) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 23fb4e75e245..290af97bf6f9 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -509,7 +509,8 @@ void xdr_commit_encode(struct xdr_stream *xdr) } EXPORT_SYMBOL_GPL(xdr_commit_encode); -__be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes) +static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, + size_t nbytes) { static __be32 *p; int space_left; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index c3b2b3369e52..51c63165073c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1306,7 +1306,7 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args) } } spin_unlock(&xprt_list_lock); - printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident); + dprintk("RPC: transport (%d) not supported\n", args->ident); return ERR_PTR(-EIO); found: diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 8f92a61ee2df..e0110270d650 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -43,6 +43,7 @@ #include <linux/sunrpc/debug.h> #include <linux/sunrpc/rpc_rdma.h> #include <linux/spinlock.h> +#include <linux/highmem.h> #include <asm/unaligned.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> @@ -435,6 +436,32 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt, return ret; } +/* + * To avoid a separate RDMA READ just for a handful of zero bytes, + * RFC 5666 section 3.7 allows the client to omit the XDR zero pad + * in chunk lists. + */ +static void +rdma_fix_xdr_pad(struct xdr_buf *buf) +{ + unsigned int page_len = buf->page_len; + unsigned int size = (XDR_QUADLEN(page_len) << 2) - page_len; + unsigned int offset, pg_no; + char *p; + + if (size == 0) + return; + + pg_no = page_len >> PAGE_SHIFT; + offset = page_len & ~PAGE_MASK; + p = page_address(buf->pages[pg_no]); + memset(p + offset, 0, size); + + buf->page_len += size; + buf->buflen += size; + buf->len += size; +} + static int rdma_read_complete(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head) { @@ -449,6 +476,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp, rqstp->rq_pages[page_no] = head->pages[page_no]; } /* Point rq_arg.pages past header */ + rdma_fix_xdr_pad(&head->arg); rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count]; rqstp->rq_arg.page_len = head->arg.page_len; rqstp->rq_arg.page_base = head->arg.page_base; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 49fd21a5c215..9f1b50689c0f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -192,6 +192,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, xdr_sge_no++; BUG_ON(xdr_sge_no > vec->count); bc -= sge_bytes; + if (sge_no == xprt->sc_max_sge) + break; } /* Prepare WRITE WR */ @@ -209,7 +211,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, atomic_inc(&rdma_stat_write); if (svc_rdma_send(xprt, &write_wr)) goto err; - return 0; + return write_len - bc; err: svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); @@ -225,7 +227,6 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, { u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; int write_len; - int max_write; u32 xdr_off; int chunk_off; int chunk_no; @@ -239,8 +240,6 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, res_ary = (struct rpcrdma_write_array *) &rdma_resp->rm_body.rm_chunks[1]; - max_write = xprt->sc_max_sge * PAGE_SIZE; - /* Write chunks start at the pagelist */ for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; xfer_len && chunk_no < arg_ary->wc_nchunks; @@ -260,23 +259,21 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, write_len); chunk_off = 0; while (write_len) { - int this_write; - this_write = min(write_len, max_write); ret = send_write(xprt, rqstp, ntohl(arg_ch->rs_handle), rs_offset + chunk_off, xdr_off, - this_write, + write_len, vec); - if (ret) { + if (ret <= 0) { dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", ret); return -EIO; } - chunk_off += this_write; - xdr_off += this_write; - xfer_len -= this_write; - write_len -= this_write; + chunk_off += ret; + xdr_off += ret; + xfer_len -= ret; + write_len -= ret; } } /* Update the req with the number of chunks actually used */ @@ -293,7 +290,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, { u32 xfer_len = rqstp->rq_res.len; int write_len; - int max_write; u32 xdr_off; int chunk_no; int chunk_off; @@ -311,8 +307,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, res_ary = (struct rpcrdma_write_array *) &rdma_resp->rm_body.rm_chunks[2]; - max_write = xprt->sc_max_sge * PAGE_SIZE; - /* xdr offset starts at RPC message */ nchunks = ntohl(arg_ary->wc_nchunks); for (xdr_off = 0, chunk_no = 0; @@ -330,24 +324,21 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, write_len); chunk_off = 0; while (write_len) { - int this_write; - - this_write = min(write_len, max_write); ret = send_write(xprt, rqstp, ntohl(ch->rs_handle), rs_offset + chunk_off, xdr_off, - this_write, + write_len, vec); - if (ret) { + if (ret <= 0) { dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", ret); return -EIO; } - chunk_off += this_write; - xdr_off += this_write; - xfer_len -= this_write; - write_len -= this_write; + chunk_off += ret; + xdr_off += ret; + xfer_len -= ret; + write_len -= ret; } } /* Update the req with the number of chunks actually used */ diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index e7323fbbd348..374feb44afea 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -92,6 +92,7 @@ struct svc_xprt_class svc_rdma_class = { .xcl_owner = THIS_MODULE, .xcl_ops = &svc_rdma_ops, .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, + .xcl_ident = XPRT_TRANSPORT_RDMA, }; struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) @@ -942,23 +943,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr); if (ret) { - /* - * XXX: This is a hack. We need a xx_request_qp interface - * that will adjust the qp_attr's with a best-effort - * number - */ - qp_attr.cap.max_send_sge -= 2; - qp_attr.cap.max_recv_sge -= 2; - ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, - &qp_attr); - if (ret) { - dprintk("svcrdma: failed to create QP, ret=%d\n", ret); - goto errout; - } - newxprt->sc_max_sge = qp_attr.cap.max_send_sge; - newxprt->sc_max_sge = qp_attr.cap.max_recv_sge; - newxprt->sc_sq_depth = qp_attr.cap.max_send_wr; - newxprt->sc_max_requests = qp_attr.cap.max_recv_wr; + dprintk("svcrdma: failed to create QP, ret=%d\n", ret); + goto errout; } newxprt->sc_qp = newxprt->sc_cm_id->qp; |