From a2268cfbf599e7f55d4ee68193f08b4f44535fac Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:34:32 -0400 Subject: xprtrdma: Add proper SPDX tags for NetApp-contributed source Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/rpc_rdma.h | 1 + include/linux/sunrpc/xprtrdma.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index 8f144db73e38..92d182fd8e3b 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright (c) 2015-2017 Oracle. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 5859563e3c1f..86fc38ff0355 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * -- cgit v1.2.3 From 37ac86c3a76c113619b7d9afe0251bbfc04cb80a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:34:53 -0400 Subject: SUNRPC: Initialize rpc_rqst outside of xprt->reserve_lock alloc_slot is a transport-specific op, but initializing an rpc_rqst is common to all transports. In addition, the only part of initial- izing an rpc_rqst that needs serialization is getting a fresh XID. Move rpc_rqst initialization to common code in preparation for adding a transport-specific alloc_slot to xprtrdma. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 5fea0fb420df..9784e2875e7e 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -324,6 +324,7 @@ struct xprt_class { struct rpc_xprt *xprt_create_transport(struct xprt_create *args); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); +void xprt_request_init(struct rpc_task *task); void xprt_retry_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); -- cgit v1.2.3 From a9cde23ab7cdf5e4e93432dffd0e734267f2b745 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:34:59 -0400 Subject: SUNRPC: Add a ->free_slot transport callout Refactor: xprtrdma needs to have better control over when RPCs are awoken from the backlog queue, so replace xprt_free_slot with a transport op callout. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 9784e2875e7e..706eef12bbc0 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -127,6 +127,8 @@ struct rpc_xprt_ops { int (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*alloc_slot)(struct rpc_xprt *xprt, struct rpc_task *task); + void (*free_slot)(struct rpc_xprt *xprt, + struct rpc_rqst *req); void (*rpcbind)(struct rpc_task *task); void (*set_port)(struct rpc_xprt *xprt, unsigned short port); void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); @@ -329,6 +331,8 @@ void xprt_retry_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); +void xprt_free_slot(struct rpc_xprt *xprt, + struct rpc_rqst *req); void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); bool xprt_prepare_transmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); -- cgit v1.2.3 From edb41e61a54ee75fae31302775e0301fdcb0caaa Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:35:09 -0400 Subject: xprtrdma: Make rpc_rqst part of rpcrdma_req This simplifies allocation of the generic RPC slot and xprtrdma specific per-RPC resources. It also makes xprtrdma more like the socket-based transports: ->buf_alloc and ->buf_free are now responsible only for send and receive buffers. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 706eef12bbc0..336fd1a19cca 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -84,7 +84,6 @@ struct rpc_rqst { void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ struct list_head rq_list; - void *rq_xprtdata; /* Per-xprt private data */ void *rq_buffer; /* Call XDR encode buffer */ size_t rq_callsize; void *rq_rbuffer; /* Reply XDR decode buffer */ -- cgit v1.2.3 From 0e0b854cfb3302b1907e9d3a927469b95710238f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:35:14 -0400 Subject: xprtrdma: Clean up Receive trace points For clarity, report the posting and completion of Receive CQEs. Also, the wc->byte_len field contains garbage if wc->status is non-zero, and the vendor error field contains garbage if wc->status is zero. For readability, don't save those fields in those cases. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 50ed3f8bf534..99c0049e51a5 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -528,24 +528,21 @@ TRACE_EVENT(xprtrdma_post_send, TRACE_EVENT(xprtrdma_post_recv, TP_PROTO( - const struct rpcrdma_rep *rep, - int status + const struct ib_cqe *cqe ), - TP_ARGS(rep, status), + TP_ARGS(cqe), TP_STRUCT__entry( - __field(const void *, rep) - __field(int, status) + __field(const void *, cqe) ), TP_fast_assign( - __entry->rep = rep; - __entry->status = status; + __entry->cqe = cqe; ), - TP_printk("rep=%p status=%d", - __entry->rep, __entry->status + TP_printk("cqe=%p", + __entry->cqe ) ); @@ -584,28 +581,32 @@ TRACE_EVENT(xprtrdma_wc_send, TRACE_EVENT(xprtrdma_wc_receive, TP_PROTO( - const struct rpcrdma_rep *rep, const struct ib_wc *wc ), - TP_ARGS(rep, wc), + TP_ARGS(wc), TP_STRUCT__entry( - __field(const void *, rep) - __field(unsigned int, byte_len) + __field(const void *, cqe) + __field(u32, byte_len) __field(unsigned int, status) - __field(unsigned int, vendor_err) + __field(u32, vendor_err) ), TP_fast_assign( - __entry->rep = rep; - __entry->byte_len = wc->byte_len; + __entry->cqe = wc->wr_cqe; __entry->status = wc->status; - __entry->vendor_err = __entry->status ? wc->vendor_err : 0; + if (wc->status) { + __entry->byte_len = 0; + __entry->vendor_err = wc->vendor_err; + } else { + __entry->byte_len = wc->byte_len; + __entry->vendor_err = 0; + } ), - TP_printk("rep=%p, %u bytes: %s (%u/0x%x)", - __entry->rep, __entry->byte_len, + TP_printk("cqe=%p %u bytes: %s (%u/0x%x)", + __entry->cqe, __entry->byte_len, rdma_show_wc_status(__entry->status), __entry->status, __entry->vendor_err ) -- cgit v1.2.3 From 7c8d9e7c8863905951d4eaa7a8d277150f3a37f7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:35:20 -0400 Subject: xprtrdma: Move Receive posting to Receive handler Receive completion and Reply handling are done by a BOUND workqueue, meaning they run on only one CPU. Posting receives is currently done in the send_request path, which on large systems is typically done on a different CPU than the one handling Receive completions. This results in movement of Receive-related cachelines between the sending and receiving CPUs. More importantly, it means that currently Receives are posted while the transport's write lock is held, which is unnecessary and costly. Finally, allocation of Receive buffers is performed on-demand in the Receive completion handler. This helps guarantee that they are allocated on the same NUMA node as the CPU that handles Receive completions. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 99c0049e51a5..ad27e192cdf8 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -546,6 +546,39 @@ TRACE_EVENT(xprtrdma_post_recv, ) ); +TRACE_EVENT(xprtrdma_post_recvs, + TP_PROTO( + const struct rpcrdma_xprt *r_xprt, + unsigned int count, + int status + ), + + TP_ARGS(r_xprt, count, status), + + TP_STRUCT__entry( + __field(const void *, r_xprt) + __field(unsigned int, count) + __field(int, status) + __field(int, posted) + __string(addr, rpcrdma_addrstr(r_xprt)) + __string(port, rpcrdma_portstr(r_xprt)) + ), + + TP_fast_assign( + __entry->r_xprt = r_xprt; + __entry->count = count; + __entry->status = status; + __entry->posted = r_xprt->rx_buf.rb_posted_receives; + __assign_str(addr, rpcrdma_addrstr(r_xprt)); + __assign_str(port, rpcrdma_portstr(r_xprt)); + ), + + TP_printk("peer=[%s]:%s r_xprt=%p: %u new recvs, %d active (rc %d)", + __get_str(addr), __get_str(port), __entry->r_xprt, + __entry->count, __entry->posted, __entry->status + ) +); + /** ** Completion events **/ @@ -800,7 +833,6 @@ TRACE_EVENT(xprtrdma_allocate, __field(unsigned int, task_id) __field(unsigned int, client_id) __field(const void *, req) - __field(const void *, rep) __field(size_t, callsize) __field(size_t, rcvsize) ), @@ -809,15 +841,13 @@ TRACE_EVENT(xprtrdma_allocate, __entry->task_id = task->tk_pid; __entry->client_id = task->tk_client->cl_clid; __entry->req = req; - __entry->rep = req ? req->rl_reply : NULL; __entry->callsize = task->tk_rqstp->rq_callsize; __entry->rcvsize = task->tk_rqstp->rq_rcvsize; ), - TP_printk("task:%u@%u req=%p rep=%p (%zu, %zu)", + TP_printk("task:%u@%u req=%p (%zu, %zu)", __entry->task_id, __entry->client_id, - __entry->req, __entry->rep, - __entry->callsize, __entry->rcvsize + __entry->req, __entry->callsize, __entry->rcvsize ) ); -- cgit v1.2.3 From a7986f09986ac1befc85bcab30970312c476dbc7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:35:25 -0400 Subject: xprtrdma: Remove rpcrdma_ep_{post_recv, post_extra_recv} Clean up: These functions are no longer used. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index ad27e192cdf8..ac82849954e4 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -879,8 +879,6 @@ TRACE_EVENT(xprtrdma_rpc_done, ) ); -DEFINE_RXPRT_EVENT(xprtrdma_noreps); - /** ** Callback events **/ -- cgit v1.2.3 From 8335640cf89faa0f4e39e73e314f3f3a22d776f3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:36:02 -0400 Subject: xprtrdma: Add trace_xprtrdma_dma_map(mr) Matches trace_xprtrdma_dma_unmap(mr). Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index ac82849954e4..c4494a2b3ecd 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -650,6 +650,7 @@ DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li); DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake); DEFINE_MR_EVENT(xprtrdma_localinv); +DEFINE_MR_EVENT(xprtrdma_dma_map); DEFINE_MR_EVENT(xprtrdma_dma_unmap); DEFINE_MR_EVENT(xprtrdma_remoteinv); DEFINE_MR_EVENT(xprtrdma_recover_mr); -- cgit v1.2.3