From 97cc3264508f33783ba21573204d7e0bf5b197e7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 20 Mar 2018 17:05:15 -0400 Subject: svcrdma: Consult max_qp_init_rd_atom when accepting connections The target needs to return the lesser of the client's Inbound RDMA Read Queue Depth (IRD), provided in the connection parameters, and the local device's Outbound RDMA Read Queue Depth (ORD). The latter limit is max_qp_init_rd_atom, not max_qp_rd_atom. The svcrdma_ord value caps the ORD value for iWARP transports, which do not exchange ORD/IRD values at connection time. Since no other Linux kernel RDMA-enabled storage target sees fit to provide this cap, I'm removing it here too. initiator_depth is a u8, so ensure the computed ORD value does not overflow that field. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 4b731b046bcd..7337e1221590 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -132,9 +132,6 @@ struct svcxprt_rdma { #define RDMAXPRT_CONN_PENDING 3 #define RPCRDMA_LISTEN_BACKLOG 10 -/* The default ORD value is based on two outstanding full-size writes with a - * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ -#define RPCRDMA_ORD (64/4) #define RPCRDMA_MAX_REQUESTS 32 /* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our -- cgit v1.2.3 From 989f881ebf77d70e883dd0fbcfa04a058d97f771 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 Mar 2018 10:49:38 -0400 Subject: svc: Simplify ->xpo_secure_port Clean up: Instead of returning a value that is used to set or clear a bit, just make ->xpo_secure_port mangle that bit, and return void. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 1caf7bc83306..19475acb68ea 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -25,7 +25,7 @@ struct svc_xprt_ops { void (*xpo_release_rqst)(struct svc_rqst *); void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); - int (*xpo_secure_port)(struct svc_rqst *); + void (*xpo_secure_port)(struct svc_rqst *rqstp); void (*xpo_kill_temp_xprt)(struct svc_xprt *); }; -- cgit v1.2.3 From 03edb90f57a5f7edaa13db3174e269331ef9a527 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 Mar 2018 10:49:53 -0400 Subject: sunrpc: Update show_svc_xprt_flags() to include recently added flags XPT_KILL_TEMP was added by commit 546125d16142 ("sunrpc: don't call sleeping functions from the notifier block callbacks"), and XPT_CONG_CTRL was added by commit 362142b25843 ("sunrpc: flag transports as having congestion control") . Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/trace/events/sunrpc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 970c91a83173..a23144471b6b 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -591,7 +591,9 @@ DEFINE_EVENT(svc_rqst_status, svc_send, { (1UL << XPT_OLD), "XPT_OLD"}, \ { (1UL << XPT_LISTENER), "XPT_LISTENER"}, \ { (1UL << XPT_CACHE_AUTH), "XPT_CACHE_AUTH"}, \ - { (1UL << XPT_LOCAL), "XPT_LOCAL"}) + { (1UL << XPT_LOCAL), "XPT_LOCAL"}, \ + { (1UL << XPT_KILL_TEMP), "XPT_KILL_TEMP"}, \ + { (1UL << XPT_CONG_CTRL), "XPT_CONG_CTRL"}) TRACE_EVENT(svc_xprt_do_enqueue, TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst), -- cgit v1.2.3 From caa3e106dc623eb41542e6221abecf9956e8a0e6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 Mar 2018 10:50:07 -0400 Subject: sunrpc: Move trace_svc_xprt_dequeue() Reduce the amount of noise generated by trace_svc_xprt_dequeue by moving it to the end of svc_get_next_xprt. This generates exactly one trace event when a ready xprt is found, rather than spurious events when there is no work to do. The empty events contain no information that can't be obtained simply by tracing function calls to svc_xprt_dequeue. A small additional benefit is simplification of the svc_xprt_event trace class, which no longer has to handle the case when the @xprt parameter is NULL. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/trace/events/sunrpc.h | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index a23144471b6b..9bba3070f873 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -634,24 +634,18 @@ DECLARE_EVENT_CLASS(svc_xprt_event, TP_STRUCT__entry( __field(struct svc_xprt *, xprt) __field(unsigned long, flags) - __dynamic_array(unsigned char, addr, xprt != NULL ? - xprt->xpt_remotelen : 0) + __dynamic_array(unsigned char, addr, xprt->xpt_remotelen) ), TP_fast_assign( __entry->xprt = xprt; - if (xprt) { - memcpy(__get_dynamic_array(addr), - &xprt->xpt_remote, - xprt->xpt_remotelen); - __entry->flags = xprt->xpt_flags; - } else - __entry->flags = 0; + __entry->flags = xprt->xpt_flags; + memcpy(__get_dynamic_array(addr), + &xprt->xpt_remote, xprt->xpt_remotelen); ), TP_printk("xprt=0x%p addr=%pIScp flags=%s", __entry->xprt, - __get_dynamic_array_len(addr) != 0 ? - (struct sockaddr *)__get_dynamic_array(addr) : NULL, + (struct sockaddr *)__get_dynamic_array(addr), show_svc_xprt_flags(__entry->flags)) ); -- cgit v1.2.3 From 41f306d0c287e0cc04054135f9f4ceb003ad6795 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 Mar 2018 10:50:42 -0400 Subject: sunrpc: Simplify trace_svc_recv There doesn't seem to be a lot of value in calling trace_svc_recv in the failing case. 1. There are two very common cases: one is the transport is not ready, and the other is shutdown. Neither is terribly interesting. 2. The trace record for the failing case contains nothing but the status code. Therefore the trace point call site in the error exit is removed. Since the trace point is now recording a length instead of a status, rename the status field and remove the case that records a zero XID. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/trace/events/sunrpc.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 9bba3070f873..5849bfb3ece2 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -485,28 +485,28 @@ TRACE_EVENT(xs_tcp_data_recv, { (1UL << RQ_BUSY), "RQ_BUSY"}) TRACE_EVENT(svc_recv, - TP_PROTO(struct svc_rqst *rqst, int status), + TP_PROTO(struct svc_rqst *rqst, int len), - TP_ARGS(rqst, status), + TP_ARGS(rqst, len), TP_STRUCT__entry( __field(u32, xid) - __field(int, status) + __field(int, len) __field(unsigned long, flags) __dynamic_array(unsigned char, addr, rqst->rq_addrlen) ), TP_fast_assign( - __entry->xid = status > 0 ? be32_to_cpu(rqst->rq_xid) : 0; - __entry->status = status; + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->len = len; __entry->flags = rqst->rq_flags; memcpy(__get_dynamic_array(addr), &rqst->rq_addr, rqst->rq_addrlen); ), - TP_printk("addr=%pIScp xid=0x%08x status=%d flags=%s", + TP_printk("addr=%pIScp xid=0x%08x len=%d flags=%s", (struct sockaddr *)__get_dynamic_array(addr), - __entry->xid, __entry->status, + __entry->xid, __entry->len, show_rqstp_flags(__entry->flags)) ); -- cgit v1.2.3 From ece200ddd54b9ce840cfee554fb812560c545c7d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 Mar 2018 10:51:00 -0400 Subject: sunrpc: Save remote presentation address in svc_xprt for trace events TP_printk defines a format string that is passed to user space for converting raw trace event records to something human-readable. My user space's printf (Oracle Linux 7), however, does not have a %pI format specifier. The result is that what is supposed to be an IP address in the output of "trace-cmd report" is just a string that says the field couldn't be displayed. To fix this, adopt the same approach as the client: maintain a pre- formated presentation address for occasions when %pI is not available. The location of the trace_svc_send trace point is adjusted so that rqst->rq_xprt is not NULL when the trace event is recorded. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 4 ++ include/trace/events/sunrpc.h | 89 +++++++++++++++-------------------------- 2 files changed, 37 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 19475acb68ea..c3d72066d4b1 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -83,6 +83,7 @@ struct svc_xprt { size_t xpt_locallen; /* length of address */ struct sockaddr_storage xpt_remote; /* remote peer's address */ size_t xpt_remotelen; /* length of address */ + char xpt_remotebuf[INET6_ADDRSTRLEN + 10]; struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */ struct list_head xpt_users; /* callbacks on free */ @@ -152,7 +153,10 @@ static inline void svc_xprt_set_remote(struct svc_xprt *xprt, { memcpy(&xprt->xpt_remote, sa, salen); xprt->xpt_remotelen = salen; + snprintf(xprt->xpt_remotebuf, sizeof(xprt->xpt_remotebuf) - 1, + "%pISpc", sa); } + static inline unsigned short svc_addr_port(const struct sockaddr *sa) { const struct sockaddr_in *sin = (const struct sockaddr_in *)sa; diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 5849bfb3ece2..1ec8c4c45766 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -493,20 +493,18 @@ TRACE_EVENT(svc_recv, __field(u32, xid) __field(int, len) __field(unsigned long, flags) - __dynamic_array(unsigned char, addr, rqst->rq_addrlen) + __string(addr, rqst->rq_xprt->xpt_remotebuf) ), TP_fast_assign( __entry->xid = be32_to_cpu(rqst->rq_xid); __entry->len = len; __entry->flags = rqst->rq_flags; - memcpy(__get_dynamic_array(addr), - &rqst->rq_addr, rqst->rq_addrlen); + __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); ), - TP_printk("addr=%pIScp xid=0x%08x len=%d flags=%s", - (struct sockaddr *)__get_dynamic_array(addr), - __entry->xid, __entry->len, + TP_printk("addr=%s xid=0x%08x len=%d flags=%s", + __get_str(addr), __entry->xid, __entry->len, show_rqstp_flags(__entry->flags)) ); @@ -519,20 +517,18 @@ DECLARE_EVENT_CLASS(svc_rqst_event, TP_STRUCT__entry( __field(u32, xid) __field(unsigned long, flags) - __dynamic_array(unsigned char, addr, rqst->rq_addrlen) + __string(addr, rqst->rq_xprt->xpt_remotebuf) ), TP_fast_assign( __entry->xid = be32_to_cpu(rqst->rq_xid); __entry->flags = rqst->rq_flags; - memcpy(__get_dynamic_array(addr), - &rqst->rq_addr, rqst->rq_addrlen); + __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); ), - TP_printk("addr=%pIScp rq_xid=0x%08x flags=%s", - (struct sockaddr *)__get_dynamic_array(addr), - __entry->xid, - show_rqstp_flags(__entry->flags)) + TP_printk("addr=%s xid=0x%08x flags=%s", + __get_str(addr), __entry->xid, + show_rqstp_flags(__entry->flags)) ); DEFINE_EVENT(svc_rqst_event, svc_defer, @@ -553,21 +549,19 @@ DECLARE_EVENT_CLASS(svc_rqst_status, __field(u32, xid) __field(int, status) __field(unsigned long, flags) - __dynamic_array(unsigned char, addr, rqst->rq_addrlen) + __string(addr, rqst->rq_xprt->xpt_remotebuf) ), TP_fast_assign( __entry->xid = be32_to_cpu(rqst->rq_xid); __entry->status = status; __entry->flags = rqst->rq_flags; - memcpy(__get_dynamic_array(addr), - &rqst->rq_addr, rqst->rq_addrlen); + __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); ), - TP_printk("addr=%pIScp rq_xid=0x%08x status=%d flags=%s", - (struct sockaddr *)__get_dynamic_array(addr), - __entry->xid, - __entry->status, show_rqstp_flags(__entry->flags)) + TP_printk("addr=%s xid=0x%08x status=%d flags=%s", + __get_str(addr), __entry->xid, + __entry->status, show_rqstp_flags(__entry->flags)) ); DEFINE_EVENT(svc_rqst_status, svc_process, @@ -604,26 +598,19 @@ TRACE_EVENT(svc_xprt_do_enqueue, __field(struct svc_xprt *, xprt) __field(int, pid) __field(unsigned long, flags) - __dynamic_array(unsigned char, addr, xprt != NULL ? - xprt->xpt_remotelen : 0) + __string(addr, xprt->xpt_remotebuf) ), TP_fast_assign( __entry->xprt = xprt; __entry->pid = rqst? rqst->rq_task->pid : 0; - if (xprt) { - memcpy(__get_dynamic_array(addr), - &xprt->xpt_remote, - xprt->xpt_remotelen); - __entry->flags = xprt->xpt_flags; - } else - __entry->flags = 0; + __entry->flags = xprt->xpt_flags; + __assign_str(addr, xprt->xpt_remotebuf); ), - TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt, - __get_dynamic_array_len(addr) != 0 ? - (struct sockaddr *)__get_dynamic_array(addr) : NULL, - __entry->pid, show_svc_xprt_flags(__entry->flags)) + TP_printk("xprt=%p addr=%s pid=%d flags=%s", + __entry->xprt, __get_str(addr), + __entry->pid, show_svc_xprt_flags(__entry->flags)) ); DECLARE_EVENT_CLASS(svc_xprt_event, @@ -634,19 +621,18 @@ DECLARE_EVENT_CLASS(svc_xprt_event, TP_STRUCT__entry( __field(struct svc_xprt *, xprt) __field(unsigned long, flags) - __dynamic_array(unsigned char, addr, xprt->xpt_remotelen) + __string(addr, xprt->xpt_remotebuf) ), TP_fast_assign( __entry->xprt = xprt; __entry->flags = xprt->xpt_flags; - memcpy(__get_dynamic_array(addr), - &xprt->xpt_remote, xprt->xpt_remotelen); + __assign_str(addr, xprt->xpt_remotebuf); ), - TP_printk("xprt=0x%p addr=%pIScp flags=%s", __entry->xprt, - (struct sockaddr *)__get_dynamic_array(addr), - show_svc_xprt_flags(__entry->flags)) + TP_printk("xprt=%p addr=%s flags=%s", + __entry->xprt, __get_str(addr), + show_svc_xprt_flags(__entry->flags)) ); DEFINE_EVENT(svc_xprt_event, svc_xprt_dequeue, @@ -682,25 +668,18 @@ TRACE_EVENT(svc_handle_xprt, __field(struct svc_xprt *, xprt) __field(int, len) __field(unsigned long, flags) - __dynamic_array(unsigned char, addr, xprt != NULL ? - xprt->xpt_remotelen : 0) + __string(addr, xprt->xpt_remotebuf) ), TP_fast_assign( __entry->xprt = xprt; __entry->len = len; - if (xprt) { - memcpy(__get_dynamic_array(addr), - &xprt->xpt_remote, - xprt->xpt_remotelen); - __entry->flags = xprt->xpt_flags; - } else - __entry->flags = 0; + __entry->flags = xprt->xpt_flags; + __assign_str(addr, xprt->xpt_remotebuf); ), - TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt, - __get_dynamic_array_len(addr) != 0 ? - (struct sockaddr *)__get_dynamic_array(addr) : NULL, + TP_printk("xprt=%p addr=%s len=%d flags=%s", + __entry->xprt, __get_str(addr), __entry->len, show_svc_xprt_flags(__entry->flags)) ); @@ -712,18 +691,16 @@ DECLARE_EVENT_CLASS(svc_deferred_event, TP_STRUCT__entry( __field(u32, xid) - __dynamic_array(unsigned char, addr, dr->addrlen) + __string(addr, dr->xprt->xpt_remotebuf) ), TP_fast_assign( __entry->xid = be32_to_cpu(*(__be32 *)(dr->args + (dr->xprt_hlen>>2))); - memcpy(__get_dynamic_array(addr), &dr->addr, dr->addrlen); + __assign_str(addr, dr->xprt->xpt_remotebuf); ), - TP_printk("addr=%pIScp xid=0x%08x", - (struct sockaddr *)__get_dynamic_array(addr), - __entry->xid) + TP_printk("addr=%s xid=0x%08x", __get_str(addr), __entry->xid) ); DEFINE_EVENT(svc_deferred_event, svc_drop_deferred, -- cgit v1.2.3 From 0b9547bf6b94317b3f8e2496dc2b44cb6e599b01 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 Mar 2018 10:51:22 -0400 Subject: sunrpc: Re-purpose trace_svc_process Currently, trace_svc_process has two call sites: 1. Just after a call to svc_send. svc_send already invokes trace_svc_send with the same arguments just before returning 2. Just before a call to svc_drop. svc_drop already invokes trace_svc_drop with the same arguments just after it is called Therefore trace_svc_process does not provide any additional information not already provided by these other trace points. However, it would be useful to record the incoming RPC procedure. So reuse trace_svc_process for this purpose. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/trace/events/sunrpc.h | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 1ec8c4c45766..5a8157c04900 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -508,6 +508,32 @@ TRACE_EVENT(svc_recv, show_rqstp_flags(__entry->flags)) ); +TRACE_EVENT(svc_process, + TP_PROTO(const struct svc_rqst *rqst, const char *name), + + TP_ARGS(rqst, name), + + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, vers) + __field(u32, proc) + __string(service, name) + __string(addr, rqst->rq_xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->vers = rqst->rq_vers; + __entry->proc = rqst->rq_proc; + __assign_str(service, name); + __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); + ), + + TP_printk("addr=%s xid=0x%08x service=%s vers=%u proc=%u", + __get_str(addr), __entry->xid, + __get_str(service), __entry->vers, __entry->proc) +); + DECLARE_EVENT_CLASS(svc_rqst_event, TP_PROTO(struct svc_rqst *rqst), @@ -564,10 +590,6 @@ DECLARE_EVENT_CLASS(svc_rqst_status, __entry->status, show_rqstp_flags(__entry->flags)) ); -DEFINE_EVENT(svc_rqst_status, svc_process, - TP_PROTO(struct svc_rqst *rqst, int status), - TP_ARGS(rqst, status)); - DEFINE_EVENT(svc_rqst_status, svc_send, TP_PROTO(struct svc_rqst *rqst, int status), TP_ARGS(rqst, status)); -- cgit v1.2.3 From aaba72cd4e793fbf1c04e06dee3d2c3710339678 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 Mar 2018 10:51:39 -0400 Subject: sunrpc: Report per-RPC execution stats Introduce a mechanism to report the server-side execution latency of each RPC. The goal is to enable user space to filter the trace record for latency outliers, build histograms, etc. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 1 + include/trace/events/sunrpc.h | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 786ae2255f05..3bd7504066e1 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -283,6 +283,7 @@ struct svc_rqst { int rq_reserved; /* space on socket outq * reserved for this request */ + ktime_t rq_stime; /* start time */ struct cache_req rq_chandle; /* handle passed to caches for * request delaying diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 5a8157c04900..1b383f71ccd7 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -705,6 +705,27 @@ TRACE_EVENT(svc_handle_xprt, __entry->len, show_svc_xprt_flags(__entry->flags)) ); +TRACE_EVENT(svc_stats_latency, + TP_PROTO(const struct svc_rqst *rqst), + + TP_ARGS(rqst), + + TP_STRUCT__entry( + __field(u32, xid) + __field(unsigned long, execute) + __string(addr, rqst->rq_xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->execute = ktime_to_us(ktime_sub(ktime_get(), + rqst->rq_stime)); + __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); + ), + + TP_printk("addr=%s xid=0x%08x execute-us=%lu", + __get_str(addr), __entry->xid, __entry->execute) +); DECLARE_EVENT_CLASS(svc_deferred_event, TP_PROTO(struct svc_deferred_req *dr), -- cgit v1.2.3 From 55f5088c22cc83dbc64394abfbf76cd1ff5e7cd0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 Mar 2018 10:52:27 -0400 Subject: svc: Report xprt dequeue latency Record the time between when a rqstp is enqueued on a transport and when it is dequeued. This includes how long the rqstp waits on the queue and how long it takes the kernel scheduler to wake a nfsd thread to service it. The svc_xprt_dequeue trace point is altered to include the number of microseconds between xprt_enqueue and xprt_dequeue. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 1 + include/trace/events/sunrpc.h | 30 ++++++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 3bd7504066e1..dc4c009deec1 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -272,6 +272,7 @@ struct svc_rqst { #define RQ_BUSY (6) /* request is busy */ #define RQ_DATA (7) /* request has data */ unsigned long rq_flags; /* flags field */ + ktime_t rq_qtime; /* enqueue time */ void * rq_argp; /* decoded arguments */ void * rq_resp; /* xdr'd results */ diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 1b383f71ccd7..922cb8968fb2 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -657,14 +657,36 @@ DECLARE_EVENT_CLASS(svc_xprt_event, show_svc_xprt_flags(__entry->flags)) ); -DEFINE_EVENT(svc_xprt_event, svc_xprt_dequeue, - TP_PROTO(struct svc_xprt *xprt), - TP_ARGS(xprt)); - DEFINE_EVENT(svc_xprt_event, svc_xprt_no_write_space, TP_PROTO(struct svc_xprt *xprt), TP_ARGS(xprt)); +TRACE_EVENT(svc_xprt_dequeue, + TP_PROTO(struct svc_rqst *rqst), + + TP_ARGS(rqst), + + TP_STRUCT__entry( + __field(struct svc_xprt *, xprt) + __field(unsigned long, flags) + __field(unsigned long, wakeup) + __string(addr, rqst->rq_xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->xprt = rqst->rq_xprt; + __entry->flags = rqst->rq_xprt->xpt_flags; + __entry->wakeup = ktime_to_us(ktime_sub(ktime_get(), + rqst->rq_qtime)); + __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); + ), + + TP_printk("xprt=%p addr=%s flags=%s wakeup-us=%lu", + __entry->xprt, __get_str(addr), + show_svc_xprt_flags(__entry->flags), + __entry->wakeup) +); + TRACE_EVENT(svc_wake_up, TP_PROTO(int pid), -- cgit v1.2.3 From 8154ef2776aa512a3eaa0e7db030dc4803354d61 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 Mar 2018 10:54:07 -0400 Subject: NFSD: Clean up legacy NFS WRITE argument XDR decoders Move common code in NFSD's legacy NFS WRITE decoders into a helper. The immediate benefit is reduction of code duplication and some nice micro-optimizations (see below). In the long term, this helper can perform a per-transport call-out to fill the rq_vec (say, using RDMA Reads). The legacy WRITE decoders and procs are changed to work like NFSv4, which constructs the rq_vec just before it is about to call vfs_writev. Why? Calling a transport call-out from the proc instead of the XDR decoder means that the incoming FH can be resolved to a particular filesystem and file. This would allow pages from the backing file to be presented to the transport to be filled, rather than presenting anonymous pages and copying or flipping them into the file's page cache later. I also prefer using the pages in rq_arg.pages, instead of pulling the data pages directly out of the rqstp::rq_pages array. This is currently the way the NFSv3 write decoder works, but the other two do not seem to take this approach. Fixing this removes the only reference to rq_pages found in NFSD, eliminating an NFSD assumption about how transports use the pages in rq_pages. Lastly, avoid setting up the first element of rq_vec as a zero- length buffer. This happens with an RDMA transport when a normal Read chunk is present because the data payload is in rq_arg's page list (none of it is in the head buffer). Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index dc4c009deec1..fb3fcacc1e98 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -495,6 +495,8 @@ void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); char * svc_print_addr(struct svc_rqst *, char *, size_t); +unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, + struct kvec *first, size_t total); #define RPC_MAX_ADDRBUFLEN (63U) -- cgit v1.2.3 From 38a70315599dedacd9ff3bd1016f9048c9d0ad12 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 Mar 2018 10:54:21 -0400 Subject: NFSD: Clean up legacy NFS SYMLINK argument XDR decoders Move common code in NFSD's legacy SYMLINK decoders into a helper. The immediate benefits include: - one fewer data copies on transports that support DDP - consistent error checking across all versions - reduction of code duplication - support for both legal forms of SYMLINK requests on RDMA transports for all versions of NFS (in particular, NFSv2, for completeness) In the long term, this helper is an appropriate spot to perform a per-transport call-out to fill the pathname argument using, say, RDMA Reads. Filling the pathname in the proc function also means that eventually the incoming filehandle can be interpreted so that filesystem- specific memory can be allocated as a sink for the pathname argument, rather than using anonymous pages. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index fb3fcacc1e98..574368e8a16f 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -497,6 +497,8 @@ struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); char * svc_print_addr(struct svc_rqst *, char *, size_t); unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first, size_t total); +char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, + struct kvec *first, size_t total); #define RPC_MAX_ADDRBUFLEN (63U) -- cgit v1.2.3