From f7733625ec2f7bd628e4fd5014f72a2983830a74 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 9 Feb 2021 08:24:34 +0200 Subject: scsi: ufs: Add exception event tracepoint Currently, exception event status can be read from wExceptionEventStatus attribute (sysfs file attributes/exception_event_status under the UFS host controller device directory). Polling that attribute to track UFS exception events is impractical, so add a tracepoint to track exception events for testing and debugging purposes. Note, by the time the exception event status is read, the exception event may have cleared, so the value can be zero - see example below. Note also, only enabled exception events can be reported. A subsequent patch adds the ability for users to enable selected exception events via debugfs. Example with driver instrumented to enable all exception events: # echo 1 > /sys/kernel/debug/tracing/events/ufs/ufshcd_exception_event/enable ... do some I/O ... # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 3/3 #P:5 # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | kworker/2:2-173 [002] .... 731.486419: ufshcd_exception_event: 0000:00:12.5: status 0x0 kworker/2:2-173 [002] .... 732.608918: ufshcd_exception_event: 0000:00:12.5: status 0x4 kworker/2:2-173 [002] .... 732.609312: ufshcd_exception_event: 0000:00:12.5: status 0x4 Link: https://lore.kernel.org/r/20210209062437.6954-2-adrian.hunter@intel.com Reviewed-by: Avri Altman Reviewed-by: Bean Huo Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- include/trace/events/ufs.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/ufs.h b/include/trace/events/ufs.h index e151477d645c..1cb6f1afba0e 100644 --- a/include/trace/events/ufs.h +++ b/include/trace/events/ufs.h @@ -349,6 +349,27 @@ TRACE_EVENT(ufshcd_upiu, ) ); +TRACE_EVENT(ufshcd_exception_event, + + TP_PROTO(const char *dev_name, u16 status), + + TP_ARGS(dev_name, status), + + TP_STRUCT__entry( + __string(dev_name, dev_name) + __field(u16, status) + ), + + TP_fast_assign( + __assign_str(dev_name, dev_name); + __entry->status = status; + ), + + TP_printk("%s: status 0x%x", + __get_str(dev_name), __entry->status + ) +); + #endif /* if !defined(_TRACE_UFS_H) || defined(TRACE_HEADER_MULTI_READ) */ /* This part must be outside protection */ -- cgit v1.2.3 From 4ce94eabac16b1d2c95762b40f49e5654ab288d7 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sat, 20 Feb 2021 15:17:07 -0800 Subject: x86/mm/tlb: Flush remote and local TLBs concurrently To improve TLB shootdown performance, flush the remote and local TLBs concurrently. Introduce flush_tlb_multi() that does so. Introduce paravirtual versions of flush_tlb_multi() for KVM, Xen and hyper-v (Xen and hyper-v are only compile-tested). While the updated smp infrastructure is capable of running a function on a single local core, it is not optimized for this case. The multiple function calls and the indirect branch introduce some overhead, and might make local TLB flushes slower than they were before the recent changes. Before calling the SMP infrastructure, check if only a local TLB flush is needed to restore the lost performance in this common case. This requires to check mm_cpumask() one more time, but unless this mask is updated very frequently, this should impact performance negatively. Signed-off-by: Nadav Amit Signed-off-by: Ingo Molnar Reviewed-by: Michael Kelley # Hyper-v parts Reviewed-by: Juergen Gross # Xen and paravirt parts Reviewed-by: Dave Hansen Link: https://lore.kernel.org/r/20210220231712.2475218-5-namit@vmware.com --- include/trace/events/xen.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index 3b61b587e137..44a3f565264d 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -346,7 +346,7 @@ TRACE_EVENT(xen_mmu_flush_tlb_one_user, TP_printk("addr %lx", __entry->addr) ); -TRACE_EVENT(xen_mmu_flush_tlb_others, +TRACE_EVENT(xen_mmu_flush_tlb_multi, TP_PROTO(const struct cpumask *cpus, struct mm_struct *mm, unsigned long addr, unsigned long end), TP_ARGS(cpus, mm, addr, end), -- cgit v1.2.3 From ee75aef23afe6e88497151c127c13ed69f41aaa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Mon, 8 Mar 2021 12:29:07 +0100 Subject: bpf, xdp: Restructure redirect actions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The XDP_REDIRECT implementations for maps and non-maps are fairly similar, but obviously need to take different code paths depending on if the target is using a map or not. Today, the redirect targets for XDP either uses a map, or is based on ifindex. Here, the map type and id are added to bpf_redirect_info, instead of the actual map. Map type, map item/ifindex, and the map_id (if any) is passed to xdp_do_redirect(). For ifindex-based redirect, used by the bpf_redirect() XDP BFP helper, a special map type/id are used. Map type of UNSPEC together with map id equal to INT_MAX has the special meaning of an ifindex based redirect. Note that valid map ids are 1 inclusive, INT_MAX exclusive ([1,INT_MAX[). In addition to making the code easier to follow, using explicit type and id in bpf_redirect_info has a slight positive performance impact by avoiding a pointer indirection for the map type lookup, and instead use the cacheline for bpf_redirect_info. Since the actual map is not passed via bpf_redirect_info anymore, the map lookup is only done in the BPF helper. This means that the bpf_clear_redirect_map() function can be removed. The actual map item is RCU protected. The bpf_redirect_info flags member is not used by XDP, and not read/written any more. The map member is only written to when required/used, and not unconditionally. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Reviewed-by: Maciej Fijalkowski Acked-by: Jesper Dangaard Brouer Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20210308112907.559576-3-bjorn.topel@gmail.com --- include/trace/events/xdp.h | 62 ++++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 27 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 76a97176ab81..fcad3645a70b 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -86,19 +86,15 @@ struct _bpf_dtab_netdev { }; #endif /* __DEVMAP_OBJ_TYPE */ -#define devmap_ifindex(tgt, map) \ - (((map->map_type == BPF_MAP_TYPE_DEVMAP || \ - map->map_type == BPF_MAP_TYPE_DEVMAP_HASH)) ? \ - ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex : 0) - DECLARE_EVENT_CLASS(xdp_redirect_template, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, - const struct bpf_map *map, u32 index), + enum bpf_map_type map_type, + u32 map_id, u32 index), - TP_ARGS(dev, xdp, tgt, err, map, index), + TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index), TP_STRUCT__entry( __field(int, prog_id) @@ -111,14 +107,22 @@ DECLARE_EVENT_CLASS(xdp_redirect_template, ), TP_fast_assign( + u32 ifindex = 0, map_index = index; + + if (map_type == BPF_MAP_TYPE_DEVMAP || map_type == BPF_MAP_TYPE_DEVMAP_HASH) { + ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex; + } else if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) { + ifindex = index; + map_index = 0; + } + __entry->prog_id = xdp->aux->id; __entry->act = XDP_REDIRECT; __entry->ifindex = dev->ifindex; __entry->err = err; - __entry->to_ifindex = map ? devmap_ifindex(tgt, map) : - index; - __entry->map_id = map ? map->id : 0; - __entry->map_index = map ? index : 0; + __entry->to_ifindex = ifindex; + __entry->map_id = map_id; + __entry->map_index = map_index; ), TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" @@ -133,45 +137,49 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, - const struct bpf_map *map, u32 index), - TP_ARGS(dev, xdp, tgt, err, map, index) + enum bpf_map_type map_type, + u32 map_id, u32 index), + TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index) ); DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, - const struct bpf_map *map, u32 index), - TP_ARGS(dev, xdp, tgt, err, map, index) + enum bpf_map_type map_type, + u32 map_id, u32 index), + TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index) ); -#define _trace_xdp_redirect(dev, xdp, to) \ - trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to) +#define _trace_xdp_redirect(dev, xdp, to) \ + trace_xdp_redirect(dev, xdp, NULL, 0, BPF_MAP_TYPE_UNSPEC, INT_MAX, to) -#define _trace_xdp_redirect_err(dev, xdp, to, err) \ - trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to) +#define _trace_xdp_redirect_err(dev, xdp, to, err) \ + trace_xdp_redirect_err(dev, xdp, NULL, err, BPF_MAP_TYPE_UNSPEC, INT_MAX, to) -#define _trace_xdp_redirect_map(dev, xdp, to, map, index) \ - trace_xdp_redirect(dev, xdp, to, 0, map, index) +#define _trace_xdp_redirect_map(dev, xdp, to, map_type, map_id, index) \ + trace_xdp_redirect(dev, xdp, to, 0, map_type, map_id, index) -#define _trace_xdp_redirect_map_err(dev, xdp, to, map, index, err) \ - trace_xdp_redirect_err(dev, xdp, to, err, map, index) +#define _trace_xdp_redirect_map_err(dev, xdp, to, map_type, map_id, index, err) \ + trace_xdp_redirect_err(dev, xdp, to, err, map_type, map_id, index) /* not used anymore, but kept around so as not to break old programs */ DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, - const struct bpf_map *map, u32 index), - TP_ARGS(dev, xdp, tgt, err, map, index) + enum bpf_map_type map_type, + u32 map_id, u32 index), + TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index) ); DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, - const struct bpf_map *map, u32 index), - TP_ARGS(dev, xdp, tgt, err, map, index) + enum bpf_map_type map_type, + u32 map_id, u32 index), + TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index) ); TRACE_EVENT(xdp_cpumap_kthread, -- cgit v1.2.3 From 565cfb9e64dac1aadf7e2130fcda19a1c018df66 Mon Sep 17 00:00:00 2001 From: Sangmoon Kim Date: Tue, 2 Mar 2021 20:55:15 +0900 Subject: rcu/tree: Add a trace event for RCU CPU stall warnings This commit adds a trace event which allows tracing the beginnings of RCU CPU stall warnings on systems where sysctl_panic_on_rcu_stall is disabled. The first parameter is the name of RCU flavor like other trace events. The second parameter indicates whether this is a stall of an expedited grace period, a self-detected stall of a normal grace period, or a stall of a normal grace period detected by some CPU other than the one that is stalled. RCU CPU stall warnings are often caused by external-to-RCU issues, for example, in interrupt handling or task scheduling. Therefore, this event uses TRACE_EVENT, not TRACE_EVENT_RCU, to avoid requiring those interested in tracing RCU CPU stalls to rebuild their kernels with CONFIG_RCU_TRACE=y. Reviewed-by: Uladzislau Rezki (Sony) Reviewed-by: Neeraj Upadhyay Signed-off-by: Sangmoon Kim Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 5fc29400e1a2..c7711e9b6900 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -432,6 +432,34 @@ TRACE_EVENT_RCU(rcu_fqs, __entry->cpu, __entry->qsevent) ); +/* + * Tracepoint for RCU stall events. Takes a string identifying the RCU flavor + * and a string identifying which function detected the RCU stall as follows: + * + * "StallDetected": Scheduler-tick detects other CPU's stalls. + * "SelfDetected": Scheduler-tick detects a current CPU's stall. + * "ExpeditedStall": Expedited grace period detects stalls. + */ +TRACE_EVENT(rcu_stall_warning, + + TP_PROTO(const char *rcuname, const char *msg), + + TP_ARGS(rcuname, msg), + + TP_STRUCT__entry( + __field(const char *, rcuname) + __field(const char *, msg) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->msg = msg; + ), + + TP_printk("%s %s", + __entry->rcuname, __entry->msg) +); + #endif /* #if defined(CONFIG_TREE_RCU) */ /* -- cgit v1.2.3 From 7dcfbd86adc45f6d6b37278efd22530cf80ab474 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 29 Jan 2021 13:04:04 -0500 Subject: SUNRPC: Export svc_xprt_received() Prepare svc_xprt_received() to be called from transport code instead of from generic RPC server code. Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/trace') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 036eb1f5c133..bda16e9e6ba7 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1781,6 +1781,7 @@ DECLARE_EVENT_CLASS(svc_xprt_event, ), \ TP_ARGS(xprt)) +DEFINE_SVC_XPRT_EVENT(received); DEFINE_SVC_XPRT_EVENT(no_write_space); DEFINE_SVC_XPRT_EVENT(close); DEFINE_SVC_XPRT_EVENT(detach); -- cgit v1.2.3 From f2cc020d7876de7583feb52ec939a32419cf9468 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 23 Mar 2021 18:49:35 +0100 Subject: tracing: Fix various typos in comments Fix ~59 single-word typos in the tracing code comments, and fix the grammar in a handful of places. Link: https://lore.kernel.org/r/20210322224546.GA1981273@gmail.com Link: https://lkml.kernel.org/r/20210323174935.GA4176821@gmail.com Reviewed-by: Randy Dunlap Signed-off-by: Ingo Molnar Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/io_uring.h | 2 +- include/trace/events/rcu.h | 2 +- include/trace/events/sched.h | 2 +- include/trace/events/timer.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 9f0d3b7d56b0..ba78a5602cd1 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -49,7 +49,7 @@ TRACE_EVENT(io_uring_create, ); /** - * io_uring_register - called after a buffer/file/eventfd was succesfully + * io_uring_register - called after a buffer/file/eventfd was successfully * registered for a ring * * @ctx: pointer to a ring context structure diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 5fc29400e1a2..97177c10bf64 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -48,7 +48,7 @@ TRACE_EVENT(rcu_utilization, * RCU flavor, the grace-period number, and a string identifying the * grace-period-related event as follows: * - * "AccReadyCB": CPU acclerates new callbacks to RCU_NEXT_READY_TAIL. + * "AccReadyCB": CPU accelerates new callbacks to RCU_NEXT_READY_TAIL. * "AccWaitCB": CPU accelerates new callbacks to RCU_WAIT_TAIL. * "newreq": Request a new grace period. * "start": Start a grace period. diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index cbe3e152d24c..1eca2305ca42 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -174,7 +174,7 @@ DEFINE_EVENT(sched_wakeup_template, sched_waking, TP_ARGS(p)); /* - * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG. + * Tracepoint called when the task is actually woken; p->state == TASK_RUNNING. * It is not always called from the waking context. */ DEFINE_EVENT(sched_wakeup_template, sched_wakeup, diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 19abb6c3eb73..6ad031c71be7 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -119,7 +119,7 @@ TRACE_EVENT(timer_expire_entry, * When used in combination with the timer_expire_entry tracepoint we can * determine the runtime of the timer callback function. * - * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might + * NOTE: Do NOT dereference timer in TP_fast_assign. The pointer might * be invalid. We solely track the pointer. */ DEFINE_EVENT(timer_class, timer_expire_exit, -- cgit v1.2.3 From 118a4417e14348b2e46f5e467da8444ec4757a45 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 21 Mar 2021 22:14:00 -0700 Subject: random: remove dead code left over from blocking pool Remove some dead code that was left over following commit 90ea1c6436d2 ("random: remove the blocking pool"). Cc: linux-crypto@vger.kernel.org Cc: Andy Lutomirski Cc: Jann Horn Cc: Theodore Ts'o Reviewed-by: Andy Lutomirski Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/trace/events/random.h | 83 ------------------------------------------- 1 file changed, 83 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/random.h b/include/trace/events/random.h index 9570a10cb949..3d7b432ca5f3 100644 --- a/include/trace/events/random.h +++ b/include/trace/events/random.h @@ -85,28 +85,6 @@ TRACE_EVENT(credit_entropy_bits, __entry->entropy_count, (void *)__entry->IP) ); -TRACE_EVENT(push_to_pool, - TP_PROTO(const char *pool_name, int pool_bits, int input_bits), - - TP_ARGS(pool_name, pool_bits, input_bits), - - TP_STRUCT__entry( - __field( const char *, pool_name ) - __field( int, pool_bits ) - __field( int, input_bits ) - ), - - TP_fast_assign( - __entry->pool_name = pool_name; - __entry->pool_bits = pool_bits; - __entry->input_bits = input_bits; - ), - - TP_printk("%s: pool_bits %d input_pool_bits %d", - __entry->pool_name, __entry->pool_bits, - __entry->input_bits) -); - TRACE_EVENT(debit_entropy, TP_PROTO(const char *pool_name, int debit_bits), @@ -161,35 +139,6 @@ TRACE_EVENT(add_disk_randomness, MINOR(__entry->dev), __entry->input_bits) ); -TRACE_EVENT(xfer_secondary_pool, - TP_PROTO(const char *pool_name, int xfer_bits, int request_bits, - int pool_entropy, int input_entropy), - - TP_ARGS(pool_name, xfer_bits, request_bits, pool_entropy, - input_entropy), - - TP_STRUCT__entry( - __field( const char *, pool_name ) - __field( int, xfer_bits ) - __field( int, request_bits ) - __field( int, pool_entropy ) - __field( int, input_entropy ) - ), - - TP_fast_assign( - __entry->pool_name = pool_name; - __entry->xfer_bits = xfer_bits; - __entry->request_bits = request_bits; - __entry->pool_entropy = pool_entropy; - __entry->input_entropy = input_entropy; - ), - - TP_printk("pool %s xfer_bits %d request_bits %d pool_entropy %d " - "input_entropy %d", __entry->pool_name, __entry->xfer_bits, - __entry->request_bits, __entry->pool_entropy, - __entry->input_entropy) -); - DECLARE_EVENT_CLASS(random__get_random_bytes, TP_PROTO(int nbytes, unsigned long IP), @@ -253,38 +202,6 @@ DEFINE_EVENT(random__extract_entropy, extract_entropy, TP_ARGS(pool_name, nbytes, entropy_count, IP) ); -DEFINE_EVENT(random__extract_entropy, extract_entropy_user, - TP_PROTO(const char *pool_name, int nbytes, int entropy_count, - unsigned long IP), - - TP_ARGS(pool_name, nbytes, entropy_count, IP) -); - -TRACE_EVENT(random_read, - TP_PROTO(int got_bits, int need_bits, int pool_left, int input_left), - - TP_ARGS(got_bits, need_bits, pool_left, input_left), - - TP_STRUCT__entry( - __field( int, got_bits ) - __field( int, need_bits ) - __field( int, pool_left ) - __field( int, input_left ) - ), - - TP_fast_assign( - __entry->got_bits = got_bits; - __entry->need_bits = need_bits; - __entry->pool_left = pool_left; - __entry->input_left = input_left; - ), - - TP_printk("got_bits %d still_needed_bits %d " - "blocking_pool_entropy_left %d input_entropy_left %d", - __entry->got_bits, __entry->got_bits, __entry->pool_left, - __entry->input_left) -); - TRACE_EVENT(urandom_read, TP_PROTO(int got_bits, int pool_left, int input_left), -- cgit v1.2.3 From 6b3caab4ba9b2d290162e610810a946a33c65117 Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Tue, 16 Feb 2021 14:16:34 -0500 Subject: ext4: delete some unused tracepoint definitions A number of tracepoint instances have been removed from ext4 by past patches but the definitions of those tracepoints have not. All instances of ext4_ext_in_cache and ext4_ext_put_in_cache were removed by commit 69eb33dc24dc ("ext4: remove single extent cache"). ext4_get_reserved_cluster_alloc was removed by commit b6bf9171ef5c ("ext4: reduce reserved cluster count by number of allocated clusters"). ext4_find_delalloc_range was removed by commit 7d1b1fbc95eb ("ext4: reimplement ext4_find_delay_alloc_range on extent status tree"). All instances of ext4_direct_IO_enter and ext4_direct_IO_exit were removed by commit 378f32bab371 ("ext4: introduce direct I/O write using iomap infrastructure"). Signed-off-by: Eric Whitney Link: https://lore.kernel.org/r/20210216191634.20957-1-enwlinux@gmail.com Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 176 -------------------------------------------- 1 file changed, 176 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 70ae5497b73a..0ea36b2b0662 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -1358,64 +1358,6 @@ TRACE_EVENT(ext4_read_block_bitmap_load, __entry->group, __entry->prefetch) ); -TRACE_EVENT(ext4_direct_IO_enter, - TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw), - - TP_ARGS(inode, offset, len, rw), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned long, len ) - __field( int, rw ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = offset; - __entry->len = len; - __entry->rw = rw; - ), - - TP_printk("dev %d,%d ino %lu pos %lld len %lu rw %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->rw) -); - -TRACE_EVENT(ext4_direct_IO_exit, - TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, - int rw, int ret), - - TP_ARGS(inode, offset, len, rw, ret), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned long, len ) - __field( int, rw ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = offset; - __entry->len = len; - __entry->rw = rw; - __entry->ret = ret; - ), - - TP_printk("dev %d,%d ino %lu pos %lld len %lu rw %d ret %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->pos, __entry->len, - __entry->rw, __entry->ret) -); - DECLARE_EVENT_CLASS(ext4__fallocate_mode, TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), @@ -1962,124 +1904,6 @@ TRACE_EVENT(ext4_get_implied_cluster_alloc_exit, __entry->len, show_mflags(__entry->flags), __entry->ret) ); -TRACE_EVENT(ext4_ext_put_in_cache, - TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len, - ext4_fsblk_t start), - - TP_ARGS(inode, lblk, len, start), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( ext4_lblk_t, lblk ) - __field( unsigned int, len ) - __field( ext4_fsblk_t, start ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->lblk = lblk; - __entry->len = len; - __entry->start = start; - ), - - TP_printk("dev %d,%d ino %lu lblk %u len %u start %llu", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - (unsigned) __entry->lblk, - __entry->len, - (unsigned long long) __entry->start) -); - -TRACE_EVENT(ext4_ext_in_cache, - TP_PROTO(struct inode *inode, ext4_lblk_t lblk, int ret), - - TP_ARGS(inode, lblk, ret), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( ext4_lblk_t, lblk ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->lblk = lblk; - __entry->ret = ret; - ), - - TP_printk("dev %d,%d ino %lu lblk %u ret %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - (unsigned) __entry->lblk, - __entry->ret) - -); - -TRACE_EVENT(ext4_find_delalloc_range, - TP_PROTO(struct inode *inode, ext4_lblk_t from, ext4_lblk_t to, - int reverse, int found, ext4_lblk_t found_blk), - - TP_ARGS(inode, from, to, reverse, found, found_blk), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( ext4_lblk_t, from ) - __field( ext4_lblk_t, to ) - __field( int, reverse ) - __field( int, found ) - __field( ext4_lblk_t, found_blk ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->from = from; - __entry->to = to; - __entry->reverse = reverse; - __entry->found = found; - __entry->found_blk = found_blk; - ), - - TP_printk("dev %d,%d ino %lu from %u to %u reverse %d found %d " - "(blk = %u)", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - (unsigned) __entry->from, (unsigned) __entry->to, - __entry->reverse, __entry->found, - (unsigned) __entry->found_blk) -); - -TRACE_EVENT(ext4_get_reserved_cluster_alloc, - TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len), - - TP_ARGS(inode, lblk, len), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( ext4_lblk_t, lblk ) - __field( unsigned int, len ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->lblk = lblk; - __entry->len = len; - ), - - TP_printk("dev %d,%d ino %lu lblk %u len %u", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - (unsigned) __entry->lblk, - __entry->len) -); - TRACE_EVENT(ext4_ext_show_extent, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk, unsigned short len), -- cgit v1.2.3 From 117bfa8d5d4cb50556a59381d0f10fe762c1cd28 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 23 Mar 2021 09:05:56 +0800 Subject: iommu/vt-d: Remove unused dma map/unmap trace events With commit c588072bba6b5 ("iommu/vt-d: Convert intel iommu driver to the iommu ops"), the trace events for dma map/unmap have no users any more. Cleanup them to make the code neat. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210323010600.678627-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/trace/events/intel_iommu.h | 120 ------------------------------------- 1 file changed, 120 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/intel_iommu.h b/include/trace/events/intel_iommu.h index e801f4910522..d233f2916584 100644 --- a/include/trace/events/intel_iommu.h +++ b/include/trace/events/intel_iommu.h @@ -15,126 +15,6 @@ #include #include -DECLARE_EVENT_CLASS(dma_map, - TP_PROTO(struct device *dev, dma_addr_t dev_addr, phys_addr_t phys_addr, - size_t size), - - TP_ARGS(dev, dev_addr, phys_addr, size), - - TP_STRUCT__entry( - __string(dev_name, dev_name(dev)) - __field(dma_addr_t, dev_addr) - __field(phys_addr_t, phys_addr) - __field(size_t, size) - ), - - TP_fast_assign( - __assign_str(dev_name, dev_name(dev)); - __entry->dev_addr = dev_addr; - __entry->phys_addr = phys_addr; - __entry->size = size; - ), - - TP_printk("dev=%s dev_addr=0x%llx phys_addr=0x%llx size=%zu", - __get_str(dev_name), - (unsigned long long)__entry->dev_addr, - (unsigned long long)__entry->phys_addr, - __entry->size) -); - -DEFINE_EVENT(dma_map, map_single, - TP_PROTO(struct device *dev, dma_addr_t dev_addr, phys_addr_t phys_addr, - size_t size), - TP_ARGS(dev, dev_addr, phys_addr, size) -); - -DEFINE_EVENT(dma_map, bounce_map_single, - TP_PROTO(struct device *dev, dma_addr_t dev_addr, phys_addr_t phys_addr, - size_t size), - TP_ARGS(dev, dev_addr, phys_addr, size) -); - -DECLARE_EVENT_CLASS(dma_unmap, - TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size), - - TP_ARGS(dev, dev_addr, size), - - TP_STRUCT__entry( - __string(dev_name, dev_name(dev)) - __field(dma_addr_t, dev_addr) - __field(size_t, size) - ), - - TP_fast_assign( - __assign_str(dev_name, dev_name(dev)); - __entry->dev_addr = dev_addr; - __entry->size = size; - ), - - TP_printk("dev=%s dev_addr=0x%llx size=%zu", - __get_str(dev_name), - (unsigned long long)__entry->dev_addr, - __entry->size) -); - -DEFINE_EVENT(dma_unmap, unmap_single, - TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size), - TP_ARGS(dev, dev_addr, size) -); - -DEFINE_EVENT(dma_unmap, unmap_sg, - TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size), - TP_ARGS(dev, dev_addr, size) -); - -DEFINE_EVENT(dma_unmap, bounce_unmap_single, - TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size), - TP_ARGS(dev, dev_addr, size) -); - -DECLARE_EVENT_CLASS(dma_map_sg, - TP_PROTO(struct device *dev, int index, int total, - struct scatterlist *sg), - - TP_ARGS(dev, index, total, sg), - - TP_STRUCT__entry( - __string(dev_name, dev_name(dev)) - __field(dma_addr_t, dev_addr) - __field(phys_addr_t, phys_addr) - __field(size_t, size) - __field(int, index) - __field(int, total) - ), - - TP_fast_assign( - __assign_str(dev_name, dev_name(dev)); - __entry->dev_addr = sg->dma_address; - __entry->phys_addr = sg_phys(sg); - __entry->size = sg->dma_length; - __entry->index = index; - __entry->total = total; - ), - - TP_printk("dev=%s [%d/%d] dev_addr=0x%llx phys_addr=0x%llx size=%zu", - __get_str(dev_name), __entry->index, __entry->total, - (unsigned long long)__entry->dev_addr, - (unsigned long long)__entry->phys_addr, - __entry->size) -); - -DEFINE_EVENT(dma_map_sg, map_sg, - TP_PROTO(struct device *dev, int index, int total, - struct scatterlist *sg), - TP_ARGS(dev, index, total, sg) -); - -DEFINE_EVENT(dma_map_sg, bounce_map_sg, - TP_PROTO(struct device *dev, int index, int total, - struct scatterlist *sg), - TP_ARGS(dev, index, total, sg) -); - TRACE_EVENT(qi_submit, TP_PROTO(struct intel_iommu *iommu, u64 qw0, u64 qw1, u64 qw2, u64 qw3), -- cgit v1.2.3 From 7471e1afabf8a9adcb4659170f4e198c05f5b5a6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 22 Feb 2021 22:05:00 -0700 Subject: io_uring: include cflags in completion trace event We should be including the completion flags for better introspection on exactly what completion event was logged. Signed-off-by: Jens Axboe --- include/trace/events/io_uring.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 9f0d3b7d56b0..bd528176a3d5 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -290,29 +290,32 @@ TRACE_EVENT(io_uring_fail_link, * @ctx: pointer to a ring context structure * @user_data: user data associated with the request * @res: result of the request + * @cflags: completion flags * */ TRACE_EVENT(io_uring_complete, - TP_PROTO(void *ctx, u64 user_data, long res), + TP_PROTO(void *ctx, u64 user_data, long res, unsigned cflags), - TP_ARGS(ctx, user_data, res), + TP_ARGS(ctx, user_data, res, cflags), TP_STRUCT__entry ( __field( void *, ctx ) __field( u64, user_data ) __field( long, res ) + __field( unsigned, cflags ) ), TP_fast_assign( __entry->ctx = ctx; __entry->user_data = user_data; __entry->res = res; + __entry->cflags = cflags; ), - TP_printk("ring %p, user_data 0x%llx, result %ld", + TP_printk("ring %p, user_data 0x%llx, result %ld, cflags %x", __entry->ctx, (unsigned long long)__entry->user_data, - __entry->res) + __entry->res, __entry->cflags) ); -- cgit v1.2.3 From 907d52310024fae6632aabfc7e833decaf185e5f Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Thu, 1 Apr 2021 10:19:25 +0800 Subject: block: add queue_to_disk() to get gendisk from request_queue Sometimes we need to get the corresponding gendisk from request_queue. It is preferred that block drivers store private data in gendisk->private_data rather than request_queue->queuedata, e.g. see: commit c4a59c4e5db3 ("dm: stop using ->queuedata"). So if only request_queue is given, we need to get its corresponding gendisk to get the private data stored in that gendisk. Reviewed-by: Hannes Reinecke Reviewed-by: Mike Snitzer Signed-off-by: Jeffle Xu Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/trace/events/kyber.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/kyber.h b/include/trace/events/kyber.h index c0e7d24ca256..f9802562edf6 100644 --- a/include/trace/events/kyber.h +++ b/include/trace/events/kyber.h @@ -30,7 +30,7 @@ TRACE_EVENT(kyber_latency, ), TP_fast_assign( - __entry->dev = disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent))); + __entry->dev = disk_devt(queue_to_disk(q)); strlcpy(__entry->domain, domain, sizeof(__entry->domain)); strlcpy(__entry->type, type, sizeof(__entry->type)); __entry->percentile = percentile; @@ -59,7 +59,7 @@ TRACE_EVENT(kyber_adjust, ), TP_fast_assign( - __entry->dev = disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent))); + __entry->dev = disk_devt(queue_to_disk(q)); strlcpy(__entry->domain, domain, sizeof(__entry->domain)); __entry->depth = depth; ), @@ -81,7 +81,7 @@ TRACE_EVENT(kyber_throttled, ), TP_fast_assign( - __entry->dev = disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent))); + __entry->dev = disk_devt(queue_to_disk(q)); strlcpy(__entry->domain, domain, sizeof(__entry->domain)); ), -- cgit v1.2.3 From e936a5970ef596ff48fca72aa8200955753c543f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 31 Mar 2021 13:22:27 -0400 Subject: SUNRPC: Add tracepoint that fires when an RPC is retransmitted A separate tracepoint can be left enabled all the time to capture rare but important retransmission events. So for example: kworker/u26:3-568 [009] 156.967933: xprt_retransmit: task:44093@5 xid=0xa25dbc79 nfsv3 WRITE ntrans=2 Or, for example, enable all nfs and nfs4 tracepoints, and set up a trigger to disable tracing when xprt_retransmit fires to capture everything that leads up to it. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/sunrpc.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 036eb1f5c133..430b42f2351f 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1079,6 +1079,46 @@ TRACE_EVENT(xprt_transmit, __entry->seqno, __entry->status) ); +TRACE_EVENT(xprt_retransmit, + TP_PROTO( + const struct rpc_rqst *rqst + ), + + TP_ARGS(rqst), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + __field(int, ntrans) + __field(int, version) + __string(progname, + rqst->rq_task->tk_client->cl_program->name) + __string(procedure, + rqst->rq_task->tk_msg.rpc_proc->p_name) + ), + + TP_fast_assign( + struct rpc_task *task = rqst->rq_task; + + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client ? + task->tk_client->cl_clid : -1; + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->ntrans = rqst->rq_ntrans; + __assign_str(progname, + task->tk_client->cl_program->name) + __entry->version = task->tk_client->cl_vers; + __assign_str(procedure, task->tk_msg.rpc_proc->p_name) + ), + + TP_printk( + "task:%u@%u xid=0x%08x %sv%d %s ntrans=%d", + __entry->task_id, __entry->client_id, __entry->xid, + __get_str(progname), __entry->version, __get_str(procedure), + __entry->ntrans) +); + TRACE_EVENT(xprt_ping, TP_PROTO(const struct rpc_xprt *xprt, int status), -- cgit v1.2.3 From 6cf23783f750634e10daeede48b0f5f5d64ebf3a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 31 Mar 2021 16:03:08 -0400 Subject: SUNRPC: Remove trace_xprt_transmit_queued This tracepoint can crash when dereferencing snd_task because when some transports connect, they put a cookie in that field instead of a pointer to an rpc_task. BUG: KASAN: use-after-free in trace_event_raw_event_xprt_writelock_event+0x141/0x18e [sunrpc] Read of size 2 at addr ffff8881a83bd3a0 by task git/331872 CPU: 11 PID: 331872 Comm: git Tainted: G S 5.12.0-rc2-00007-g3ab6e585a7f9 #1453 Hardware name: Supermicro SYS-6028R-T/X10DRi, BIOS 1.1a 10/16/2015 Call Trace: dump_stack+0x9c/0xcf print_address_description.constprop.0+0x18/0x239 kasan_report+0x174/0x1b0 trace_event_raw_event_xprt_writelock_event+0x141/0x18e [sunrpc] xprt_prepare_transmit+0x8e/0xc1 [sunrpc] call_transmit+0x4d/0xc6 [sunrpc] Fixes: 9ce07ae5eb1d ("SUNRPC: Replace dprintk() call site in xprt_prepare_transmit") Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/sunrpc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 430b42f2351f..3c3c4c843c62 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1181,7 +1181,6 @@ DECLARE_EVENT_CLASS(xprt_writelock_event, DEFINE_WRITELOCK_EVENT(reserve_xprt); DEFINE_WRITELOCK_EVENT(release_xprt); -DEFINE_WRITELOCK_EVENT(transmit_queued); DECLARE_EVENT_CLASS(xprt_cong_event, TP_PROTO( -- cgit v1.2.3 From e10a9892097672b62be4ea265a9eb48f698ca3b8 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 16 Apr 2021 15:38:04 -0700 Subject: mptcp: add tracepoint in mptcp_subflow_get_send This patch added a tracepoint in the packet scheduler function mptcp_subflow_get_send(). Suggested-by: Paolo Abeni Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/trace/events/mptcp.h | 60 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 include/trace/events/mptcp.h (limited to 'include/trace') diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h new file mode 100644 index 000000000000..b1617a0162da --- /dev/null +++ b/include/trace/events/mptcp.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mptcp + +#if !defined(_TRACE_MPTCP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MPTCP_H + +#include + +TRACE_EVENT(mptcp_subflow_get_send, + + TP_PROTO(struct mptcp_subflow_context *subflow), + + TP_ARGS(subflow), + + TP_STRUCT__entry( + __field(bool, active) + __field(bool, free) + __field(u32, snd_wnd) + __field(u32, pace) + __field(u8, backup) + __field(u64, ratio) + ), + + TP_fast_assign( + struct sock *ssk; + + __entry->active = mptcp_subflow_active(subflow); + __entry->backup = subflow->backup; + + if (subflow->tcp_sock && sk_fullsock(subflow->tcp_sock)) + __entry->free = sk_stream_memory_free(subflow->tcp_sock); + else + __entry->free = 0; + + ssk = mptcp_subflow_tcp_sock(subflow); + if (ssk && sk_fullsock(ssk)) { + __entry->snd_wnd = tcp_sk(ssk)->snd_wnd; + __entry->pace = ssk->sk_pacing_rate; + } else { + __entry->snd_wnd = 0; + __entry->pace = 0; + } + + if (ssk && sk_fullsock(ssk) && __entry->pace) + __entry->ratio = div_u64((u64)ssk->sk_wmem_queued << 32, __entry->pace); + else + __entry->ratio = 0; + ), + + TP_printk("active=%d free=%d snd_wnd=%u pace=%u backup=%u ratio=%llu", + __entry->active, __entry->free, + __entry->snd_wnd, __entry->pace, + __entry->backup, __entry->ratio) +); + +#endif /* _TRACE_MPTCP_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 0918e34b85c7e125f531caaf3d2918baf2b1a5f9 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 16 Apr 2021 15:38:05 -0700 Subject: mptcp: add tracepoint in get_mapping_status This patch added a tracepoint in the mapping status function get_mapping_status() to dump every mpext field. Suggested-by: Paolo Abeni Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/trace/events/mptcp.h | 52 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h index b1617a0162da..ec20350d82eb 100644 --- a/include/trace/events/mptcp.h +++ b/include/trace/events/mptcp.h @@ -54,6 +54,58 @@ TRACE_EVENT(mptcp_subflow_get_send, __entry->backup, __entry->ratio) ); +DECLARE_EVENT_CLASS(mptcp_dump_mpext, + + TP_PROTO(struct mptcp_ext *mpext), + + TP_ARGS(mpext), + + TP_STRUCT__entry( + __field(u64, data_ack) + __field(u64, data_seq) + __field(u32, subflow_seq) + __field(u16, data_len) + __field(u8, use_map) + __field(u8, dsn64) + __field(u8, data_fin) + __field(u8, use_ack) + __field(u8, ack64) + __field(u8, mpc_map) + __field(u8, frozen) + __field(u8, reset_transient) + __field(u8, reset_reason) + ), + + TP_fast_assign( + __entry->data_ack = mpext->ack64 ? mpext->data_ack : mpext->data_ack32; + __entry->data_seq = mpext->data_seq; + __entry->subflow_seq = mpext->subflow_seq; + __entry->data_len = mpext->data_len; + __entry->use_map = mpext->use_map; + __entry->dsn64 = mpext->dsn64; + __entry->data_fin = mpext->data_fin; + __entry->use_ack = mpext->use_ack; + __entry->ack64 = mpext->ack64; + __entry->mpc_map = mpext->mpc_map; + __entry->frozen = mpext->frozen; + __entry->reset_transient = mpext->reset_transient; + __entry->reset_reason = mpext->reset_reason; + ), + + TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u", + __entry->data_ack, __entry->data_seq, + __entry->subflow_seq, __entry->data_len, + __entry->use_map, __entry->dsn64, + __entry->data_fin, __entry->use_ack, + __entry->ack64, __entry->mpc_map, + __entry->frozen, __entry->reset_transient, + __entry->reset_reason) +); + +DEFINE_EVENT(mptcp_dump_mpext, get_mapping_status, + TP_PROTO(struct mptcp_ext *mpext), + TP_ARGS(mpext)); + #endif /* _TRACE_MPTCP_H */ /* This part must be outside protection */ -- cgit v1.2.3 From ed66bfb4ce34a94174bb755eeaca85d1661d36ad Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 16 Apr 2021 15:38:06 -0700 Subject: mptcp: add tracepoint in ack_update_msk This patch added a tracepoint in ack_update_msk() to track the incoming data_ack and window/snd_una updates. Suggested-by: Paolo Abeni Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/trace/events/mptcp.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h index ec20350d82eb..b90bfe45d995 100644 --- a/include/trace/events/mptcp.h +++ b/include/trace/events/mptcp.h @@ -106,6 +106,38 @@ DEFINE_EVENT(mptcp_dump_mpext, get_mapping_status, TP_PROTO(struct mptcp_ext *mpext), TP_ARGS(mpext)); +TRACE_EVENT(ack_update_msk, + + TP_PROTO(u64 data_ack, u64 old_snd_una, + u64 new_snd_una, u64 new_wnd_end, + u64 msk_wnd_end), + + TP_ARGS(data_ack, old_snd_una, + new_snd_una, new_wnd_end, + msk_wnd_end), + + TP_STRUCT__entry( + __field(u64, data_ack) + __field(u64, old_snd_una) + __field(u64, new_snd_una) + __field(u64, new_wnd_end) + __field(u64, msk_wnd_end) + ), + + TP_fast_assign( + __entry->data_ack = data_ack; + __entry->old_snd_una = old_snd_una; + __entry->new_snd_una = new_snd_una; + __entry->new_wnd_end = new_wnd_end; + __entry->msk_wnd_end = msk_wnd_end; + ), + + TP_printk("data_ack=%llu old_snd_una=%llu new_snd_una=%llu new_wnd_end=%llu msk_wnd_end=%llu", + __entry->data_ack, __entry->old_snd_una, + __entry->new_snd_una, __entry->new_wnd_end, + __entry->msk_wnd_end) +); + #endif /* _TRACE_MPTCP_H */ /* This part must be outside protection */ -- cgit v1.2.3 From d96a838a7ce2772ed181f89becd79b72d267f93a Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 16 Apr 2021 15:38:07 -0700 Subject: mptcp: add tracepoint in subflow_check_data_avail This patch added a tracepoint in subflow_check_data_avail() to show the mapping status. Suggested-by: Paolo Abeni Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/trace/events/mptcp.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h index b90bfe45d995..775a46d0b0f0 100644 --- a/include/trace/events/mptcp.h +++ b/include/trace/events/mptcp.h @@ -7,6 +7,14 @@ #include +#define show_mapping_status(status) \ + __print_symbolic(status, \ + { 0, "MAPPING_OK" }, \ + { 1, "MAPPING_INVALID" }, \ + { 2, "MAPPING_EMPTY" }, \ + { 3, "MAPPING_DATA_FIN" }, \ + { 4, "MAPPING_DUMMY" }) + TRACE_EVENT(mptcp_subflow_get_send, TP_PROTO(struct mptcp_subflow_context *subflow), @@ -138,6 +146,27 @@ TRACE_EVENT(ack_update_msk, __entry->msk_wnd_end) ); +TRACE_EVENT(subflow_check_data_avail, + + TP_PROTO(__u8 status, struct sk_buff *skb), + + TP_ARGS(status, skb), + + TP_STRUCT__entry( + __field(u8, status) + __field(const void *, skb) + ), + + TP_fast_assign( + __entry->status = status; + __entry->skb = skb; + ), + + TP_printk("mapping_status=%s, skb=%p", + show_mapping_status(__entry->status), + __entry->skb) +); + #endif /* _TRACE_MPTCP_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 501b918525efec2e701e806f04d474d7da350962 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 25 Mar 2021 19:19:48 -0700 Subject: KVM: Move arm64's MMU notifier trace events to generic code Move arm64's MMU notifier trace events into common code in preparation for doing the hva->gfn lookup in common code. The alternative would be to trace the gfn instead of hva, but that's not obviously better and could also be done in common code. Tracing the notifiers is also quite handy for debug regardless of architecture. Remove a completely redundant tracepoint from PPC e500. Signed-off-by: Sean Christopherson Message-Id: <20210326021957.1424875-10-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/trace/events/kvm.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 49d7d0fe29f6..200eb7465530 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -462,6 +462,72 @@ TRACE_EVENT(kvm_dirty_ring_exit, TP_printk("vcpu %d", __entry->vcpu_id) ); +TRACE_EVENT(kvm_unmap_hva_range, + TP_PROTO(unsigned long start, unsigned long end), + TP_ARGS(start, end), + + TP_STRUCT__entry( + __field( unsigned long, start ) + __field( unsigned long, end ) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + ), + + TP_printk("mmu notifier unmap range: %#016lx -- %#016lx", + __entry->start, __entry->end) +); + +TRACE_EVENT(kvm_set_spte_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("mmu notifier set pte hva: %#016lx", __entry->hva) +); + +TRACE_EVENT(kvm_age_hva, + TP_PROTO(unsigned long start, unsigned long end), + TP_ARGS(start, end), + + TP_STRUCT__entry( + __field( unsigned long, start ) + __field( unsigned long, end ) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + ), + + TP_printk("mmu notifier age hva: %#016lx -- %#016lx", + __entry->start, __entry->end) +); + +TRACE_EVENT(kvm_test_age_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("mmu notifier test age hva: %#016lx", __entry->hva) +); + #endif /* _TRACE_KVM_MAIN_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 6dfbd6b5d5de19bad36f44710359200f21191134 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 25 Mar 2021 19:19:57 -0700 Subject: KVM: x86/mmu: Drop trace_kvm_age_page() tracepoint Remove x86's trace_kvm_age_page() tracepoint. It's mostly redundant with the common trace_kvm_age_hva() tracepoint, and if there is a need for the extra details, e.g. gfn, referenced, etc... those details should be added to the common tracepoint so that all architectures and MMUs benefit from the info. Signed-off-by: Sean Christopherson Message-Id: <20210326021957.1424875-19-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/trace/events/kvm.h | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 200eb7465530..37e1e1a2d67d 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -255,30 +255,6 @@ TRACE_EVENT(kvm_fpu, TP_printk("%s", __print_symbolic(__entry->load, kvm_fpu_load_symbol)) ); -TRACE_EVENT(kvm_age_page, - TP_PROTO(ulong gfn, int level, struct kvm_memory_slot *slot, int ref), - TP_ARGS(gfn, level, slot, ref), - - TP_STRUCT__entry( - __field( u64, hva ) - __field( u64, gfn ) - __field( u8, level ) - __field( u8, referenced ) - ), - - TP_fast_assign( - __entry->gfn = gfn; - __entry->level = level; - __entry->hva = ((gfn - slot->base_gfn) << - PAGE_SHIFT) + slot->userspace_addr; - __entry->referenced = ref; - ), - - TP_printk("hva %llx gfn %llx level %u %s", - __entry->hva, __entry->gfn, __entry->level, - __entry->referenced ? "YOUNG" : "OLD") -); - #ifdef CONFIG_KVM_ASYNC_PF DECLARE_EVENT_CLASS(kvm_async_get_page_class, -- cgit v1.2.3 From 18bb8bbf13c1839b43c9e09e76d397b753989af2 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 19 Apr 2021 16:41:02 +0900 Subject: btrfs: zoned: automatically reclaim zones When a file gets deleted on a zoned file system, the space freed is not returned back into the block group's free space, but is migrated to zone_unusable. As this zone_unusable space is behind the current write pointer it is not possible to use it for new allocations. In the current implementation a zone is reset once all of the block group's space is accounted as zone unusable. This behaviour can lead to premature ENOSPC errors on a busy file system. Instead of only reclaiming the zone once it is completely unusable, kick off a reclaim job once the amount of unusable bytes exceeds a user configurable threshold between 51% and 100%. It can be set per mounted filesystem via the sysfs tunable bg_reclaim_threshold which is set to 75% by default. Similar to reclaiming unused block groups, these dirty block groups are added to a to_reclaim list and then on a transaction commit, the reclaim process is triggered but after we deleted unused block groups, which will free space for the relocation process. Reviewed-by: Filipe Manana Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 0551ea65374f..a41dd8a0c730 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1903,6 +1903,18 @@ DEFINE_EVENT(btrfs__block_group, btrfs_add_unused_block_group, TP_ARGS(bg_cache) ); +DEFINE_EVENT(btrfs__block_group, btrfs_add_reclaim_block_group, + TP_PROTO(const struct btrfs_block_group *bg_cache), + + TP_ARGS(bg_cache) +); + +DEFINE_EVENT(btrfs__block_group, btrfs_reclaim_block_group, + TP_PROTO(const struct btrfs_block_group *bg_cache), + + TP_ARGS(bg_cache) +); + DEFINE_EVENT(btrfs__block_group, btrfs_skip_unused_block_group, TP_PROTO(const struct btrfs_block_group *bg_cache), -- cgit v1.2.3 From 77b4d2c6316ab096e3f77eea240144941434f2a4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 18 Sep 2020 09:25:13 +0100 Subject: netfs: Add tracepoints Add three tracepoints to track the activity of the read helpers: (1) netfs/netfs_read This logs entry to the read helpers and also expansion of the range in a readahead request. (2) netfs/netfs_rreq This logs the progress of netfs_read_request objects which track read requests. A read request may be a compound of multiple subrequests. (3) netfs/netfs_sreq This logs the progress of netfs_read_subrequest objects, which track the contributions from various sources to a read request. Signed-off-by: David Howells Reviewed-and-tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Matthew Wilcox cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/161118138060.1232039.5353374588021776217.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161033468.2537118.14021843889844001905.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340395843.1303470.7355519662919639648.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539538693.286939.10171713520419106334.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653796447.2770958.1870655382450862155.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789078003.6155.17814844411672989942.stgit@warthog.procyon.org.uk/ # v6 --- include/trace/events/netfs.h | 199 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 include/trace/events/netfs.h (limited to 'include/trace') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h new file mode 100644 index 000000000000..12ad382764c5 --- /dev/null +++ b/include/trace/events/netfs.h @@ -0,0 +1,199 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Network filesystem support module tracepoints + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM netfs + +#if !defined(_TRACE_NETFS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NETFS_H + +#include + +/* + * Define enums for tracing information. + */ +#ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY +#define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY + +enum netfs_read_trace { + netfs_read_trace_expanded, + netfs_read_trace_readahead, + netfs_read_trace_readpage, +}; + +enum netfs_rreq_trace { + netfs_rreq_trace_assess, + netfs_rreq_trace_done, + netfs_rreq_trace_free, + netfs_rreq_trace_resubmit, + netfs_rreq_trace_unlock, + netfs_rreq_trace_unmark, + netfs_rreq_trace_write, +}; + +enum netfs_sreq_trace { + netfs_sreq_trace_download_instead, + netfs_sreq_trace_free, + netfs_sreq_trace_prepare, + netfs_sreq_trace_resubmit_short, + netfs_sreq_trace_submit, + netfs_sreq_trace_terminated, + netfs_sreq_trace_write, + netfs_sreq_trace_write_term, +}; + +#endif + +#define netfs_read_traces \ + EM(netfs_read_trace_expanded, "EXPANDED ") \ + EM(netfs_read_trace_readahead, "READAHEAD") \ + E_(netfs_read_trace_readpage, "READPAGE ") + +#define netfs_rreq_traces \ + EM(netfs_rreq_trace_assess, "ASSESS") \ + EM(netfs_rreq_trace_done, "DONE ") \ + EM(netfs_rreq_trace_free, "FREE ") \ + EM(netfs_rreq_trace_resubmit, "RESUBM") \ + EM(netfs_rreq_trace_unlock, "UNLOCK") \ + EM(netfs_rreq_trace_unmark, "UNMARK") \ + E_(netfs_rreq_trace_write, "WRITE ") + +#define netfs_sreq_sources \ + EM(NETFS_FILL_WITH_ZEROES, "ZERO") \ + EM(NETFS_DOWNLOAD_FROM_SERVER, "DOWN") \ + EM(NETFS_READ_FROM_CACHE, "READ") \ + E_(NETFS_INVALID_READ, "INVL") \ + +#define netfs_sreq_traces \ + EM(netfs_sreq_trace_download_instead, "RDOWN") \ + EM(netfs_sreq_trace_free, "FREE ") \ + EM(netfs_sreq_trace_prepare, "PREP ") \ + EM(netfs_sreq_trace_resubmit_short, "SHORT") \ + EM(netfs_sreq_trace_submit, "SUBMT") \ + EM(netfs_sreq_trace_terminated, "TERM ") \ + EM(netfs_sreq_trace_write, "WRITE") \ + E_(netfs_sreq_trace_write_term, "WTERM") + + +/* + * Export enum symbols via userspace. + */ +#undef EM +#undef E_ +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define E_(a, b) TRACE_DEFINE_ENUM(a); + +netfs_read_traces; +netfs_rreq_traces; +netfs_sreq_sources; +netfs_sreq_traces; + +/* + * Now redefine the EM() and E_() macros to map the enums to the strings that + * will be printed in the output. + */ +#undef EM +#undef E_ +#define EM(a, b) { a, b }, +#define E_(a, b) { a, b } + +TRACE_EVENT(netfs_read, + TP_PROTO(struct netfs_read_request *rreq, + loff_t start, size_t len, + enum netfs_read_trace what), + + TP_ARGS(rreq, start, len, what), + + TP_STRUCT__entry( + __field(unsigned int, rreq ) + __field(unsigned int, cookie ) + __field(loff_t, start ) + __field(size_t, len ) + __field(enum netfs_read_trace, what ) + ), + + TP_fast_assign( + __entry->rreq = rreq->debug_id; + __entry->cookie = rreq->cookie_debug_id; + __entry->start = start; + __entry->len = len; + __entry->what = what; + ), + + TP_printk("R=%08x %s c=%08x s=%llx %zx", + __entry->rreq, + __print_symbolic(__entry->what, netfs_read_traces), + __entry->cookie, + __entry->start, __entry->len) + ); + +TRACE_EVENT(netfs_rreq, + TP_PROTO(struct netfs_read_request *rreq, + enum netfs_rreq_trace what), + + TP_ARGS(rreq, what), + + TP_STRUCT__entry( + __field(unsigned int, rreq ) + __field(unsigned short, flags ) + __field(enum netfs_rreq_trace, what ) + ), + + TP_fast_assign( + __entry->rreq = rreq->debug_id; + __entry->flags = rreq->flags; + __entry->what = what; + ), + + TP_printk("R=%08x %s f=%02x", + __entry->rreq, + __print_symbolic(__entry->what, netfs_rreq_traces), + __entry->flags) + ); + +TRACE_EVENT(netfs_sreq, + TP_PROTO(struct netfs_read_subrequest *sreq, + enum netfs_sreq_trace what), + + TP_ARGS(sreq, what), + + TP_STRUCT__entry( + __field(unsigned int, rreq ) + __field(unsigned short, index ) + __field(short, error ) + __field(unsigned short, flags ) + __field(enum netfs_read_source, source ) + __field(enum netfs_sreq_trace, what ) + __field(size_t, len ) + __field(size_t, transferred ) + __field(loff_t, start ) + ), + + TP_fast_assign( + __entry->rreq = sreq->rreq->debug_id; + __entry->index = sreq->debug_index; + __entry->error = sreq->error; + __entry->flags = sreq->flags; + __entry->source = sreq->source; + __entry->what = what; + __entry->len = sreq->len; + __entry->transferred = sreq->transferred; + __entry->start = sreq->start; + ), + + TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx/%zx e=%d", + __entry->rreq, __entry->index, + __print_symbolic(__entry->what, netfs_sreq_traces), + __print_symbolic(__entry->source, netfs_sreq_sources), + __entry->flags, + __entry->start, __entry->transferred, __entry->len, + __entry->error) + ); + +#endif /* _TRACE_NETFS_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From e1b1240c1ff5f8bfba797f14996d8bac8a9ec437 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Sep 2020 11:06:07 +0100 Subject: netfs: Add write_begin helper Add a helper to do the pre-reading work for the netfs write_begin address space op. Changes v6: - Fixed a missing rreq put in netfs_write_begin()[3]. - Use DEFINE_READAHEAD()[4]. v5: - Made the wait for PG_fscache in netfs_write_begin() killable[2]. v4: - Added flag to netfs_subreq_terminated() to indicate that the caller may have been running async and stuff that might sleep needs punting to a workqueue (can't use in_softirq()[1]). Signed-off-by: David Howells Reviewed-and-tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Matthew Wilcox cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20210216084230.GA23669@lst.de/ [1] Link: https://lore.kernel.org/r/2499407.1616505440@warthog.procyon.org.uk/ [2] Link: https://lore.kernel.org/r/161781042127.463527.9154479794406046987.stgit@warthog.procyon.org.uk/ [3] Link: https://lore.kernel.org/r/1234933.1617886271@warthog.procyon.org.uk/ [4] Link: https://lore.kernel.org/r/160588543960.3465195.2792938973035886168.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118140165.1232039.16418853874312234477.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161035539.2537118.15674887534950908530.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340398368.1303470.11242918276563276090.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539541541.286939.1889738674057013729.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653798616.2770958.17213315845968485563.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789080530.6155.1011847312392330491.stgit@warthog.procyon.org.uk/ # v6 --- include/trace/events/netfs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 12ad382764c5..a2bf6cd84bd4 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -22,6 +22,7 @@ enum netfs_read_trace { netfs_read_trace_expanded, netfs_read_trace_readahead, netfs_read_trace_readpage, + netfs_read_trace_write_begin, }; enum netfs_rreq_trace { @@ -50,7 +51,8 @@ enum netfs_sreq_trace { #define netfs_read_traces \ EM(netfs_read_trace_expanded, "EXPANDED ") \ EM(netfs_read_trace_readahead, "READAHEAD") \ - E_(netfs_read_trace_readpage, "READPAGE ") + EM(netfs_read_trace_readpage, "READPAGE ") \ + E_(netfs_read_trace_write_begin, "WRITEBEGN") #define netfs_rreq_traces \ EM(netfs_rreq_trace_assess, "ASSESS") \ -- cgit v1.2.3 From 726218fdc22c9b52f16e1228499a804bbf262a20 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Feb 2020 14:22:24 +0000 Subject: netfs: Define an interface to talk to a cache Add an interface to the netfs helper library for reading data from the cache instead of downloading it from the server and support for writing data just downloaded or cleared to the cache. The API passes an iov_iter to the cache read/write routines to indicate the data/buffer to be used. This is done using the ITER_XARRAY type to provide direct access to the netfs inode's pagecache. When the netfs's ->begin_cache_operation() method is called, this must fill in the cache_resources in the netfs_read_request struct, including the netfs_cache_ops used by the helper lib to talk to the cache. The helper lib does not directly access the cache. Changes: v6: - Call trace_netfs_read() after beginning the cache op so that the cookie debug ID can be logged[3]. - Don't record the error from writing to the cache. We don't want to pass it back to the netfs[4]. - Fix copy-to-cache subreq amalgamation to not round up as it goes along otherwise it overcalculates the length of the write[5]. v5: - Use end_page_fscache() rather than unlock_page_fscache()[2]. v4: - Added flag to netfs_subreq_terminated() to indicate that the caller may have been running async and stuff that might sleep needs punting to a workqueue (can't use in_softirq()[1]). - Add missing inc of netfs_n_rh_read stat. - Move initial definition of fscache_begin_read_operation() elsewhere. - Need to call op->begin_cache_operation() from netfs_write_begin(). Signed-off-by: David Howells Reviewed-and-tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Matthew Wilcox cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20210216084230.GA23669@lst.de/ [1] Link: https://lore.kernel.org/r/2499407.1616505440@warthog.procyon.org.uk/ [2] Link: https://lore.kernel.org/r/161781045123.463527.14533348855710902201.stgit@warthog.procyon.org.uk/ [3] Link: https://lore.kernel.org/r/161781046256.463527.18158681600085556192.stgit@warthog.procyon.org.uk/ [4] Link: https://lore.kernel.org/r/161781047695.463527.7463536103593997492.stgit@warthog.procyon.org.uk/ [5] Link: https://lore.kernel.org/r/161118141321.1232039.8296910406755622458.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161036700.2537118.11170748455436854978.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340399569.1303470.1138884774643385730.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539542874.286939.13337898213448136687.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653799826.2770958.9015430297426331950.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789081462.6155.3853904866933313256.stgit@warthog.procyon.org.uk/ # v6 --- include/trace/events/netfs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index a2bf6cd84bd4..e3ebeabd3852 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -43,6 +43,7 @@ enum netfs_sreq_trace { netfs_sreq_trace_submit, netfs_sreq_trace_terminated, netfs_sreq_trace_write, + netfs_sreq_trace_write_skip, netfs_sreq_trace_write_term, }; @@ -77,6 +78,7 @@ enum netfs_sreq_trace { EM(netfs_sreq_trace_submit, "SUBMT") \ EM(netfs_sreq_trace_terminated, "TERM ") \ EM(netfs_sreq_trace_write, "WRITE") \ + EM(netfs_sreq_trace_write_skip, "SKIP ") \ E_(netfs_sreq_trace_write_term, "WTERM") -- cgit v1.2.3 From 0246f3e5737d0b083baefa552fecedd90832dad0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 6 Apr 2021 17:31:54 +0100 Subject: netfs: Add a tracepoint to log failures that would be otherwise unseen Add a tracepoint to log internal failures (such as cache errors) that we don't otherwise want to pass back to the netfs. Signed-off-by: David Howells Tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Matthew Wilcox cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/161781048813.463527.1557000804674707986.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/161789082749.6155.15498680577213140870.stgit@warthog.procyon.org.uk/ # v6 --- include/trace/events/netfs.h | 58 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index e3ebeabd3852..de1c64635e42 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -47,6 +47,15 @@ enum netfs_sreq_trace { netfs_sreq_trace_write_term, }; +enum netfs_failure { + netfs_fail_check_write_begin, + netfs_fail_copy_to_cache, + netfs_fail_read, + netfs_fail_short_readpage, + netfs_fail_short_write_begin, + netfs_fail_prepare_write, +}; + #endif #define netfs_read_traces \ @@ -81,6 +90,14 @@ enum netfs_sreq_trace { EM(netfs_sreq_trace_write_skip, "SKIP ") \ E_(netfs_sreq_trace_write_term, "WTERM") +#define netfs_failures \ + EM(netfs_fail_check_write_begin, "check-write-begin") \ + EM(netfs_fail_copy_to_cache, "copy-to-cache") \ + EM(netfs_fail_read, "read") \ + EM(netfs_fail_short_readpage, "short-readpage") \ + EM(netfs_fail_short_write_begin, "short-write-begin") \ + E_(netfs_fail_prepare_write, "prep-write") + /* * Export enum symbols via userspace. @@ -94,6 +111,7 @@ netfs_read_traces; netfs_rreq_traces; netfs_sreq_sources; netfs_sreq_traces; +netfs_failures; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -197,6 +215,46 @@ TRACE_EVENT(netfs_sreq, __entry->error) ); +TRACE_EVENT(netfs_failure, + TP_PROTO(struct netfs_read_request *rreq, + struct netfs_read_subrequest *sreq, + int error, enum netfs_failure what), + + TP_ARGS(rreq, sreq, error, what), + + TP_STRUCT__entry( + __field(unsigned int, rreq ) + __field(unsigned short, index ) + __field(short, error ) + __field(unsigned short, flags ) + __field(enum netfs_read_source, source ) + __field(enum netfs_failure, what ) + __field(size_t, len ) + __field(size_t, transferred ) + __field(loff_t, start ) + ), + + TP_fast_assign( + __entry->rreq = rreq->debug_id; + __entry->index = sreq ? sreq->debug_index : 0; + __entry->error = error; + __entry->flags = sreq ? sreq->flags : 0; + __entry->source = sreq ? sreq->source : NETFS_INVALID_READ; + __entry->what = what; + __entry->len = sreq ? sreq->len : 0; + __entry->transferred = sreq ? sreq->transferred : 0; + __entry->start = sreq ? sreq->start : 0; + ), + + TP_printk("R=%08x[%u] %s f=%02x s=%llx %zx/%zx %s e=%d", + __entry->rreq, __entry->index, + __print_symbolic(__entry->source, netfs_sreq_sources), + __entry->flags, + __entry->start, __entry->transferred, __entry->len, + __print_symbolic(__entry->what, netfs_failures), + __entry->error) + ); + #endif /* _TRACE_NETFS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 67d78a6f6e7b38c1beb7d8c09c6d40f8682e60b1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 28 Oct 2020 14:23:46 +0000 Subject: afs: Pass page into dirty region helpers to provide THP size Pass a pointer to the page being accessed into the dirty region helpers so that the size of the page can be determined in case it's a transparent huge page. This also required the page to be passed into the afs_page_dirty trace point - so there's no need to specifically pass in the index or private data as these can be retrieved directly from the page struct. Signed-off-by: David Howells Tested-By: Marc Dionne cc: linux-afs@lists.infradead.org cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/160588527183.3465195.16107942526481976308.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118144921.1232039.11377711180492625929.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161040747.2537118.11435394902674511430.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340404553.1303470.11414163641767769882.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539548385.286939.8864598314493255313.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653804285.2770958.3497360004849598038.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789087043.6155.16922142208140170528.stgit@warthog.procyon.org.uk/ # v6 --- include/trace/events/afs.h | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 4a5cc8c64be3..9203cf6a8c53 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -969,30 +969,33 @@ TRACE_EVENT(afs_dir_check_failed, ); TRACE_EVENT(afs_page_dirty, - TP_PROTO(struct afs_vnode *vnode, const char *where, - pgoff_t page, unsigned long priv), + TP_PROTO(struct afs_vnode *vnode, const char *where, struct page *page), - TP_ARGS(vnode, where, page, priv), + TP_ARGS(vnode, where, page), TP_STRUCT__entry( __field(struct afs_vnode *, vnode ) __field(const char *, where ) __field(pgoff_t, page ) - __field(unsigned long, priv ) + __field(unsigned long, from ) + __field(unsigned long, to ) ), TP_fast_assign( __entry->vnode = vnode; __entry->where = where; - __entry->page = page; - __entry->priv = priv; + __entry->page = page->index; + __entry->from = afs_page_dirty_from(page, page->private); + __entry->to = afs_page_dirty_to(page, page->private); + __entry->to |= (afs_is_page_dirty_mmapped(page->private) ? + (1UL << (BITS_PER_LONG - 1)) : 0); ), - TP_printk("vn=%p %lx %s %zx-%zx%s", + TP_printk("vn=%p %lx %s %lx-%lx%s", __entry->vnode, __entry->page, __entry->where, - afs_page_dirty_from(__entry->priv), - afs_page_dirty_to(__entry->priv), - afs_is_page_dirty_mmapped(__entry->priv) ? " M" : "") + __entry->from, + __entry->to & ~(1UL << (BITS_PER_LONG - 1)), + __entry->to & (1UL << (BITS_PER_LONG - 1)) ? " M" : "") ); TRACE_EVENT(afs_call_state, -- cgit v1.2.3 From bd80d8a80e12895e56a1bb7862b2379942e46167 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Feb 2020 14:22:28 +0000 Subject: afs: Use ITER_XARRAY for writing Use a single ITER_XARRAY iterator to describe the portion of a file to be transmitted to the server rather than generating a series of small ITER_BVEC iterators on the fly. This will make it easier to implement AIO in afs. In theory we could maybe use one giant ITER_BVEC, but that means potentially allocating a huge array of bio_vec structs (max 256 per page) when in fact the pagecache already has a structure listing all the relevant pages (radix_tree/xarray) that can be walked over. Signed-off-by: David Howells Tested-By: Marc Dionne cc: linux-afs@lists.infradead.org cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/153685395197.14766.16289516750731233933.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/158861251312.340223.17924900795425422532.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/159465828607.1377938.6903132788463419368.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/160588535018.3465195.14509994354240338307.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118152415.1232039.6452879415814850025.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161048194.2537118.13763612220937637316.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340411602.1303470.4661108879482218408.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539555629.286939.5241869986617154517.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653811456.2770958.7017388543246759245.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789095005.6155.6789055030327407928.stgit@warthog.procyon.org.uk/ # v6 --- include/trace/events/afs.h | 51 +++++++++++++++++----------------------------- 1 file changed, 19 insertions(+), 32 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 9203cf6a8c53..3ccf591b2374 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -886,65 +886,52 @@ TRACE_EVENT(afs_call_done, __entry->rx_call) ); -TRACE_EVENT(afs_send_pages, - TP_PROTO(struct afs_call *call, struct msghdr *msg, - pgoff_t first, pgoff_t last, unsigned int offset), +TRACE_EVENT(afs_send_data, + TP_PROTO(struct afs_call *call, struct msghdr *msg), - TP_ARGS(call, msg, first, last, offset), + TP_ARGS(call, msg), TP_STRUCT__entry( __field(unsigned int, call ) - __field(pgoff_t, first ) - __field(pgoff_t, last ) - __field(unsigned int, nr ) - __field(unsigned int, bytes ) - __field(unsigned int, offset ) __field(unsigned int, flags ) + __field(loff_t, offset ) + __field(loff_t, count ) ), TP_fast_assign( __entry->call = call->debug_id; - __entry->first = first; - __entry->last = last; - __entry->nr = msg->msg_iter.nr_segs; - __entry->bytes = msg->msg_iter.count; - __entry->offset = offset; __entry->flags = msg->msg_flags; + __entry->offset = msg->msg_iter.xarray_start + msg->msg_iter.iov_offset; + __entry->count = iov_iter_count(&msg->msg_iter); ), - TP_printk(" c=%08x %lx-%lx-%lx b=%x o=%x f=%x", - __entry->call, - __entry->first, __entry->first + __entry->nr - 1, __entry->last, - __entry->bytes, __entry->offset, + TP_printk(" c=%08x o=%llx n=%llx f=%x", + __entry->call, __entry->offset, __entry->count, __entry->flags) ); -TRACE_EVENT(afs_sent_pages, - TP_PROTO(struct afs_call *call, pgoff_t first, pgoff_t last, - pgoff_t cursor, int ret), +TRACE_EVENT(afs_sent_data, + TP_PROTO(struct afs_call *call, struct msghdr *msg, int ret), - TP_ARGS(call, first, last, cursor, ret), + TP_ARGS(call, msg, ret), TP_STRUCT__entry( __field(unsigned int, call ) - __field(pgoff_t, first ) - __field(pgoff_t, last ) - __field(pgoff_t, cursor ) __field(int, ret ) + __field(loff_t, offset ) + __field(loff_t, count ) ), TP_fast_assign( __entry->call = call->debug_id; - __entry->first = first; - __entry->last = last; - __entry->cursor = cursor; __entry->ret = ret; + __entry->offset = msg->msg_iter.xarray_start + msg->msg_iter.iov_offset; + __entry->count = iov_iter_count(&msg->msg_iter); ), - TP_printk(" c=%08x %lx-%lx c=%lx r=%d", - __entry->call, - __entry->first, __entry->last, - __entry->cursor, __entry->ret) + TP_printk(" c=%08x o=%llx n=%llx r=%x", + __entry->call, __entry->offset, __entry->count, + __entry->ret) ); TRACE_EVENT(afs_dir_check_failed, -- cgit v1.2.3 From e4b52ca01315ad53df41877708428c1c41c1444d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 Apr 2021 14:03:12 -0400 Subject: xprtrdma: Do not recycle MR after FastReg/LocalInv flushes Better not to touch MRs involved in a flush or post error until the Send and Receive Queues are drained and the transport is fully quiescent. Simply don't insert such MRs back onto the free list. They remain on mr_all and will be released when the connection is torn down. I had thought that recycling would prevent hardware resources from being tied up for a long time. However, since v5.7, a transport disconnect destroys the QP and other hardware-owned resources. The MRs get cleaned up nicely at that point. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/rpcrdma.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index c838e7ac1c2d..e38e745d13b0 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1014,7 +1014,6 @@ DEFINE_MR_EVENT(localinv); DEFINE_MR_EVENT(map); DEFINE_ANON_MR_EVENT(unmap); -DEFINE_ANON_MR_EVENT(recycle); TRACE_EVENT(xprtrdma_dma_maperr, TP_PROTO( -- cgit v1.2.3 From 4ddd0fc32c94fbb77a8c0728dc507b2bdcc67edc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 Apr 2021 14:03:31 -0400 Subject: xprtrdma: Add tracepoints showing FastReg WRs and remote invalidation The Send signaling logic is a little subtle, so add some observability around it. For every xprtrdma_mr_fastreg event, there should be an xprtrdma_mr_localinv or xprtrdma_mr_reminv event. When these tracepoints are enabled, we can see exactly when an MR is DMA-mapped, registered, invalidated (either locally or remotely) and then DMA-unmapped. kworker/u25:2-190 [000] 787.979512: xprtrdma_mr_map: task:351@5 mr.id=4 nents=2 5608@0x8679e0c8f6f56000:0x00000503 (TO_DEVICE) kworker/u25:2-190 [000] 787.979515: xprtrdma_chunk_read: task:351@5 pos=148 5608@0x8679e0c8f6f56000:0x00000503 (last) kworker/u25:2-190 [000] 787.979519: xprtrdma_marshal: task:351@5 xid=0x8679e0c8: hdr=52 xdr=148/5608/0 read list/inline kworker/u25:2-190 [000] 787.979525: xprtrdma_mr_fastreg: task:351@5 mr.id=4 nents=2 5608@0x8679e0c8f6f56000:0x00000503 (TO_DEVICE) kworker/u25:2-190 [000] 787.979526: xprtrdma_post_send: task:351@5 cq.id=0 cid=73 (2 SGEs) ... kworker/5:1H-219 [005] 787.980567: xprtrdma_wc_receive: cq.id=1 cid=161 status=SUCCESS (0/0x0) received=164 kworker/5:1H-219 [005] 787.980571: xprtrdma_post_recvs: peer=[192.168.100.55]:20049 r_xprt=0xffff8884974d4000: 0 new recvs, 70 active (rc 0) kworker/5:1H-219 [005] 787.980573: xprtrdma_reply: task:351@5 xid=0x8679e0c8 credits=64 kworker/5:1H-219 [005] 787.980576: xprtrdma_mr_reminv: task:351@5 mr.id=4 nents=2 5608@0x8679e0c8f6f56000:0x00000503 (TO_DEVICE) kworker/5:1H-219 [005] 787.980577: xprtrdma_mr_unmap: mr.id=4 nents=2 5608@0x8679e0c8f6f56000:0x00000503 (TO_DEVICE) Note that I've moved the xprtrdma_post_send tracepoint so that event always appears after the xprtrdma_mr_fastreg tracepoint. Otherwise the event log looks counterintuitive (FastReg is always supposed to happen before Send). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/rpcrdma.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index e38e745d13b0..9462326b3535 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1010,7 +1010,9 @@ TRACE_EVENT(xprtrdma_frwr_maperr, ) ); +DEFINE_MR_EVENT(fastreg); DEFINE_MR_EVENT(localinv); +DEFINE_MR_EVENT(reminv); DEFINE_MR_EVENT(map); DEFINE_ANON_MR_EVENT(unmap); -- cgit v1.2.3 From 6b147ea7f442e1fb31dfa25e25b7a8ca3fb817f0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 Apr 2021 14:03:38 -0400 Subject: xprtrdma: Add an rpcrdma_mr_completion_class I found it confusing that the MR_EVENT class displays the mr.id but the associated COMPLETION_EVENT class displays a cid (that happens to contain the mr.id!). To make it a little easier on humans who have to read and interpret these events, create an MR_COMPLETION class that displays the mr.id in the same way as the MR_EVENT class. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/rpcrdma.h | 48 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 9462326b3535..3e6e4c69b533 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -60,6 +60,46 @@ DECLARE_EVENT_CLASS(rpcrdma_completion_class, ), \ TP_ARGS(wc, cid)) +DECLARE_EVENT_CLASS(rpcrdma_mr_completion_class, + TP_PROTO( + const struct ib_wc *wc, + const struct rpc_rdma_cid *cid + ), + + TP_ARGS(wc, cid), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) + __field(unsigned long, status) + __field(unsigned int, vendor_err) + ), + + TP_fast_assign( + __entry->cq_id = cid->ci_queue_id; + __entry->completion_id = cid->ci_completion_id; + __entry->status = wc->status; + if (wc->status) + __entry->vendor_err = wc->vendor_err; + else + __entry->vendor_err = 0; + ), + + TP_printk("cq.id=%u mr.id=%d status=%s (%lu/0x%x)", + __entry->cq_id, __entry->completion_id, + rdma_show_wc_status(__entry->status), + __entry->status, __entry->vendor_err + ) +); + +#define DEFINE_MR_COMPLETION_EVENT(name) \ + DEFINE_EVENT(rpcrdma_mr_completion_class, name, \ + TP_PROTO( \ + const struct ib_wc *wc, \ + const struct rpc_rdma_cid *cid \ + ), \ + TP_ARGS(wc, cid)) + DECLARE_EVENT_CLASS(rpcrdma_receive_completion_class, TP_PROTO( const struct ib_wc *wc, @@ -886,10 +926,10 @@ TRACE_EVENT(xprtrdma_post_linv_err, DEFINE_RECEIVE_COMPLETION_EVENT(xprtrdma_wc_receive); DEFINE_COMPLETION_EVENT(xprtrdma_wc_send); -DEFINE_COMPLETION_EVENT(xprtrdma_wc_fastreg); -DEFINE_COMPLETION_EVENT(xprtrdma_wc_li); -DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_wake); -DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_done); +DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_fastreg); +DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li); +DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li_wake); +DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li_done); TRACE_EVENT(xprtrdma_frwr_alloc, TP_PROTO( -- cgit v1.2.3 From 83189d15115467061295c0b75334b39fc64c6142 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 Apr 2021 14:03:44 -0400 Subject: xprtrdma: Don't display r_xprt memory addresses in tracepoints The remote peer's IP address is sufficient, and does not expose details of the kernel's memory layout. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/rpcrdma.h | 51 ++++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 32 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 3e6e4c69b533..e38b8e33be2d 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -190,19 +190,17 @@ DECLARE_EVENT_CLASS(xprtrdma_rxprt, TP_ARGS(r_xprt), TP_STRUCT__entry( - __field(const void *, r_xprt) __string(addr, rpcrdma_addrstr(r_xprt)) __string(port, rpcrdma_portstr(r_xprt)) ), TP_fast_assign( - __entry->r_xprt = r_xprt; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p", - __get_str(addr), __get_str(port), __entry->r_xprt + TP_printk("peer=[%s]:%s", + __get_str(addr), __get_str(port) ) ); @@ -222,7 +220,6 @@ DECLARE_EVENT_CLASS(xprtrdma_connect_class, TP_ARGS(r_xprt, rc), TP_STRUCT__entry( - __field(const void *, r_xprt) __field(int, rc) __field(int, connect_status) __string(addr, rpcrdma_addrstr(r_xprt)) @@ -230,15 +227,14 @@ DECLARE_EVENT_CLASS(xprtrdma_connect_class, ), TP_fast_assign( - __entry->r_xprt = r_xprt; __entry->rc = rc; __entry->connect_status = r_xprt->rx_ep->re_connect_status; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connection status=%d", - __get_str(addr), __get_str(port), __entry->r_xprt, + TP_printk("peer=[%s]:%s rc=%d connection status=%d", + __get_str(addr), __get_str(port), __entry->rc, __entry->connect_status ) ); @@ -535,22 +531,19 @@ TRACE_EVENT(xprtrdma_op_connect, TP_ARGS(r_xprt, delay), TP_STRUCT__entry( - __field(const void *, r_xprt) __field(unsigned long, delay) __string(addr, rpcrdma_addrstr(r_xprt)) __string(port, rpcrdma_portstr(r_xprt)) ), TP_fast_assign( - __entry->r_xprt = r_xprt; __entry->delay = delay; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p delay=%lu", - __get_str(addr), __get_str(port), __entry->r_xprt, - __entry->delay + TP_printk("peer=[%s]:%s delay=%lu", + __get_str(addr), __get_str(port), __entry->delay ) ); @@ -565,7 +558,6 @@ TRACE_EVENT(xprtrdma_op_set_cto, TP_ARGS(r_xprt, connect, reconnect), TP_STRUCT__entry( - __field(const void *, r_xprt) __field(unsigned long, connect) __field(unsigned long, reconnect) __string(addr, rpcrdma_addrstr(r_xprt)) @@ -573,15 +565,14 @@ TRACE_EVENT(xprtrdma_op_set_cto, ), TP_fast_assign( - __entry->r_xprt = r_xprt; __entry->connect = connect; __entry->reconnect = reconnect; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: connect=%lu reconnect=%lu", - __get_str(addr), __get_str(port), __entry->r_xprt, + TP_printk("peer=[%s]:%s connect=%lu reconnect=%lu", + __get_str(addr), __get_str(port), __entry->connect / HZ, __entry->reconnect / HZ ) ); @@ -631,22 +622,19 @@ TRACE_EVENT(xprtrdma_createmrs, TP_ARGS(r_xprt, count), TP_STRUCT__entry( - __field(const void *, r_xprt) __string(addr, rpcrdma_addrstr(r_xprt)) __string(port, rpcrdma_portstr(r_xprt)) __field(unsigned int, count) ), TP_fast_assign( - __entry->r_xprt = r_xprt; __entry->count = count; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: created %u MRs", - __get_str(addr), __get_str(port), __entry->r_xprt, - __entry->count + TP_printk("peer=[%s]:%s created %u MRs", + __get_str(addr), __get_str(port), __entry->count ) ); @@ -869,7 +857,7 @@ TRACE_EVENT(xprtrdma_post_recvs, TP_ARGS(r_xprt, count, status), TP_STRUCT__entry( - __field(const void *, r_xprt) + __field(u32, cq_id) __field(unsigned int, count) __field(int, status) __field(int, posted) @@ -878,16 +866,18 @@ TRACE_EVENT(xprtrdma_post_recvs, ), TP_fast_assign( - __entry->r_xprt = r_xprt; + const struct rpcrdma_ep *ep = r_xprt->rx_ep; + + __entry->cq_id = ep->re_attr.recv_cq->res.id; __entry->count = count; __entry->status = status; - __entry->posted = r_xprt->rx_ep->re_receive_count; + __entry->posted = ep->re_receive_count; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: %u new recvs, %d active (rc %d)", - __get_str(addr), __get_str(port), __entry->r_xprt, + TP_printk("peer=[%s]:%s cq.id=%d %u new recvs, %d active (rc %d)", + __get_str(addr), __get_str(port), __entry->cq_id, __entry->count, __entry->posted, __entry->status ) ); @@ -1289,22 +1279,19 @@ TRACE_EVENT(xprtrdma_cb_setup, TP_ARGS(r_xprt, reqs), TP_STRUCT__entry( - __field(const void *, r_xprt) __field(unsigned int, reqs) __string(addr, rpcrdma_addrstr(r_xprt)) __string(port, rpcrdma_portstr(r_xprt)) ), TP_fast_assign( - __entry->r_xprt = r_xprt; __entry->reqs = reqs; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: %u reqs", - __get_str(addr), __get_str(port), - __entry->r_xprt, __entry->reqs + TP_printk("peer=[%s]:%s %u reqs", + __get_str(addr), __get_str(port), __entry->reqs ) ); -- cgit v1.2.3 From e1648eb23d839bd4b9f2999296d5e81dcd93311f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 Apr 2021 14:03:50 -0400 Subject: xprtrdma: Remove the RPC/RDMA QP event handler Clean up: The handler only recorded a trace event. If indeed no action is needed by the RPC/RDMA consumer, then the event can be ignored. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/rpcrdma.h | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index e38b8e33be2d..ef6166b840e7 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -577,38 +577,6 @@ TRACE_EVENT(xprtrdma_op_set_cto, ) ); -TRACE_EVENT(xprtrdma_qp_event, - TP_PROTO( - const struct rpcrdma_ep *ep, - const struct ib_event *event - ), - - TP_ARGS(ep, event), - - TP_STRUCT__entry( - __field(unsigned long, event) - __string(name, event->device->name) - __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) - __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) - ), - - TP_fast_assign( - const struct rdma_cm_id *id = ep->re_id; - - __entry->event = event->event; - __assign_str(name, event->device->name); - memcpy(__entry->srcaddr, &id->route.addr.src_addr, - sizeof(struct sockaddr_in6)); - memcpy(__entry->dstaddr, &id->route.addr.dst_addr, - sizeof(struct sockaddr_in6)); - ), - - TP_printk("%pISpc -> %pISpc device=%s %s (%lu)", - __entry->srcaddr, __entry->dstaddr, __get_str(name), - rdma_show_ib_event(__entry->event), __entry->event - ) -); - /** ** Call events **/ -- cgit v1.2.3 From 13bcf7e32a0181095cd62010579869e87aacb332 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 Apr 2021 14:04:21 -0400 Subject: xprtrdma: Move fr_mr field to struct rpcrdma_mr Clean up: The last remaining field in struct rpcrdma_frwr has been removed, so the struct can be eliminated. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/rpcrdma.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index ef6166b840e7..bd55908c1bef 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -379,7 +379,7 @@ DECLARE_EVENT_CLASS(xprtrdma_mr_class, __entry->task_id = task->tk_pid; __entry->client_id = task->tk_client->cl_clid; - __entry->mr_id = mr->frwr.fr_mr->res.id; + __entry->mr_id = mr->mr_ibmr->res.id; __entry->nents = mr->mr_nents; __entry->handle = mr->mr_handle; __entry->length = mr->mr_length; @@ -420,7 +420,7 @@ DECLARE_EVENT_CLASS(xprtrdma_anonymous_mr_class, ), TP_fast_assign( - __entry->mr_id = mr->frwr.fr_mr->res.id; + __entry->mr_id = mr->mr_ibmr->res.id; __entry->nents = mr->mr_nents; __entry->handle = mr->mr_handle; __entry->length = mr->mr_length; @@ -903,7 +903,7 @@ TRACE_EVENT(xprtrdma_frwr_alloc, ), TP_fast_assign( - __entry->mr_id = mr->frwr.fr_mr->res.id; + __entry->mr_id = mr->mr_ibmr->res.id; __entry->rc = rc; ), @@ -931,7 +931,7 @@ TRACE_EVENT(xprtrdma_frwr_dereg, ), TP_fast_assign( - __entry->mr_id = mr->frwr.fr_mr->res.id; + __entry->mr_id = mr->mr_ibmr->res.id; __entry->nents = mr->mr_nents; __entry->handle = mr->mr_handle; __entry->length = mr->mr_length; @@ -964,7 +964,7 @@ TRACE_EVENT(xprtrdma_frwr_sgerr, ), TP_fast_assign( - __entry->mr_id = mr->frwr.fr_mr->res.id; + __entry->mr_id = mr->mr_ibmr->res.id; __entry->addr = mr->mr_sg->dma_address; __entry->dir = mr->mr_dir; __entry->nents = sg_nents; @@ -994,7 +994,7 @@ TRACE_EVENT(xprtrdma_frwr_maperr, ), TP_fast_assign( - __entry->mr_id = mr->frwr.fr_mr->res.id; + __entry->mr_id = mr->mr_ibmr->res.id; __entry->addr = mr->mr_sg->dma_address; __entry->dir = mr->mr_dir; __entry->num_mapped = num_mapped; -- cgit v1.2.3 From f9001107820c647f65b57fb9c1ca2c0908b5fede Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Thu, 29 Apr 2021 22:57:26 -0700 Subject: mm, tracing: improve rss_stat tracepoint message Adjust the rss_stat tracepoint to print the name of the resident page type that got updated (e.g. MM_ANONPAGES/MM_FILEPAGES), rather than the numeric index corresponding to it (the __entry->member value): Before this patch: ------------------ rss_stat: mm_id=1216113068 curr=0 member=1 size=28672B rss_stat: mm_id=1216113068 curr=0 member=1 size=0B rss_stat: mm_id=534402304 curr=1 member=0 size=188416B rss_stat: mm_id=534402304 curr=1 member=1 size=40960B After this patch: ----------------- rss_stat: mm_id=1726253524 curr=1 type=MM_ANONPAGES size=40960B rss_stat: mm_id=1726253524 curr=1 type=MM_FILEPAGES size=663552B rss_stat: mm_id=1726253524 curr=1 type=MM_ANONPAGES size=65536B rss_stat: mm_id=1726253524 curr=1 type=MM_FILEPAGES size=647168B Use TRACE_DEFINE_ENUM()/__print_symbolic() logic to map the enum values to the strings they represent, so that userspace tools can also parse the raw data correctly. Link: https://lkml.kernel.org/r/20210310162305.4862-1-ovidiu.panait@windriver.com Signed-off-by: Ovidiu Panait Suggested-by: Steven Rostedt (VMware) Reviewed-by: Steven Rostedt (VMware) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/kmem.h | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 3a60b6b6db32..829a75692cc0 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -343,6 +343,26 @@ static unsigned int __maybe_unused mm_ptr_to_hash(const void *ptr) #define __PTR_TO_HASHVAL #endif +#define TRACE_MM_PAGES \ + EM(MM_FILEPAGES) \ + EM(MM_ANONPAGES) \ + EM(MM_SWAPENTS) \ + EMe(MM_SHMEMPAGES) + +#undef EM +#undef EMe + +#define EM(a) TRACE_DEFINE_ENUM(a); +#define EMe(a) TRACE_DEFINE_ENUM(a); + +TRACE_MM_PAGES + +#undef EM +#undef EMe + +#define EM(a) { a, #a }, +#define EMe(a) { a, #a } + TRACE_EVENT(rss_stat, TP_PROTO(struct mm_struct *mm, @@ -365,10 +385,10 @@ TRACE_EVENT(rss_stat, __entry->size = (count << PAGE_SHIFT); ), - TP_printk("mm_id=%u curr=%d member=%d size=%ldB", + TP_printk("mm_id=%u curr=%d type=%s size=%ldB", __entry->mm_id, __entry->curr, - __entry->member, + __print_symbolic(__entry->member, TRACE_MM_PAGES), __entry->size) ); #endif /* _TRACE_KMEM_H */ -- cgit v1.2.3 From 7677f7fd8be76659cd2d0db8ff4093bbb51c20e5 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Tue, 4 May 2021 18:35:36 -0700 Subject: userfaultfd: add minor fault registration mode Patch series "userfaultfd: add minor fault handling", v9. Overview ======== This series adds a new userfaultfd feature, UFFD_FEATURE_MINOR_HUGETLBFS. When enabled (via the UFFDIO_API ioctl), this feature means that any hugetlbfs VMAs registered with UFFDIO_REGISTER_MODE_MISSING will *also* get events for "minor" faults. By "minor" fault, I mean the following situation: Let there exist two mappings (i.e., VMAs) to the same page(s) (shared memory). One of the mappings is registered with userfaultfd (in minor mode), and the other is not. Via the non-UFFD mapping, the underlying pages have already been allocated & filled with some contents. The UFFD mapping has not yet been faulted in; when it is touched for the first time, this results in what I'm calling a "minor" fault. As a concrete example, when working with hugetlbfs, we have huge_pte_none(), but find_lock_page() finds an existing page. We also add a new ioctl to resolve such faults: UFFDIO_CONTINUE. The idea is, userspace resolves the fault by either a) doing nothing if the contents are already correct, or b) updating the underlying contents using the second, non-UFFD mapping (via memcpy/memset or similar, or something fancier like RDMA, or etc...). In either case, userspace issues UFFDIO_CONTINUE to tell the kernel "I have ensured the page contents are correct, carry on setting up the mapping". Use Case ======== Consider the use case of VM live migration (e.g. under QEMU/KVM): 1. While a VM is still running, we copy the contents of its memory to a target machine. The pages are populated on the target by writing to the non-UFFD mapping, using the setup described above. The VM is still running (and therefore its memory is likely changing), so this may be repeated several times, until we decide the target is "up to date enough". 2. We pause the VM on the source, and start executing on the target machine. During this gap, the VM's user(s) will *see* a pause, so it is desirable to minimize this window. 3. Between the last time any page was copied from the source to the target, and when the VM was paused, the contents of that page may have changed - and therefore the copy we have on the target machine is out of date. Although we can keep track of which pages are out of date, for VMs with large amounts of memory, it is "slow" to transfer this information to the target machine. We want to resume execution before such a transfer would complete. 4. So, the guest begins executing on the target machine. The first time it touches its memory (via the UFFD-registered mapping), userspace wants to intercept this fault. Userspace checks whether or not the page is up to date, and if not, copies the updated page from the source machine, via the non-UFFD mapping. Finally, whether a copy was performed or not, userspace issues a UFFDIO_CONTINUE ioctl to tell the kernel "I have ensured the page contents are correct, carry on setting up the mapping". We don't have to do all of the final updates on-demand. The userfaultfd manager can, in the background, also copy over updated pages once it receives the map of which pages are up-to-date or not. Interaction with Existing APIs ============================== Because this is a feature, a registered VMA could potentially receive both missing and minor faults. I spent some time thinking through how the existing API interacts with the new feature: UFFDIO_CONTINUE cannot be used to resolve non-minor faults, as it does not allocate a new page. If UFFDIO_CONTINUE is used on a non-minor fault: - For non-shared memory or shmem, -EINVAL is returned. - For hugetlb, -EFAULT is returned. UFFDIO_COPY and UFFDIO_ZEROPAGE cannot be used to resolve minor faults. Without modifications, the existing codepath assumes a new page needs to be allocated. This is okay, since userspace must have a second non-UFFD-registered mapping anyway, thus there isn't much reason to want to use these in any case (just memcpy or memset or similar). - If UFFDIO_COPY is used on a minor fault, -EEXIST is returned. - If UFFDIO_ZEROPAGE is used on a minor fault, -EEXIST is returned (or -EINVAL in the case of hugetlb, as UFFDIO_ZEROPAGE is unsupported in any case). - UFFDIO_WRITEPROTECT simply doesn't work with shared memory, and returns -ENOENT in that case (regardless of the kind of fault). Future Work =========== This series only supports hugetlbfs. I have a second series in flight to support shmem as well, extending the functionality. This series is more mature than the shmem support at this point, and the functionality works fully on hugetlbfs, so this series can be merged first and then shmem support will follow. This patch (of 6): This feature allows userspace to intercept "minor" faults. By "minor" faults, I mean the following situation: Let there exist two mappings (i.e., VMAs) to the same page(s). One of the mappings is registered with userfaultfd (in minor mode), and the other is not. Via the non-UFFD mapping, the underlying pages have already been allocated & filled with some contents. The UFFD mapping has not yet been faulted in; when it is touched for the first time, this results in what I'm calling a "minor" fault. As a concrete example, when working with hugetlbfs, we have huge_pte_none(), but find_lock_page() finds an existing page. This commit adds the new registration mode, and sets the relevant flag on the VMAs being registered. In the hugetlb fault path, if we find that we have huge_pte_none(), but find_lock_page() does indeed find an existing page, then we have a "minor" fault, and if the VMA has the userfaultfd registration flag, we call into userfaultfd to handle it. This is implemented as a new registration mode, instead of an API feature. This is because the alternative implementation has significant drawbacks [1]. However, doing it this was requires we allocate a VM_* flag for the new registration mode. On 32-bit systems, there are no unused bits, so this feature is only supported on architectures with CONFIG_ARCH_USES_HIGH_VMA_FLAGS. When attempting to register a VMA in MINOR mode on 32-bit architectures, we return -EINVAL. [1] https://lore.kernel.org/patchwork/patch/1380226/ [peterx@redhat.com: fix minor fault page leak] Link: https://lkml.kernel.org/r/20210322175132.36659-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20210301222728.176417-1-axelrasmussen@google.com Link: https://lkml.kernel.org/r/20210301222728.176417-2-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Reviewed-by: Peter Xu Reviewed-by: Mike Kravetz Cc: Alexander Viro Cc: Alexey Dobriyan Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Chinwen Chang Cc: Huang Ying Cc: Ingo Molnar Cc: Jann Horn Cc: Jerome Glisse Cc: Lokesh Gidra Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: "Michal Koutn" Cc: Michel Lespinasse Cc: Mike Rapoport Cc: Nicholas Piggin Cc: Peter Xu Cc: Shaohua Li Cc: Shawn Anastasio Cc: Steven Rostedt Cc: Steven Price Cc: Vlastimil Babka Cc: Adam Ruprecht Cc: Axel Rasmussen Cc: Cannon Matthews Cc: "Dr . David Alan Gilbert" Cc: David Rientjes Cc: Mina Almasry Cc: Oliver Upton Cc: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/mmflags.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 67018d367b9f..629c7a0eaff2 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -137,6 +137,12 @@ IF_HAVE_PG_ARCH_2(PG_arch_2, "arch_2" ) #define IF_HAVE_VM_SOFTDIRTY(flag,name) #endif +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR +# define IF_HAVE_UFFD_MINOR(flag, name) {flag, name}, +#else +# define IF_HAVE_UFFD_MINOR(flag, name) +#endif + #define __def_vmaflag_names \ {VM_READ, "read" }, \ {VM_WRITE, "write" }, \ @@ -148,6 +154,7 @@ IF_HAVE_PG_ARCH_2(PG_arch_2, "arch_2" ) {VM_MAYSHARE, "mayshare" }, \ {VM_GROWSDOWN, "growsdown" }, \ {VM_UFFD_MISSING, "uffd_missing" }, \ +IF_HAVE_UFFD_MINOR(VM_UFFD_MINOR, "uffd_minor" ) \ {VM_PFNMAP, "pfnmap" }, \ {VM_DENYWRITE, "denywrite" }, \ {VM_UFFD_WP, "uffd_wp" }, \ -- cgit v1.2.3 From 7bc1aec5e28765ad18742824b3b972471807a632 Mon Sep 17 00:00:00 2001 From: Liam Mark Date: Tue, 4 May 2021 18:37:25 -0700 Subject: mm: cma: add trace events for CMA alloc perf testing Add cma and migrate trace events to enable CMA allocation performance to be measured via ftrace. [georgi.djakov@linaro.org: add the CMA instance name to the cma_alloc_start trace event] Link: https://lkml.kernel.org/r/20210326155414.25006-1-georgi.djakov@linaro.org Link: https://lkml.kernel.org/r/20210324160740.15901-1-georgi.djakov@linaro.org Signed-off-by: Liam Mark Signed-off-by: Georgi Djakov Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/cma.h | 42 +++++++++++++++++++++++++++++++++++++++++- include/trace/events/migrate.h | 22 ++++++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/cma.h b/include/trace/events/cma.h index 5017a8829270..be1525a10457 100644 --- a/include/trace/events/cma.h +++ b/include/trace/events/cma.h @@ -8,7 +8,7 @@ #include #include -TRACE_EVENT(cma_alloc, +DECLARE_EVENT_CLASS(cma_alloc_class, TP_PROTO(unsigned long pfn, const struct page *page, unsigned int count, unsigned int align), @@ -61,6 +61,46 @@ TRACE_EVENT(cma_release, __entry->count) ); +TRACE_EVENT(cma_alloc_start, + + TP_PROTO(const char *name, unsigned int count, unsigned int align), + + TP_ARGS(name, count, align), + + TP_STRUCT__entry( + __string(name, name) + __field(unsigned int, count) + __field(unsigned int, align) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->count = count; + __entry->align = align; + ), + + TP_printk("name=%s count=%u align=%u", + __get_str(name), + __entry->count, + __entry->align) +); + +DEFINE_EVENT(cma_alloc_class, cma_alloc, + + TP_PROTO(unsigned long pfn, const struct page *page, + unsigned int count, unsigned int align), + + TP_ARGS(pfn, page, count, align) +); + +DEFINE_EVENT(cma_alloc_class, cma_alloc_busy_retry, + + TP_PROTO(unsigned long pfn, const struct page *page, + unsigned int count, unsigned int align), + + TP_ARGS(pfn, page, count, align) +); + #endif /* _TRACE_CMA_H */ /* This part must be outside protection */ diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h index 4d434398d64d..f2c990603888 100644 --- a/include/trace/events/migrate.h +++ b/include/trace/events/migrate.h @@ -81,6 +81,28 @@ TRACE_EVENT(mm_migrate_pages, __print_symbolic(__entry->mode, MIGRATE_MODE), __print_symbolic(__entry->reason, MIGRATE_REASON)) ); + +TRACE_EVENT(mm_migrate_pages_start, + + TP_PROTO(enum migrate_mode mode, int reason), + + TP_ARGS(mode, reason), + + TP_STRUCT__entry( + __field(enum migrate_mode, mode) + __field(int, reason) + ), + + TP_fast_assign( + __entry->mode = mode; + __entry->reason = reason; + ), + + TP_printk("mode=%s reason=%s", + __print_symbolic(__entry->mode, MIGRATE_MODE), + __print_symbolic(__entry->reason, MIGRATE_REASON)) +); + #endif /* _TRACE_MIGRATE_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 3aab8ae7aace3388da319a233edf48f0f5d26a44 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 4 May 2021 18:37:31 -0700 Subject: mm: cma: add the CMA instance name to cma trace events There were missing places to add cma instance name. To identify each CMA instance, let's add the name for every cma trace. This patch also changes the existing cma_trace_alloc to cma_trace_finish since we have cma_alloc_start[1]. [1] https://lore.kernel.org/linux-mm/20210324160740.15901-1-georgi.djakov@linaro.org Link: https://lkml.kernel.org/r/20210330220237.748899-1-minchan@kernel.org Signed-off-by: Minchan Kim Cc: Liam Mark Cc: Georgi Djakov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/cma.h | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/cma.h b/include/trace/events/cma.h index be1525a10457..5cf385ae7c08 100644 --- a/include/trace/events/cma.h +++ b/include/trace/events/cma.h @@ -10,12 +10,13 @@ DECLARE_EVENT_CLASS(cma_alloc_class, - TP_PROTO(unsigned long pfn, const struct page *page, + TP_PROTO(const char *name, unsigned long pfn, const struct page *page, unsigned int count, unsigned int align), - TP_ARGS(pfn, page, count, align), + TP_ARGS(name, pfn, page, count, align), TP_STRUCT__entry( + __string(name, name) __field(unsigned long, pfn) __field(const struct page *, page) __field(unsigned int, count) @@ -23,13 +24,15 @@ DECLARE_EVENT_CLASS(cma_alloc_class, ), TP_fast_assign( + __assign_str(name, name); __entry->pfn = pfn; __entry->page = page; __entry->count = count; __entry->align = align; ), - TP_printk("pfn=%lx page=%p count=%u align=%u", + TP_printk("name=%s pfn=%lx page=%p count=%u align=%u", + __get_str(name), __entry->pfn, __entry->page, __entry->count, @@ -38,24 +41,27 @@ DECLARE_EVENT_CLASS(cma_alloc_class, TRACE_EVENT(cma_release, - TP_PROTO(unsigned long pfn, const struct page *page, + TP_PROTO(const char *name, unsigned long pfn, const struct page *page, unsigned int count), - TP_ARGS(pfn, page, count), + TP_ARGS(name, pfn, page, count), TP_STRUCT__entry( + __string(name, name) __field(unsigned long, pfn) __field(const struct page *, page) __field(unsigned int, count) ), TP_fast_assign( + __assign_str(name, name); __entry->pfn = pfn; __entry->page = page; __entry->count = count; ), - TP_printk("pfn=%lx page=%p count=%u", + TP_printk("name=%s pfn=%lx page=%p count=%u", + __get_str(name), __entry->pfn, __entry->page, __entry->count) @@ -85,20 +91,20 @@ TRACE_EVENT(cma_alloc_start, __entry->align) ); -DEFINE_EVENT(cma_alloc_class, cma_alloc, +DEFINE_EVENT(cma_alloc_class, cma_alloc_finish, - TP_PROTO(unsigned long pfn, const struct page *page, + TP_PROTO(const char *name, unsigned long pfn, const struct page *page, unsigned int count, unsigned int align), - TP_ARGS(pfn, page, count, align) + TP_ARGS(name, pfn, page, count, align) ); DEFINE_EVENT(cma_alloc_class, cma_alloc_busy_retry, - TP_PROTO(unsigned long pfn, const struct page *page, + TP_PROTO(const char *name, unsigned long pfn, const struct page *page, unsigned int count, unsigned int align), - TP_ARGS(pfn, page, count, align) + TP_ARGS(name, pfn, page, count, align) ); #endif /* _TRACE_CMA_H */ -- cgit v1.2.3 From 78fa51503fdbe463c96eef4c3cf69ca54032647a Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 4 May 2021 18:37:34 -0700 Subject: mm: use proper type for cma_[alloc|release] size_t in cma_alloc is confusing since it makes people think it's byte count, not pages. Change it to unsigned long[1]. The unsigned int in cma_release is also not right so change it. Since we have unsigned long in cma_release, free_contig_range should also respect it. [1] 67a2e213e7e9, mm: cma: fix incorrect type conversion for size during dma allocation Link: https://lore.kernel.org/linux-mm/20210324043434.GP1719932@casper.infradead.org/ Link: https://lkml.kernel.org/r/20210331164018.710560-1-minchan@kernel.org Signed-off-by: Minchan Kim Reviewed-by: David Hildenbrand Cc: Matthew Wilcox Cc: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/cma.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/cma.h b/include/trace/events/cma.h index 5cf385ae7c08..c3d354702cb0 100644 --- a/include/trace/events/cma.h +++ b/include/trace/events/cma.h @@ -11,7 +11,7 @@ DECLARE_EVENT_CLASS(cma_alloc_class, TP_PROTO(const char *name, unsigned long pfn, const struct page *page, - unsigned int count, unsigned int align), + unsigned long count, unsigned int align), TP_ARGS(name, pfn, page, count, align), @@ -19,7 +19,7 @@ DECLARE_EVENT_CLASS(cma_alloc_class, __string(name, name) __field(unsigned long, pfn) __field(const struct page *, page) - __field(unsigned int, count) + __field(unsigned long, count) __field(unsigned int, align) ), @@ -31,7 +31,7 @@ DECLARE_EVENT_CLASS(cma_alloc_class, __entry->align = align; ), - TP_printk("name=%s pfn=%lx page=%p count=%u align=%u", + TP_printk("name=%s pfn=%lx page=%p count=%lu align=%u", __get_str(name), __entry->pfn, __entry->page, @@ -42,7 +42,7 @@ DECLARE_EVENT_CLASS(cma_alloc_class, TRACE_EVENT(cma_release, TP_PROTO(const char *name, unsigned long pfn, const struct page *page, - unsigned int count), + unsigned long count), TP_ARGS(name, pfn, page, count), @@ -50,7 +50,7 @@ TRACE_EVENT(cma_release, __string(name, name) __field(unsigned long, pfn) __field(const struct page *, page) - __field(unsigned int, count) + __field(unsigned long, count) ), TP_fast_assign( @@ -60,7 +60,7 @@ TRACE_EVENT(cma_release, __entry->count = count; ), - TP_printk("name=%s pfn=%lx page=%p count=%u", + TP_printk("name=%s pfn=%lx page=%p count=%lu", __get_str(name), __entry->pfn, __entry->page, @@ -69,13 +69,13 @@ TRACE_EVENT(cma_release, TRACE_EVENT(cma_alloc_start, - TP_PROTO(const char *name, unsigned int count, unsigned int align), + TP_PROTO(const char *name, unsigned long count, unsigned int align), TP_ARGS(name, count, align), TP_STRUCT__entry( __string(name, name) - __field(unsigned int, count) + __field(unsigned long, count) __field(unsigned int, align) ), @@ -85,7 +85,7 @@ TRACE_EVENT(cma_alloc_start, __entry->align = align; ), - TP_printk("name=%s count=%u align=%u", + TP_printk("name=%s count=%lu align=%u", __get_str(name), __entry->count, __entry->align) @@ -94,7 +94,7 @@ TRACE_EVENT(cma_alloc_start, DEFINE_EVENT(cma_alloc_class, cma_alloc_finish, TP_PROTO(const char *name, unsigned long pfn, const struct page *page, - unsigned int count, unsigned int align), + unsigned long count, unsigned int align), TP_ARGS(name, pfn, page, count, align) ); @@ -102,7 +102,7 @@ DEFINE_EVENT(cma_alloc_class, cma_alloc_finish, DEFINE_EVENT(cma_alloc_class, cma_alloc_busy_retry, TP_PROTO(const char *name, unsigned long pfn, const struct page *page, - unsigned int count, unsigned int align), + unsigned long count, unsigned int align), TP_ARGS(name, pfn, page, count, align) ); -- cgit v1.2.3 From d1e153fea2a8940273174fc17733c44323d35cd5 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Tue, 4 May 2021 18:39:08 -0700 Subject: mm/gup: migrate pinned pages out of movable zone We should not pin pages in ZONE_MOVABLE. Currently, we do not pin only movable CMA pages. Generalize the function that migrates CMA pages to migrate all movable pages. Use is_pinnable_page() to check which pages need to be migrated Link: https://lkml.kernel.org/r/20210215161349.246722-10-pasha.tatashin@soleen.com Signed-off-by: Pavel Tatashin Reviewed-by: John Hubbard Cc: Dan Williams Cc: David Hildenbrand Cc: David Rientjes Cc: Ingo Molnar Cc: Ira Weiny Cc: James Morris Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: Joonsoo Kim Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Michal Hocko Cc: Mike Kravetz Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Sasha Levin Cc: Steven Rostedt (VMware) Cc: Tyler Hicks Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/migrate.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h index f2c990603888..9fb2a3bbcdfb 100644 --- a/include/trace/events/migrate.h +++ b/include/trace/events/migrate.h @@ -20,7 +20,8 @@ EM( MR_SYSCALL, "syscall_or_cpuset") \ EM( MR_MEMPOLICY_MBIND, "mempolicy_mbind") \ EM( MR_NUMA_MISPLACED, "numa_misplaced") \ - EMe(MR_CONTIG_RANGE, "contig_range") + EM( MR_CONTIG_RANGE, "contig_range") \ + EMe(MR_LONGTERM_PIN, "longterm_pin") /* * First define the enums in the above macros to be exported to userspace -- cgit v1.2.3