From def1ed0db2a66eed5de593748ffe131615edb45e Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Thu, 25 Jan 2024 00:22:51 -0800 Subject: platform/x86/intel/ifs: Trace on all HT threads when executing a test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable the trace function on all HT threads. Currently, the trace is called from some arbitrary CPU where the test was invoked. This change gives visibility to the exact errors as seen by each participating HT threads, and not just what was seen from the primary thread. Sample output below. # TASK-PID CPU# ||||| TIMESTAMP FUNCTION # | | | ||||| | | migration/0-18 [000] d..1. 527287.084668: start: 0000, stop: 007f, status: 0000000000007f80 migration/128-785 [128] d..1. 527287.084669: start: 0000, stop: 007f, status: 0000000000007f80 Signed-off-by: Ashok Raj Reviewed-by: Tony Luck Link: https://lore.kernel.org/r/20240125082254.424859-3-ashok.raj@intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/trace/events/intel_ifs.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/intel_ifs.h b/include/trace/events/intel_ifs.h index af0af3f1d9b7..8462dfb7a020 100644 --- a/include/trace/events/intel_ifs.h +++ b/include/trace/events/intel_ifs.h @@ -10,26 +10,23 @@ TRACE_EVENT(ifs_status, - TP_PROTO(int cpu, int start, int stop, u64 status), + TP_PROTO(int start, int stop, u64 status), - TP_ARGS(cpu, start, stop, status), + TP_ARGS(start, stop, status), TP_STRUCT__entry( __field( u64, status ) - __field( int, cpu ) __field( u16, start ) __field( u16, stop ) ), TP_fast_assign( - __entry->cpu = cpu; __entry->start = start; __entry->stop = stop; __entry->status = status; ), - TP_printk("cpu: %d, start: %.4x, stop: %.4x, status: %.16llx", - __entry->cpu, + TP_printk("start: %.4x, stop: %.4x, status: %.16llx", __entry->start, __entry->stop, __entry->status) -- cgit v1.2.3 From e272d1e1188e55259dd0e3ba2f8f744a531fdd59 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Thu, 25 Jan 2024 00:22:52 -0800 Subject: platform/x86/intel/ifs: Add current batch number to trace output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the current batch number in the trace output. When there are failures, it's important to know which test content resulted in failure. # TASK-PID CPU# ||||| TIMESTAMP FUNCTION # | | | ||||| | | migration/0-18 [000] d..1. 527287.084668: ifs_status: batch: 02, start: 0000, stop: 007f, status: 0000000000007f80 migration/128-785 [128] d..1. 527287.084669: ifs_status: batch: 02, start: 0000, stop: 007f, status: 0000000000007f80 Signed-off-by: Ashok Raj Reviewed-by: Tony Luck Link: https://lore.kernel.org/r/20240125082254.424859-4-ashok.raj@intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/trace/events/intel_ifs.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/intel_ifs.h b/include/trace/events/intel_ifs.h index 8462dfb7a020..8ce2de120f2d 100644 --- a/include/trace/events/intel_ifs.h +++ b/include/trace/events/intel_ifs.h @@ -10,23 +10,26 @@ TRACE_EVENT(ifs_status, - TP_PROTO(int start, int stop, u64 status), + TP_PROTO(int batch, int start, int stop, u64 status), - TP_ARGS(start, stop, status), + TP_ARGS(batch, start, stop, status), TP_STRUCT__entry( + __field( int, batch ) __field( u64, status ) __field( u16, start ) __field( u16, stop ) ), TP_fast_assign( + __entry->batch = batch; __entry->start = start; __entry->stop = stop; __entry->status = status; ), - TP_printk("start: %.4x, stop: %.4x, status: %.16llx", + TP_printk("batch: %.2d, start: %.4x, stop: %.4x, status: %.16llx", + __entry->batch, __entry->start, __entry->stop, __entry->status) -- cgit v1.2.3 From 587a67b6830b56afce19dcb5d2f6c3d7f30492e0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 31 Jan 2024 18:01:43 -0500 Subject: filelock: rename some fields in tracepoints In later patches we're going to introduce some macros with names that clash with fields here. To prevent problems building, just rename the fields in the trace entry structures. Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20240131-flsplit-v3-2-c6129007ee8d@kernel.org Reviewed-by: NeilBrown Signed-off-by: Christian Brauner --- include/trace/events/filelock.h | 76 ++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 38 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h index 1646dadd7f37..8fb1d41b1c67 100644 --- a/include/trace/events/filelock.h +++ b/include/trace/events/filelock.h @@ -68,11 +68,11 @@ DECLARE_EVENT_CLASS(filelock_lock, __field(struct file_lock *, fl) __field(unsigned long, i_ino) __field(dev_t, s_dev) - __field(struct file_lock *, fl_blocker) - __field(fl_owner_t, fl_owner) - __field(unsigned int, fl_pid) - __field(unsigned int, fl_flags) - __field(unsigned char, fl_type) + __field(struct file_lock *, blocker) + __field(fl_owner_t, owner) + __field(unsigned int, pid) + __field(unsigned int, flags) + __field(unsigned char, type) __field(loff_t, fl_start) __field(loff_t, fl_end) __field(int, ret) @@ -82,11 +82,11 @@ DECLARE_EVENT_CLASS(filelock_lock, __entry->fl = fl ? fl : NULL; __entry->s_dev = inode->i_sb->s_dev; __entry->i_ino = inode->i_ino; - __entry->fl_blocker = fl ? fl->fl_blocker : NULL; - __entry->fl_owner = fl ? fl->fl_owner : NULL; - __entry->fl_pid = fl ? fl->fl_pid : 0; - __entry->fl_flags = fl ? fl->fl_flags : 0; - __entry->fl_type = fl ? fl->fl_type : 0; + __entry->blocker = fl ? fl->fl_blocker : NULL; + __entry->owner = fl ? fl->fl_owner : NULL; + __entry->pid = fl ? fl->fl_pid : 0; + __entry->flags = fl ? fl->fl_flags : 0; + __entry->type = fl ? fl->fl_type : 0; __entry->fl_start = fl ? fl->fl_start : 0; __entry->fl_end = fl ? fl->fl_end : 0; __entry->ret = ret; @@ -94,9 +94,9 @@ DECLARE_EVENT_CLASS(filelock_lock, TP_printk("fl=%p dev=0x%x:0x%x ino=0x%lx fl_blocker=%p fl_owner=%p fl_pid=%u fl_flags=%s fl_type=%s fl_start=%lld fl_end=%lld ret=%d", __entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev), - __entry->i_ino, __entry->fl_blocker, __entry->fl_owner, - __entry->fl_pid, show_fl_flags(__entry->fl_flags), - show_fl_type(__entry->fl_type), + __entry->i_ino, __entry->blocker, __entry->owner, + __entry->pid, show_fl_flags(__entry->flags), + show_fl_type(__entry->type), __entry->fl_start, __entry->fl_end, __entry->ret) ); @@ -125,32 +125,32 @@ DECLARE_EVENT_CLASS(filelock_lease, __field(struct file_lock *, fl) __field(unsigned long, i_ino) __field(dev_t, s_dev) - __field(struct file_lock *, fl_blocker) - __field(fl_owner_t, fl_owner) - __field(unsigned int, fl_flags) - __field(unsigned char, fl_type) - __field(unsigned long, fl_break_time) - __field(unsigned long, fl_downgrade_time) + __field(struct file_lock *, blocker) + __field(fl_owner_t, owner) + __field(unsigned int, flags) + __field(unsigned char, type) + __field(unsigned long, break_time) + __field(unsigned long, downgrade_time) ), TP_fast_assign( __entry->fl = fl ? fl : NULL; __entry->s_dev = inode->i_sb->s_dev; __entry->i_ino = inode->i_ino; - __entry->fl_blocker = fl ? fl->fl_blocker : NULL; - __entry->fl_owner = fl ? fl->fl_owner : NULL; - __entry->fl_flags = fl ? fl->fl_flags : 0; - __entry->fl_type = fl ? fl->fl_type : 0; - __entry->fl_break_time = fl ? fl->fl_break_time : 0; - __entry->fl_downgrade_time = fl ? fl->fl_downgrade_time : 0; + __entry->blocker = fl ? fl->fl_blocker : NULL; + __entry->owner = fl ? fl->fl_owner : NULL; + __entry->flags = fl ? fl->fl_flags : 0; + __entry->type = fl ? fl->fl_type : 0; + __entry->break_time = fl ? fl->fl_break_time : 0; + __entry->downgrade_time = fl ? fl->fl_downgrade_time : 0; ), TP_printk("fl=%p dev=0x%x:0x%x ino=0x%lx fl_blocker=%p fl_owner=%p fl_flags=%s fl_type=%s fl_break_time=%lu fl_downgrade_time=%lu", __entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev), - __entry->i_ino, __entry->fl_blocker, __entry->fl_owner, - show_fl_flags(__entry->fl_flags), - show_fl_type(__entry->fl_type), - __entry->fl_break_time, __entry->fl_downgrade_time) + __entry->i_ino, __entry->blocker, __entry->owner, + show_fl_flags(__entry->flags), + show_fl_type(__entry->type), + __entry->break_time, __entry->downgrade_time) ); DEFINE_EVENT(filelock_lease, break_lease_noblock, TP_PROTO(struct inode *inode, struct file_lock *fl), @@ -179,9 +179,9 @@ TRACE_EVENT(generic_add_lease, __field(int, rcount) __field(int, icount) __field(dev_t, s_dev) - __field(fl_owner_t, fl_owner) - __field(unsigned int, fl_flags) - __field(unsigned char, fl_type) + __field(fl_owner_t, owner) + __field(unsigned int, flags) + __field(unsigned char, type) ), TP_fast_assign( @@ -190,17 +190,17 @@ TRACE_EVENT(generic_add_lease, __entry->wcount = atomic_read(&inode->i_writecount); __entry->rcount = atomic_read(&inode->i_readcount); __entry->icount = atomic_read(&inode->i_count); - __entry->fl_owner = fl->fl_owner; - __entry->fl_flags = fl->fl_flags; - __entry->fl_type = fl->fl_type; + __entry->owner = fl->fl_owner; + __entry->flags = fl->fl_flags; + __entry->type = fl->fl_type; ), TP_printk("dev=0x%x:0x%x ino=0x%lx wcount=%d rcount=%d icount=%d fl_owner=%p fl_flags=%s fl_type=%s", MAJOR(__entry->s_dev), MINOR(__entry->s_dev), __entry->i_ino, __entry->wcount, __entry->rcount, - __entry->icount, __entry->fl_owner, - show_fl_flags(__entry->fl_flags), - show_fl_type(__entry->fl_type)) + __entry->icount, __entry->owner, + show_fl_flags(__entry->flags), + show_fl_type(__entry->type)) ); TRACE_EVENT(leases_conflict, -- cgit v1.2.3 From 4ca52f539865853e6ddbc393745cd0b305f0d810 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 31 Jan 2024 18:01:59 -0500 Subject: filelock: have fs/locks.c deal with file_lock_core directly Convert fs/locks.c to access fl_core fields direcly rather than using the backward-compatibility macros. Most of this was done with coccinelle, with a few by-hand fixups. Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20240131-flsplit-v3-18-c6129007ee8d@kernel.org Reviewed-by: NeilBrown Signed-off-by: Christian Brauner --- include/trace/events/filelock.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h index 8fb1d41b1c67..4be341b5ead0 100644 --- a/include/trace/events/filelock.h +++ b/include/trace/events/filelock.h @@ -82,11 +82,11 @@ DECLARE_EVENT_CLASS(filelock_lock, __entry->fl = fl ? fl : NULL; __entry->s_dev = inode->i_sb->s_dev; __entry->i_ino = inode->i_ino; - __entry->blocker = fl ? fl->fl_blocker : NULL; - __entry->owner = fl ? fl->fl_owner : NULL; - __entry->pid = fl ? fl->fl_pid : 0; - __entry->flags = fl ? fl->fl_flags : 0; - __entry->type = fl ? fl->fl_type : 0; + __entry->blocker = fl ? fl->c.flc_blocker : NULL; + __entry->owner = fl ? fl->c.flc_owner : NULL; + __entry->pid = fl ? fl->c.flc_pid : 0; + __entry->flags = fl ? fl->c.flc_flags : 0; + __entry->type = fl ? fl->c.flc_type : 0; __entry->fl_start = fl ? fl->fl_start : 0; __entry->fl_end = fl ? fl->fl_end : 0; __entry->ret = ret; @@ -137,10 +137,10 @@ DECLARE_EVENT_CLASS(filelock_lease, __entry->fl = fl ? fl : NULL; __entry->s_dev = inode->i_sb->s_dev; __entry->i_ino = inode->i_ino; - __entry->blocker = fl ? fl->fl_blocker : NULL; - __entry->owner = fl ? fl->fl_owner : NULL; - __entry->flags = fl ? fl->fl_flags : 0; - __entry->type = fl ? fl->fl_type : 0; + __entry->blocker = fl ? fl->c.flc_blocker : NULL; + __entry->owner = fl ? fl->c.flc_owner : NULL; + __entry->flags = fl ? fl->c.flc_flags : 0; + __entry->type = fl ? fl->c.flc_type : 0; __entry->break_time = fl ? fl->fl_break_time : 0; __entry->downgrade_time = fl ? fl->fl_downgrade_time : 0; ), @@ -190,9 +190,9 @@ TRACE_EVENT(generic_add_lease, __entry->wcount = atomic_read(&inode->i_writecount); __entry->rcount = atomic_read(&inode->i_readcount); __entry->icount = atomic_read(&inode->i_count); - __entry->owner = fl->fl_owner; - __entry->flags = fl->fl_flags; - __entry->type = fl->fl_type; + __entry->owner = fl->c.flc_owner; + __entry->flags = fl->c.flc_flags; + __entry->type = fl->c.flc_type; ), TP_printk("dev=0x%x:0x%x ino=0x%lx wcount=%d rcount=%d icount=%d fl_owner=%p fl_flags=%s fl_type=%s", @@ -220,11 +220,11 @@ TRACE_EVENT(leases_conflict, TP_fast_assign( __entry->lease = lease; - __entry->l_fl_flags = lease->fl_flags; - __entry->l_fl_type = lease->fl_type; + __entry->l_fl_flags = lease->c.flc_flags; + __entry->l_fl_type = lease->c.flc_type; __entry->breaker = breaker; - __entry->b_fl_flags = breaker->fl_flags; - __entry->b_fl_type = breaker->fl_type; + __entry->b_fl_flags = breaker->c.flc_flags; + __entry->b_fl_type = breaker->c.flc_type; __entry->conflict = conflict; ), -- cgit v1.2.3 From b6aaba5b76e9596cb4d62d081cca41e114becacc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 31 Jan 2024 18:02:07 -0500 Subject: filelock: convert fl_blocker to file_lock_core Both locks and leases deal with fl_blocker. Switch the fl_blocker pointer in struct file_lock_core to point to the file_lock_core of the blocker instead of a file_lock structure. Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20240131-flsplit-v3-26-c6129007ee8d@kernel.org Reviewed-by: NeilBrown Signed-off-by: Christian Brauner --- include/trace/events/filelock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h index 4be341b5ead0..c778061c6249 100644 --- a/include/trace/events/filelock.h +++ b/include/trace/events/filelock.h @@ -68,7 +68,7 @@ DECLARE_EVENT_CLASS(filelock_lock, __field(struct file_lock *, fl) __field(unsigned long, i_ino) __field(dev_t, s_dev) - __field(struct file_lock *, blocker) + __field(struct file_lock_core *, blocker) __field(fl_owner_t, owner) __field(unsigned int, pid) __field(unsigned int, flags) @@ -125,7 +125,7 @@ DECLARE_EVENT_CLASS(filelock_lease, __field(struct file_lock *, fl) __field(unsigned long, i_ino) __field(dev_t, s_dev) - __field(struct file_lock *, blocker) + __field(struct file_lock_core *, blocker) __field(fl_owner_t, owner) __field(unsigned int, flags) __field(unsigned char, type) -- cgit v1.2.3 From 82a8cb96b23244f40be56b9edcf085af0cc237a6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 31 Jan 2024 18:02:16 -0500 Subject: afs: adapt to breakup of struct file_lock Most of the existing APIs have remained the same, but subsystems that access file_lock fields directly need to reach into struct file_lock_core now. Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20240131-flsplit-v3-35-c6129007ee8d@kernel.org Reviewed-by: NeilBrown Signed-off-by: Christian Brauner --- include/trace/events/afs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 8d73171cb9f0..789931ea5e48 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -1164,8 +1164,8 @@ TRACE_EVENT(afs_flock_op, __entry->from = fl->fl_start; __entry->len = fl->fl_end - fl->fl_start + 1; __entry->op = op; - __entry->type = fl->fl_type; - __entry->flags = fl->fl_flags; + __entry->type = fl->c.flc_type; + __entry->flags = fl->c.flc_flags; __entry->debug_id = fl->fl_u.afs.debug_id; ), -- cgit v1.2.3 From c69ff4071935f946f1cddc59e1d36a03442ed015 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 31 Jan 2024 18:02:28 -0500 Subject: filelock: split leases out of struct file_lock Add a new struct file_lease and move the lease-specific fields from struct file_lock to it. Convert the appropriate API calls to take struct file_lease instead, and convert the callers to use them. There is zero overlap between the lock manager operations for file locks and the ones for file leases, so split the lease-related operations off into a new lease_manager_operations struct. Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20240131-flsplit-v3-47-c6129007ee8d@kernel.org Reviewed-by: NeilBrown Signed-off-by: Christian Brauner --- include/trace/events/filelock.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h index c778061c6249..b8d1e00a7982 100644 --- a/include/trace/events/filelock.h +++ b/include/trace/events/filelock.h @@ -117,12 +117,12 @@ DEFINE_EVENT(filelock_lock, flock_lock_inode, TP_ARGS(inode, fl, ret)); DECLARE_EVENT_CLASS(filelock_lease, - TP_PROTO(struct inode *inode, struct file_lock *fl), + TP_PROTO(struct inode *inode, struct file_lease *fl), TP_ARGS(inode, fl), TP_STRUCT__entry( - __field(struct file_lock *, fl) + __field(struct file_lease *, fl) __field(unsigned long, i_ino) __field(dev_t, s_dev) __field(struct file_lock_core *, blocker) @@ -153,23 +153,23 @@ DECLARE_EVENT_CLASS(filelock_lease, __entry->break_time, __entry->downgrade_time) ); -DEFINE_EVENT(filelock_lease, break_lease_noblock, TP_PROTO(struct inode *inode, struct file_lock *fl), +DEFINE_EVENT(filelock_lease, break_lease_noblock, TP_PROTO(struct inode *inode, struct file_lease *fl), TP_ARGS(inode, fl)); -DEFINE_EVENT(filelock_lease, break_lease_block, TP_PROTO(struct inode *inode, struct file_lock *fl), +DEFINE_EVENT(filelock_lease, break_lease_block, TP_PROTO(struct inode *inode, struct file_lease *fl), TP_ARGS(inode, fl)); -DEFINE_EVENT(filelock_lease, break_lease_unblock, TP_PROTO(struct inode *inode, struct file_lock *fl), +DEFINE_EVENT(filelock_lease, break_lease_unblock, TP_PROTO(struct inode *inode, struct file_lease *fl), TP_ARGS(inode, fl)); -DEFINE_EVENT(filelock_lease, generic_delete_lease, TP_PROTO(struct inode *inode, struct file_lock *fl), +DEFINE_EVENT(filelock_lease, generic_delete_lease, TP_PROTO(struct inode *inode, struct file_lease *fl), TP_ARGS(inode, fl)); -DEFINE_EVENT(filelock_lease, time_out_leases, TP_PROTO(struct inode *inode, struct file_lock *fl), +DEFINE_EVENT(filelock_lease, time_out_leases, TP_PROTO(struct inode *inode, struct file_lease *fl), TP_ARGS(inode, fl)); TRACE_EVENT(generic_add_lease, - TP_PROTO(struct inode *inode, struct file_lock *fl), + TP_PROTO(struct inode *inode, struct file_lease *fl), TP_ARGS(inode, fl), @@ -204,7 +204,7 @@ TRACE_EVENT(generic_add_lease, ); TRACE_EVENT(leases_conflict, - TP_PROTO(bool conflict, struct file_lock *lease, struct file_lock *breaker), + TP_PROTO(bool conflict, struct file_lease *lease, struct file_lease *breaker), TP_ARGS(conflict, lease, breaker), -- cgit v1.2.3 From 41b7fa157ea1c8c3a575ca7f5f32034de9bee3ae Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 2 Feb 2024 15:19:16 +0000 Subject: rxrpc: Fix counting of new acks and nacks Fix the counting of new acks and nacks when parsing a packet - something that is used in congestion control. As the code stands, it merely notes if there are any nacks whereas what we really should do is compare the previous SACK table to the new one, assuming we get two successive ACK packets with nacks in them. However, we really don't want to do that if we can avoid it as the tables might not correspond directly as one may be shifted from the other - something that will only get harder to deal with once extended ACK tables come into full use (with a capacity of up to 8192). Instead, count the number of nacks shifted out of the old SACK, the number of nacks retained in the portion still active and the number of new acks and nacks in the new table then calculate what we need. Note this ends up a bit of an estimate as the Rx protocol allows acks to be withdrawn by the receiver and packets requested to be retransmitted. Fixes: d57a3a151660 ("rxrpc: Save last ACK's SACK table rather than marking txbufs") Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- include/trace/events/rxrpc.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 4c1ef7b3705c..87b8de9b6c1c 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -128,6 +128,7 @@ EM(rxrpc_skb_eaten_by_unshare_nomem, "ETN unshar-nm") \ EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \ EM(rxrpc_skb_get_conn_work, "GET conn-work") \ + EM(rxrpc_skb_get_last_nack, "GET last-nack") \ EM(rxrpc_skb_get_local_work, "GET locl-work") \ EM(rxrpc_skb_get_reject_work, "GET rej-work ") \ EM(rxrpc_skb_get_to_recvmsg, "GET to-recv ") \ @@ -141,6 +142,7 @@ EM(rxrpc_skb_put_error_report, "PUT error-rep") \ EM(rxrpc_skb_put_input, "PUT input ") \ EM(rxrpc_skb_put_jumbo_subpacket, "PUT jumbo-sub") \ + EM(rxrpc_skb_put_last_nack, "PUT last-nack") \ EM(rxrpc_skb_put_purge, "PUT purge ") \ EM(rxrpc_skb_put_rotate, "PUT rotate ") \ EM(rxrpc_skb_put_unknown, "PUT unknown ") \ @@ -1552,7 +1554,7 @@ TRACE_EVENT(rxrpc_congest, memcpy(&__entry->sum, summary, sizeof(__entry->sum)); ), - TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u r=%u b=%u u=%u d=%u l=%x%s%s%s", + TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u,%u b=%u u=%u d=%u l=%x%s%s%s", __entry->call, __entry->ack_serial, __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names), @@ -1560,9 +1562,9 @@ TRACE_EVENT(rxrpc_congest, __print_symbolic(__entry->sum.mode, rxrpc_congest_modes), __entry->sum.cwnd, __entry->sum.ssthresh, - __entry->sum.nr_acks, __entry->sum.saw_nacks, + __entry->sum.nr_acks, __entry->sum.nr_retained_nacks, __entry->sum.nr_new_acks, - __entry->sum.nr_rot_new_acks, + __entry->sum.nr_new_nacks, __entry->top - __entry->hard_ack, __entry->sum.cumulative_acks, __entry->sum.dup_acks, -- cgit v1.2.3 From 4bcb982cce74e18155fba0d97394ca9634e0d8f0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 28 Jan 2024 20:05:47 -0700 Subject: io_uring: expand main struct io_kiocb flags to 64-bits We're out of space here, and none of the flags are easily reclaimable. Bump it to 64-bits and re-arrange the struct a bit to avoid gaps. Add a specific bitwise type for the request flags, io_request_flags_t. This will help catch violations of casting this value to a smaller type on 32-bit archs, like unsigned int. This creates a hole in the io_kiocb, so move nr_tw up and rsrc_node down to retain needing only cacheline 0 and 1 for non-polled opcodes. No functional changes intended in this patch. Signed-off-by: Jens Axboe --- include/trace/events/io_uring.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 69454f1f98b0..b241f2b12486 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -148,7 +148,7 @@ TRACE_EVENT(io_uring_queue_async_work, __field( void *, req ) __field( u64, user_data ) __field( u8, opcode ) - __field( unsigned int, flags ) + __field( unsigned long long, flags ) __field( struct io_wq_work *, work ) __field( int, rw ) @@ -159,7 +159,7 @@ TRACE_EVENT(io_uring_queue_async_work, __entry->ctx = req->ctx; __entry->req = req; __entry->user_data = req->cqe.user_data; - __entry->flags = req->flags; + __entry->flags = (__force unsigned long long) req->flags; __entry->opcode = req->opcode; __entry->work = &req->work; __entry->rw = rw; @@ -167,10 +167,10 @@ TRACE_EVENT(io_uring_queue_async_work, __assign_str(op_str, io_uring_get_opcode(req->opcode)); ), - TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p", + TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%llx, %s queue, work %p", __entry->ctx, __entry->req, __entry->user_data, - __get_str(op_str), - __entry->flags, __entry->rw ? "hashed" : "normal", __entry->work) + __get_str(op_str), __entry->flags, + __entry->rw ? "hashed" : "normal", __entry->work) ); /** @@ -378,7 +378,7 @@ TRACE_EVENT(io_uring_submit_req, __field( void *, req ) __field( unsigned long long, user_data ) __field( u8, opcode ) - __field( u32, flags ) + __field( unsigned long long, flags ) __field( bool, sq_thread ) __string( op_str, io_uring_get_opcode(req->opcode) ) @@ -389,16 +389,16 @@ TRACE_EVENT(io_uring_submit_req, __entry->req = req; __entry->user_data = req->cqe.user_data; __entry->opcode = req->opcode; - __entry->flags = req->flags; + __entry->flags = (__force unsigned long long) req->flags; __entry->sq_thread = req->ctx->flags & IORING_SETUP_SQPOLL; __assign_str(op_str, io_uring_get_opcode(req->opcode)); ), - TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, " + TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%llx, " "sq_thread %d", __entry->ctx, __entry->req, - __entry->user_data, __get_str(op_str), - __entry->flags, __entry->sq_thread) + __entry->user_data, __get_str(op_str), __entry->flags, + __entry->sq_thread) ); /* -- cgit v1.2.3 From 4c98b89175a229a1eb9e6db67b4b7c0d712c86a7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 30 Jan 2024 07:03:46 -0700 Subject: io_uring: remove 'loops' argument from trace_io_uring_task_work_run() We no longer loop in task_work handling, hence delete the argument from the tracepoint as it's always 1 and hence not very informative. Signed-off-by: Jens Axboe --- include/trace/events/io_uring.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index b241f2b12486..e948df7ce625 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -602,29 +602,25 @@ TRACE_EVENT(io_uring_cqe_overflow, * * @tctx: pointer to a io_uring_task * @count: how many functions it ran - * @loops: how many loops it ran * */ TRACE_EVENT(io_uring_task_work_run, - TP_PROTO(void *tctx, unsigned int count, unsigned int loops), + TP_PROTO(void *tctx, unsigned int count), - TP_ARGS(tctx, count, loops), + TP_ARGS(tctx, count), TP_STRUCT__entry ( __field( void *, tctx ) __field( unsigned int, count ) - __field( unsigned int, loops ) ), TP_fast_assign( __entry->tctx = tctx; __entry->count = count; - __entry->loops = loops; ), - TP_printk("tctx %p, count %u, loops %u", - __entry->tctx, __entry->count, __entry->loops) + TP_printk("tctx %p, count %u", __entry->tctx, __entry->count) ); TRACE_EVENT(io_uring_short_write, -- cgit v1.2.3 From 36e40df35d2c1891fe58241640c7c95de4aa739b Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Thu, 22 Feb 2024 11:34:03 +0100 Subject: timer_migration: Add tracepoints The timer pull logic needs proper debugging aids. Add tracepoints so the hierarchical idle machinery can be diagnosed. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240222103403.31923-1-anna-maria@linutronix.de --- include/trace/events/timer_migration.h | 298 +++++++++++++++++++++++++++++++++ 1 file changed, 298 insertions(+) create mode 100644 include/trace/events/timer_migration.h (limited to 'include/trace') diff --git a/include/trace/events/timer_migration.h b/include/trace/events/timer_migration.h new file mode 100644 index 000000000000..79f19e76a80b --- /dev/null +++ b/include/trace/events/timer_migration.h @@ -0,0 +1,298 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM timer_migration + +#if !defined(_TRACE_TIMER_MIGRATION_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_TIMER_MIGRATION_H + +#include + +/* Group events */ +TRACE_EVENT(tmigr_group_set, + + TP_PROTO(struct tmigr_group *group), + + TP_ARGS(group), + + TP_STRUCT__entry( + __field( void *, group ) + __field( unsigned int, lvl ) + __field( unsigned int, numa_node ) + ), + + TP_fast_assign( + __entry->group = group; + __entry->lvl = group->level; + __entry->numa_node = group->numa_node; + ), + + TP_printk("group=%p lvl=%d numa=%d", + __entry->group, __entry->lvl, __entry->numa_node) +); + +TRACE_EVENT(tmigr_connect_child_parent, + + TP_PROTO(struct tmigr_group *child), + + TP_ARGS(child), + + TP_STRUCT__entry( + __field( void *, child ) + __field( void *, parent ) + __field( unsigned int, lvl ) + __field( unsigned int, numa_node ) + __field( unsigned int, num_children ) + __field( u32, childmask ) + ), + + TP_fast_assign( + __entry->child = child; + __entry->parent = child->parent; + __entry->lvl = child->parent->level; + __entry->numa_node = child->parent->numa_node; + __entry->num_children = child->parent->num_children; + __entry->childmask = child->childmask; + ), + + TP_printk("group=%p childmask=%0x parent=%p lvl=%d numa=%d num_children=%d", + __entry->child, __entry->childmask, __entry->parent, + __entry->lvl, __entry->numa_node, __entry->num_children) +); + +TRACE_EVENT(tmigr_connect_cpu_parent, + + TP_PROTO(struct tmigr_cpu *tmc), + + TP_ARGS(tmc), + + TP_STRUCT__entry( + __field( void *, parent ) + __field( unsigned int, cpu ) + __field( unsigned int, lvl ) + __field( unsigned int, numa_node ) + __field( unsigned int, num_children ) + __field( u32, childmask ) + ), + + TP_fast_assign( + __entry->parent = tmc->tmgroup; + __entry->cpu = tmc->cpuevt.cpu; + __entry->lvl = tmc->tmgroup->level; + __entry->numa_node = tmc->tmgroup->numa_node; + __entry->num_children = tmc->tmgroup->num_children; + __entry->childmask = tmc->childmask; + ), + + TP_printk("cpu=%d childmask=%0x parent=%p lvl=%d numa=%d num_children=%d", + __entry->cpu, __entry->childmask, __entry->parent, + __entry->lvl, __entry->numa_node, __entry->num_children) +); + +DECLARE_EVENT_CLASS(tmigr_group_and_cpu, + + TP_PROTO(struct tmigr_group *group, union tmigr_state state, u32 childmask), + + TP_ARGS(group, state, childmask), + + TP_STRUCT__entry( + __field( void *, group ) + __field( void *, parent ) + __field( unsigned int, lvl ) + __field( unsigned int, numa_node ) + __field( u32, childmask ) + __field( u8, active ) + __field( u8, migrator ) + ), + + TP_fast_assign( + __entry->group = group; + __entry->parent = group->parent; + __entry->lvl = group->level; + __entry->numa_node = group->numa_node; + __entry->childmask = childmask; + __entry->active = state.active; + __entry->migrator = state.migrator; + ), + + TP_printk("group=%p lvl=%d numa=%d active=%0x migrator=%0x " + "parent=%p childmask=%0x", + __entry->group, __entry->lvl, __entry->numa_node, + __entry->active, __entry->migrator, + __entry->parent, __entry->childmask) +); + +DEFINE_EVENT(tmigr_group_and_cpu, tmigr_group_set_cpu_inactive, + + TP_PROTO(struct tmigr_group *group, union tmigr_state state, u32 childmask), + + TP_ARGS(group, state, childmask) +); + +DEFINE_EVENT(tmigr_group_and_cpu, tmigr_group_set_cpu_active, + + TP_PROTO(struct tmigr_group *group, union tmigr_state state, u32 childmask), + + TP_ARGS(group, state, childmask) +); + +/* CPU events*/ +DECLARE_EVENT_CLASS(tmigr_cpugroup, + + TP_PROTO(struct tmigr_cpu *tmc), + + TP_ARGS(tmc), + + TP_STRUCT__entry( + __field( u64, wakeup ) + __field( void *, parent ) + __field( unsigned int, cpu ) + + ), + + TP_fast_assign( + __entry->wakeup = tmc->wakeup; + __entry->parent = tmc->tmgroup; + __entry->cpu = tmc->cpuevt.cpu; + ), + + TP_printk("cpu=%d parent=%p wakeup=%llu", __entry->cpu, __entry->parent, __entry->wakeup) +); + +DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_new_timer, + + TP_PROTO(struct tmigr_cpu *tmc), + + TP_ARGS(tmc) +); + +DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_active, + + TP_PROTO(struct tmigr_cpu *tmc), + + TP_ARGS(tmc) +); + +DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_online, + + TP_PROTO(struct tmigr_cpu *tmc), + + TP_ARGS(tmc) +); + +DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_offline, + + TP_PROTO(struct tmigr_cpu *tmc), + + TP_ARGS(tmc) +); + +DEFINE_EVENT(tmigr_cpugroup, tmigr_handle_remote_cpu, + + TP_PROTO(struct tmigr_cpu *tmc), + + TP_ARGS(tmc) +); + +DECLARE_EVENT_CLASS(tmigr_idle, + + TP_PROTO(struct tmigr_cpu *tmc, u64 nextevt), + + TP_ARGS(tmc, nextevt), + + TP_STRUCT__entry( + __field( u64, nextevt) + __field( u64, wakeup) + __field( void *, parent) + __field( unsigned int, cpu) + ), + + TP_fast_assign( + __entry->nextevt = nextevt; + __entry->wakeup = tmc->wakeup; + __entry->parent = tmc->tmgroup; + __entry->cpu = tmc->cpuevt.cpu; + ), + + TP_printk("cpu=%d parent=%p nextevt=%llu wakeup=%llu", + __entry->cpu, __entry->parent, __entry->nextevt, __entry->wakeup) +); + +DEFINE_EVENT(tmigr_idle, tmigr_cpu_idle, + + TP_PROTO(struct tmigr_cpu *tmc, u64 nextevt), + + TP_ARGS(tmc, nextevt) +); + +DEFINE_EVENT(tmigr_idle, tmigr_cpu_new_timer_idle, + + TP_PROTO(struct tmigr_cpu *tmc, u64 nextevt), + + TP_ARGS(tmc, nextevt) +); + +TRACE_EVENT(tmigr_update_events, + + TP_PROTO(struct tmigr_group *child, struct tmigr_group *group, + union tmigr_state childstate, union tmigr_state groupstate, + u64 nextevt), + + TP_ARGS(child, group, childstate, groupstate, nextevt), + + TP_STRUCT__entry( + __field( void *, child ) + __field( void *, group ) + __field( u64, nextevt ) + __field( u64, group_next_expiry ) + __field( u64, child_evt_expiry ) + __field( unsigned int, group_lvl ) + __field( unsigned int, child_evtcpu ) + __field( u8, child_active ) + __field( u8, group_active ) + ), + + TP_fast_assign( + __entry->child = child; + __entry->group = group; + __entry->nextevt = nextevt; + __entry->group_next_expiry = group->next_expiry; + __entry->child_evt_expiry = child ? child->groupevt.nextevt.expires : 0; + __entry->group_lvl = group->level; + __entry->child_evtcpu = child ? child->groupevt.cpu : 0; + __entry->child_active = childstate.active; + __entry->group_active = groupstate.active; + ), + + TP_printk("child=%p group=%p group_lvl=%d child_active=%0x group_active=%0x " + "nextevt=%llu next_expiry=%llu child_evt_expiry=%llu child_evtcpu=%d", + __entry->child, __entry->group, __entry->group_lvl, __entry->child_active, + __entry->group_active, + __entry->nextevt, __entry->group_next_expiry, __entry->child_evt_expiry, + __entry->child_evtcpu) +); + +TRACE_EVENT(tmigr_handle_remote, + + TP_PROTO(struct tmigr_group *group), + + TP_ARGS(group), + + TP_STRUCT__entry( + __field( void * , group ) + __field( unsigned int , lvl ) + ), + + TP_fast_assign( + __entry->group = group; + __entry->lvl = group->level; + ), + + TP_printk("group=%p lvl=%d", + __entry->group, __entry->lvl) +); + +#endif /* _TRACE_TIMER_MIGRATION_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From ab755bf4249b992fc2140d615ab0a686d50765b4 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 20 Feb 2024 14:16:31 +0800 Subject: mm: compaction: update the cc->nr_migratepages when allocating or freeing the freepages Currently we will use 'cc->nr_freepages >= cc->nr_migratepages' comparison to ensure that enough freepages are isolated in isolate_freepages(), however it just decreases the cc->nr_freepages without updating cc->nr_migratepages in compaction_alloc(), which will waste more CPU cycles and cause too many freepages to be isolated. So we should also update the cc->nr_migratepages when allocating or freeing the freepages to avoid isolating excess freepages. And I can see fewer free pages are scanned and isolated when running thpcompact on my Arm64 server: k6.7 k6.7_patched Ops Compaction pages isolated 120692036.00 118160797.00 Ops Compaction migrate scanned 131210329.00 154093268.00 Ops Compaction free scanned 1090587971.00 1080632536.00 Ops Compact scan efficiency 12.03 14.26 Moreover, I did not see an obvious latency improvements, this is likely because isolating freepages is not the bottleneck in the thpcompact test case. k6.7 k6.7_patched Amean fault-both-1 1089.76 ( 0.00%) 1080.16 * 0.88%* Amean fault-both-3 1616.48 ( 0.00%) 1636.65 * -1.25%* Amean fault-both-5 2266.66 ( 0.00%) 2219.20 * 2.09%* Amean fault-both-7 2909.84 ( 0.00%) 2801.90 * 3.71%* Amean fault-both-12 4861.26 ( 0.00%) 4733.25 * 2.63%* Amean fault-both-18 7351.11 ( 0.00%) 6950.51 * 5.45%* Amean fault-both-24 9059.30 ( 0.00%) 9159.99 * -1.11%* Amean fault-both-30 10685.68 ( 0.00%) 11399.02 * -6.68%* Link: https://lkml.kernel.org/r/6440493f18da82298152b6305d6b41c2962a3ce6.1708409245.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Acked-by: Mel Gorman Reviewed-by: Vlastimil Babka Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: Zi Yan Signed-off-by: Andrew Morton --- include/trace/events/compaction.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index 2b2a975efd20..d05759d18538 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h @@ -78,10 +78,10 @@ DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_fast_isolate_freepage #ifdef CONFIG_COMPACTION TRACE_EVENT(mm_compaction_migratepages, - TP_PROTO(struct compact_control *cc, + TP_PROTO(unsigned int nr_migratepages, unsigned int nr_succeeded), - TP_ARGS(cc, nr_succeeded), + TP_ARGS(nr_migratepages, nr_succeeded), TP_STRUCT__entry( __field(unsigned long, nr_migrated) @@ -90,7 +90,7 @@ TRACE_EVENT(mm_compaction_migratepages, TP_fast_assign( __entry->nr_migrated = nr_succeeded; - __entry->nr_failed = cc->nr_migratepages - nr_succeeded; + __entry->nr_failed = nr_migratepages - nr_succeeded; ), TP_printk("nr_migrated=%lu nr_failed=%lu", -- cgit v1.2.3 From 015abee404760249a5c968b9ce29216b94b8ced1 Mon Sep 17 00:00:00 2001 From: Vilas Bhat Date: Wed, 21 Feb 2024 19:47:53 +0000 Subject: PM: runtime: add tracepoint for runtime_status changes Existing runtime PM ftrace events (`rpm_suspend`, `rpm_resume`, `rpm_return_int`) offer limited visibility into the exact timing of device runtime power state transitions, particularly when asynchronous operations are involved. When the `rpm_suspend` or `rpm_resume` functions are invoked with the `RPM_ASYNC` flag, a return value of 0 i.e., success merely indicates that the device power state request has been queued, not that the device has yet transitioned. A new ftrace event, `rpm_status`, is introduced. This event directly logs the `power.runtime_status` value of a device whenever it changes providing granular tracking of runtime power state transitions regardless of synchronous or asynchronous `rpm_suspend` / `rpm_resume` usage. Signed-off-by: Vilas Bhat Signed-off-by: Rafael J. Wysocki --- include/trace/events/rpm.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/rpm.h b/include/trace/events/rpm.h index 3c716214dab1..bd120e23ce12 100644 --- a/include/trace/events/rpm.h +++ b/include/trace/events/rpm.h @@ -101,6 +101,48 @@ TRACE_EVENT(rpm_return_int, __entry->ret) ); +#define RPM_STATUS_STRINGS \ + EM(RPM_INVALID, "RPM_INVALID") \ + EM(RPM_ACTIVE, "RPM_ACTIVE") \ + EM(RPM_RESUMING, "RPM_RESUMING") \ + EM(RPM_SUSPENDED, "RPM_SUSPENDED") \ + EMe(RPM_SUSPENDING, "RPM_SUSPENDING") + +/* Enums require being exported to userspace, for user tool parsing. */ +#undef EM +#undef EMe +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +RPM_STATUS_STRINGS + +/* + * Now redefine the EM() and EMe() macros to map the enums to the strings that + * will be printed in the output. + */ +#undef EM +#undef EMe +#define EM(a, b) { a, b }, +#define EMe(a, b) { a, b } + +TRACE_EVENT(rpm_status, + TP_PROTO(struct device *dev, enum rpm_status status), + TP_ARGS(dev, status), + + TP_STRUCT__entry( + __string(name, dev_name(dev)) + __field(int, status) + ), + + TP_fast_assign( + __assign_str(name, dev_name(dev)); + __entry->status = status; + ), + + TP_printk("%s status=%s", __get_str(name), + __print_symbolic(__entry->status, RPM_STATUS_STRINGS)) +); + #endif /* _TRACE_RUNTIME_POWER_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 6e21eda471129b1b783c5212ec9a738fa7de204d Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 17 Jan 2024 14:59:12 -0500 Subject: SUNRPC: add xrpt id to rpc_stats_latency tracepoint In order to get the latency per xprt under the same clientid this patch adds xprt_id to the tracepoint output. Signed-off-by: Olga Kornievskaia Tested-by: Chen Hanxiao Signed-off-by: Trond Myklebust --- include/trace/events/sunrpc.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index cdd3a45e6003..4a2abf9daa46 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -639,6 +639,7 @@ TRACE_EVENT(rpc_stats_latency, __field(unsigned long, backlog) __field(unsigned long, rtt) __field(unsigned long, execute) + __field(u32, xprt_id) ), TP_fast_assign( @@ -651,13 +652,16 @@ TRACE_EVENT(rpc_stats_latency, __entry->backlog = ktime_to_us(backlog); __entry->rtt = ktime_to_us(rtt); __entry->execute = ktime_to_us(execute); + __entry->xprt_id = task->tk_xprt->id; ), TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu", + " xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu" + " xprt_id=%d", __entry->task_id, __entry->client_id, __entry->xid, __get_str(progname), __entry->version, __get_str(procname), - __entry->backlog, __entry->rtt, __entry->execute) + __entry->backlog, __entry->rtt, __entry->execute, + __entry->xprt_id) ); TRACE_EVENT(rpc_xdr_overflow, -- cgit v1.2.3 From ba132d841d5637655273c28d30751c8d18ac189b Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 29 Jan 2024 13:51:30 +0000 Subject: rxrpc: Record the Tx serial in the rxrpc_txbuf and retransmit trace Each Rx protocol packet contains a per-connection monotonically increasing serial number used to correlate outgoing messages with their replies - something that can be used for RTT calculation. Note this value in the rxrpc_txbuf struct in addition to the wire header and then log it in the rxrpc_retransmit trace for reference. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- include/trace/events/rxrpc.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 87b8de9b6c1c..9add56980485 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1506,25 +1506,29 @@ TRACE_EVENT(rxrpc_drop_ack, ); TRACE_EVENT(rxrpc_retransmit, - TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, s64 expiry), + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, + rxrpc_serial_t serial, s64 expiry), - TP_ARGS(call, seq, expiry), + TP_ARGS(call, seq, serial, expiry), TP_STRUCT__entry( __field(unsigned int, call) __field(rxrpc_seq_t, seq) + __field(rxrpc_serial_t, serial) __field(s64, expiry) ), TP_fast_assign( __entry->call = call->debug_id; __entry->seq = seq; + __entry->serial = serial; __entry->expiry = expiry; ), - TP_printk("c=%08x q=%x xp=%lld", + TP_printk("c=%08x q=%x r=%x xp=%lld", __entry->call, __entry->seq, + __entry->serial, __entry->expiry) ); -- cgit v1.2.3 From 12bdff73a147aebcea8efae2b395ef0c27448909 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 29 Jan 2024 15:01:10 +0000 Subject: rxrpc: Convert rxrpc_txbuf::flags into a mask and don't use atomics Convert the transmission buffer flags into a mask and use | and & rather than bitops functions (atomic ops are not required as only the I/O thread can manipulate them once submitted for transmission). The bottom byte can then correspond directly to the Rx protocol header flags. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- include/trace/events/rxrpc.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 9add56980485..33888f688325 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1084,9 +1084,9 @@ TRACE_EVENT(rxrpc_tx_packet, TRACE_EVENT(rxrpc_tx_data, TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, - rxrpc_serial_t serial, u8 flags, bool retrans, bool lose), + rxrpc_serial_t serial, unsigned int flags, bool lose), - TP_ARGS(call, seq, serial, flags, retrans, lose), + TP_ARGS(call, seq, serial, flags, lose), TP_STRUCT__entry( __field(unsigned int, call) @@ -1094,8 +1094,7 @@ TRACE_EVENT(rxrpc_tx_data, __field(rxrpc_serial_t, serial) __field(u32, cid) __field(u32, call_id) - __field(u8, flags) - __field(bool, retrans) + __field(u16, flags) __field(bool, lose) ), @@ -1106,7 +1105,6 @@ TRACE_EVENT(rxrpc_tx_data, __entry->seq = seq; __entry->serial = serial; __entry->flags = flags; - __entry->retrans = retrans; __entry->lose = lose; ), @@ -1116,8 +1114,8 @@ TRACE_EVENT(rxrpc_tx_data, __entry->call_id, __entry->serial, __entry->seq, - __entry->flags, - __entry->retrans ? " *RETRANS*" : "", + __entry->flags & RXRPC_TXBUF_WIRE_FLAGS, + __entry->flags & RXRPC_TXBUF_RESENT ? " *RETRANS*" : "", __entry->lose ? " *LOSE*" : "") ); -- cgit v1.2.3 From 17469ae0582aaacad36e8e858f58b86c369f21ef Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 26 Jan 2024 16:17:03 +0000 Subject: rxrpc: Fix the names of the fields in the ACK trailer struct From AFS-3.3 a trailer containing extra info was added to the ACK packet format - but AF_RXRPC has the names of some of the fields mixed up compared to other AFS implementations. Rename the struct and the fields to make them match. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- include/trace/events/rxrpc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 33888f688325..c730cd732348 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -83,7 +83,7 @@ EM(rxrpc_badmsg_bad_abort, "bad-abort") \ EM(rxrpc_badmsg_bad_jumbo, "bad-jumbo") \ EM(rxrpc_badmsg_short_ack, "short-ack") \ - EM(rxrpc_badmsg_short_ack_info, "short-ack-info") \ + EM(rxrpc_badmsg_short_ack_trailer, "short-ack-trailer") \ EM(rxrpc_badmsg_short_hdr, "short-hdr") \ EM(rxrpc_badmsg_unsupported_packet, "unsup-pkt") \ EM(rxrpc_badmsg_zero_call, "zero-call") \ -- cgit v1.2.3 From 9a026aec88e8ef3d79e510195c5e17fcff76e4d9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 26 Jan 2024 12:46:01 -0500 Subject: NFSD: Add callback operation lifetime trace points Help observe the flow of callback operations. bc_shutdown() records exactly when the backchannel RPC client is destroyed and cl_cb_client is replaced with NULL. Examples include: nfsd-955 [004] 650.013997: nfsd_cb_queue: addr=192.168.122.6:0 client 65b3c5b8:f541f749 cb=0xffff8881134b02f8 (first try) kworker/u21:4-497 [004] 650.014050: nfsd_cb_seq_status: task:00000001@00000001 sessionid=65b3c5b8:f541f749:00000001:00000000 tk_status=-107 seq_status=1 kworker/u21:4-497 [004] 650.014051: nfsd_cb_restart: addr=192.168.122.6:0 client 65b3c5b8:f541f749 cb=0xffff88810e39f400 (first try) kworker/u21:4-497 [004] 650.014066: nfsd_cb_queue: addr=192.168.122.6:0 client 65b3c5b8:f541f749 cb=0xffff88810e39f400 (need restart) kworker/u16:0-10 [006] 650.065750: nfsd_cb_start: addr=192.168.122.6:0 client 65b3c5b8:f541f749 state=UNKNOWN kworker/u16:0-10 [006] 650.065752: nfsd_cb_bc_update: addr=192.168.122.6:0 client 65b3c5b8:f541f749 cb=0xffff8881134b02f8 (first try) kworker/u16:0-10 [006] 650.065754: nfsd_cb_bc_shutdown: addr=192.168.122.6:0 client 65b3c5b8:f541f749 cb=0xffff8881134b02f8 (first try) kworker/u16:0-10 [006] 650.065810: nfsd_cb_new_state: addr=192.168.122.6:0 client 65b3c5b8:f541f749 state=DOWN Reviewed-by: Jeff Layton Reviewed-by: Benjamin Coddington Signed-off-by: Chuck Lever --- include/trace/misc/nfs.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/trace') diff --git a/include/trace/misc/nfs.h b/include/trace/misc/nfs.h index 0d9d48dca38a..64ab5dac59ce 100644 --- a/include/trace/misc/nfs.h +++ b/include/trace/misc/nfs.h @@ -385,3 +385,37 @@ TRACE_DEFINE_ENUM(IOMODE_ANY); { SEQ4_STATUS_RESTART_RECLAIM_NEEDED, "RESTART_RECLAIM_NEEDED" }, \ { SEQ4_STATUS_CB_PATH_DOWN_SESSION, "CB_PATH_DOWN_SESSION" }, \ { SEQ4_STATUS_BACKCHANNEL_FAULT, "BACKCHANNEL_FAULT" }) + +TRACE_DEFINE_ENUM(OP_CB_GETATTR); +TRACE_DEFINE_ENUM(OP_CB_RECALL); +TRACE_DEFINE_ENUM(OP_CB_LAYOUTRECALL); +TRACE_DEFINE_ENUM(OP_CB_NOTIFY); +TRACE_DEFINE_ENUM(OP_CB_PUSH_DELEG); +TRACE_DEFINE_ENUM(OP_CB_RECALL_ANY); +TRACE_DEFINE_ENUM(OP_CB_RECALLABLE_OBJ_AVAIL); +TRACE_DEFINE_ENUM(OP_CB_RECALL_SLOT); +TRACE_DEFINE_ENUM(OP_CB_SEQUENCE); +TRACE_DEFINE_ENUM(OP_CB_WANTS_CANCELLED); +TRACE_DEFINE_ENUM(OP_CB_NOTIFY_LOCK); +TRACE_DEFINE_ENUM(OP_CB_NOTIFY_DEVICEID); +TRACE_DEFINE_ENUM(OP_CB_OFFLOAD); +TRACE_DEFINE_ENUM(OP_CB_ILLEGAL); + +#define show_nfs4_cb_op(x) \ + __print_symbolic(x, \ + { 0, "CB_NULL" }, \ + { 1, "CB_COMPOUND" }, \ + { OP_CB_GETATTR, "CB_GETATTR" }, \ + { OP_CB_RECALL, "CB_RECALL" }, \ + { OP_CB_LAYOUTRECALL, "CB_LAYOUTRECALL" }, \ + { OP_CB_NOTIFY, "CB_NOTIFY" }, \ + { OP_CB_PUSH_DELEG, "CB_PUSH_DELEG" }, \ + { OP_CB_RECALL_ANY, "CB_RECALL_ANY" }, \ + { OP_CB_RECALLABLE_OBJ_AVAIL, "CB_RECALLABLE_OBJ_AVAIL" }, \ + { OP_CB_RECALL_SLOT, "CB_RECALL_SLOT" }, \ + { OP_CB_SEQUENCE, "CB_SEQUENCE" }, \ + { OP_CB_WANTS_CANCELLED, "CB_WANTS_CANCELLED" }, \ + { OP_CB_NOTIFY_LOCK, "CB_NOTIFY_LOCK" }, \ + { OP_CB_NOTIFY_DEVICEID, "CB_NOTIFY_DEVICEID" }, \ + { OP_CB_OFFLOAD, "CB_OFFLOAD" }, \ + { OP_CB_ILLEGAL, "CB_ILLEGAL" }) -- cgit v1.2.3 From a1f5788a0c250c87d3007d59d11a00ab98e66f01 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 4 Feb 2024 18:17:28 -0500 Subject: svcrdma: Move write_info for Reply chunks into struct svc_rdma_send_ctxt Since the RPC transaction's svc_rdma_send_ctxt will stay around for the duration of the RDMA Write operation, the write_info structure for the Reply chunk can reside in the request's svc_rdma_send_ctxt instead of being allocated separately. Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 110c1475c527..027ac3ab457d 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -2118,6 +2118,10 @@ DEFINE_SIMPLE_CID_EVENT(svcrdma_wc_write); DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_write_flush); DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_write_err); +DEFINE_SIMPLE_CID_EVENT(svcrdma_wc_reply); +DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_reply_flush); +DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_reply_err); + TRACE_EVENT(svcrdma_qp_error, TP_PROTO( const struct ib_event *event, -- cgit v1.2.3 From 51270d573a8d9dd5afdc7934de97d66c0e14b5fd Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 29 Feb 2024 14:34:44 -0500 Subject: tracing/net_sched: Fix tracepoints that save qdisc_dev() as a string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'm updating __assign_str() and will be removing the second parameter. To make sure that it does not break anything, I make sure that it matches the __string() field, as that is where the string is actually going to be saved in. To make sure there's nothing that breaks, I added a WARN_ON() to make sure that what was used in __string() is the same that is used in __assign_str(). In doing this change, an error was triggered as __assign_str() now expects the string passed in to be a char * value. I instead had the following warning: include/trace/events/qdisc.h: In function ‘trace_event_raw_event_qdisc_reset’: include/trace/events/qdisc.h:91:35: error: passing argument 1 of 'strcmp' from incompatible pointer type [-Werror=incompatible-pointer-types] 91 | __assign_str(dev, qdisc_dev(q)); That's because the qdisc_enqueue() and qdisc_reset() pass in qdisc_dev(q) to __assign_str() and to __string(). But that function returns a pointer to struct net_device and not a string. It appears that these events are just saving the pointer as a string and then reading it as a string as well. Use qdisc_dev(q)->name to save the device instead. Fixes: a34dac0b90552 ("net_sched: add tracepoints for qdisc_reset() and qdisc_destroy()") Signed-off-by: Steven Rostedt (Google) Reviewed-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/trace/events/qdisc.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index a3995925cb05..1f4258308b96 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -81,14 +81,14 @@ TRACE_EVENT(qdisc_reset, TP_ARGS(q), TP_STRUCT__entry( - __string( dev, qdisc_dev(q) ) - __string( kind, q->ops->id ) - __field( u32, parent ) - __field( u32, handle ) + __string( dev, qdisc_dev(q)->name ) + __string( kind, q->ops->id ) + __field( u32, parent ) + __field( u32, handle ) ), TP_fast_assign( - __assign_str(dev, qdisc_dev(q)); + __assign_str(dev, qdisc_dev(q)->name); __assign_str(kind, q->ops->id); __entry->parent = q->parent; __entry->handle = q->handle; @@ -106,14 +106,14 @@ TRACE_EVENT(qdisc_destroy, TP_ARGS(q), TP_STRUCT__entry( - __string( dev, qdisc_dev(q) ) - __string( kind, q->ops->id ) - __field( u32, parent ) - __field( u32, handle ) + __string( dev, qdisc_dev(q)->name ) + __string( kind, q->ops->id ) + __field( u32, parent ) + __field( u32, handle ) ), TP_fast_assign( - __assign_str(dev, qdisc_dev(q)); + __assign_str(dev, qdisc_dev(q)->name); __assign_str(kind, q->ops->id); __entry->parent = q->parent; __entry->handle = q->handle; -- cgit v1.2.3 From 72ba14deb40a9e9668ec5e66a341ed657e5215c2 Mon Sep 17 00:00:00 2001 From: Carlos Galo Date: Fri, 23 Feb 2024 17:32:49 +0000 Subject: mm: update mark_victim tracepoints fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current implementation of the mark_victim tracepoint provides only the process ID (pid) of the victim process. This limitation poses challenges for userspace tools requiring real-time OOM analysis and intervention. Although this information is available from the kernel logs, it’s not the appropriate format to provide OOM notifications. In Android, BPF programs are used with the mark_victim trace events to notify userspace of an OOM kill. For consistency, update the trace event to include the same information about the OOMed victim as the kernel logs. - UID In Android each installed application has a unique UID. Including the `uid` assists in correlating OOM events with specific apps. - Process Name (comm) Enables identification of the affected process. - OOM Score Will allow userspace to get additional insight of the relative kill priority of the OOM victim. In Android, the oom_score_adj is used to categorize app state (foreground, background, etc.), which aids in analyzing user-perceptible impacts of OOM events [1]. - Total VM, RSS Stats, and pgtables Amount of memory used by the victim that will, potentially, be freed up by killing it. [1] https://cs.android.com/android/platform/superproject/main/+/246dc8fc95b6d93afcba5c6d6c133307abb3ac2e:frameworks/base/services/core/java/com/android/server/am/ProcessList.java;l=188-283 Signed-off-by: Carlos Galo Reviewed-by: Steven Rostedt Cc: Suren Baghdasaryan Cc: Michal Hocko Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Signed-off-by: Andrew Morton --- include/trace/events/oom.h | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h index 26a11e4a2c36..b799f3bcba82 100644 --- a/include/trace/events/oom.h +++ b/include/trace/events/oom.h @@ -7,6 +7,8 @@ #include #include +#define PG_COUNT_TO_KB(x) ((x) << (PAGE_SHIFT - 10)) + TRACE_EVENT(oom_score_adj_update, TP_PROTO(struct task_struct *task), @@ -72,19 +74,45 @@ TRACE_EVENT(reclaim_retry_zone, ); TRACE_EVENT(mark_victim, - TP_PROTO(int pid), + TP_PROTO(struct task_struct *task, uid_t uid), - TP_ARGS(pid), + TP_ARGS(task, uid), TP_STRUCT__entry( __field(int, pid) + __string(comm, task->comm) + __field(unsigned long, total_vm) + __field(unsigned long, anon_rss) + __field(unsigned long, file_rss) + __field(unsigned long, shmem_rss) + __field(uid_t, uid) + __field(unsigned long, pgtables) + __field(short, oom_score_adj) ), TP_fast_assign( - __entry->pid = pid; + __entry->pid = task->pid; + __assign_str(comm, task->comm); + __entry->total_vm = PG_COUNT_TO_KB(task->mm->total_vm); + __entry->anon_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_ANONPAGES)); + __entry->file_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_FILEPAGES)); + __entry->shmem_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_SHMEMPAGES)); + __entry->uid = uid; + __entry->pgtables = mm_pgtables_bytes(task->mm) >> 10; + __entry->oom_score_adj = task->signal->oom_score_adj; ), - TP_printk("pid=%d", __entry->pid) + TP_printk("pid=%d comm=%s total-vm=%lukB anon-rss=%lukB file-rss:%lukB shmem-rss:%lukB uid=%u pgtables=%lukB oom_score_adj=%hd", + __entry->pid, + __get_str(comm), + __entry->total_vm, + __entry->anon_rss, + __entry->file_rss, + __entry->shmem_rss, + __entry->uid, + __entry->pgtables, + __entry->oom_score_adj + ) ); TRACE_EVENT(wake_reaper, -- cgit v1.2.3 From c8b36003121834cb77fcaf8a1ce0a454d7a97891 Mon Sep 17 00:00:00 2001 From: Richard Chang Date: Wed, 28 Feb 2024 05:11:17 +0000 Subject: mm: add alloc_contig_migrate_range allocation statistics alloc_contig_migrate_range has every information to be able to understand big contiguous allocation latency. For example, how many pages are migrated, how many times they were needed to unmap from page tables. This patch adds the trace event to collect the allocation statistics. In the field, it was quite useful to understand CMA allocation latency. [akpm@linux-foundation.org: a/trace_mm_alloc_config_migrate_range_info_enabled/trace_mm_alloc_contig_migrate_range_info_enabled] Link: https://lkml.kernel.org/r/20240228051127.2859472-1-richardycc@google.com Signed-off-by: Richard Chang Reviewed-by: Steven Rostedt (Google) Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Cc: Minchan Kim Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/trace/events/kmem.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 58688768ef0f..6e62cc64cd92 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -304,6 +304,44 @@ TRACE_EVENT(mm_page_alloc_extfrag, __entry->change_ownership) ); +TRACE_EVENT(mm_alloc_contig_migrate_range_info, + + TP_PROTO(unsigned long start, + unsigned long end, + unsigned long nr_migrated, + unsigned long nr_reclaimed, + unsigned long nr_mapped, + int migratetype), + + TP_ARGS(start, end, nr_migrated, nr_reclaimed, nr_mapped, migratetype), + + TP_STRUCT__entry( + __field(unsigned long, start) + __field(unsigned long, end) + __field(unsigned long, nr_migrated) + __field(unsigned long, nr_reclaimed) + __field(unsigned long, nr_mapped) + __field(int, migratetype) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + __entry->nr_migrated = nr_migrated; + __entry->nr_reclaimed = nr_reclaimed; + __entry->nr_mapped = nr_mapped; + __entry->migratetype = migratetype; + ), + + TP_printk("start=0x%lx end=0x%lx migratetype=%d nr_migrated=%lu nr_reclaimed=%lu nr_mapped=%lu", + __entry->start, + __entry->end, + __entry->migratetype, + __entry->nr_migrated, + __entry->nr_reclaimed, + __entry->nr_mapped) +); + /* * Required for uniquely and securely identifying mm in rss_stat tracepoint. */ -- cgit v1.2.3 From 12a66e77c4999b97a2c2b8f5e4e7533c656cee12 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jan 2024 20:30:03 +0000 Subject: rxrpc: Differentiate PING ACK transmission traces. There are three points that transmit PING ACKs and all of them use the same trace string. Change two of them to use different strings. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- include/trace/events/rxrpc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index c730cd732348..3b726a6c8c42 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -364,11 +364,13 @@ #define rxrpc_propose_ack_traces \ EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \ + EM(rxrpc_propose_ack_delayed_ack, "DlydAck") \ EM(rxrpc_propose_ack_input_data, "DataIn ") \ EM(rxrpc_propose_ack_input_data_hole, "DataInH") \ EM(rxrpc_propose_ack_ping_for_keepalive, "KeepAlv") \ EM(rxrpc_propose_ack_ping_for_lost_ack, "LostAck") \ EM(rxrpc_propose_ack_ping_for_lost_reply, "LostRpl") \ + EM(rxrpc_propose_ack_ping_for_0_retrans, "0-Retrn") \ EM(rxrpc_propose_ack_ping_for_old_rtt, "OldRtt ") \ EM(rxrpc_propose_ack_ping_for_params, "Params ") \ EM(rxrpc_propose_ack_ping_for_rtt, "Rtt ") \ -- cgit v1.2.3 From 153f90a066dd4a91ef7edc0df3964dd097a5e2a5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jan 2024 21:37:16 +0000 Subject: rxrpc: Use ktimes for call timeout tracking and set the timer lazily Track the call timeouts as ktimes rather than jiffies as the latter's granularity is too high and only set the timer at the end of the event handling function. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- include/trace/events/rxrpc.h | 174 +++++++++++++++++++++++-------------------- 1 file changed, 93 insertions(+), 81 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 3b726a6c8c42..a1b126a6b0d7 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -119,6 +119,7 @@ EM(rxrpc_call_poke_complete, "Compl") \ EM(rxrpc_call_poke_error, "Error") \ EM(rxrpc_call_poke_idle, "Idle") \ + EM(rxrpc_call_poke_set_timeout, "Set-timo") \ EM(rxrpc_call_poke_start, "Start") \ EM(rxrpc_call_poke_timer, "Timer") \ E_(rxrpc_call_poke_timer_now, "Timer-now") @@ -340,27 +341,16 @@ E_(rxrpc_rtt_rx_requested_ack, "RACK") #define rxrpc_timer_traces \ - EM(rxrpc_timer_begin, "Begin ") \ - EM(rxrpc_timer_exp_ack, "ExpAck") \ - EM(rxrpc_timer_exp_hard, "ExpHrd") \ - EM(rxrpc_timer_exp_idle, "ExpIdl") \ - EM(rxrpc_timer_exp_keepalive, "ExpKA ") \ - EM(rxrpc_timer_exp_lost_ack, "ExpLoA") \ - EM(rxrpc_timer_exp_normal, "ExpNml") \ - EM(rxrpc_timer_exp_ping, "ExpPng") \ - EM(rxrpc_timer_exp_resend, "ExpRsn") \ - EM(rxrpc_timer_init_for_reply, "IniRpl") \ - EM(rxrpc_timer_init_for_send_reply, "SndRpl") \ - EM(rxrpc_timer_restart, "Restrt") \ - EM(rxrpc_timer_set_for_ack, "SetAck") \ - EM(rxrpc_timer_set_for_hard, "SetHrd") \ - EM(rxrpc_timer_set_for_idle, "SetIdl") \ - EM(rxrpc_timer_set_for_keepalive, "KeepAl") \ - EM(rxrpc_timer_set_for_lost_ack, "SetLoA") \ - EM(rxrpc_timer_set_for_normal, "SetNml") \ - EM(rxrpc_timer_set_for_ping, "SetPng") \ - EM(rxrpc_timer_set_for_resend, "SetRTx") \ - E_(rxrpc_timer_set_for_send, "SetSnd") + EM(rxrpc_timer_trace_delayed_ack, "DelayAck ") \ + EM(rxrpc_timer_trace_expect_rx, "ExpectRx ") \ + EM(rxrpc_timer_trace_hard, "HardLimit") \ + EM(rxrpc_timer_trace_idle, "IdleLimit") \ + EM(rxrpc_timer_trace_keepalive, "KeepAlive") \ + EM(rxrpc_timer_trace_lost_ack, "LostAck ") \ + EM(rxrpc_timer_trace_ping, "DelayPing") \ + EM(rxrpc_timer_trace_resend, "Resend ") \ + EM(rxrpc_timer_trace_resend_reset, "ResendRst") \ + E_(rxrpc_timer_trace_resend_tx, "ResendTx ") #define rxrpc_propose_ack_traces \ EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \ @@ -1314,90 +1304,112 @@ TRACE_EVENT(rxrpc_rtt_rx, __entry->rto) ); -TRACE_EVENT(rxrpc_timer, - TP_PROTO(struct rxrpc_call *call, enum rxrpc_timer_trace why, - unsigned long now), +TRACE_EVENT(rxrpc_timer_set, + TP_PROTO(struct rxrpc_call *call, ktime_t delay, + enum rxrpc_timer_trace why), - TP_ARGS(call, why, now), + TP_ARGS(call, delay, why), TP_STRUCT__entry( __field(unsigned int, call) __field(enum rxrpc_timer_trace, why) - __field(long, now) - __field(long, ack_at) - __field(long, ack_lost_at) - __field(long, resend_at) - __field(long, ping_at) - __field(long, expect_rx_by) - __field(long, expect_req_by) - __field(long, expect_term_by) - __field(long, timer) + __field(ktime_t, delay) ), TP_fast_assign( __entry->call = call->debug_id; __entry->why = why; - __entry->now = now; - __entry->ack_at = call->delay_ack_at; - __entry->ack_lost_at = call->ack_lost_at; - __entry->resend_at = call->resend_at; - __entry->expect_rx_by = call->expect_rx_by; - __entry->expect_req_by = call->expect_req_by; - __entry->expect_term_by = call->expect_term_by; - __entry->timer = call->timer.expires; + __entry->delay = delay; ), - TP_printk("c=%08x %s a=%ld la=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld", + TP_printk("c=%08x %s to=%lld", __entry->call, __print_symbolic(__entry->why, rxrpc_timer_traces), - __entry->ack_at - __entry->now, - __entry->ack_lost_at - __entry->now, - __entry->resend_at - __entry->now, - __entry->expect_rx_by - __entry->now, - __entry->expect_req_by - __entry->now, - __entry->expect_term_by - __entry->now, - __entry->timer - __entry->now) + ktime_to_us(__entry->delay)) + ); + +TRACE_EVENT(rxrpc_timer_exp, + TP_PROTO(struct rxrpc_call *call, ktime_t delay, + enum rxrpc_timer_trace why), + + TP_ARGS(call, delay, why), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(enum rxrpc_timer_trace, why) + __field(ktime_t, delay) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->why = why; + __entry->delay = delay; + ), + + TP_printk("c=%08x %s to=%lld", + __entry->call, + __print_symbolic(__entry->why, rxrpc_timer_traces), + ktime_to_us(__entry->delay)) + ); + +TRACE_EVENT(rxrpc_timer_can, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_timer_trace why), + + TP_ARGS(call, why), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(enum rxrpc_timer_trace, why) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->why = why; + ), + + TP_printk("c=%08x %s", + __entry->call, + __print_symbolic(__entry->why, rxrpc_timer_traces)) + ); + +TRACE_EVENT(rxrpc_timer_restart, + TP_PROTO(struct rxrpc_call *call, ktime_t delay, unsigned long delayj), + + TP_ARGS(call, delay, delayj), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(unsigned long, delayj) + __field(ktime_t, delay) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->delayj = delayj; + __entry->delay = delay; + ), + + TP_printk("c=%08x to=%lld j=%ld", + __entry->call, + ktime_to_us(__entry->delay), + __entry->delayj) ); TRACE_EVENT(rxrpc_timer_expired, - TP_PROTO(struct rxrpc_call *call, unsigned long now), + TP_PROTO(struct rxrpc_call *call), - TP_ARGS(call, now), + TP_ARGS(call), TP_STRUCT__entry( __field(unsigned int, call) - __field(long, now) - __field(long, ack_at) - __field(long, ack_lost_at) - __field(long, resend_at) - __field(long, ping_at) - __field(long, expect_rx_by) - __field(long, expect_req_by) - __field(long, expect_term_by) - __field(long, timer) ), TP_fast_assign( __entry->call = call->debug_id; - __entry->now = now; - __entry->ack_at = call->delay_ack_at; - __entry->ack_lost_at = call->ack_lost_at; - __entry->resend_at = call->resend_at; - __entry->expect_rx_by = call->expect_rx_by; - __entry->expect_req_by = call->expect_req_by; - __entry->expect_term_by = call->expect_term_by; - __entry->timer = call->timer.expires; ), - TP_printk("c=%08x EXPIRED a=%ld la=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld", - __entry->call, - __entry->ack_at - __entry->now, - __entry->ack_lost_at - __entry->now, - __entry->resend_at - __entry->now, - __entry->expect_rx_by - __entry->now, - __entry->expect_req_by - __entry->now, - __entry->expect_term_by - __entry->now, - __entry->timer - __entry->now) + TP_printk("c=%08x EXPIRED", + __entry->call) ); TRACE_EVENT(rxrpc_rx_lose, @@ -1507,7 +1519,7 @@ TRACE_EVENT(rxrpc_drop_ack, TRACE_EVENT(rxrpc_retransmit, TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, - rxrpc_serial_t serial, s64 expiry), + rxrpc_serial_t serial, ktime_t expiry), TP_ARGS(call, seq, serial, expiry), @@ -1515,7 +1527,7 @@ TRACE_EVENT(rxrpc_retransmit, __field(unsigned int, call) __field(rxrpc_seq_t, seq) __field(rxrpc_serial_t, serial) - __field(s64, expiry) + __field(ktime_t, expiry) ), TP_fast_assign( @@ -1529,7 +1541,7 @@ TRACE_EVENT(rxrpc_retransmit, __entry->call, __entry->seq, __entry->serial, - __entry->expiry) + ktime_to_us(__entry->expiry)) ); TRACE_EVENT(rxrpc_congest, -- cgit v1.2.3 From 6ef46a69ec32fe1cf56de67742fcd01af4bf48af Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 6 Mar 2024 10:30:00 +0100 Subject: ASoC: trace: add component to set_bias_level trace events The snd_soc_bias_level_start and snd_soc_bias_level_done trace events currently look like: aplay-229 [000] 1250.140778: snd_soc_bias_level_start: card=vscn-2046 val=1 aplay-229 [000] 1250.140784: snd_soc_bias_level_done: card=vscn-2046 val=1 aplay-229 [000] 1250.140786: snd_soc_bias_level_start: card=vscn-2046 val=2 aplay-229 [000] 1250.140788: snd_soc_bias_level_done: card=vscn-2046 val=2 kworker/u8:1-21 [000] 1250.140871: snd_soc_bias_level_start: card=vscn-2046 val=1 kworker/u8:0-11 [000] 1250.140951: snd_soc_bias_level_start: card=vscn-2046 val=1 kworker/u8:0-11 [000] 1250.140956: snd_soc_bias_level_done: card=vscn-2046 val=1 kworker/u8:0-11 [000] 1250.140959: snd_soc_bias_level_start: card=vscn-2046 val=2 kworker/u8:0-11 [000] 1250.140961: snd_soc_bias_level_done: card=vscn-2046 val=2 kworker/u8:1-21 [000] 1250.167219: snd_soc_bias_level_done: card=vscn-2046 val=1 kworker/u8:1-21 [000] 1250.167222: snd_soc_bias_level_start: card=vscn-2046 val=2 kworker/u8:1-21 [000] 1250.167232: snd_soc_bias_level_done: card=vscn-2046 val=2 kworker/u8:0-11 [000] 1250.167440: snd_soc_bias_level_start: card=vscn-2046 val=3 kworker/u8:0-11 [000] 1250.167444: snd_soc_bias_level_done: card=vscn-2046 val=3 kworker/u8:1-21 [000] 1250.167497: snd_soc_bias_level_start: card=vscn-2046 val=3 kworker/u8:1-21 [000] 1250.167506: snd_soc_bias_level_done: card=vscn-2046 val=3 There are clearly multiple calls, one per component, but they cannot be discriminated from each other. Change the ftrace events to also print the component name, to make it clear which part of the code is involved. This requires changing the passed value from a struct snd_soc_card, where the DAPM context is not kwown, to a struct snd_soc_dapm_context where it is obviously known but the a card pointer is also available. With this change, the resulting trace becomes: aplay-247 [000] 1436.357332: snd_soc_bias_level_start: card=vscn-2046 component=(none) val=1 aplay-247 [000] 1436.357338: snd_soc_bias_level_done: card=vscn-2046 component=(none) val=1 aplay-247 [000] 1436.357340: snd_soc_bias_level_start: card=vscn-2046 component=(none) val=2 aplay-247 [000] 1436.357343: snd_soc_bias_level_done: card=vscn-2046 component=(none) val=2 kworker/u8:4-215 [000] 1436.357437: snd_soc_bias_level_start: card=vscn-2046 component=ff560000.codec val=1 kworker/u8:5-231 [000] 1436.357518: snd_soc_bias_level_start: card=vscn-2046 component=ff320000.i2s val=1 kworker/u8:5-231 [000] 1436.357523: snd_soc_bias_level_done: card=vscn-2046 component=ff320000.i2s val=1 kworker/u8:5-231 [000] 1436.357526: snd_soc_bias_level_start: card=vscn-2046 component=ff320000.i2s val=2 kworker/u8:5-231 [000] 1436.357528: snd_soc_bias_level_done: card=vscn-2046 component=ff320000.i2s val=2 kworker/u8:4-215 [000] 1436.383217: snd_soc_bias_level_done: card=vscn-2046 component=ff560000.codec val=1 kworker/u8:4-215 [000] 1436.383221: snd_soc_bias_level_start: card=vscn-2046 component=ff560000.codec val=2 kworker/u8:4-215 [000] 1436.383231: snd_soc_bias_level_done: card=vscn-2046 component=ff560000.codec val=2 kworker/u8:5-231 [000] 1436.383468: snd_soc_bias_level_start: card=vscn-2046 component=ff320000.i2s val=3 kworker/u8:5-231 [000] 1436.383472: snd_soc_bias_level_done: card=vscn-2046 component=ff320000.i2s val=3 kworker/u8:4-215 [000] 1436.383503: snd_soc_bias_level_start: card=vscn-2046 component=ff560000.codec val=3 kworker/u8:4-215 [000] 1436.383513: snd_soc_bias_level_done: card=vscn-2046 component=ff560000.codec val=3 Signed-off-by: Luca Ceresoli Link: https://msgid.link/r/20240306-improve-asoc-trace-events-v1-1-edb252bbeb10@bootlin.com Signed-off-by: Mark Brown --- include/trace/events/asoc.h | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/asoc.h b/include/trace/events/asoc.h index 4d8ef71090af..b7ac7f100bb4 100644 --- a/include/trace/events/asoc.h +++ b/include/trace/events/asoc.h @@ -17,38 +17,41 @@ struct snd_soc_card; struct snd_soc_dapm_widget; struct snd_soc_dapm_path; -DECLARE_EVENT_CLASS(snd_soc_card, +DECLARE_EVENT_CLASS(snd_soc_dapm, - TP_PROTO(struct snd_soc_card *card, int val), + TP_PROTO(struct snd_soc_dapm_context *dapm, int val), - TP_ARGS(card, val), + TP_ARGS(dapm, val), TP_STRUCT__entry( - __string( name, card->name ) - __field( int, val ) + __string( card_name, dapm->card->name) + __string( comp_name, dapm->component ? dapm->component->name : "(none)") + __field( int, val) ), TP_fast_assign( - __assign_str(name, card->name); + __assign_str(card_name, dapm->card->name); + __assign_str(comp_name, dapm->component ? dapm->component->name : "(none)"); __entry->val = val; ), - TP_printk("card=%s val=%d", __get_str(name), (int)__entry->val) + TP_printk("card=%s component=%s val=%d", + __get_str(card_name), __get_str(comp_name), (int)__entry->val) ); -DEFINE_EVENT(snd_soc_card, snd_soc_bias_level_start, +DEFINE_EVENT(snd_soc_dapm, snd_soc_bias_level_start, - TP_PROTO(struct snd_soc_card *card, int val), + TP_PROTO(struct snd_soc_dapm_context *dapm, int val), - TP_ARGS(card, val) + TP_ARGS(dapm, val) ); -DEFINE_EVENT(snd_soc_card, snd_soc_bias_level_done, +DEFINE_EVENT(snd_soc_dapm, snd_soc_bias_level_done, - TP_PROTO(struct snd_soc_card *card, int val), + TP_PROTO(struct snd_soc_dapm_context *dapm, int val), - TP_ARGS(card, val) + TP_ARGS(dapm, val) ); -- cgit v1.2.3 From 7df3eb4cdb6bbfa482f51548b9fd47c2723c68ba Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 6 Mar 2024 10:30:01 +0100 Subject: ASoC: trace: add event to snd_soc_dapm trace events Add the event value to the snd_soc_dapm_start and snd_soc_dapm_done trace events to make them more informative. Trace before: aplay-229 [000] 250.140309: snd_soc_dapm_start: card=vscn-2046 aplay-229 [000] 250.167531: snd_soc_dapm_done: card=vscn-2046 aplay-229 [000] 251.169588: snd_soc_dapm_start: card=vscn-2046 aplay-229 [000] 251.195245: snd_soc_dapm_done: card=vscn-2046 Trace after: aplay-214 [000] 693.290612: snd_soc_dapm_start: card=vscn-2046 event=1 aplay-214 [000] 693.315508: snd_soc_dapm_done: card=vscn-2046 event=1 aplay-214 [000] 694.537349: snd_soc_dapm_start: card=vscn-2046 event=2 aplay-214 [000] 694.563241: snd_soc_dapm_done: card=vscn-2046 event=2 Signed-off-by: Luca Ceresoli Link: https://msgid.link/r/20240306-improve-asoc-trace-events-v1-2-edb252bbeb10@bootlin.com Signed-off-by: Mark Brown --- include/trace/events/asoc.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/asoc.h b/include/trace/events/asoc.h index b7ac7f100bb4..4eed9028bb11 100644 --- a/include/trace/events/asoc.h +++ b/include/trace/events/asoc.h @@ -57,34 +57,36 @@ DEFINE_EVENT(snd_soc_dapm, snd_soc_bias_level_done, DECLARE_EVENT_CLASS(snd_soc_dapm_basic, - TP_PROTO(struct snd_soc_card *card), + TP_PROTO(struct snd_soc_card *card, int event), - TP_ARGS(card), + TP_ARGS(card, event), TP_STRUCT__entry( __string( name, card->name ) + __field( int, event ) ), TP_fast_assign( __assign_str(name, card->name); + __entry->event = event; ), - TP_printk("card=%s", __get_str(name)) + TP_printk("card=%s event=%d", __get_str(name), (int)__entry->event) ); DEFINE_EVENT(snd_soc_dapm_basic, snd_soc_dapm_start, - TP_PROTO(struct snd_soc_card *card), + TP_PROTO(struct snd_soc_card *card, int event), - TP_ARGS(card) + TP_ARGS(card, event) ); DEFINE_EVENT(snd_soc_dapm_basic, snd_soc_dapm_done, - TP_PROTO(struct snd_soc_card *card), + TP_PROTO(struct snd_soc_card *card, int event), - TP_ARGS(card) + TP_ARGS(card, event) ); -- cgit v1.2.3 From 4e441bb8aca1964460e0210bdfd9b89f18a0fd16 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 17:29:33 +0800 Subject: tcp: add tracing of skb/skaddr in tcp_event_sk_skb class Printing the addresses can help us identify the exact skb/sk for those system in which it's not that easy to run BPF program. As we can see, it already fetches those, then use it directly and it will print like below: ...tcp_retransmit_skb: skbaddr=XXX skaddr=XXX family=AF_INET... Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/trace/events/tcp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 7b1ddffa3dfc..ac36067ae066 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -88,7 +88,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), - TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s", + TP_printk("skbaddr=%p skaddr=%p family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s", + __entry->skbaddr, __entry->skaddr, show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, -- cgit v1.2.3 From 0ab544b6f055f234f708f294f4a6e47262573a4c Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 17:29:34 +0800 Subject: tcp: add tracing of skbaddr in tcp_event_skb class Use the existing parameter and print the address of skbaddr as other trace functions do. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/trace/events/tcp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index ac36067ae066..6ca3e0343666 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -362,7 +362,8 @@ DECLARE_EVENT_CLASS(tcp_event_skb, TP_STORE_ADDR_PORTS_SKB(__entry, skb); ), - TP_printk("src=%pISpc dest=%pISpc", __entry->saddr, __entry->daddr) + TP_printk("skbaddr=%p src=%pISpc dest=%pISpc", + __entry->skbaddr, __entry->saddr, __entry->daddr) ); DEFINE_EVENT(tcp_event_skb, tcp_bad_csum, -- cgit v1.2.3 From 6025b9135f7a8b46826a5fcf947259da43bac281 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Mar 2024 06:08:47 -0800 Subject: net: dqs: add NIC stall detector based on BQL softnet_data->time_squeeze is sometimes used as a proxy for host overload or indication of scheduling problems. In practice this statistic is very noisy and has hard to grasp units - e.g. is 10 squeezes a second to be expected, or high? Delaying network (NAPI) processing leads to drops on NIC queues but also RTT bloat, impacting pacing and CA decisions. Stalls are a little hard to detect on the Rx side, because there may simply have not been any packets received in given period of time. Packet timestamps help a little bit, but again we don't know if packets are stale because we're not keeping up or because someone (*cough* cgroups) disabled IRQs for a long time. We can, however, use Tx as a proxy for Rx stalls. Most drivers use combined Rx+Tx NAPIs so if Tx gets starved so will Rx. On the Tx side we know exactly when packets get queued, and completed, so there is no uncertainty. This patch adds stall checks to BQL. Why BQL? Because it's a convenient place to add such checks, already called by most drivers, and it has copious free space in its structures (this patch adds no extra cache references or dirtying to the fast path). The algorithm takes one parameter - max delay AKA stall threshold and increments a counter whenever NAPI got delayed for at least that amount of time. It also records the length of the longest stall. To be precise every time NAPI has not polled for at least stall thrs we check if there were any Tx packets queued between last NAPI run and now - stall_thrs/2. Unlike the classic Tx watchdog this mechanism does not ignore stalls caused by Tx being disabled, or loss of link. I don't think the check is worth the complexity, and stall is a stall, whether due to host overload, flow control, link down... doesn't matter much to the application. We have been running this detector in production at Meta for 2 years, with the threshold of 8ms. It's the lowest value where false positives become rare. There's still a constant stream of reported stalls (especially without the ksoftirqd deferral patches reverted), those who like their stall metrics to be 0 may prefer higher value. Signed-off-by: Jakub Kicinski Signed-off-by: Breno Leitao Signed-off-by: David S. Miller --- include/trace/events/napi.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h index 6678cf8b235b..dc03cf8e0369 100644 --- a/include/trace/events/napi.h +++ b/include/trace/events/napi.h @@ -36,6 +36,39 @@ TRACE_EVENT(napi_poll, __entry->work, __entry->budget) ); +TRACE_EVENT(dql_stall_detected, + + TP_PROTO(unsigned short thrs, unsigned int len, + unsigned long last_reap, unsigned long hist_head, + unsigned long now, unsigned long *hist), + + TP_ARGS(thrs, len, last_reap, hist_head, now, hist), + + TP_STRUCT__entry( + __field( unsigned short, thrs) + __field( unsigned int, len) + __field( unsigned long, last_reap) + __field( unsigned long, hist_head) + __field( unsigned long, now) + __array( unsigned long, hist, 4) + ), + + TP_fast_assign( + __entry->thrs = thrs; + __entry->len = len; + __entry->last_reap = last_reap; + __entry->hist_head = hist_head * BITS_PER_LONG; + __entry->now = now; + memcpy(__entry->hist, hist, sizeof(entry->hist)); + ), + + TP_printk("thrs %u len %u last_reap %lu hist_head %lu now %lu hist %016lx %016lx %016lx %016lx", + __entry->thrs, __entry->len, + __entry->last_reap, __entry->hist_head, __entry->now, + __entry->hist[0], __entry->hist[1], + __entry->hist[2], __entry->hist[3]) +); + #undef NO_DEV #endif /* _TRACE_NAPI_H */ -- cgit v1.2.3 From caabd859c41b50a571cfdf7747de9f245c5d531b Mon Sep 17 00:00:00 2001 From: fuyuanli Date: Tue, 5 Mar 2024 11:04:17 +0800 Subject: tcp: Add skb addr and sock addr to arguments of tracepoint tcp_probe. It is useful to expose skb addr and sock addr to user in tracepoint tcp_probe, so that we can get more information while monitoring receiving of tcp data, by ebpf or other ways. For example, we need to identify a packet by seq and end_seq when calculate transmit latency between layer 2 and layer 4 by ebpf, but which is not available in tcp_probe, so we can only use kprobe hooking tcp_rcv_established to get them. But we can use tcp_probe directly if skb addr and sock addr are available, which is more efficient. Signed-off-by: fuyuanli Reviewed-by: Jason Xing Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 6ca3e0343666..699dafd204ea 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -259,6 +259,8 @@ TRACE_EVENT(tcp_probe, __field(__u32, srtt) __field(__u32, rcv_wnd) __field(__u64, sock_cookie) + __field(const void *, skbaddr) + __field(const void *, skaddr) ), TP_fast_assign( @@ -286,14 +288,18 @@ TRACE_EVENT(tcp_probe, __entry->ssthresh = tcp_current_ssthresh(sk); __entry->srtt = tp->srtt_us >> 3; __entry->sock_cookie = sock_gen_cookie(sk); + + __entry->skbaddr = skb; + __entry->skaddr = sk; ), - TP_printk("family=%s src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx", + TP_printk("family=%s src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx skbaddr=%p skaddr=%p", show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->mark, __entry->data_len, __entry->snd_nxt, __entry->snd_una, __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd, - __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie) + __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie, + __entry->skbaddr, __entry->skaddr) ); #define TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb) \ -- cgit v1.2.3 From 7e5ae43b2d0eb89560bf7da7c9c745d31bf72ffe Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 2 Feb 2024 15:21:13 -0500 Subject: NFSv4.1: add tracepoint to trunked nfs4_exchange_id calls Add a tracepoint to track when the client sends EXCHANGE_ID to test a new transport for session trunking. nfs4_detect_session_trunking() tests for trunking and returns EINVAL if trunking can't be done, add EINVAL mapping to show_nfs4_status() in tracepoints. Signed-off-by: Olga Kornievskaia Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- include/trace/misc/nfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/trace') diff --git a/include/trace/misc/nfs.h b/include/trace/misc/nfs.h index 0d9d48dca38a..5387eb0a6a08 100644 --- a/include/trace/misc/nfs.h +++ b/include/trace/misc/nfs.h @@ -239,6 +239,7 @@ TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS); { EHOSTDOWN, "EHOSTDOWN" }, \ { EPIPE, "EPIPE" }, \ { EPFNOSUPPORT, "EPFNOSUPPORT" }, \ + { EINVAL, "EINVAL" }, \ { EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \ { NFS4ERR_ACCESS, "ACCESS" }, \ { NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \ -- cgit v1.2.3 From c1fa617caeb005e7e3db60826cff6dddebb0363f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 22 Feb 2024 16:14:16 -0500 Subject: tracing: Rework __assign_str() and __string() to not duplicate getting the string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TRACE_EVENT() macro handles dynamic strings by having: TP_PROTO(struct some_struct *s), TP_ARGS(s), TP_STRUCT__entry( __string(my_string, s->string) ), TP_fast_assign( __assign_str(my_string, s->string); ) TP_printk("%s", __get_str(my_string)) There's even some code that may call a function helper to find the s->string value. The problem with the above is that the work to get the s->string is done twice. Once at the __string() and again in the __assign_str(). But the __string() uses dynamic_array() which has a helper structure that is created holding the offsets and length of the string fields. Instead of finding the string twice, just save it off in another field from that helper structure, and have __assign_str() use that instead. Note, this also means that the second parameter of __assign_str() isn't even used anymore, and may be removed in the future. Link: https://lore.kernel.org/linux-trace-kernel/20240222211442.634192653@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Ville Syrjälä Cc: Rodrigo Vivi Cc: Chuck Lever Signed-off-by: Steven Rostedt (Google) --- include/trace/stages/stage2_data_offsets.h | 4 ++-- include/trace/stages/stage5_get_offsets.h | 15 ++++++++++----- include/trace/stages/stage6_event_callback.h | 12 ++++++++---- 3 files changed, 20 insertions(+), 11 deletions(-) (limited to 'include/trace') diff --git a/include/trace/stages/stage2_data_offsets.h b/include/trace/stages/stage2_data_offsets.h index 469b6a64293d..8b0cff06d346 100644 --- a/include/trace/stages/stage2_data_offsets.h +++ b/include/trace/stages/stage2_data_offsets.h @@ -24,7 +24,7 @@ #define __array(type, item, len) #undef __dynamic_array -#define __dynamic_array(type, item, len) u32 item; +#define __dynamic_array(type, item, len) u32 item; const void *item##_ptr_; #undef __string #define __string(item, src) __dynamic_array(char, item, -1) @@ -45,7 +45,7 @@ #define __sockaddr(field, len) __dynamic_array(u8, field, len) #undef __rel_dynamic_array -#define __rel_dynamic_array(type, item, len) u32 item; +#define __rel_dynamic_array(type, item, len) u32 item; const void *item##_ptr_; #undef __rel_string #define __rel_string(item, src) __rel_dynamic_array(char, item, -1) diff --git a/include/trace/stages/stage5_get_offsets.h b/include/trace/stages/stage5_get_offsets.h index e30a13be46ba..45f8151cf622 100644 --- a/include/trace/stages/stage5_get_offsets.h +++ b/include/trace/stages/stage5_get_offsets.h @@ -47,10 +47,12 @@ #undef __string #define __string(item, src) __dynamic_array(char, item, \ - strlen((src) ? (const char *)(src) : "(null)") + 1) + strlen((src) ? (const char *)(src) : "(null)") + 1) \ + __data_offsets->item##_ptr_ = src; #undef __string_len -#define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1) +#define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1)\ + __data_offsets->item##_ptr_ = src; #undef __vstring #define __vstring(item, fmt, ap) __dynamic_array(char, item, \ @@ -67,11 +69,14 @@ __data_size += __item_length; #undef __rel_string -#define __rel_string(item, src) __rel_dynamic_array(char, item, \ - strlen((src) ? (const char *)(src) : "(null)") + 1) +#define __rel_string(item, src) __rel_dynamic_array(char, item, \ + strlen((src) ? (const char *)(src) : "(null)") + 1) \ + __data_offsets->item##_ptr_ = src; #undef __rel_string_len -#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, (len) + 1) +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, (len) + 1)\ + __data_offsets->item##_ptr_ = src; + /* * __bitmask_size_in_bytes_raw is the number of bytes needed to hold * num_possible_cpus(). diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h index 919b1a4da980..b3e2f321e787 100644 --- a/include/trace/stages/stage6_event_callback.h +++ b/include/trace/stages/stage6_event_callback.h @@ -32,12 +32,14 @@ #undef __assign_str #define __assign_str(dst, src) \ - strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)"); + strcpy(__get_str(dst), __data_offsets.dst##_ptr_ ? \ + __data_offsets.dst##_ptr_ : "(null)") #undef __assign_str_len #define __assign_str_len(dst, src, len) \ do { \ - memcpy(__get_str(dst), (src), (len)); \ + memcpy(__get_str(dst), __data_offsets.dst##_ptr_ ? \ + __data_offsets.dst##_ptr_ : "(null)", len); \ __get_str(dst)[len] = '\0'; \ } while(0) @@ -92,12 +94,14 @@ #undef __assign_rel_str #define __assign_rel_str(dst, src) \ - strcpy(__get_rel_str(dst), (src) ? (const char *)(src) : "(null)"); + strcpy(__get_rel_str(dst), __data_offsets.dst##_ptr_ ? \ + __data_offsets.dst##_ptr_ : "(null)") #undef __assign_rel_str_len #define __assign_rel_str_len(dst, src, len) \ do { \ - memcpy(__get_rel_str(dst), (src), (len)); \ + memcpy(__get_rel_str(dst), __data_offsets.dst##_ptr_ ? \ + __data_offsets.dst##_ptr_ : "(null)", len); \ __get_rel_str(dst)[len] = '\0'; \ } while (0) -- cgit v1.2.3 From e8b737bfb16a0d540413173e8d1574e3bf8cc0e9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 22 Feb 2024 16:14:17 -0500 Subject: tracing: Do not calculate strlen() twice for __string() fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TRACE_EVENT() macro handles dynamic strings by having: TP_PROTO(struct some_struct *s), TP_ARGS(s), TP_STRUCT__entry( __string(my_string, s->string) ), TP_fast_assign( __assign_str(my_string, s->string); ) TP_printk("%s", __get_str(my_string)) There's even some code that may call a function helper to find the s->string value. The problem with the above is that the work to get the s->string is done twice. Once at the __string() and again in the __assign_str(). The length of the string is calculated via a strlen(), not once, but twice. Once during the __string() macro and again in __assign_str(). But the length is actually already recorded in the data location and here's no reason to call strlen() again. Just use the saved length that was saved in the __string() code for the __assign_str() code. Link: https://lore.kernel.org/linux-trace-kernel/20240222211442.793074999@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Ville Syrjälä Cc: Rodrigo Vivi Cc: Chuck Lever Signed-off-by: Steven Rostedt (Google) --- include/trace/stages/stage6_event_callback.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h index b3e2f321e787..c0e5d097324e 100644 --- a/include/trace/stages/stage6_event_callback.h +++ b/include/trace/stages/stage6_event_callback.h @@ -32,8 +32,9 @@ #undef __assign_str #define __assign_str(dst, src) \ - strcpy(__get_str(dst), __data_offsets.dst##_ptr_ ? \ - __data_offsets.dst##_ptr_ : "(null)") + memcpy(__get_str(dst), __data_offsets.dst##_ptr_ ? \ + __data_offsets.dst##_ptr_ : "(null)", \ + __get_dynamic_array_len(dst)) #undef __assign_str_len #define __assign_str_len(dst, src, len) \ @@ -94,8 +95,9 @@ #undef __assign_rel_str #define __assign_rel_str(dst, src) \ - strcpy(__get_rel_str(dst), __data_offsets.dst##_ptr_ ? \ - __data_offsets.dst##_ptr_ : "(null)") + memcpy(__get_rel_str(dst), __data_offsets.dst##_ptr_ ? \ + __data_offsets.dst##_ptr_ : "(null)", \ + __get_rel_dynamic_array_len(dst)) #undef __assign_rel_str_len #define __assign_rel_str_len(dst, src, len) \ -- cgit v1.2.3 From 916849860fa9c7d3caeb144cb5dec8831cf23bfc Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 22 Feb 2024 16:14:18 -0500 Subject: tracing: Use ? : shortcut in trace macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of having: #define __assign_str(dst, src) \ memcpy(__get_str(dst), __data_offsets.dst##_ptr_ ? \ __data_offsets.dst##_ptr_ : "(null)", \ __get_dynamic_array_len(dst)) Use the ? : shortcut and compact it down to: #define __assign_str(dst, src) \ memcpy(__get_str(dst), __data_offsets.dst##_ptr_ ? : "(null)", \ __get_dynamic_array_len(dst)) Link: https://lore.kernel.org/linux-trace-kernel/20240222211442.949327725@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Andrew Morton Cc: Ville Syrjälä Cc: Rodrigo Vivi Cc: Chuck Lever Suggested-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/trace/stages/stage5_get_offsets.h | 4 ++-- include/trace/stages/stage6_event_callback.h | 14 ++++++-------- 2 files changed, 8 insertions(+), 10 deletions(-) (limited to 'include/trace') diff --git a/include/trace/stages/stage5_get_offsets.h b/include/trace/stages/stage5_get_offsets.h index 45f8151cf622..20b801ed3fd4 100644 --- a/include/trace/stages/stage5_get_offsets.h +++ b/include/trace/stages/stage5_get_offsets.h @@ -47,7 +47,7 @@ #undef __string #define __string(item, src) __dynamic_array(char, item, \ - strlen((src) ? (const char *)(src) : "(null)") + 1) \ + strlen((const char *)(src) ? : "(null)") + 1) \ __data_offsets->item##_ptr_ = src; #undef __string_len @@ -70,7 +70,7 @@ #undef __rel_string #define __rel_string(item, src) __rel_dynamic_array(char, item, \ - strlen((src) ? (const char *)(src) : "(null)") + 1) \ + strlen((const char *)(src) ? : "(null)") + 1) \ __data_offsets->item##_ptr_ = src; #undef __rel_string_len diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h index c0e5d097324e..38732855eadb 100644 --- a/include/trace/stages/stage6_event_callback.h +++ b/include/trace/stages/stage6_event_callback.h @@ -32,15 +32,14 @@ #undef __assign_str #define __assign_str(dst, src) \ - memcpy(__get_str(dst), __data_offsets.dst##_ptr_ ? \ - __data_offsets.dst##_ptr_ : "(null)", \ + memcpy(__get_str(dst), __data_offsets.dst##_ptr_ ? : "(null)", \ __get_dynamic_array_len(dst)) #undef __assign_str_len #define __assign_str_len(dst, src, len) \ do { \ - memcpy(__get_str(dst), __data_offsets.dst##_ptr_ ? \ - __data_offsets.dst##_ptr_ : "(null)", len); \ + memcpy(__get_str(dst), \ + __data_offsets.dst##_ptr_ ? : "(null)", len); \ __get_str(dst)[len] = '\0'; \ } while(0) @@ -95,15 +94,14 @@ #undef __assign_rel_str #define __assign_rel_str(dst, src) \ - memcpy(__get_rel_str(dst), __data_offsets.dst##_ptr_ ? \ - __data_offsets.dst##_ptr_ : "(null)", \ + memcpy(__get_rel_str(dst), __data_offsets.dst##_ptr_ ? : "(null)", \ __get_rel_dynamic_array_len(dst)) #undef __assign_rel_str_len #define __assign_rel_str_len(dst, src, len) \ do { \ - memcpy(__get_rel_str(dst), __data_offsets.dst##_ptr_ ? \ - __data_offsets.dst##_ptr_ : "(null)", len); \ + memcpy(__get_rel_str(dst), \ + __data_offsets.dst##_ptr_ ? : "(null)", len); \ __get_rel_str(dst)[len] = '\0'; \ } while (0) -- cgit v1.2.3 From 70a6ed553f7d3504febac467cb4a0bae621ba3c6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 22 Feb 2024 16:14:19 -0500 Subject: tracing: Use EVENT_NULL_STR macro instead of open coding "(null)" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TRACE_EVENT macros has some dependency if a __string() field is NULL, where it will save "(null)" as the string. This string is also used by __assign_str(). It's better to create a single macro instead of having something that will not be caught by the compiler if there is an unfortunate typo. Link: https://lore.kernel.org/linux-trace-kernel/20240222211443.106216915@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Andrew Morton Cc: Ville Syrjälä Cc: Rodrigo Vivi Cc: Chuck Lever Suggested-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/trace/events/sunrpc.h | 12 ++++++------ include/trace/stages/stage5_get_offsets.h | 4 ++-- include/trace/stages/stage6_event_callback.h | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index cdd3a45e6003..ce6a85b82afa 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1327,18 +1327,18 @@ TRACE_EVENT(xs_stream_read_data, __field(ssize_t, err) __field(size_t, total) __string(addr, xprt ? xprt->address_strings[RPC_DISPLAY_ADDR] : - "(null)") + EVENT_NULL_STR) __string(port, xprt ? xprt->address_strings[RPC_DISPLAY_PORT] : - "(null)") + EVENT_NULL_STR) ), TP_fast_assign( __entry->err = err; __entry->total = total; __assign_str(addr, xprt ? - xprt->address_strings[RPC_DISPLAY_ADDR] : "(null)"); + xprt->address_strings[RPC_DISPLAY_ADDR] : EVENT_NULL_STR); __assign_str(port, xprt ? - xprt->address_strings[RPC_DISPLAY_PORT] : "(null)"); + xprt->address_strings[RPC_DISPLAY_PORT] : EVENT_NULL_STR); ), TP_printk("peer=[%s]:%s err=%zd total=%zu", __get_str(addr), @@ -1783,7 +1783,7 @@ TRACE_EVENT(svc_process, __string(service, name) __string(procedure, svc_proc_name(rqst)) __string(addr, rqst->rq_xprt ? - rqst->rq_xprt->xpt_remotebuf : "(null)") + rqst->rq_xprt->xpt_remotebuf : EVENT_NULL_STR) ), TP_fast_assign( @@ -1793,7 +1793,7 @@ TRACE_EVENT(svc_process, __assign_str(service, name); __assign_str(procedure, svc_proc_name(rqst)); __assign_str(addr, rqst->rq_xprt ? - rqst->rq_xprt->xpt_remotebuf : "(null)"); + rqst->rq_xprt->xpt_remotebuf : EVENT_NULL_STR); ), TP_printk("addr=%s xid=0x%08x service=%s vers=%u proc=%s", diff --git a/include/trace/stages/stage5_get_offsets.h b/include/trace/stages/stage5_get_offsets.h index 20b801ed3fd4..e6b96608f452 100644 --- a/include/trace/stages/stage5_get_offsets.h +++ b/include/trace/stages/stage5_get_offsets.h @@ -47,7 +47,7 @@ #undef __string #define __string(item, src) __dynamic_array(char, item, \ - strlen((const char *)(src) ? : "(null)") + 1) \ + strlen((const char *)(src) ? : EVENT_NULL_STR) + 1) \ __data_offsets->item##_ptr_ = src; #undef __string_len @@ -70,7 +70,7 @@ #undef __rel_string #define __rel_string(item, src) __rel_dynamic_array(char, item, \ - strlen((const char *)(src) ? : "(null)") + 1) \ + strlen((const char *)(src) ? : EVENT_NULL_STR) + 1) \ __data_offsets->item##_ptr_ = src; #undef __rel_string_len diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h index 38732855eadb..2bfd49713b42 100644 --- a/include/trace/stages/stage6_event_callback.h +++ b/include/trace/stages/stage6_event_callback.h @@ -32,14 +32,14 @@ #undef __assign_str #define __assign_str(dst, src) \ - memcpy(__get_str(dst), __data_offsets.dst##_ptr_ ? : "(null)", \ + memcpy(__get_str(dst), __data_offsets.dst##_ptr_ ? : EVENT_NULL_STR, \ __get_dynamic_array_len(dst)) #undef __assign_str_len #define __assign_str_len(dst, src, len) \ do { \ memcpy(__get_str(dst), \ - __data_offsets.dst##_ptr_ ? : "(null)", len); \ + __data_offsets.dst##_ptr_ ? : EVENT_NULL_STR, len); \ __get_str(dst)[len] = '\0'; \ } while(0) @@ -94,14 +94,14 @@ #undef __assign_rel_str #define __assign_rel_str(dst, src) \ - memcpy(__get_rel_str(dst), __data_offsets.dst##_ptr_ ? : "(null)", \ + memcpy(__get_rel_str(dst), __data_offsets.dst##_ptr_ ? : EVENT_NULL_STR, \ __get_rel_dynamic_array_len(dst)) #undef __assign_rel_str_len #define __assign_rel_str_len(dst, src, len) \ do { \ memcpy(__get_rel_str(dst), \ - __data_offsets.dst##_ptr_ ? : "(null)", len); \ + __data_offsets.dst##_ptr_ ? : EVENT_NULL_STR, len); \ __get_rel_str(dst)[len] = '\0'; \ } while (0) -- cgit v1.2.3 From c759e609030ca37e59866cbc849fdc611cc56292 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 23 Feb 2024 15:22:06 -0500 Subject: tracing: Remove __assign_str_len() Now that __assign_str() gets the length from the __string() (and __string_len()) macros, there's no reason to have a separate __assign_str_len() macro as __assign_str() can get the length of the string needed. Also remove __assign_rel_str() although it had no users anyway. Link: https://lore.kernel.org/linux-trace-kernel/20240223152206.0b650659@gandalf.local.home Cc: Jeff Layton Acked-by: Chuck Lever Signed-off-by: Steven Rostedt (Google) --- include/trace/stages/stage6_event_callback.h | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) (limited to 'include/trace') diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h index 2bfd49713b42..0c0f50bcdc56 100644 --- a/include/trace/stages/stage6_event_callback.h +++ b/include/trace/stages/stage6_event_callback.h @@ -32,16 +32,13 @@ #undef __assign_str #define __assign_str(dst, src) \ - memcpy(__get_str(dst), __data_offsets.dst##_ptr_ ? : EVENT_NULL_STR, \ - __get_dynamic_array_len(dst)) - -#undef __assign_str_len -#define __assign_str_len(dst, src, len) \ do { \ - memcpy(__get_str(dst), \ - __data_offsets.dst##_ptr_ ? : EVENT_NULL_STR, len); \ - __get_str(dst)[len] = '\0'; \ - } while(0) + char *__str__ = __get_str(dst); \ + int __len__ = __get_dynamic_array_len(dst) - 1; \ + memcpy(__str__, __data_offsets.dst##_ptr_ ? : \ + EVENT_NULL_STR, __len__); \ + __str__[__len__] = '\0'; \ + } while (0) #undef __assign_vstr #define __assign_vstr(dst, fmt, va) \ @@ -94,15 +91,12 @@ #undef __assign_rel_str #define __assign_rel_str(dst, src) \ - memcpy(__get_rel_str(dst), __data_offsets.dst##_ptr_ ? : EVENT_NULL_STR, \ - __get_rel_dynamic_array_len(dst)) - -#undef __assign_rel_str_len -#define __assign_rel_str_len(dst, src, len) \ do { \ - memcpy(__get_rel_str(dst), \ - __data_offsets.dst##_ptr_ ? : EVENT_NULL_STR, len); \ - __get_rel_str(dst)[len] = '\0'; \ + char *__str__ = __get_rel_str(dst); \ + int __len__ = __get_rel_dynamic_array_len(dst) - 1; \ + memcpy(__str__, __data_offsets.dst##_ptr_ ? : \ + EVENT_NULL_STR, __len__); \ + __str__[__len__] = '\0'; \ } while (0) #undef __rel_bitmask -- cgit v1.2.3 From cf986e57d606849288eecf572800f2bd476fb1ab Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 23 Feb 2024 16:13:56 -0500 Subject: tracing: Add warning if string in __assign_str() does not match __string() In preparation to remove the second parameter of __assign_str(), make sure it is really a duplicate of __string() by adding a WARN_ON_ONCE(). Link: https://lore.kernel.org/linux-trace-kernel/20240223161356.63b72403@gandalf.local.home Cc: Mathieu Desnoyers Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/trace/stages/stage6_event_callback.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/trace') diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h index 0c0f50bcdc56..935608971899 100644 --- a/include/trace/stages/stage6_event_callback.h +++ b/include/trace/stages/stage6_event_callback.h @@ -35,6 +35,7 @@ do { \ char *__str__ = __get_str(dst); \ int __len__ = __get_dynamic_array_len(dst) - 1; \ + WARN_ON_ONCE((src) != __data_offsets.dst##_ptr_); \ memcpy(__str__, __data_offsets.dst##_ptr_ ? : \ EVENT_NULL_STR, __len__); \ __str__[__len__] = '\0'; \ -- cgit v1.2.3 From 0bdfb68c845edd7e2dbe815266bb09641576e22f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 23 Feb 2024 16:25:19 -0500 Subject: tracing: Remove second parameter to __assign_rel_str() The second parameter of __assign_rel_str() is no longer used. It can be removed. Note, the only real users of rel_string is user events. This code is just in the sample code for testing purposes. This makes __assign_rel_str() different than __assign_str() but that's fine. __assign_str() is used over 700 places and has a larger impact. That change will come later. Link: https://lore.kernel.org/linux-trace-kernel/20240223162519.2beb8112@gandalf.local.home Cc: Mathieu Desnoyers Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/trace/stages/stage6_event_callback.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h index 935608971899..a0c15f67eabe 100644 --- a/include/trace/stages/stage6_event_callback.h +++ b/include/trace/stages/stage6_event_callback.h @@ -91,7 +91,7 @@ #define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) #undef __assign_rel_str -#define __assign_rel_str(dst, src) \ +#define __assign_rel_str(dst) \ do { \ char *__str__ = __get_rel_str(dst); \ int __len__ = __get_rel_dynamic_array_len(dst) - 1; \ -- cgit v1.2.3 From b1afefa62ca9d77c8b1d386c3512fb4f21cb7db1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 12 Mar 2024 11:30:02 -0400 Subject: tracing: Use strcmp() in __assign_str() WARN_ON() check The WARN_ON() check in __assign_str() to catch where the source variable to the macro doesn't match the source variable to __string() gives an error in clang: >> include/trace/events/sunrpc.h:703:4: warning: result of comparison against a string literal is unspecified (use an explicit string comparison function instead) [-Wstring-compare] 670 | __assign_str(progname, "unknown"); That's because the __assign_str() macro has: WARN_ON_ONCE((src) != __data_offsets.dst##_ptr_); Where "src" is a string literal. Clang warns when comparing a string literal directly as it is undefined to what the value of the literal is. Since this is still to make sure the same string that goes to __string() is the same as __assign_str(), for string literals do a test for that and then use strcmp() in those cases Note that this depends on commit 51270d573a8d ("tracing/net_sched: Fix tracepoints that save qdisc_dev() as a string") being applied, as this was what found that bug. Link: https://lore.kernel.org/linux-trace-kernel/20240312113002.00031668@gandalf.local.home Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Nathan Chancellor Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402292111.KIdExylU-lkp@intel.com/ Fixes: 433e1d88a3be ("tracing: Add warning if string in __assign_str() does not match __string()") Signed-off-by: Steven Rostedt (Google) --- include/trace/stages/stage6_event_callback.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h index a0c15f67eabe..83da83a0c14f 100644 --- a/include/trace/stages/stage6_event_callback.h +++ b/include/trace/stages/stage6_event_callback.h @@ -35,7 +35,9 @@ do { \ char *__str__ = __get_str(dst); \ int __len__ = __get_dynamic_array_len(dst) - 1; \ - WARN_ON_ONCE((src) != __data_offsets.dst##_ptr_); \ + WARN_ON_ONCE(__builtin_constant_p(src) ? \ + strcmp((src), __data_offsets.dst##_ptr_) : \ + (src) != __data_offsets.dst##_ptr_); \ memcpy(__str__, __data_offsets.dst##_ptr_ ? : \ EVENT_NULL_STR, __len__); \ __str__[__len__] = '\0'; \ -- cgit v1.2.3 From 7604256cecef34a82333d9f78262d3180f4eb525 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 14 Mar 2024 23:27:54 -0400 Subject: tracing: Add __string_src() helper to help compilers not to get confused The __string() helper macro of the TRACE_EVENT() macro is used to determine how much of the ring buffer needs to be allocated to fit the given source string. Some trace events have a string that is dependent on another variable that could be NULL, and in those cases the string is passed in to be NULL. The __string() macro can handle being passed in a NULL pointer for which it will turn it into "(null)". It does that with: strlen((src) ? (const char *)(src) : "(null)") + 1 But if src itself has the same conditional type it can confuse the compiler. That is: __string(r ? dev(r)->name : NULL) Would turn into: strlen((r ? dev(r)->name : NULL) ? (r ? dev(r)->name : NULL) : "(null)" + 1 For which the compiler thinks that NULL is being passed to strlen() and gives this kind of warning: ./include/trace/stages/stage5_get_offsets.h:50:21: warning: argument 1 null where non-null expected [-Wnonnull] 50 | strlen((src) ? (const char *)(src) : "(null)") + 1) Instead, create a static inline function that takes the src string and will return the string if it is not NULL and will return "(null)" if it is. This will then make the strlen() line: strlen(__string_src(src)) + 1 Where the compiler can see that strlen() will not end up with NULL and does not warn about it. Note that this depends on commit 51270d573a8d ("tracing/net_sched: Fix tracepoints that save qdisc_dev() as a string") being applied, as passing the qdisc_dev() into __string_src() will give an error. Link: https://lore.kernel.org/all/ZfNmfCmgCs4Nc+EH@aschofie-mobl2/ Link: https://lore.kernel.org/linux-trace-kernel/20240314232754.345cea82@rorschach.local.home Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Reported-by: Alison Schofield Signed-off-by: Steven Rostedt (Google) --- include/trace/stages/stage5_get_offsets.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/stages/stage5_get_offsets.h b/include/trace/stages/stage5_get_offsets.h index e6b96608f452..c6a62dfb18ef 100644 --- a/include/trace/stages/stage5_get_offsets.h +++ b/include/trace/stages/stage5_get_offsets.h @@ -9,6 +9,16 @@ #undef __entry #define __entry entry +#ifndef __STAGE5_STRING_SRC_H +#define __STAGE5_STRING_SRC_H +static inline const char *__string_src(const char *str) +{ + if (!str) + return EVENT_NULL_STR; + return str; +} +#endif /* __STAGE5_STRING_SRC_H */ + /* * Fields should never declare an array: i.e. __field(int, arr[5]) * If they do, it will cause issues in parsing and possibly corrupt the @@ -47,7 +57,7 @@ #undef __string #define __string(item, src) __dynamic_array(char, item, \ - strlen((const char *)(src) ? : EVENT_NULL_STR) + 1) \ + strlen(__string_src(src)) + 1) \ __data_offsets->item##_ptr_ = src; #undef __string_len @@ -70,7 +80,7 @@ #undef __rel_string #define __rel_string(item, src) __rel_dynamic_array(char, item, \ - strlen((const char *)(src) ? : EVENT_NULL_STR) + 1) \ + strlen(__string_src(src)) + 1) \ __data_offsets->item##_ptr_ = src; #undef __rel_string_len -- cgit v1.2.3 From 24f5bb9f24ad80da6c6f83ba6124b5188c5384b2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 19 Mar 2024 13:39:59 -0400 Subject: tracing: Just use strcmp() for testing __string() and __assign_str() match As __assign_str() no longer uses its "src" parameter, there's a check to make sure nothing depends on it being different than what was passed to __string(). It originally just compared the pointer passed to __string() with the pointer passed into __assign_str() via the "src" parameter. But there's a couple of outliers that just pass in a quoted string constant, where comparing the pointers is UB to the compiler, as the compiler is free to create multiple copies of the same string constant. Instead, just use strcmp(). It may slow down the trace event, but this will eventually be removed. Also, fix the issue of passing NULL to strcmp() by adding a WARN_ON() to make sure that both "src" and the pointer saved in __string() are either both NULL or have content, and then checking if "src" is not NULL before performing the strcmp(). Link: https://lore.kernel.org/all/CAHk-=wjxX16kWd=uxG5wzqt=aXoYDf1BgWOKk+qVmAO0zh7sjA@mail.gmail.com/ Fixes: b1afefa62ca9 ("tracing: Use strcmp() in __assign_str() WARN_ON() check") Reported-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) Signed-off-by: Linus Torvalds --- include/trace/stages/stage6_event_callback.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/trace') diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h index 83da83a0c14f..3690e677263f 100644 --- a/include/trace/stages/stage6_event_callback.h +++ b/include/trace/stages/stage6_event_callback.h @@ -35,9 +35,8 @@ do { \ char *__str__ = __get_str(dst); \ int __len__ = __get_dynamic_array_len(dst) - 1; \ - WARN_ON_ONCE(__builtin_constant_p(src) ? \ - strcmp((src), __data_offsets.dst##_ptr_) : \ - (src) != __data_offsets.dst##_ptr_); \ + WARN_ON_ONCE(!(void *)(src) != !(void *)__data_offsets.dst##_ptr_); \ + WARN_ON_ONCE((src) && strcmp((src), __data_offsets.dst##_ptr_)); \ memcpy(__str__, __data_offsets.dst##_ptr_ ? : \ EVENT_NULL_STR, __len__); \ __str__[__len__] = '\0'; \ -- cgit v1.2.3