From 07814a9439a3b03d79a1001614b5bc1cab69bcec Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Jun 2024 10:09:18 -1000 Subject: sched_ext: Print debug dump after an error exit If a BPF scheduler triggers an error, the scheduler is aborted and the system is reverted to the built-in scheduler. In the process, a lot of information which may be useful for figuring out what happened can be lost. This patch adds debug dump which captures information which may be useful for debugging including runqueue and runnable thread states at the time of failure. The following shows a debug dump after triggering the watchdog: root@test ~# os/work/tools/sched_ext/build/bin/scx_qmap -t 100 stats : enq=1 dsp=0 delta=1 deq=0 stats : enq=90 dsp=90 delta=0 deq=0 stats : enq=156 dsp=156 delta=0 deq=0 stats : enq=218 dsp=218 delta=0 deq=0 stats : enq=255 dsp=255 delta=0 deq=0 stats : enq=271 dsp=271 delta=0 deq=0 stats : enq=284 dsp=284 delta=0 deq=0 stats : enq=293 dsp=293 delta=0 deq=0 DEBUG DUMP ================================================================================ kworker/u32:12[320] triggered exit kind 1026: runnable task stall (stress[1530] failed to run for 6.841s) Backtrace: scx_watchdog_workfn+0x136/0x1c0 process_scheduled_works+0x2b5/0x600 worker_thread+0x269/0x360 kthread+0xeb/0x110 ret_from_fork+0x36/0x40 ret_from_fork_asm+0x1a/0x30 QMAP FIFO[0]: QMAP FIFO[1]: QMAP FIFO[2]: 1436 QMAP FIFO[3]: QMAP FIFO[4]: CPU states ---------- CPU 0 : nr_run=1 ops_qseq=244 curr=swapper/0[0] class=idle_sched_class QMAP: dsp_idx=1 dsp_cnt=0 R stress[1530] -6841ms scx_state/flags=3/0x1 ops_state/qseq=2/20 sticky/holding_cpu=-1/-1 dsq_id=(n/a) cpus=ff QMAP: force_local=0 asm_sysvec_apic_timer_interrupt+0x16/0x20 CPU 2 : nr_run=2 ops_qseq=142 curr=swapper/2[0] class=idle_sched_class QMAP: dsp_idx=1 dsp_cnt=0 R sshd[1703] -5905ms scx_state/flags=3/0x9 ops_state/qseq=2/88 sticky/holding_cpu=-1/-1 dsq_id=(n/a) cpus=ff QMAP: force_local=1 __x64_sys_ppoll+0xf6/0x120 do_syscall_64+0x7b/0x150 entry_SYSCALL_64_after_hwframe+0x76/0x7e R fish[1539] -4141ms scx_state/flags=3/0x9 ops_state/qseq=2/124 sticky/holding_cpu=-1/-1 dsq_id=(n/a) cpus=ff QMAP: force_local=1 futex_wait+0x60/0xe0 do_futex+0x109/0x180 __x64_sys_futex+0x117/0x190 do_syscall_64+0x7b/0x150 entry_SYSCALL_64_after_hwframe+0x76/0x7e CPU 3 : nr_run=2 ops_qseq=162 curr=kworker/u32:12[320] class=ext_sched_class QMAP: dsp_idx=1 dsp_cnt=0 *R kworker/u32:12[320] +0ms scx_state/flags=3/0xd ops_state/qseq=0/0 sticky/holding_cpu=-1/-1 dsq_id=(n/a) cpus=ff QMAP: force_local=0 scx_dump_state+0x613/0x6f0 scx_ops_error_irq_workfn+0x1f/0x40 irq_work_run_list+0x82/0xd0 irq_work_run+0x14/0x30 __sysvec_irq_work+0x40/0x140 sysvec_irq_work+0x60/0x70 asm_sysvec_irq_work+0x16/0x20 scx_watchdog_workfn+0x15f/0x1c0 process_scheduled_works+0x2b5/0x600 worker_thread+0x269/0x360 kthread+0xeb/0x110 ret_from_fork+0x36/0x40 ret_from_fork_asm+0x1a/0x30 R kworker/3:2[1436] +0ms scx_state/flags=3/0x9 ops_state/qseq=2/160 sticky/holding_cpu=-1/-1 dsq_id=(n/a) cpus=08 QMAP: force_local=0 kthread+0xeb/0x110 ret_from_fork+0x36/0x40 ret_from_fork_asm+0x1a/0x30 CPU 7 : nr_run=0 ops_qseq=76 curr=swapper/7[0] class=idle_sched_class ================================================================================ EXIT: runnable task stall (stress[1530] failed to run for 6.841s) It shows that CPU 3 was running the watchdog when it triggered the error condition and the scx_qmap thread has been queued on CPU 0 for over 5 seconds but failed to run. It also prints out scx_qmap specific information - e.g. which tasks are queued on each FIFO and so on using the dump_*() ops. This dump has proved pretty useful for developing and debugging BPF schedulers. Debug dump is generated automatically when the BPF scheduler exits due to an error. The debug buffer used in such cases is determined by sched_ext_ops.exit_dump_len and defaults to 32k. If the debug dump overruns the available buffer, the output is truncated and marked accordingly. Debug dump output can also be read through the sched_ext_dump tracepoint. When read through the tracepoint, there is no length limit. SysRq-D can be used to trigger debug dump at any time while a BPF scheduler is loaded. This is non-destructive - the scheduler keeps running afterwards. The output can be read through the sched_ext_dump tracepoint. v2: - The size of exit debug dump buffer can now be customized using sched_ext_ops.exit_dump_len. - sched_ext_ops.dump*() added to enable dumping of BPF scheduler specific information. - Tracpoint output and SysRq-D triggering added. Signed-off-by: Tejun Heo Reviewed-by: David Vernet --- include/trace/events/sched_ext.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 include/trace/events/sched_ext.h (limited to 'include/trace') diff --git a/include/trace/events/sched_ext.h b/include/trace/events/sched_ext.h new file mode 100644 index 000000000000..fe19da7315a9 --- /dev/null +++ b/include/trace/events/sched_ext.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM sched_ext + +#if !defined(_TRACE_SCHED_EXT_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SCHED_EXT_H + +#include + +TRACE_EVENT(sched_ext_dump, + + TP_PROTO(const char *line), + + TP_ARGS(line), + + TP_STRUCT__entry( + __string(line, line) + ), + + TP_fast_assign( + __assign_str(line); + ), + + TP_printk("%s", + __get_str(line) + ) +); + +#endif /* _TRACE_SCHED_EXT_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 469b77e421b92ce4662f6f1a47f1e7af9dbd14bd Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 30 Jul 2024 02:05:12 +0000 Subject: ALSA: trace: use snd_pcm_direction_name() We already have snd_pcm_direction_name(). Let's use it. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87sevrk52f.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Takashi Iwai --- include/trace/events/asoc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/asoc.h b/include/trace/events/asoc.h index 202fc3680c36..6696dbcc2b96 100644 --- a/include/trace/events/asoc.h +++ b/include/trace/events/asoc.h @@ -8,6 +8,7 @@ #include #include #include +#include #define DAPM_DIRECT "(direct)" #define DAPM_ARROW(dir) (((dir) == SND_SOC_DAPM_DIR_OUT) ? "->" : "<-") @@ -212,7 +213,7 @@ TRACE_EVENT(snd_soc_dapm_connected, ), TP_printk("%s: found %d paths", - __entry->stream ? "capture" : "playback", __entry->paths) + snd_pcm_direction_name(__entry->stream), __entry->paths) ); TRACE_EVENT(snd_soc_jack_irq, -- cgit v1.2.3 From bf66471987b4bd537bc800353ca7d39bfd1d1022 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 16 Apr 2024 10:51:10 +0200 Subject: context_tracking, rcu: Rename struct context_tracking .dynticks_nesting into .nesting The context_tracking.state RCU_DYNTICKS subvariable has been renamed to RCU_WATCHING, reflect that change in the related helpers. [ neeraj.upadhyay: Fix htmldocs build error reported by Stephen Rothwell ] Suggested-by: Frederic Weisbecker Signed-off-by: Valentin Schneider Reviewed-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay --- include/trace/events/rcu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 31b3e0d3e65f..4066b6d51e46 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -469,7 +469,7 @@ TRACE_EVENT(rcu_stall_warning, * polarity: "Start", "End", "StillNonIdle" for entering, exiting or still not * being in dyntick-idle mode. * context: "USER" or "IDLE" or "IRQ". - * NMIs nested in IRQs are inferred with dynticks_nesting > 1 in IRQ context. + * NMIs nested in IRQs are inferred with nesting > 1 in IRQ context. * * These events also take a pair of numbers, which indicate the nesting * depth before and after the event of interest, and a third number that is -- cgit v1.2.3 From 61b74964536e86445d43acff5cff6ad907ba9321 Mon Sep 17 00:00:00 2001 From: Jithu Joseph Date: Thu, 1 Aug 2024 05:18:14 +0000 Subject: trace: platform/x86/intel/ifs: Add SBAF trace support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tracing support for the SBAF IFS tests, which may be useful for debugging systems that fail these tests. Log details like test content batch number, SBAF bundle ID, program index and the exact errors or warnings encountered by each HT thread during the test. Reviewed-by: Ashok Raj Reviewed-by: Tony Luck Reviewed-by: Ilpo Järvinen Reviewed-by: Steven Rostedt (Google) Signed-off-by: Jithu Joseph Signed-off-by: Kuppuswamy Sathyanarayanan Link: https://lore.kernel.org/r/20240801051814.1935149-5-sathyanarayanan.kuppuswamy@linux.intel.com Signed-off-by: Hans de Goede --- include/trace/events/intel_ifs.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/intel_ifs.h b/include/trace/events/intel_ifs.h index 0d88ebf2c980..70323acde1de 100644 --- a/include/trace/events/intel_ifs.h +++ b/include/trace/events/intel_ifs.h @@ -35,6 +35,33 @@ TRACE_EVENT(ifs_status, __entry->status) ); +TRACE_EVENT(ifs_sbaf, + + TP_PROTO(int batch, union ifs_sbaf activate, union ifs_sbaf_status status), + + TP_ARGS(batch, activate, status), + + TP_STRUCT__entry( + __field( u64, status ) + __field( int, batch ) + __field( u16, bundle ) + __field( u16, pgm ) + ), + + TP_fast_assign( + __entry->status = status.data; + __entry->batch = batch; + __entry->bundle = activate.bundle_idx; + __entry->pgm = activate.pgm_idx; + ), + + TP_printk("batch: 0x%.2x, bundle_idx: 0x%.4x, pgm_idx: 0x%.4x, status: 0x%.16llx", + __entry->batch, + __entry->bundle, + __entry->pgm, + __entry->status) +); + #endif /* _TRACE_IFS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From aaf8c0b9ae042494cb4585883b15c1332de77840 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 26 Jun 2024 09:47:27 +0800 Subject: f2fs: reduce expensive checkpoint trigger frequency We may trigger high frequent checkpoint for below case: 1. mkdir /mnt/dir1; set dir1 encrypted 2. touch /mnt/file1; fsync /mnt/file1 3. mkdir /mnt/dir2; set dir2 encrypted 4. touch /mnt/file2; fsync /mnt/file2 ... Although, newly created dir and file are not related, due to commit bbf156f7afa7 ("f2fs: fix lost xattrs of directories"), we will trigger checkpoint whenever fsync() comes after a new encrypted dir created. In order to avoid such performance regression issue, let's record an entry including directory's ino in global cache whenever we update directory's xattr data, and then triggerring checkpoint() only if xattr metadata of target file's parent was updated. This patch updates to cover below no encryption case as well: 1) parent is checkpointed 2) set_xattr(dir) w/ new xnid 3) create(file) 4) fsync(file) Fixes: bbf156f7afa7 ("f2fs: fix lost xattrs of directories") Reported-by: wangzijie Reported-by: Zhiguo Niu Tested-by: Zhiguo Niu Reported-by: Yunlei He Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/trace/events/f2fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index ed794b5fefbe..2851c823095b 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -139,7 +139,8 @@ TRACE_DEFINE_ENUM(EX_BLOCK_AGE); { CP_NODE_NEED_CP, "node needs cp" }, \ { CP_FASTBOOT_MODE, "fastboot mode" }, \ { CP_SPEC_LOG_NUM, "log type is 2" }, \ - { CP_RECOVER_DIR, "dir needs recovery" }) + { CP_RECOVER_DIR, "dir needs recovery" }, \ + { CP_XATTR_DIR, "dir's xattr updated" }) #define show_shutdown_mode(type) \ __print_symbolic(type, \ -- cgit v1.2.3 From 4f336dc07eceb77d2164bc1121a5ae6003b19f55 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 22 Apr 2024 13:57:29 +0200 Subject: context_tracking, rcu: Rename rcu_dyntick trace event into rcu_watching The "rcu_dyntick" naming convention has been turned into "rcu_watching" for all helpers now, align the trace event to that. To add to the confusion, the strings passed to the trace event are now reversed: when RCU "starts" the dyntick / EQS state, it "stops" watching. Signed-off-by: Valentin Schneider Reviewed-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay --- include/trace/events/rcu.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 4066b6d51e46..e81431deaa50 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -466,40 +466,40 @@ TRACE_EVENT(rcu_stall_warning, /* * Tracepoint for dyntick-idle entry/exit events. These take 2 strings * as argument: - * polarity: "Start", "End", "StillNonIdle" for entering, exiting or still not - * being in dyntick-idle mode. + * polarity: "Start", "End", "StillWatching" for entering, exiting or still not + * being in EQS mode. * context: "USER" or "IDLE" or "IRQ". * NMIs nested in IRQs are inferred with nesting > 1 in IRQ context. * * These events also take a pair of numbers, which indicate the nesting * depth before and after the event of interest, and a third number that is - * the ->dynticks counter. Note that task-related and interrupt-related + * the RCU_WATCHING counter. Note that task-related and interrupt-related * events use two separate counters, and that the "++=" and "--=" events * for irq/NMI will change the counter by two, otherwise by one. */ -TRACE_EVENT_RCU(rcu_dyntick, +TRACE_EVENT_RCU(rcu_watching, - TP_PROTO(const char *polarity, long oldnesting, long newnesting, int dynticks), + TP_PROTO(const char *polarity, long oldnesting, long newnesting, int counter), - TP_ARGS(polarity, oldnesting, newnesting, dynticks), + TP_ARGS(polarity, oldnesting, newnesting, counter), TP_STRUCT__entry( __field(const char *, polarity) __field(long, oldnesting) __field(long, newnesting) - __field(int, dynticks) + __field(int, counter) ), TP_fast_assign( __entry->polarity = polarity; __entry->oldnesting = oldnesting; __entry->newnesting = newnesting; - __entry->dynticks = dynticks; + __entry->counter = counter; ), TP_printk("%s %lx %lx %#3x", __entry->polarity, __entry->oldnesting, __entry->newnesting, - __entry->dynticks & 0xfff) + __entry->counter & 0xfff) ); /* -- cgit v1.2.3 From 89835a58f5f54d52537709f2513fb91024e2d069 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Wed, 21 Aug 2024 08:54:11 +0300 Subject: scsi: ufs: Move UFS trace events to private header UFS trace events are called exclusively from the UFS core drivers. Make those events private to the core driver. The MAINTAINERS file does not need updating as the maintainership remains the same and the relevant directory is already covered. Reviewed-by: Bart Van Assche Signed-off-by: Avri Altman Link: https://lore.kernel.org/r/20240821055411.3128159-1-avri.altman@wdc.com Acked-by: Bean Huo Signed-off-by: Martin K. Petersen --- include/trace/events/ufs.h | 399 --------------------------------------------- 1 file changed, 399 deletions(-) delete mode 100644 include/trace/events/ufs.h (limited to 'include/trace') diff --git a/include/trace/events/ufs.h b/include/trace/events/ufs.h deleted file mode 100644 index c4e209fbdfbb..000000000000 --- a/include/trace/events/ufs.h +++ /dev/null @@ -1,399 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2013-2014, The Linux Foundation. All rights reserved. - */ - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM ufs - -#if !defined(_TRACE_UFS_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_UFS_H - -#include - -#define str_opcode(opcode) \ - __print_symbolic(opcode, \ - { WRITE_16, "WRITE_16" }, \ - { WRITE_10, "WRITE_10" }, \ - { READ_16, "READ_16" }, \ - { READ_10, "READ_10" }, \ - { SYNCHRONIZE_CACHE, "SYNC" }, \ - { UNMAP, "UNMAP" }) - -#define UFS_LINK_STATES \ - EM(UIC_LINK_OFF_STATE, "UIC_LINK_OFF_STATE") \ - EM(UIC_LINK_ACTIVE_STATE, "UIC_LINK_ACTIVE_STATE") \ - EMe(UIC_LINK_HIBERN8_STATE, "UIC_LINK_HIBERN8_STATE") - -#define UFS_PWR_MODES \ - EM(UFS_ACTIVE_PWR_MODE, "UFS_ACTIVE_PWR_MODE") \ - EM(UFS_SLEEP_PWR_MODE, "UFS_SLEEP_PWR_MODE") \ - EM(UFS_POWERDOWN_PWR_MODE, "UFS_POWERDOWN_PWR_MODE") \ - EMe(UFS_DEEPSLEEP_PWR_MODE, "UFS_DEEPSLEEP_PWR_MODE") - -#define UFSCHD_CLK_GATING_STATES \ - EM(CLKS_OFF, "CLKS_OFF") \ - EM(CLKS_ON, "CLKS_ON") \ - EM(REQ_CLKS_OFF, "REQ_CLKS_OFF") \ - EMe(REQ_CLKS_ON, "REQ_CLKS_ON") - -#define UFS_CMD_TRACE_STRINGS \ - EM(UFS_CMD_SEND, "send_req") \ - EM(UFS_CMD_COMP, "complete_rsp") \ - EM(UFS_DEV_COMP, "dev_complete") \ - EM(UFS_QUERY_SEND, "query_send") \ - EM(UFS_QUERY_COMP, "query_complete") \ - EM(UFS_QUERY_ERR, "query_complete_err") \ - EM(UFS_TM_SEND, "tm_send") \ - EM(UFS_TM_COMP, "tm_complete") \ - EMe(UFS_TM_ERR, "tm_complete_err") - -#define UFS_CMD_TRACE_TSF_TYPES \ - EM(UFS_TSF_CDB, "CDB") \ - EM(UFS_TSF_OSF, "OSF") \ - EM(UFS_TSF_TM_INPUT, "TM_INPUT") \ - EMe(UFS_TSF_TM_OUTPUT, "TM_OUTPUT") - -/* Enums require being exported to userspace, for user tool parsing */ -#undef EM -#undef EMe -#define EM(a, b) TRACE_DEFINE_ENUM(a); -#define EMe(a, b) TRACE_DEFINE_ENUM(a); - -UFS_LINK_STATES; -UFS_PWR_MODES; -UFSCHD_CLK_GATING_STATES; -UFS_CMD_TRACE_STRINGS -UFS_CMD_TRACE_TSF_TYPES - -/* - * Now redefine the EM() and EMe() macros to map the enums to the strings - * that will be printed in the output. - */ -#undef EM -#undef EMe -#define EM(a, b) {a, b}, -#define EMe(a, b) {a, b} - -#define show_ufs_cmd_trace_str(str_t) \ - __print_symbolic(str_t, UFS_CMD_TRACE_STRINGS) -#define show_ufs_cmd_trace_tsf(tsf) \ - __print_symbolic(tsf, UFS_CMD_TRACE_TSF_TYPES) - -TRACE_EVENT(ufshcd_clk_gating, - - TP_PROTO(const char *dev_name, int state), - - TP_ARGS(dev_name, state), - - TP_STRUCT__entry( - __string(dev_name, dev_name) - __field(int, state) - ), - - TP_fast_assign( - __assign_str(dev_name); - __entry->state = state; - ), - - TP_printk("%s: gating state changed to %s", - __get_str(dev_name), - __print_symbolic(__entry->state, UFSCHD_CLK_GATING_STATES)) -); - -TRACE_EVENT(ufshcd_clk_scaling, - - TP_PROTO(const char *dev_name, const char *state, const char *clk, - u32 prev_state, u32 curr_state), - - TP_ARGS(dev_name, state, clk, prev_state, curr_state), - - TP_STRUCT__entry( - __string(dev_name, dev_name) - __string(state, state) - __string(clk, clk) - __field(u32, prev_state) - __field(u32, curr_state) - ), - - TP_fast_assign( - __assign_str(dev_name); - __assign_str(state); - __assign_str(clk); - __entry->prev_state = prev_state; - __entry->curr_state = curr_state; - ), - - TP_printk("%s: %s %s from %u to %u Hz", - __get_str(dev_name), __get_str(state), __get_str(clk), - __entry->prev_state, __entry->curr_state) -); - -TRACE_EVENT(ufshcd_auto_bkops_state, - - TP_PROTO(const char *dev_name, const char *state), - - TP_ARGS(dev_name, state), - - TP_STRUCT__entry( - __string(dev_name, dev_name) - __string(state, state) - ), - - TP_fast_assign( - __assign_str(dev_name); - __assign_str(state); - ), - - TP_printk("%s: auto bkops - %s", - __get_str(dev_name), __get_str(state)) -); - -DECLARE_EVENT_CLASS(ufshcd_profiling_template, - TP_PROTO(const char *dev_name, const char *profile_info, s64 time_us, - int err), - - TP_ARGS(dev_name, profile_info, time_us, err), - - TP_STRUCT__entry( - __string(dev_name, dev_name) - __string(profile_info, profile_info) - __field(s64, time_us) - __field(int, err) - ), - - TP_fast_assign( - __assign_str(dev_name); - __assign_str(profile_info); - __entry->time_us = time_us; - __entry->err = err; - ), - - TP_printk("%s: %s: took %lld usecs, err %d", - __get_str(dev_name), __get_str(profile_info), - __entry->time_us, __entry->err) -); - -DEFINE_EVENT(ufshcd_profiling_template, ufshcd_profile_hibern8, - TP_PROTO(const char *dev_name, const char *profile_info, s64 time_us, - int err), - TP_ARGS(dev_name, profile_info, time_us, err)); - -DEFINE_EVENT(ufshcd_profiling_template, ufshcd_profile_clk_gating, - TP_PROTO(const char *dev_name, const char *profile_info, s64 time_us, - int err), - TP_ARGS(dev_name, profile_info, time_us, err)); - -DEFINE_EVENT(ufshcd_profiling_template, ufshcd_profile_clk_scaling, - TP_PROTO(const char *dev_name, const char *profile_info, s64 time_us, - int err), - TP_ARGS(dev_name, profile_info, time_us, err)); - -DECLARE_EVENT_CLASS(ufshcd_template, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - - TP_ARGS(dev_name, err, usecs, dev_state, link_state), - - TP_STRUCT__entry( - __field(s64, usecs) - __field(int, err) - __string(dev_name, dev_name) - __field(int, dev_state) - __field(int, link_state) - ), - - TP_fast_assign( - __entry->usecs = usecs; - __entry->err = err; - __assign_str(dev_name); - __entry->dev_state = dev_state; - __entry->link_state = link_state; - ), - - TP_printk( - "%s: took %lld usecs, dev_state: %s, link_state: %s, err %d", - __get_str(dev_name), - __entry->usecs, - __print_symbolic(__entry->dev_state, UFS_PWR_MODES), - __print_symbolic(__entry->link_state, UFS_LINK_STATES), - __entry->err - ) -); - -DEFINE_EVENT(ufshcd_template, ufshcd_system_suspend, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); - -DEFINE_EVENT(ufshcd_template, ufshcd_system_resume, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); - -DEFINE_EVENT(ufshcd_template, ufshcd_runtime_suspend, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); - -DEFINE_EVENT(ufshcd_template, ufshcd_runtime_resume, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); - -DEFINE_EVENT(ufshcd_template, ufshcd_init, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); - -DEFINE_EVENT(ufshcd_template, ufshcd_wl_suspend, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); - -DEFINE_EVENT(ufshcd_template, ufshcd_wl_resume, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); - -DEFINE_EVENT(ufshcd_template, ufshcd_wl_runtime_suspend, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); - -DEFINE_EVENT(ufshcd_template, ufshcd_wl_runtime_resume, - TP_PROTO(const char *dev_name, int err, s64 usecs, - int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); - -TRACE_EVENT(ufshcd_command, - TP_PROTO(struct scsi_device *sdev, enum ufs_trace_str_t str_t, - unsigned int tag, u32 doorbell, u32 hwq_id, int transfer_len, - u32 intr, u64 lba, u8 opcode, u8 group_id), - - TP_ARGS(sdev, str_t, tag, doorbell, hwq_id, transfer_len, intr, lba, - opcode, group_id), - - TP_STRUCT__entry( - __field(struct scsi_device *, sdev) - __field(enum ufs_trace_str_t, str_t) - __field(unsigned int, tag) - __field(u32, doorbell) - __field(u32, hwq_id) - __field(u32, intr) - __field(u64, lba) - __field(int, transfer_len) - __field(u8, opcode) - __field(u8, group_id) - ), - - TP_fast_assign( - __entry->sdev = sdev; - __entry->str_t = str_t; - __entry->tag = tag; - __entry->doorbell = doorbell; - __entry->hwq_id = hwq_id; - __entry->intr = intr; - __entry->lba = lba; - __entry->transfer_len = transfer_len; - __entry->opcode = opcode; - __entry->group_id = group_id; - ), - - TP_printk( - "%s: %s: tag: %u, DB: 0x%x, size: %d, IS: %u, LBA: %llu, opcode: 0x%x (%s), group_id: 0x%x, hwq_id: %d", - show_ufs_cmd_trace_str(__entry->str_t), - dev_name(&__entry->sdev->sdev_dev), __entry->tag, - __entry->doorbell, __entry->transfer_len, __entry->intr, - __entry->lba, (u32)__entry->opcode, str_opcode(__entry->opcode), - (u32)__entry->group_id, __entry->hwq_id - ) -); - -TRACE_EVENT(ufshcd_uic_command, - TP_PROTO(const char *dev_name, enum ufs_trace_str_t str_t, u32 cmd, - u32 arg1, u32 arg2, u32 arg3), - - TP_ARGS(dev_name, str_t, cmd, arg1, arg2, arg3), - - TP_STRUCT__entry( - __string(dev_name, dev_name) - __field(enum ufs_trace_str_t, str_t) - __field(u32, cmd) - __field(u32, arg1) - __field(u32, arg2) - __field(u32, arg3) - ), - - TP_fast_assign( - __assign_str(dev_name); - __entry->str_t = str_t; - __entry->cmd = cmd; - __entry->arg1 = arg1; - __entry->arg2 = arg2; - __entry->arg3 = arg3; - ), - - TP_printk( - "%s: %s: cmd: 0x%x, arg1: 0x%x, arg2: 0x%x, arg3: 0x%x", - show_ufs_cmd_trace_str(__entry->str_t), __get_str(dev_name), - __entry->cmd, __entry->arg1, __entry->arg2, __entry->arg3 - ) -); - -TRACE_EVENT(ufshcd_upiu, - TP_PROTO(const char *dev_name, enum ufs_trace_str_t str_t, void *hdr, - void *tsf, enum ufs_trace_tsf_t tsf_t), - - TP_ARGS(dev_name, str_t, hdr, tsf, tsf_t), - - TP_STRUCT__entry( - __string(dev_name, dev_name) - __field(enum ufs_trace_str_t, str_t) - __array(unsigned char, hdr, 12) - __array(unsigned char, tsf, 16) - __field(enum ufs_trace_tsf_t, tsf_t) - ), - - TP_fast_assign( - __assign_str(dev_name); - __entry->str_t = str_t; - memcpy(__entry->hdr, hdr, sizeof(__entry->hdr)); - memcpy(__entry->tsf, tsf, sizeof(__entry->tsf)); - __entry->tsf_t = tsf_t; - ), - - TP_printk( - "%s: %s: HDR:%s, %s:%s", - show_ufs_cmd_trace_str(__entry->str_t), __get_str(dev_name), - __print_hex(__entry->hdr, sizeof(__entry->hdr)), - show_ufs_cmd_trace_tsf(__entry->tsf_t), - __print_hex(__entry->tsf, sizeof(__entry->tsf)) - ) -); - -TRACE_EVENT(ufshcd_exception_event, - - TP_PROTO(const char *dev_name, u16 status), - - TP_ARGS(dev_name, status), - - TP_STRUCT__entry( - __string(dev_name, dev_name) - __field(u16, status) - ), - - TP_fast_assign( - __assign_str(dev_name); - __entry->status = status; - ), - - TP_printk("%s: status 0x%x", - __get_str(dev_name), __entry->status - ) -); - -#endif /* if !defined(_TRACE_UFS_H) || defined(TRACE_HEADER_MULTI_READ) */ - -/* This part must be outside protection */ -#include -- cgit v1.2.3 From f951171044762a0d179caf9f1addcdbc7cb533bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 27 Aug 2024 08:50:46 +0200 Subject: ext4: remove tracing for FALLOC_FL_NO_HIDE_STALE FALLOC_FL_NO_HIDE_STALE can't make it past vfs_fallocate (and if the flag does what the name implies that's a good thing as it would be highly dangerous). Remove the dead tracing code for it. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240827065123.1762168-3-hch@lst.de Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/trace/events/ext4.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index cc5e9b7b2b44..156908641e68 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -91,7 +91,6 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B); #define show_falloc_mode(mode) __print_flags(mode, "|", \ { FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \ { FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \ - { FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}, \ { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \ { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) -- cgit v1.2.3 From 459ca85ae1feff78d1518344df88bb79a092780c Mon Sep 17 00:00:00 2001 From: Julian Sun Date: Wed, 28 Aug 2024 16:13:59 +0800 Subject: writeback: Refine the show_inode_state() macro definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the show_inode_state() macro only prints part of the state of inode->i_state. Let’s improve it to display more of its state. Signed-off-by: Julian Sun Link: https://lore.kernel.org/r/20240828081359.62429-1-sunjunchao2870@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/trace/events/writeback.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 54e353c9f919..a261e86e61fa 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -20,7 +20,15 @@ {I_CLEAR, "I_CLEAR"}, \ {I_SYNC, "I_SYNC"}, \ {I_DIRTY_TIME, "I_DIRTY_TIME"}, \ - {I_REFERENCED, "I_REFERENCED"} \ + {I_REFERENCED, "I_REFERENCED"}, \ + {I_LINKABLE, "I_LINKABLE"}, \ + {I_WB_SWITCH, "I_WB_SWITCH"}, \ + {I_OVL_INUSE, "I_OVL_INUSE"}, \ + {I_CREATING, "I_CREATING"}, \ + {I_DONTCACHE, "I_DONTCACHE"}, \ + {I_SYNC_QUEUED, "I_SYNC_QUEUED"}, \ + {I_PINNING_NETFS_WB, "I_PINNING_NETFS_WB"}, \ + {I_LRU_ISOLATING, "I_LRU_ISOLATING"} \ ) /* enums need to be exported to user space */ -- cgit v1.2.3 From cef48236dfe55fa266d505e8a497963a7bc5ef2a Mon Sep 17 00:00:00 2001 From: Chen Hanxiao Date: Thu, 18 Jul 2024 15:06:16 +0800 Subject: NFS: trace: show TIMEDOUT instead of 0x6e __nfs_revalidate_inode may return ETIMEDOUT. print symbol of ETIMEDOUT in nfs trace: before: cat-5191 [005] 119.331127: nfs_revalidate_inode_exit: error=-110 (0x6e) after: cat-1738 [004] 44.365509: nfs_revalidate_inode_exit: error=-110 (TIMEDOUT) Signed-off-by: Chen Hanxiao Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/trace/misc/nfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/trace') diff --git a/include/trace/misc/nfs.h b/include/trace/misc/nfs.h index 7b221d51133a..c82233e950ac 100644 --- a/include/trace/misc/nfs.h +++ b/include/trace/misc/nfs.h @@ -51,6 +51,7 @@ TRACE_DEFINE_ENUM(NFSERR_JUKEBOX); { NFSERR_IO, "IO" }, \ { NFSERR_NXIO, "NXIO" }, \ { ECHILD, "CHILD" }, \ + { ETIMEDOUT, "TIMEDOUT" }, \ { NFSERR_EAGAIN, "AGAIN" }, \ { NFSERR_ACCES, "ACCES" }, \ { NFSERR_EXIST, "EXIST" }, \ -- cgit v1.2.3 From 29943248af0a8ac3edb808564baa417b40e35521 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 29 Jul 2024 14:47:17 +0530 Subject: mm: improve code consistency with zonelist_* helper functions Replace direct access to zoneref->zone, zoneref->zone_idx, or zone_to_nid(zoneref->zone) with the corresponding zonelist_* helper functions for consistency. No functional change. Link: https://lkml.kernel.org/r/20240729091717.464-1-shivankg@amd.com Co-developed-by: Shivank Garg Signed-off-by: Shivank Garg Signed-off-by: Wei Yang Acked-by: David Hildenbrand Cc: Mike Rapoport (IBM) Signed-off-by: Andrew Morton --- include/trace/events/oom.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h index a42be4c8563b..9f0a5d1482c4 100644 --- a/include/trace/events/oom.h +++ b/include/trace/events/oom.h @@ -55,8 +55,8 @@ TRACE_EVENT(reclaim_retry_zone, ), TP_fast_assign( - __entry->node = zone_to_nid(zoneref->zone); - __entry->zone_idx = zoneref->zone_idx; + __entry->node = zonelist_node_idx(zoneref); + __entry->zone_idx = zonelist_zone_idx(zoneref); __entry->order = order; __entry->reclaimable = reclaimable; __entry->available = available; -- cgit v1.2.3 From 09022bc196d23484a7a5d48cf373f8583e3fcf23 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 7 Aug 2024 20:35:26 +0100 Subject: mm: remove PG_error The PG_error bit is now unused; delete it and free up a bit in page->flags. Link: https://lkml.kernel.org/r/20240807193528.1865100-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/trace/events/mmflags.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index b63d211bd141..b5c4da370a50 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -100,7 +100,6 @@ #define __def_pageflag_names \ DEF_PAGEFLAG_NAME(locked), \ DEF_PAGEFLAG_NAME(waiters), \ - DEF_PAGEFLAG_NAME(error), \ DEF_PAGEFLAG_NAME(referenced), \ DEF_PAGEFLAG_NAME(uptodate), \ DEF_PAGEFLAG_NAME(dirty), \ -- cgit v1.2.3 From b6273b55d88539c6a7127a697c61d3f89c5831fe Mon Sep 17 00:00:00 2001 From: Takaya Saeki Date: Tue, 13 Aug 2024 10:03:12 +0000 Subject: filemap: add trace events for get_pages, map_pages, and fault To allow precise tracking of page caches accessed, add new tracepoints that trigger when a process actually accesses them. The ureadahead program used by ChromeOS traces the disk access of programs as they start up at boot up. It uses mincore(2) or the 'mm_filemap_add_to_page_cache' trace event to accomplish this. It stores this information in a "pack" file and on subsequent boots, it will read the pack file and call readahead(2) on the information so that disk storage can be loaded into RAM before the applications actually need it. A problem we see is that due to the kernel's readahead algorithm that can aggressively pull in more data than needed (to try and accomplish the same goal) and this data is also recorded. The end result is that the pack file contains a lot of pages on disk that are never actually used. Calling readahead(2) on these unused pages can slow down the system boot up times. To solve this, add 3 new trace events, get_pages, map_pages, and fault. These will be used to trace the pages are not only pulled in from disk, but are actually used by the application. Only those pages will be stored in the pack file, and this helps out the performance of boot up. With the combination of these 3 new trace events and mm_filemap_add_to_page_cache, we observed a reduction in the pack file by 7.3% - 20% on ChromeOS varying by device. Link: https://lkml.kernel.org/r/20240813100312.3930505-1-takayas@chromium.org Signed-off-by: Takaya Saeki Reviewed-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) Cc: Junichi Uekawa Cc: Mathieu Desnoyers Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/trace/events/filemap.h | 84 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h index 46c89c1e460c..f48fe637bfd2 100644 --- a/include/trace/events/filemap.h +++ b/include/trace/events/filemap.h @@ -56,6 +56,90 @@ DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_add_to_page_cache, TP_ARGS(folio) ); +DECLARE_EVENT_CLASS(mm_filemap_op_page_cache_range, + + TP_PROTO( + struct address_space *mapping, + pgoff_t index, + pgoff_t last_index + ), + + TP_ARGS(mapping, index, last_index), + + TP_STRUCT__entry( + __field(unsigned long, i_ino) + __field(dev_t, s_dev) + __field(unsigned long, index) + __field(unsigned long, last_index) + ), + + TP_fast_assign( + __entry->i_ino = mapping->host->i_ino; + if (mapping->host->i_sb) + __entry->s_dev = + mapping->host->i_sb->s_dev; + else + __entry->s_dev = mapping->host->i_rdev; + __entry->index = index; + __entry->last_index = last_index; + ), + + TP_printk( + "dev=%d:%d ino=%lx ofs=%lld-%lld", + MAJOR(__entry->s_dev), + MINOR(__entry->s_dev), __entry->i_ino, + ((loff_t)__entry->index) << PAGE_SHIFT, + ((((loff_t)__entry->last_index + 1) << PAGE_SHIFT) - 1) + ) +); + +DEFINE_EVENT(mm_filemap_op_page_cache_range, mm_filemap_get_pages, + TP_PROTO( + struct address_space *mapping, + pgoff_t index, + pgoff_t last_index + ), + TP_ARGS(mapping, index, last_index) +); + +DEFINE_EVENT(mm_filemap_op_page_cache_range, mm_filemap_map_pages, + TP_PROTO( + struct address_space *mapping, + pgoff_t index, + pgoff_t last_index + ), + TP_ARGS(mapping, index, last_index) +); + +TRACE_EVENT(mm_filemap_fault, + TP_PROTO(struct address_space *mapping, pgoff_t index), + + TP_ARGS(mapping, index), + + TP_STRUCT__entry( + __field(unsigned long, i_ino) + __field(dev_t, s_dev) + __field(unsigned long, index) + ), + + TP_fast_assign( + __entry->i_ino = mapping->host->i_ino; + if (mapping->host->i_sb) + __entry->s_dev = + mapping->host->i_sb->s_dev; + else + __entry->s_dev = mapping->host->i_rdev; + __entry->index = index; + ), + + TP_printk( + "dev=%d:%d ino=%lx ofs=%lld", + MAJOR(__entry->s_dev), + MINOR(__entry->s_dev), __entry->i_ino, + ((loff_t)__entry->index) << PAGE_SHIFT + ) +); + TRACE_EVENT(filemap_set_wb_err, TP_PROTO(struct address_space *mapping, errseq_t eseq), -- cgit v1.2.3 From e27ad6560e4b5993315b56d6884ca5a4652468f4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 21 Aug 2024 18:39:09 +0100 Subject: printf: remove %pGt support Patch series "Increase the number of bits available in page_type". Kent wants more than 16 bits in page_type, so I resurrected this old patch and expanded it a bit. It's a bit more efficient than our current scheme (1 4-byte insn vs 3 insns of 13 bytes total) to test a single page type. This patch (of 4): An upcoming patch will convert page type from being a bitfield to a single byte, so we will not be able to use %pG to print the page type any more. The printing of the symbolic name will be restored in that patch. Link: https://lkml.kernel.org/r/20240821173914.2270383-1-willy@infradead.org Link: https://lkml.kernel.org/r/20240821173914.2270383-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: David Hildenbrand Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Kent Overstreet Signed-off-by: Andrew Morton --- include/trace/events/mmflags.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index b5c4da370a50..c151cc21d367 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -130,16 +130,6 @@ IF_HAVE_PG_ARCH_X(arch_3) __def_pageflag_names \ ) : "none" -#define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) } - -#define __def_pagetype_names \ - DEF_PAGETYPE_NAME(slab), \ - DEF_PAGETYPE_NAME(hugetlb), \ - DEF_PAGETYPE_NAME(offline), \ - DEF_PAGETYPE_NAME(guard), \ - DEF_PAGETYPE_NAME(table), \ - DEF_PAGETYPE_NAME(buddy) - #if defined(CONFIG_X86) #define __VM_ARCH_SPECIFIC_1 {VM_PAT, "pat" } #elif defined(CONFIG_PPC) -- cgit v1.2.3 From 02e1960aafac33721401dcd92e915325fdb524b2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 21 Aug 2024 20:34:42 +0100 Subject: mm: rename PG_mappedtodisk to PG_owner_2 This flag has similar constraints to PG_owner_priv_1 -- it is ignored by core code, and is entirely for the use of the code which allocated the folio. Since the pagecache does not use it, individual filesystems can use it. The bufferhead code does use it, so filesystems which use the buffer cache must not use it for another purpose. Link: https://lkml.kernel.org/r/20240821193445.2294269-10-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/trace/events/mmflags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index c151cc21d367..3b51558cdc9b 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -107,13 +107,13 @@ DEF_PAGEFLAG_NAME(active), \ DEF_PAGEFLAG_NAME(workingset), \ DEF_PAGEFLAG_NAME(owner_priv_1), \ + DEF_PAGEFLAG_NAME(owner_2), \ DEF_PAGEFLAG_NAME(arch_1), \ DEF_PAGEFLAG_NAME(reserved), \ DEF_PAGEFLAG_NAME(private), \ DEF_PAGEFLAG_NAME(private_2), \ DEF_PAGEFLAG_NAME(writeback), \ DEF_PAGEFLAG_NAME(head), \ - DEF_PAGEFLAG_NAME(mappedtodisk), \ DEF_PAGEFLAG_NAME(reclaim), \ DEF_PAGEFLAG_NAME(swapbacked), \ DEF_PAGEFLAG_NAME(unevictable) \ -- cgit v1.2.3 From 7a87225ae2c6c317c7b80cf599e5cf0eee699196 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 21 Aug 2024 20:34:43 +0100 Subject: x86: remove PG_uncached Convert x86 to use PG_arch_2 instead of PG_uncached and remove PG_uncached. Link: https://lkml.kernel.org/r/20240821193445.2294269-11-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/trace/events/mmflags.h | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 3b51558cdc9b..58f2699331b6 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -71,12 +71,6 @@ #define IF_HAVE_PG_MLOCK(_name) #endif -#ifdef CONFIG_ARCH_USES_PG_UNCACHED -#define IF_HAVE_PG_UNCACHED(_name) ,{1UL << PG_##_name, __stringify(_name)} -#else -#define IF_HAVE_PG_UNCACHED(_name) -#endif - #ifdef CONFIG_MEMORY_FAILURE #define IF_HAVE_PG_HWPOISON(_name) ,{1UL << PG_##_name, __stringify(_name)} #else @@ -89,10 +83,16 @@ #define IF_HAVE_PG_IDLE(_name) #endif -#ifdef CONFIG_ARCH_USES_PG_ARCH_X -#define IF_HAVE_PG_ARCH_X(_name) ,{1UL << PG_##_name, __stringify(_name)} +#ifdef CONFIG_ARCH_USES_PG_ARCH_2 +#define IF_HAVE_PG_ARCH_2(_name) ,{1UL << PG_##_name, __stringify(_name)} +#else +#define IF_HAVE_PG_ARCH_2(_name) +#endif + +#ifdef CONFIG_ARCH_USES_PG_ARCH_3 +#define IF_HAVE_PG_ARCH_3(_name) ,{1UL << PG_##_name, __stringify(_name)} #else -#define IF_HAVE_PG_ARCH_X(_name) +#define IF_HAVE_PG_ARCH_3(_name) #endif #define DEF_PAGEFLAG_NAME(_name) { 1UL << PG_##_name, __stringify(_name) } @@ -118,12 +118,11 @@ DEF_PAGEFLAG_NAME(swapbacked), \ DEF_PAGEFLAG_NAME(unevictable) \ IF_HAVE_PG_MLOCK(mlocked) \ -IF_HAVE_PG_UNCACHED(uncached) \ IF_HAVE_PG_HWPOISON(hwpoison) \ IF_HAVE_PG_IDLE(idle) \ IF_HAVE_PG_IDLE(young) \ -IF_HAVE_PG_ARCH_X(arch_2) \ -IF_HAVE_PG_ARCH_X(arch_3) +IF_HAVE_PG_ARCH_2(arch_2) \ +IF_HAVE_PG_ARCH_3(arch_3) #define show_page_flags(flags) \ (flags) ? __print_flags(flags, "|", \ -- cgit v1.2.3 From 8f52de0077ba3bf41e5d53d67a185700f41efce7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Jun 2024 11:33:01 +0100 Subject: netfs: Reduce number of conditional branches in netfs_perform_write() Reduce the number of conditional branches in netfs_perform_write() by merging in netfs_how_to_modify() and then creating a separate if-statement for each way we might modify a folio. Note that this means replicating the data copy in each path. Signed-off-by: David Howells cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-6-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/trace/events/netfs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 606b4a0f92da..a4fd5dea52f4 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -129,7 +129,6 @@ E_(netfs_sreq_trace_put_terminated, "PUT TERM ") #define netfs_folio_traces \ - /* The first few correspond to enum netfs_how_to_modify */ \ EM(netfs_folio_is_uptodate, "mod-uptodate") \ EM(netfs_just_prefetch, "mod-prefetch") \ EM(netfs_whole_folio_modify, "mod-whole-f") \ @@ -139,7 +138,6 @@ EM(netfs_flush_content, "flush") \ EM(netfs_streaming_filled_page, "mod-streamw-f") \ EM(netfs_streaming_cont_filled_page, "mod-streamw-f+") \ - /* The rest are for writeback */ \ EM(netfs_folio_trace_cancel_copy, "cancel-copy") \ EM(netfs_folio_trace_clear, "clear") \ EM(netfs_folio_trace_clear_cc, "clear-cc") \ -- cgit v1.2.3 From 51e9a86a4f75c6dc8531407b2ab2ccc4ad137e5a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Jul 2024 09:45:46 +0100 Subject: netfs: Reserve netfs_sreq_source 0 as unset/unknown Reserve the 0-valued netfs_sreq_source to mean unset or unknown so that it can be seen in the trace as such rather than appearing as download-from-server when it's going to get switched to something else. Signed-off-by: David Howells cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-9-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/trace/events/netfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/trace') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index a4fd5dea52f4..f4105b8e5894 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -60,6 +60,7 @@ E_(netfs_rreq_trace_write_done, "WR-DONE") #define netfs_sreq_sources \ + EM(NETFS_SOURCE_UNKNOWN, "----") \ EM(NETFS_FILL_WITH_ZEROES, "ZERO") \ EM(NETFS_DOWNLOAD_FROM_SERVER, "DOWN") \ EM(NETFS_READ_FROM_CACHE, "READ") \ -- cgit v1.2.3 From c57de2a9259d7dc18a7a425fca91c77502263d8a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Jul 2024 09:43:38 +0100 Subject: netfs: Remove NETFS_COPY_TO_CACHE Remove NETFS_COPY_TO_CACHE as it isn't used anymore. Signed-off-by: David Howells cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-10-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/trace/events/netfs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index f4105b8e5894..47cd11aaccac 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -34,11 +34,10 @@ EM(NETFS_READAHEAD, "RA") \ EM(NETFS_READPAGE, "RP") \ EM(NETFS_READ_FOR_WRITE, "RW") \ - EM(NETFS_COPY_TO_CACHE, "CC") \ + EM(NETFS_DIO_READ, "DR") \ EM(NETFS_WRITEBACK, "WB") \ EM(NETFS_WRITETHROUGH, "WT") \ EM(NETFS_UNBUFFERED_WRITE, "UW") \ - EM(NETFS_DIO_READ, "DR") \ E_(NETFS_DIO_WRITE, "DW") #define netfs_rreq_traces \ -- cgit v1.2.3 From 1c3e34bf8802b8b17d0c2067c43bb49b7e83885c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 5 Jul 2024 23:14:51 +0200 Subject: pwm: Make info in traces about affected pwm more useful MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hashed pointer isn't useful to identify the pwm device. Instead store and emit chipid and hwpwm. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20240705211452.1157967-2-u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- include/trace/events/pwm.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/pwm.h b/include/trace/events/pwm.h index 12b35e4ff917..8022701c446d 100644 --- a/include/trace/events/pwm.h +++ b/include/trace/events/pwm.h @@ -15,7 +15,8 @@ DECLARE_EVENT_CLASS(pwm, TP_ARGS(pwm, state, err), TP_STRUCT__entry( - __field(struct pwm_device *, pwm) + __field(unsigned int, chipid) + __field(unsigned int, hwpwm) __field(u64, period) __field(u64, duty_cycle) __field(enum pwm_polarity, polarity) @@ -24,7 +25,8 @@ DECLARE_EVENT_CLASS(pwm, ), TP_fast_assign( - __entry->pwm = pwm; + __entry->chipid = pwm->chip->id; + __entry->hwpwm = pwm->hwpwm; __entry->period = state->period; __entry->duty_cycle = state->duty_cycle; __entry->polarity = state->polarity; @@ -32,8 +34,8 @@ DECLARE_EVENT_CLASS(pwm, __entry->err = err; ), - TP_printk("%p: period=%llu duty_cycle=%llu polarity=%d enabled=%d err=%d", - __entry->pwm, __entry->period, __entry->duty_cycle, + TP_printk("pwmchip%u.%u: period=%llu duty_cycle=%llu polarity=%d enabled=%d err=%d", + __entry->chipid, __entry->hwpwm, __entry->period, __entry->duty_cycle, __entry->polarity, __entry->enabled, __entry->err) ); -- cgit v1.2.3 From 038eb433dc1474c4bc7d33188294e3d4778efdfd Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 6 Sep 2024 17:54:34 -0400 Subject: dma-mapping: add tracing for dma-mapping API calls When debugging drivers, it can often be useful to trace when memory gets (un)mapped for DMA (and can be accessed by the device). Add some tracepoints for this purpose. Use u64 instead of phys_addr_t and dma_addr_t (and similarly %llx instead of %pa) because libtraceevent can't handle typedefs in all cases. Signed-off-by: Sean Anderson Signed-off-by: Christoph Hellwig --- include/trace/events/dma.h | 341 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 341 insertions(+) create mode 100644 include/trace/events/dma.h (limited to 'include/trace') diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h new file mode 100644 index 000000000000..f57f05331d73 --- /dev/null +++ b/include/trace/events/dma.h @@ -0,0 +1,341 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM dma + +#if !defined(_TRACE_DMA_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_DMA_H + +#include +#include +#include +#include + +TRACE_DEFINE_ENUM(DMA_BIDIRECTIONAL); +TRACE_DEFINE_ENUM(DMA_TO_DEVICE); +TRACE_DEFINE_ENUM(DMA_FROM_DEVICE); +TRACE_DEFINE_ENUM(DMA_NONE); + +#define decode_dma_data_direction(dir) \ + __print_symbolic(dir, \ + { DMA_BIDIRECTIONAL, "BIDIRECTIONAL" }, \ + { DMA_TO_DEVICE, "TO_DEVICE" }, \ + { DMA_FROM_DEVICE, "FROM_DEVICE" }, \ + { DMA_NONE, "NONE" }) + +#define decode_dma_attrs(attrs) \ + __print_flags(attrs, "|", \ + { DMA_ATTR_WEAK_ORDERING, "WEAK_ORDERING" }, \ + { DMA_ATTR_WRITE_COMBINE, "WRITE_COMBINE" }, \ + { DMA_ATTR_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING" }, \ + { DMA_ATTR_SKIP_CPU_SYNC, "SKIP_CPU_SYNC" }, \ + { DMA_ATTR_FORCE_CONTIGUOUS, "FORCE_CONTIGUOUS" }, \ + { DMA_ATTR_ALLOC_SINGLE_PAGES, "ALLOC_SINGLE_PAGES" }, \ + { DMA_ATTR_NO_WARN, "NO_WARN" }, \ + { DMA_ATTR_PRIVILEGED, "PRIVILEGED" }) + +DECLARE_EVENT_CLASS(dma_map, + TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __field(u64, phys_addr) + __field(u64, dma_addr) + __field(size_t, size) + __field(enum dma_data_direction, dir) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + __assign_str(device); + __entry->phys_addr = phys_addr; + __entry->dma_addr = dma_addr; + __entry->size = size; + __entry->dir = dir; + __entry->attrs = attrs; + ), + + TP_printk("%s dir=%s dma_addr=%llx size=%zu phys_addr=%llx attrs=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __entry->dma_addr, + __entry->size, + __entry->phys_addr, + decode_dma_attrs(__entry->attrs)) +); + +DEFINE_EVENT(dma_map, dma_map_page, + TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs)); + +DEFINE_EVENT(dma_map, dma_map_resource, + TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs)); + +DECLARE_EVENT_CLASS(dma_unmap, + TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, addr, size, dir, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __field(u64, addr) + __field(size_t, size) + __field(enum dma_data_direction, dir) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + __assign_str(device); + __entry->addr = addr; + __entry->size = size; + __entry->dir = dir; + __entry->attrs = attrs; + ), + + TP_printk("%s dir=%s dma_addr=%llx size=%zu attrs=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __entry->addr, + __entry->size, + decode_dma_attrs(__entry->attrs)) +); + +DEFINE_EVENT(dma_unmap, dma_unmap_page, + TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, addr, size, dir, attrs)); + +DEFINE_EVENT(dma_unmap, dma_unmap_resource, + TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, addr, size, dir, attrs)); + +TRACE_EVENT(dma_alloc, + TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, + size_t size, gfp_t flags, unsigned long attrs), + TP_ARGS(dev, virt_addr, dma_addr, size, flags, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __field(u64, phys_addr) + __field(u64, dma_addr) + __field(size_t, size) + __field(gfp_t, flags) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + __assign_str(device); + __entry->phys_addr = virt_to_phys(virt_addr); + __entry->dma_addr = dma_addr; + __entry->size = size; + __entry->flags = flags; + __entry->attrs = attrs; + ), + + TP_printk("%s dma_addr=%llx size=%zu phys_addr=%llx flags=%s attrs=%s", + __get_str(device), + __entry->dma_addr, + __entry->size, + __entry->phys_addr, + show_gfp_flags(__entry->flags), + decode_dma_attrs(__entry->attrs)) +); + +TRACE_EVENT(dma_free, + TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, + size_t size, unsigned long attrs), + TP_ARGS(dev, virt_addr, dma_addr, size, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __field(u64, phys_addr) + __field(u64, dma_addr) + __field(size_t, size) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + __assign_str(device); + __entry->phys_addr = virt_to_phys(virt_addr); + __entry->dma_addr = dma_addr; + __entry->size = size; + __entry->attrs = attrs; + ), + + TP_printk("%s dma_addr=%llx size=%zu phys_addr=%llx attrs=%s", + __get_str(device), + __entry->dma_addr, + __entry->size, + __entry->phys_addr, + decode_dma_attrs(__entry->attrs)) +); + +TRACE_EVENT(dma_map_sg, + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + int ents, enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, sg, nents, ents, dir, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __dynamic_array(u64, phys_addrs, nents) + __dynamic_array(u64, dma_addrs, ents) + __dynamic_array(unsigned int, lengths, ents) + __field(enum dma_data_direction, dir) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + int i; + + __assign_str(device); + for (i = 0; i < nents; i++) + ((u64 *)__get_dynamic_array(phys_addrs))[i] = + sg_phys(sg + i); + for (i = 0; i < ents; i++) { + ((u64 *)__get_dynamic_array(dma_addrs))[i] = + sg_dma_address(sg + i); + ((unsigned int *)__get_dynamic_array(lengths))[i] = + sg_dma_len(sg + i); + } + __entry->dir = dir; + __entry->attrs = attrs; + ), + + TP_printk("%s dir=%s dma_addrs=%s sizes=%s phys_addrs=%s attrs=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __print_array(__get_dynamic_array(dma_addrs), + __get_dynamic_array_len(dma_addrs) / + sizeof(u64), sizeof(u64)), + __print_array(__get_dynamic_array(lengths), + __get_dynamic_array_len(lengths) / + sizeof(unsigned int), sizeof(unsigned int)), + __print_array(__get_dynamic_array(phys_addrs), + __get_dynamic_array_len(phys_addrs) / + sizeof(u64), sizeof(u64)), + decode_dma_attrs(__entry->attrs)) +); + +TRACE_EVENT(dma_unmap_sg, + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, sg, nents, dir, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __dynamic_array(u64, addrs, nents) + __field(enum dma_data_direction, dir) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + int i; + + __assign_str(device); + for (i = 0; i < nents; i++) + ((u64 *)__get_dynamic_array(addrs))[i] = + sg_phys(sg + i); + __entry->dir = dir; + __entry->attrs = attrs; + ), + + TP_printk("%s dir=%s phys_addrs=%s attrs=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __print_array(__get_dynamic_array(addrs), + __get_dynamic_array_len(addrs) / + sizeof(u64), sizeof(u64)), + decode_dma_attrs(__entry->attrs)) +); + +DECLARE_EVENT_CLASS(dma_sync_single, + TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir), + TP_ARGS(dev, dma_addr, size, dir), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __field(u64, dma_addr) + __field(size_t, size) + __field(enum dma_data_direction, dir) + ), + + TP_fast_assign( + __assign_str(device); + __entry->dma_addr = dma_addr; + __entry->size = size; + __entry->dir = dir; + ), + + TP_printk("%s dir=%s dma_addr=%llx size=%zu", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __entry->dma_addr, + __entry->size) +); + +DEFINE_EVENT(dma_sync_single, dma_sync_single_for_cpu, + TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir), + TP_ARGS(dev, dma_addr, size, dir)); + +DEFINE_EVENT(dma_sync_single, dma_sync_single_for_device, + TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir), + TP_ARGS(dev, dma_addr, size, dir)); + +DECLARE_EVENT_CLASS(dma_sync_sg, + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir), + TP_ARGS(dev, sg, nents, dir), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __dynamic_array(u64, dma_addrs, nents) + __dynamic_array(unsigned int, lengths, nents) + __field(enum dma_data_direction, dir) + ), + + TP_fast_assign( + int i; + + __assign_str(device); + for (i = 0; i < nents; i++) { + ((u64 *)__get_dynamic_array(dma_addrs))[i] = + sg_dma_address(sg + i); + ((unsigned int *)__get_dynamic_array(lengths))[i] = + sg_dma_len(sg + i); + } + __entry->dir = dir; + ), + + TP_printk("%s dir=%s dma_addrs=%s sizes=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __print_array(__get_dynamic_array(dma_addrs), + __get_dynamic_array_len(dma_addrs) / + sizeof(u64), sizeof(u64)), + __print_array(__get_dynamic_array(lengths), + __get_dynamic_array_len(lengths) / + sizeof(unsigned int), sizeof(unsigned int))) +); + +DEFINE_EVENT(dma_sync_sg, dma_sync_sg_for_cpu, + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir), + TP_ARGS(dev, sg, nents, dir)); + +DEFINE_EVENT(dma_sync_sg, dma_sync_sg_for_device, + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir), + TP_ARGS(dev, sg, nents, dir)); + +#endif /* _TRACE_DMA_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 9e97e8b277a2235bbb562a4feb6f1216fb52d1b1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 23 Jul 2024 17:12:15 -0400 Subject: btrfs: update the writepage tracepoint to take a folio Willy is wanting to get rid of page->index, convert the writepage tracepoint to take a folio so we can do folio->index instead of page->index. Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 0a523023bdcc..0eddbb8b6728 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -674,10 +674,10 @@ TRACE_EVENT(btrfs_finish_ordered_extent, DECLARE_EVENT_CLASS(btrfs__writepage, - TP_PROTO(const struct page *page, const struct inode *inode, + TP_PROTO(const struct folio *folio, const struct inode *inode, const struct writeback_control *wbc), - TP_ARGS(page, inode, wbc), + TP_ARGS(folio, inode, wbc), TP_STRUCT__entry_btrfs( __field( u64, ino ) @@ -695,7 +695,7 @@ DECLARE_EVENT_CLASS(btrfs__writepage, TP_fast_assign_btrfs(btrfs_sb(inode->i_sb), __entry->ino = btrfs_ino(BTRFS_I(inode)); - __entry->index = page->index; + __entry->index = folio->index; __entry->nr_to_write = wbc->nr_to_write; __entry->pages_skipped = wbc->pages_skipped; __entry->range_start = wbc->range_start; @@ -723,10 +723,10 @@ DECLARE_EVENT_CLASS(btrfs__writepage, DEFINE_EVENT(btrfs__writepage, __extent_writepage, - TP_PROTO(const struct page *page, const struct inode *inode, + TP_PROTO(const struct folio *folio, const struct inode *inode, const struct writeback_control *wbc), - TP_ARGS(page, inode, wbc) + TP_ARGS(folio, inode, wbc) ); TRACE_EVENT(btrfs_writepage_end_io_hook, -- cgit v1.2.3 From 06de42c5a98a28060b314589241cabcacc3c4ff8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 27 Aug 2024 03:30:16 +0200 Subject: btrfs: rename __extent_writepage() and drop double underscores The function does not follow the pattern where the underscores would be justified, so rename it. Reviewed-by: Qu Wenruo Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 0eddbb8b6728..e4add61e00f1 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -721,7 +721,7 @@ DECLARE_EVENT_CLASS(btrfs__writepage, __entry->writeback_index) ); -DEFINE_EVENT(btrfs__writepage, __extent_writepage, +DEFINE_EVENT(btrfs__writepage, extent_writepage, TP_PROTO(const struct folio *folio, const struct inode *inode, const struct writeback_control *wbc), -- cgit v1.2.3 From ca283ea9920ac20ae23ed398b693db3121045019 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 26 Jun 2024 23:39:11 +0200 Subject: btrfs: constify more pointer parameters Continue adding const to parameters. This is for clarity and minor addition to safety. There are some minor effects, in the assembly code and .ko measured on release config. Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index e4add61e00f1..bf60ad50011e 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1825,7 +1825,7 @@ TRACE_EVENT(qgroup_update_counters, TRACE_EVENT(qgroup_update_reserve, - TP_PROTO(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup, + TP_PROTO(const struct btrfs_fs_info *fs_info, const struct btrfs_qgroup *qgroup, s64 diff, int type), TP_ARGS(fs_info, qgroup, diff, type), @@ -1851,7 +1851,7 @@ TRACE_EVENT(qgroup_update_reserve, TRACE_EVENT(qgroup_meta_reserve, - TP_PROTO(struct btrfs_root *root, s64 diff, int type), + TP_PROTO(const struct btrfs_root *root, s64 diff, int type), TP_ARGS(root, diff, type), @@ -1874,7 +1874,7 @@ TRACE_EVENT(qgroup_meta_reserve, TRACE_EVENT(qgroup_meta_convert, - TP_PROTO(struct btrfs_root *root, s64 diff), + TP_PROTO(const struct btrfs_root *root, s64 diff), TP_ARGS(root, diff), -- cgit v1.2.3 From edd3f6f7588c713477e1299c38c84dcd91a7f148 Mon Sep 17 00:00:00 2001 From: Philo Lu Date: Wed, 11 Sep 2024 11:37:17 +0800 Subject: tcp: Use skb__nullable in trace_tcp_send_reset Replace skb with skb__nullable as the argument name. The suffix tells bpf verifier through btf that the arg could be NULL and should be checked in tp_btf prog. For now, this is the only nullable argument in tcp tracepoints. Signed-off-by: Philo Lu Acked-by: Jakub Kicinski Link: https://lore.kernel.org/r/20240911033719.91468-4-lulie@linux.alibaba.com Signed-off-by: Martin KaFai Lau --- include/trace/events/tcp.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 1c8bd8e186b8..a27c4b619dff 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -91,10 +91,10 @@ DEFINE_RST_REASON(FN, FN) TRACE_EVENT(tcp_send_reset, TP_PROTO(const struct sock *sk, - const struct sk_buff *skb, + const struct sk_buff *skb__nullable, const enum sk_rst_reason reason), - TP_ARGS(sk, skb, reason), + TP_ARGS(sk, skb__nullable, reason), TP_STRUCT__entry( __field(const void *, skbaddr) @@ -106,7 +106,7 @@ TRACE_EVENT(tcp_send_reset, ), TP_fast_assign( - __entry->skbaddr = skb; + __entry->skbaddr = skb__nullable; __entry->skaddr = sk; /* Zero means unknown state. */ __entry->state = sk ? sk->sk_state : 0; @@ -118,13 +118,13 @@ TRACE_EVENT(tcp_send_reset, const struct inet_sock *inet = inet_sk(sk); TP_STORE_ADDR_PORTS(__entry, inet, sk); - } else if (skb) { - const struct tcphdr *th = (const struct tcphdr *)skb->data; + } else if (skb__nullable) { + const struct tcphdr *th = (const struct tcphdr *)skb__nullable->data; /* * We should reverse the 4-tuple of skb, so later * it can print the right flow direction of rst. */ - TP_STORE_ADDR_PORTS_SKB(skb, th, entry->daddr, entry->saddr); + TP_STORE_ADDR_PORTS_SKB(skb__nullable, th, entry->daddr, entry->saddr); } __entry->reason = reason; ), -- cgit v1.2.3 From 8ab79ed50cf10f338465c296012500de1081646f Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Tue, 10 Sep 2024 17:14:49 +0000 Subject: page_pool: devmem support Convert netmem to be a union of struct page and struct netmem. Overload the LSB of struct netmem* to indicate that it's a net_iov, otherwise it's a page. Currently these entries in struct page are rented by the page_pool and used exclusively by the net stack: struct { unsigned long pp_magic; struct page_pool *pp; unsigned long _pp_mapping_pad; unsigned long dma_addr; atomic_long_t pp_ref_count; }; Mirror these (and only these) entries into struct net_iov and implement netmem helpers that can access these common fields regardless of whether the underlying type is page or net_iov. Implement checks for net_iov in netmem helpers which delegate to mm APIs, to ensure net_iov are never passed to the mm stack. Signed-off-by: Mina Almasry Reviewed-by: Pavel Begunkov Acked-by: Jakub Kicinski Link: https://patch.msgid.link/20240910171458.219195-6-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/trace/events/page_pool.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/page_pool.h b/include/trace/events/page_pool.h index 543e54e432a1..31825ed30032 100644 --- a/include/trace/events/page_pool.h +++ b/include/trace/events/page_pool.h @@ -57,12 +57,12 @@ TRACE_EVENT(page_pool_state_release, __entry->pool = pool; __entry->netmem = (__force unsigned long)netmem; __entry->release = release; - __entry->pfn = netmem_to_pfn(netmem); + __entry->pfn = netmem_pfn_trace(netmem); ), - TP_printk("page_pool=%p netmem=%p pfn=0x%lx release=%u", + TP_printk("page_pool=%p netmem=%p is_net_iov=%lu pfn=0x%lx release=%u", __entry->pool, (void *)__entry->netmem, - __entry->pfn, __entry->release) + __entry->netmem & NET_IOV, __entry->pfn, __entry->release) ); TRACE_EVENT(page_pool_state_hold, @@ -83,12 +83,12 @@ TRACE_EVENT(page_pool_state_hold, __entry->pool = pool; __entry->netmem = (__force unsigned long)netmem; __entry->hold = hold; - __entry->pfn = netmem_to_pfn(netmem); + __entry->pfn = netmem_pfn_trace(netmem); ), - TP_printk("page_pool=%p netmem=%p pfn=0x%lx hold=%u", + TP_printk("page_pool=%p netmem=%p is_net_iov=%lu, pfn=0x%lx hold=%u", __entry->pool, (void *)__entry->netmem, - __entry->pfn, __entry->hold) + __entry->netmem & NET_IOV, __entry->pfn, __entry->hold) ); TRACE_EVENT(page_pool_update_nid, -- cgit v1.2.3 From cd0277ed0c188dd40e7744e89299af7b78831ca4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 29 May 2024 21:47:07 +0100 Subject: netfs: Use new folio_queue data type and iterator instead of xarray iter Make the netfs write-side routines use the new folio_queue struct to hold a rolling buffer of folios, with the issuer adding folios at the tail and the collector removing them from the head as they're processed instead of using an xarray. This will allow a subsequent patch to simplify the write collector. The primary mark (as tested by folioq_is_marked()) is used to note if the corresponding folio needs putting. Signed-off-by: David Howells cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-16-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/trace/events/netfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/trace') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 47cd11aaccac..4e13774a06e6 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -153,6 +153,7 @@ EM(netfs_folio_trace_mkwrite, "mkwrite") \ EM(netfs_folio_trace_mkwrite_plus, "mkwrite+") \ EM(netfs_folio_trace_not_under_wback, "!wback") \ + EM(netfs_folio_trace_put, "put") \ EM(netfs_folio_trace_read_gaps, "read-gaps") \ EM(netfs_folio_trace_redirtied, "redirtied") \ EM(netfs_folio_trace_store, "store") \ -- cgit v1.2.3 From 983cdcf8fe141b0ce16bc71959a5dc55bcb0764d Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Jun 2024 07:48:55 +0100 Subject: netfs: Simplify the writeback code Use the new folio_queue structures to simplify the writeback code. The problem with referring to the i_pages xarray directly is that we may have gaps in the sequence of folios we're writing from that we need to skip when we're removing the writeback mark from the folios we're writing back from. At the moment the code tries to deal with this by carefully tracking the gaps in each writeback stream (eg. write to server and write to cache) and divining when there's a gap that spans folios (something that's not helped by folios not being a consistent size). Instead, the folio_queue buffer contains pointers only the folios we're dealing with, has them in ascending order and indicates a gap by placing non-consequitive folios next to each other. This makes it possible to track where we need to clean up to by just keeping track of where we've processed to on each stream and taking the minimum. Note that the I/O iterator is always rounded up to the end of the folio, even if that is beyond the EOF position, so that the cache can do DIO from the page. The excess space is cleared, though mmapped writes clobber it. Signed-off-by: David Howells cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-18-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/trace/events/netfs.h | 33 ++------------------------------- 1 file changed, 2 insertions(+), 31 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 4e13774a06e6..58bf23002fc1 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -512,33 +512,6 @@ TRACE_EVENT(netfs_collect, __entry->start + __entry->len) ); -TRACE_EVENT(netfs_collect_contig, - TP_PROTO(const struct netfs_io_request *wreq, unsigned long long to, - enum netfs_collect_contig_trace type), - - TP_ARGS(wreq, to, type), - - TP_STRUCT__entry( - __field(unsigned int, wreq) - __field(enum netfs_collect_contig_trace, type) - __field(unsigned long long, contiguity) - __field(unsigned long long, to) - ), - - TP_fast_assign( - __entry->wreq = wreq->debug_id; - __entry->type = type; - __entry->contiguity = wreq->contiguity; - __entry->to = to; - ), - - TP_printk("R=%08x %llx -> %llx %s", - __entry->wreq, - __entry->contiguity, - __entry->to, - __print_symbolic(__entry->type, netfs_collect_contig_traces)) - ); - TRACE_EVENT(netfs_collect_sreq, TP_PROTO(const struct netfs_io_request *wreq, const struct netfs_io_subrequest *subreq), @@ -610,7 +583,6 @@ TRACE_EVENT(netfs_collect_state, __field(unsigned int, notes ) __field(unsigned long long, collected_to ) __field(unsigned long long, cleaned_to ) - __field(unsigned long long, contiguity ) ), TP_fast_assign( @@ -618,12 +590,11 @@ TRACE_EVENT(netfs_collect_state, __entry->notes = notes; __entry->collected_to = collected_to; __entry->cleaned_to = wreq->cleaned_to; - __entry->contiguity = wreq->contiguity; ), - TP_printk("R=%08x cto=%llx fto=%llx ctg=%llx n=%x", + TP_printk("R=%08x col=%llx cln=%llx n=%x", __entry->wreq, __entry->collected_to, - __entry->cleaned_to, __entry->contiguity, + __entry->cleaned_to, __entry->notes) ); -- cgit v1.2.3 From ee4cdf7ba857a894ad1650d6ab77669cbbfa329e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jul 2024 00:40:22 +0100 Subject: netfs: Speed up buffered reading Improve the efficiency of buffered reads in a number of ways: (1) Overhaul the algorithm in general so that it's a lot more compact and split the read submission code between buffered and unbuffered versions. The unbuffered version can be vastly simplified. (2) Read-result collection is handed off to a work queue rather than being done in the I/O thread. Multiple subrequests can be processes simultaneously. (3) When a subrequest is collected, any folios it fully spans are collected and "spare" data on either side is donated to either the previous or the next subrequest in the sequence. Notes: (*) Readahead expansion is massively slows down fio, presumably because it causes a load of extra allocations, both folio and xarray, up front before RPC requests can be transmitted. (*) RDMA with cifs does appear to work, both with SIW and RXE. (*) PG_private_2-based reading and copy-to-cache is split out into its own file and altered to use folio_queue. Note that the copy to the cache now creates a new write transaction against the cache and adds the folios to be copied into it. This allows it to use part of the writeback I/O code. Signed-off-by: David Howells cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-20-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/trace/events/netfs.h | 103 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 98 insertions(+), 5 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 58bf23002fc1..7b26463cb98f 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -20,6 +20,7 @@ EM(netfs_read_trace_expanded, "EXPANDED ") \ EM(netfs_read_trace_readahead, "READAHEAD") \ EM(netfs_read_trace_readpage, "READPAGE ") \ + EM(netfs_read_trace_read_gaps, "READ-GAPS") \ EM(netfs_read_trace_prefetch_for_write, "PREFETCHW") \ E_(netfs_read_trace_write_begin, "WRITEBEGN") @@ -33,12 +34,14 @@ #define netfs_rreq_origins \ EM(NETFS_READAHEAD, "RA") \ EM(NETFS_READPAGE, "RP") \ + EM(NETFS_READ_GAPS, "RG") \ EM(NETFS_READ_FOR_WRITE, "RW") \ EM(NETFS_DIO_READ, "DR") \ EM(NETFS_WRITEBACK, "WB") \ EM(NETFS_WRITETHROUGH, "WT") \ EM(NETFS_UNBUFFERED_WRITE, "UW") \ - E_(NETFS_DIO_WRITE, "DW") + EM(NETFS_DIO_WRITE, "DW") \ + E_(NETFS_PGPRIV2_COPY_TO_CACHE, "2C") #define netfs_rreq_traces \ EM(netfs_rreq_trace_assess, "ASSESS ") \ @@ -69,15 +72,25 @@ E_(NETFS_INVALID_WRITE, "INVL") #define netfs_sreq_traces \ + EM(netfs_sreq_trace_add_donations, "+DON ") \ + EM(netfs_sreq_trace_added, "ADD ") \ + EM(netfs_sreq_trace_clear, "CLEAR") \ EM(netfs_sreq_trace_discard, "DSCRD") \ + EM(netfs_sreq_trace_donate_to_prev, "DON-P") \ + EM(netfs_sreq_trace_donate_to_next, "DON-N") \ EM(netfs_sreq_trace_download_instead, "RDOWN") \ EM(netfs_sreq_trace_fail, "FAIL ") \ EM(netfs_sreq_trace_free, "FREE ") \ + EM(netfs_sreq_trace_hit_eof, "EOF ") \ + EM(netfs_sreq_trace_io_progress, "IO ") \ EM(netfs_sreq_trace_limited, "LIMIT") \ EM(netfs_sreq_trace_prepare, "PREP ") \ EM(netfs_sreq_trace_prep_failed, "PRPFL") \ - EM(netfs_sreq_trace_resubmit_short, "SHORT") \ + EM(netfs_sreq_trace_progress, "PRGRS") \ + EM(netfs_sreq_trace_reprep_failed, "REPFL") \ EM(netfs_sreq_trace_retry, "RETRY") \ + EM(netfs_sreq_trace_short, "SHORT") \ + EM(netfs_sreq_trace_split, "SPLIT") \ EM(netfs_sreq_trace_submit, "SUBMT") \ EM(netfs_sreq_trace_terminated, "TERM ") \ EM(netfs_sreq_trace_write, "WRITE") \ @@ -118,7 +131,7 @@ EM(netfs_sreq_trace_new, "NEW ") \ EM(netfs_sreq_trace_put_cancel, "PUT CANCEL ") \ EM(netfs_sreq_trace_put_clear, "PUT CLEAR ") \ - EM(netfs_sreq_trace_put_discard, "PUT DISCARD") \ + EM(netfs_sreq_trace_put_consumed, "PUT CONSUME") \ EM(netfs_sreq_trace_put_done, "PUT DONE ") \ EM(netfs_sreq_trace_put_failed, "PUT FAILED ") \ EM(netfs_sreq_trace_put_merged, "PUT MERGED ") \ @@ -138,6 +151,7 @@ EM(netfs_flush_content, "flush") \ EM(netfs_streaming_filled_page, "mod-streamw-f") \ EM(netfs_streaming_cont_filled_page, "mod-streamw-f+") \ + EM(netfs_folio_trace_abandon, "abandon") \ EM(netfs_folio_trace_cancel_copy, "cancel-copy") \ EM(netfs_folio_trace_clear, "clear") \ EM(netfs_folio_trace_clear_cc, "clear-cc") \ @@ -154,7 +168,11 @@ EM(netfs_folio_trace_mkwrite_plus, "mkwrite+") \ EM(netfs_folio_trace_not_under_wback, "!wback") \ EM(netfs_folio_trace_put, "put") \ + EM(netfs_folio_trace_read, "read") \ + EM(netfs_folio_trace_read_done, "read-done") \ EM(netfs_folio_trace_read_gaps, "read-gaps") \ + EM(netfs_folio_trace_read_put, "read-put") \ + EM(netfs_folio_trace_read_unlock, "read-unlock") \ EM(netfs_folio_trace_redirtied, "redirtied") \ EM(netfs_folio_trace_store, "store") \ EM(netfs_folio_trace_store_copy, "store-copy") \ @@ -167,6 +185,12 @@ EM(netfs_contig_trace_jump, "-->JUMP-->") \ E_(netfs_contig_trace_unlock, "Unlock") +#define netfs_donate_traces \ + EM(netfs_trace_donate_tail_to_prev, "tail-to-prev") \ + EM(netfs_trace_donate_to_prev, "to-prev") \ + EM(netfs_trace_donate_to_next, "to-next") \ + E_(netfs_trace_donate_to_deferred_next, "defer-next") + #ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY #define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY @@ -184,6 +208,7 @@ enum netfs_rreq_ref_trace { netfs_rreq_ref_traces } __mode(byte); enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte); enum netfs_folio_trace { netfs_folio_traces } __mode(byte); enum netfs_collect_contig_trace { netfs_collect_contig_traces } __mode(byte); +enum netfs_donate_trace { netfs_donate_traces } __mode(byte); #endif @@ -206,6 +231,7 @@ netfs_rreq_ref_traces; netfs_sreq_ref_traces; netfs_folio_traces; netfs_collect_contig_traces; +netfs_donate_traces; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -226,6 +252,7 @@ TRACE_EVENT(netfs_read, TP_STRUCT__entry( __field(unsigned int, rreq ) __field(unsigned int, cookie ) + __field(loff_t, i_size ) __field(loff_t, start ) __field(size_t, len ) __field(enum netfs_read_trace, what ) @@ -235,18 +262,19 @@ TRACE_EVENT(netfs_read, TP_fast_assign( __entry->rreq = rreq->debug_id; __entry->cookie = rreq->cache_resources.debug_id; + __entry->i_size = rreq->i_size; __entry->start = start; __entry->len = len; __entry->what = what; __entry->netfs_inode = rreq->inode->i_ino; ), - TP_printk("R=%08x %s c=%08x ni=%x s=%llx %zx", + TP_printk("R=%08x %s c=%08x ni=%x s=%llx l=%zx sz=%llx", __entry->rreq, __print_symbolic(__entry->what, netfs_read_traces), __entry->cookie, __entry->netfs_inode, - __entry->start, __entry->len) + __entry->start, __entry->len, __entry->i_size) ); TRACE_EVENT(netfs_rreq, @@ -651,6 +679,71 @@ TRACE_EVENT(netfs_collect_stream, __entry->collected_to, __entry->front) ); +TRACE_EVENT(netfs_progress, + TP_PROTO(const struct netfs_io_subrequest *subreq, + unsigned long long start, size_t avail, size_t part), + + TP_ARGS(subreq, start, avail, part), + + TP_STRUCT__entry( + __field(unsigned int, rreq) + __field(unsigned int, subreq) + __field(unsigned int, consumed) + __field(unsigned int, transferred) + __field(unsigned long long, f_start) + __field(unsigned int, f_avail) + __field(unsigned int, f_part) + __field(unsigned char, slot) + ), + + TP_fast_assign( + __entry->rreq = subreq->rreq->debug_id; + __entry->subreq = subreq->debug_index; + __entry->consumed = subreq->consumed; + __entry->transferred = subreq->transferred; + __entry->f_start = start; + __entry->f_avail = avail; + __entry->f_part = part; + __entry->slot = subreq->curr_folioq_slot; + ), + + TP_printk("R=%08x[%02x] s=%llx ct=%x/%x pa=%x/%x sl=%x", + __entry->rreq, __entry->subreq, __entry->f_start, + __entry->consumed, __entry->transferred, + __entry->f_part, __entry->f_avail, __entry->slot) + ); + +TRACE_EVENT(netfs_donate, + TP_PROTO(const struct netfs_io_request *rreq, + const struct netfs_io_subrequest *from, + const struct netfs_io_subrequest *to, + size_t amount, + enum netfs_donate_trace trace), + + TP_ARGS(rreq, from, to, amount, trace), + + TP_STRUCT__entry( + __field(unsigned int, rreq) + __field(unsigned int, from) + __field(unsigned int, to) + __field(unsigned int, amount) + __field(enum netfs_donate_trace, trace) + ), + + TP_fast_assign( + __entry->rreq = rreq->debug_id; + __entry->from = from->debug_index; + __entry->to = to ? to->debug_index : -1; + __entry->amount = amount; + __entry->trace = trace; + ), + + TP_printk("R=%08x[%02x] -> [%02x] %s am=%x", + __entry->rreq, __entry->from, __entry->to, + __print_symbolic(__entry->trace, netfs_donate_traces), + __entry->amount) + ); + #undef EM #undef E_ #endif /* _TRACE_NETFS_H */ -- cgit v1.2.3 From 8f246b7c0a1be0882374f2ff831a61f0dbe77678 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 29 Jul 2024 12:23:11 +0100 Subject: netfs: Cancel dirty folios that have no storage destination Kafs wants to be able to cache the contents of directories (and symlinks), but whilst these are downloaded from the server with the FS.FetchData RPC op and similar, the same as for regular files, they can't be updated by FS.StoreData, but rather have special operations (FS.MakeDir, etc.). Now, rather than redownloading a directory's content after each change made to that directory, kafs modifies the local blob. This blob can be saved out to the cache, and since it's using netfslib, kafs just marks the folios dirty and lets ->writepages() on the directory take care of it, as for an regular file. This is fine as long as there's a cache as although the upload stream is disabled, there's a cache stream to drive the procedure. But if the cache goes away in the meantime, suddenly there's no way do any writes and the code gets confused, complains "R=%x: No submit" to dmesg and leaves the dirty folio hanging. Fix this by just cancelling the store of the folio if neither stream is active. (If there's no cache at the time of dirtying, we should just not mark the folio dirty). Signed-off-by: David Howells cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-23-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/trace/events/netfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/trace') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 7b26463cb98f..76bd42a96815 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -153,6 +153,7 @@ EM(netfs_streaming_cont_filled_page, "mod-streamw-f+") \ EM(netfs_folio_trace_abandon, "abandon") \ EM(netfs_folio_trace_cancel_copy, "cancel-copy") \ + EM(netfs_folio_trace_cancel_store, "cancel-store") \ EM(netfs_folio_trace_clear, "clear") \ EM(netfs_folio_trace_clear_cc, "clear-cc") \ EM(netfs_folio_trace_clear_g, "clear-g") \ -- cgit v1.2.3 From f1cba5212e252243a539e079813bc96fbf53e241 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 12 Sep 2024 22:30:38 +0900 Subject: firewire: core: rename cause flag of tracepoints event The flag of FW_ISO_CONTEXT_COMPLETIONS_CAUSE_IRQ directly causes hardIRQ request by 1394 OHCI hardware when the corresponding isochronous packet is transferred, however it is not so directly associated to hardIRQ processing itself. This commit renames the flag so that it relates to interrupt parameter of internal packet data. Link: https://lore.kernel.org/r/20240912133038.238786-6-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index b108176deb22..ad0e0cf82b9c 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -830,13 +830,13 @@ TRACE_EVENT_CONDITION(isoc_inbound_multiple_queue, #ifndef show_cause enum fw_iso_context_completions_cause { FW_ISO_CONTEXT_COMPLETIONS_CAUSE_FLUSH = 0, - FW_ISO_CONTEXT_COMPLETIONS_CAUSE_IRQ, + FW_ISO_CONTEXT_COMPLETIONS_CAUSE_INTERRUPT, FW_ISO_CONTEXT_COMPLETIONS_CAUSE_HEADER_OVERFLOW, }; #define show_cause(cause) \ __print_symbolic(cause, \ { FW_ISO_CONTEXT_COMPLETIONS_CAUSE_FLUSH, "FLUSH" }, \ - { FW_ISO_CONTEXT_COMPLETIONS_CAUSE_IRQ, "IRQ" }, \ + { FW_ISO_CONTEXT_COMPLETIONS_CAUSE_INTERRUPT, "INTERRUPT" }, \ { FW_ISO_CONTEXT_COMPLETIONS_CAUSE_HEADER_OVERFLOW, "HEADER_OVERFLOW" } \ ) #endif -- cgit v1.2.3 From d175ee98fe545d2c56df22751314584cce228307 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 2 Sep 2024 21:17:18 +0200 Subject: mm: Define VM_DROPPABLE for powerpc/32 Commit 9651fcedf7b9 ("mm: add MAP_DROPPABLE for designating always lazily freeable mappings") only adds VM_DROPPABLE for 64 bits architectures. In order to also use the getrandom vDSO implementation on powerpc/32, use VM_ARCH_1 for VM_DROPPABLE on powerpc/32. This is possible because VM_ARCH_1 is used for VM_SAO on powerpc and VM_SAO is only for powerpc/64. It is used in combination with PROT_SAO in some parts of code that are restricted to CONFIG_PPC64 through #ifdefs, it is therefore possible to define VM_SAO for CONFIG_PPC64 only. Signed-off-by: Christophe Leroy Acked-by: Michael Ellerman Signed-off-by: Jason A. Donenfeld --- include/trace/events/mmflags.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index b63d211bd141..37265977d524 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -143,7 +143,7 @@ IF_HAVE_PG_ARCH_X(arch_3) #if defined(CONFIG_X86) #define __VM_ARCH_SPECIFIC_1 {VM_PAT, "pat" } -#elif defined(CONFIG_PPC) +#elif defined(CONFIG_PPC64) #define __VM_ARCH_SPECIFIC_1 {VM_SAO, "sao" } #elif defined(CONFIG_PARISC) #define __VM_ARCH_SPECIFIC_1 {VM_GROWSUP, "growsup" } @@ -165,7 +165,7 @@ IF_HAVE_PG_ARCH_X(arch_3) # define IF_HAVE_UFFD_MINOR(flag, name) #endif -#ifdef CONFIG_64BIT +#if defined(CONFIG_64BIT) || defined(CONFIG_PPC32) # define IF_HAVE_VM_DROPPABLE(flag, name) {flag, name}, #else # define IF_HAVE_VM_DROPPABLE(flag, name) -- cgit v1.2.3 From c4de97f7c45434985e5dbf2d6ccc9eca676e37fe Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 29 Jul 2024 16:52:32 -0400 Subject: svcrdma: Handle device removal outside of the CM event handler Synchronously wait for all disconnects to complete to ensure the transports have divested all hardware resources before the underlying RDMA device can safely be removed. Reviewed-by: Sagi Grimberg Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index a96a985c49b3..e6a72646c507 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -2172,6 +2172,29 @@ TRACE_EVENT(svcrdma_qp_error, ) ); +TRACE_EVENT(svcrdma_device_removal, + TP_PROTO( + const struct rdma_cm_id *id + ), + + TP_ARGS(id), + + TP_STRUCT__entry( + __string(name, id->device->name) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __assign_str(name); + memcpy(__entry->addr, &id->route.addr.dst_addr, + sizeof(struct sockaddr_in6)); + ), + + TP_printk("device %s to be removed, disconnecting %pISpc\n", + __get_str(name), __entry->addr + ) +); + DECLARE_EVENT_CLASS(svcrdma_sendqueue_class, TP_PROTO( const struct svcxprt_rdma *rdma, -- cgit v1.2.3 From bfc4a245a794841cba5cf287034a0f60d3087402 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 26 Sep 2024 08:35:24 +0200 Subject: dma-mapping: fix DMA API tracing for chained scatterlists scatterlist allocations can be chained, and thus all iterations need to use the chain-aware iterators. Switch the newly added tracing to use the proper iterators so that they work with chained scatterlists. Fixes: 038eb433dc14 ("dma-mapping: add tracing for dma-mapping API calls") Reported-by: syzbot+95e4ef83a3024384ec7a@syzkaller.appspotmail.com Signed-off-by: Christoph Hellwig Reviewed-by: Sean Anderson Tested-by: syzbot+95e4ef83a3024384ec7a@syzkaller.appspotmail.com --- include/trace/events/dma.h | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index f57f05331d73..569f86a44aaa 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h @@ -176,9 +176,9 @@ TRACE_EVENT(dma_free, ); TRACE_EVENT(dma_map_sg, - TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, int ents, enum dma_data_direction dir, unsigned long attrs), - TP_ARGS(dev, sg, nents, ents, dir, attrs), + TP_ARGS(dev, sgl, nents, ents, dir, attrs), TP_STRUCT__entry( __string(device, dev_name(dev)) @@ -190,17 +190,17 @@ TRACE_EVENT(dma_map_sg, ), TP_fast_assign( + struct scatterlist *sg; int i; __assign_str(device); - for (i = 0; i < nents; i++) - ((u64 *)__get_dynamic_array(phys_addrs))[i] = - sg_phys(sg + i); - for (i = 0; i < ents; i++) { + for_each_sg(sgl, sg, nents, i) + ((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg); + for_each_sg(sgl, sg, ents, i) { ((u64 *)__get_dynamic_array(dma_addrs))[i] = - sg_dma_address(sg + i); + sg_dma_address(sg); ((unsigned int *)__get_dynamic_array(lengths))[i] = - sg_dma_len(sg + i); + sg_dma_len(sg); } __entry->dir = dir; __entry->attrs = attrs; @@ -222,9 +222,9 @@ TRACE_EVENT(dma_map_sg, ); TRACE_EVENT(dma_unmap_sg, - TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs), - TP_ARGS(dev, sg, nents, dir, attrs), + TP_ARGS(dev, sgl, nents, dir, attrs), TP_STRUCT__entry( __string(device, dev_name(dev)) @@ -234,12 +234,12 @@ TRACE_EVENT(dma_unmap_sg, ), TP_fast_assign( + struct scatterlist *sg; int i; __assign_str(device); - for (i = 0; i < nents; i++) - ((u64 *)__get_dynamic_array(addrs))[i] = - sg_phys(sg + i); + for_each_sg(sgl, sg, nents, i) + ((u64 *)__get_dynamic_array(addrs))[i] = sg_phys(sg); __entry->dir = dir; __entry->attrs = attrs; ), @@ -290,9 +290,9 @@ DEFINE_EVENT(dma_sync_single, dma_sync_single_for_device, TP_ARGS(dev, dma_addr, size, dir)); DECLARE_EVENT_CLASS(dma_sync_sg, - TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir), - TP_ARGS(dev, sg, nents, dir), + TP_ARGS(dev, sgl, nents, dir), TP_STRUCT__entry( __string(device, dev_name(dev)) @@ -302,14 +302,15 @@ DECLARE_EVENT_CLASS(dma_sync_sg, ), TP_fast_assign( + struct scatterlist *sg; int i; __assign_str(device); - for (i = 0; i < nents; i++) { + for_each_sg(sgl, sg, nents, i) { ((u64 *)__get_dynamic_array(dma_addrs))[i] = - sg_dma_address(sg + i); + sg_dma_address(sg); ((unsigned int *)__get_dynamic_array(lengths))[i] = - sg_dma_len(sg + i); + sg_dma_len(sg); } __entry->dir = dir; ), -- cgit v1.2.3