From 2cf0dee989a8b2501929eaab29473b6b1fa11057 Mon Sep 17 00:00:00 2001 From: Mikhail Kobuk Date: Fri, 25 Aug 2023 13:34:30 +0300 Subject: tracing: Remove extra space at the end of hwlat_detector/mode Space is printed after each mode value including the last one: $ echo \"$(sudo cat /sys/kernel/tracing/hwlat_detector/mode)\" "none [round-robin] per-cpu " Found by Linux Verification Center (linuxtesting.org) with SVACE. Link: https://lore.kernel.org/linux-trace-kernel/20230825103432.7750-1-m.kobuk@ispras.ru Cc: Masami Hiramatsu Fixes: 8fa826b7344d ("trace/hwlat: Implement the mode config option") Signed-off-by: Mikhail Kobuk Reviewed-by: Alexey Khoroshilov Acked-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_hwlat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 2f37a6e68aa9..b791524a6536 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -635,7 +635,7 @@ static int s_mode_show(struct seq_file *s, void *v) else seq_printf(s, "%s", thread_mode_str[mode]); - if (mode != MODE_MAX) + if (mode < MODE_MAX - 1) /* if mode is any but last */ seq_puts(s, " "); return 0; -- cgit v1.2.3 From 3163f635b20e9e1fb4659e74f47918c9dddfe64e Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Thu, 31 Aug 2023 21:27:39 +0800 Subject: tracing: Fix race issue between cpu buffer write and swap Warning happened in rb_end_commit() at code: if (RB_WARN_ON(cpu_buffer, !local_read(&cpu_buffer->committing))) WARNING: CPU: 0 PID: 139 at kernel/trace/ring_buffer.c:3142 rb_commit+0x402/0x4a0 Call Trace: ring_buffer_unlock_commit+0x42/0x250 trace_buffer_unlock_commit_regs+0x3b/0x250 trace_event_buffer_commit+0xe5/0x440 trace_event_buffer_reserve+0x11c/0x150 trace_event_raw_event_sched_switch+0x23c/0x2c0 __traceiter_sched_switch+0x59/0x80 __schedule+0x72b/0x1580 schedule+0x92/0x120 worker_thread+0xa0/0x6f0 It is because the race between writing event into cpu buffer and swapping cpu buffer through file per_cpu/cpu0/snapshot: Write on CPU 0 Swap buffer by per_cpu/cpu0/snapshot on CPU 1 -------- -------- tracing_snapshot_write() [...] ring_buffer_lock_reserve() cpu_buffer = buffer->buffers[cpu]; // 1. Suppose find 'cpu_buffer_a'; [...] rb_reserve_next_event() [...] ring_buffer_swap_cpu() if (local_read(&cpu_buffer_a->committing)) goto out_dec; if (local_read(&cpu_buffer_b->committing)) goto out_dec; buffer_a->buffers[cpu] = cpu_buffer_b; buffer_b->buffers[cpu] = cpu_buffer_a; // 2. cpu_buffer has swapped here. rb_start_commit(cpu_buffer); if (unlikely(READ_ONCE(cpu_buffer->buffer) != buffer)) { // 3. This check passed due to 'cpu_buffer->buffer' [...] // has not changed here. return NULL; } cpu_buffer_b->buffer = buffer_a; cpu_buffer_a->buffer = buffer_b; [...] // 4. Reserve event from 'cpu_buffer_a'. ring_buffer_unlock_commit() [...] cpu_buffer = buffer->buffers[cpu]; // 5. Now find 'cpu_buffer_b' !!! rb_commit(cpu_buffer) rb_end_commit() // 6. WARN for the wrong 'committing' state !!! Based on above analysis, we can easily reproduce by following testcase: ``` bash #!/bin/bash dmesg -n 7 sysctl -w kernel.panic_on_warn=1 TR=/sys/kernel/tracing echo 7 > ${TR}/buffer_size_kb echo "sched:sched_switch" > ${TR}/set_event while [ true ]; do echo 1 > ${TR}/per_cpu/cpu0/snapshot done & while [ true ]; do echo 1 > ${TR}/per_cpu/cpu0/snapshot done & while [ true ]; do echo 1 > ${TR}/per_cpu/cpu0/snapshot done & ``` To fix it, IIUC, we can use smp_call_function_single() to do the swap on the target cpu where the buffer is located, so that above race would be avoided. Link: https://lore.kernel.org/linux-trace-kernel/20230831132739.4070878-1-zhengyejian1@huawei.com Cc: Fixes: f1affcaaa861 ("tracing: Add snapshot in the per_cpu trace directories") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3e55375f47e0..23579fba1a57 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7599,6 +7599,11 @@ out: return ret; } +static void tracing_swap_cpu_buffer(void *tr) +{ + update_max_tr_single((struct trace_array *)tr, current, smp_processor_id()); +} + static ssize_t tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) @@ -7657,13 +7662,15 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) break; - local_irq_disable(); /* Now, we're going to swap */ - if (iter->cpu_file == RING_BUFFER_ALL_CPUS) + if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { + local_irq_disable(); update_max_tr(tr, current, smp_processor_id(), NULL); - else - update_max_tr_single(tr, current, iter->cpu_file); - local_irq_enable(); + local_irq_enable(); + } else { + smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer, + (void *)tr, 1); + } break; default: if (tr->allocated_snapshot) { -- cgit v1.2.3 From 13511489046a60f76a9f9fef1335837b28d325e4 Mon Sep 17 00:00:00 2001 From: Levi Yun Date: Thu, 3 Aug 2023 21:52:36 +0100 Subject: ftrace: Use within_module to check rec->ip within specified module. within_module_core && within_module_init condition is same to within module but it's more readable. Use within_module instead of former condition to check rec->ip within specified module area or not. Link: https://lore.kernel.org/linux-trace-kernel/20230803205236.32201-1-ppbuk5246@gmail.com Signed-off-by: Levi Yun Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 05c0024815bf..c46dd6d97afe 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6779,8 +6779,7 @@ void ftrace_release_mod(struct module *mod) last_pg = &ftrace_pages_start; for (pg = ftrace_pages_start; pg; pg = *last_pg) { rec = &pg->records[0]; - if (within_module_core(rec->ip, mod) || - within_module_init(rec->ip, mod)) { + if (within_module(rec->ip, mod)) { /* * As core pages are first, the first * page should never be a module page. @@ -6852,8 +6851,7 @@ void ftrace_module_enable(struct module *mod) * not part of this module, then skip this pg, * which the "break" will do. */ - if (!within_module_core(rec->ip, mod) && - !within_module_init(rec->ip, mod)) + if (!within_module(rec->ip, mod)) break; /* Weak functions should still be ignored */ -- cgit v1.2.3 From 2a30dbcbef96f4073d3a5f8708865b1aab0bfc6d Mon Sep 17 00:00:00 2001 From: Ruan Jinjie Date: Wed, 9 Aug 2023 15:15:51 +0800 Subject: ftrace: Use LIST_HEAD to initialize clear_hash Use LIST_HEAD() to initialize clear_hash instead of open-coding it. Link: https://lore.kernel.org/linux-trace-kernel/20230809071551.913041-1-ruanjinjie@huawei.com Cc: Masami Hiramatsu Cc: Mark Rutland Signed-off-by: Ruan Jinjie Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index c46dd6d97afe..8de8bec5f366 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7140,9 +7140,7 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) struct dyn_ftrace key; struct ftrace_mod_map *mod_map = NULL; struct ftrace_init_func *func, *func_next; - struct list_head clear_hash; - - INIT_LIST_HEAD(&clear_hash); + LIST_HEAD(clear_hash); key.ip = start; key.flags = end; /* overload flags, as it is unsigned long */ -- cgit v1.2.3 From 3d07fa1dd19035eb0b13ae6697efd5caa9033e74 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 31 Aug 2023 08:55:00 -0400 Subject: tracing: Zero the pipe cpumask on alloc to avoid spurious -EBUSY The pipe cpumask used to serialize opens between the main and percpu trace pipes is not zeroed or initialized. This can result in spurious -EBUSY returns if underlying memory is not fully zeroed. This has been observed by immediate failure to read the main trace_pipe file on an otherwise newly booted and idle system: # cat /sys/kernel/debug/tracing/trace_pipe cat: /sys/kernel/debug/tracing/trace_pipe: Device or resource busy Zero the allocation of pipe_cpumask to avoid the problem. Link: https://lore.kernel.org/linux-trace-kernel/20230831125500.986862-1-bfoster@redhat.com Cc: stable@vger.kernel.org Fixes: c2489bb7e6be ("tracing: Introduce pipe_cpumask to avoid race on trace_pipes") Reviewed-by: Zheng Yejian Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Brian Foster Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 23579fba1a57..35783a7baf15 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9474,7 +9474,7 @@ static struct trace_array *trace_array_create(const char *name) if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) goto out_free_tr; - if (!alloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) + if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) goto out_free_tr; tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; @@ -10419,7 +10419,7 @@ __init static int tracer_alloc_buffers(void) if (trace_create_savedcmd() < 0) goto out_free_temp_buffer; - if (!alloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) + if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) goto out_free_savedcmd; /* TODO: make the number of buffers hot pluggable with CPUS */ -- cgit v1.2.3 From 9af4058493c59721eccd90b7c40cad793e4e3c3b Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 1 Sep 2023 17:10:36 +0200 Subject: tracing/filters: Fix error-handling of cpulist parsing buffer parse_pred() allocates a string buffer to parse the user-provided cpulist, but doesn't check the allocation result nor does it free the buffer once it is no longer needed. Add an allocation check, and free the buffer as soon as it is no longer needed. Link: https://lkml.kernel.org/r/20230901151039.125186-2-vschneid@redhat.com Cc: Masami Hiramatsu Reported-by: Steven Rostedt Reported-by: Josh Poimboeuf Signed-off-by: Valentin Schneider Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_filter.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 3a529214a21b..c06e1d596f4b 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1744,17 +1744,23 @@ static int parse_pred(const char *str, void *data, /* Copy the cpulist between { and } */ tmp = kmalloc((i - maskstart) + 1, GFP_KERNEL); - strscpy(tmp, str + maskstart, (i - maskstart) + 1); + if (!tmp) + goto err_mem; + strscpy(tmp, str + maskstart, (i - maskstart) + 1); pred->mask = kzalloc(cpumask_size(), GFP_KERNEL); - if (!pred->mask) + if (!pred->mask) { + kfree(tmp); goto err_mem; + } /* Now parse it */ if (cpulist_parse(tmp, pred->mask)) { + kfree(tmp); parse_error(pe, FILT_ERR_INVALID_CPULIST, pos + i); goto err_free; } + kfree(tmp); /* Move along */ i++; -- cgit v1.2.3 From 1caf7adb9e000479d2bd29c86b6d7eaeb24b1b05 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 1 Sep 2023 17:10:37 +0200 Subject: tracing/filters: Fix double-free of struct filter_pred.mask When a cpulist filter is found to contain a single CPU, that CPU is saved as a scalar and the backing cpumask storage is freed. Also NULL the mask to avoid a double-free once we get down to free_predicate(). Link: https://lkml.kernel.org/r/20230901151039.125186-3-vschneid@redhat.com Cc: Masami Hiramatsu Reported-by: Steven Rostedt Signed-off-by: Valentin Schneider Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_filter.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index c06e1d596f4b..eb331e8b00b6 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1773,6 +1773,7 @@ static int parse_pred(const char *str, void *data, if (single) { pred->val = cpumask_first(pred->mask); kfree(pred->mask); + pred->mask = NULL; } if (field->filter_type == FILTER_CPUMASK) { -- cgit v1.2.3 From 2900bcbee3899e900853be555665f126673141b7 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 1 Sep 2023 17:10:38 +0200 Subject: tracing/filters: Change parse_pred() cpulist ternary into an if block Review comments noted that an if block would be clearer than a ternary, so swap it out. No change in behaviour intended Link: https://lkml.kernel.org/r/20230901151039.125186-4-vschneid@redhat.com Cc: Masami Hiramatsu Signed-off-by: Valentin Schneider Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_filter.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index eb331e8b00b6..09b4733a2933 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1782,13 +1782,17 @@ static int parse_pred(const char *str, void *data, FILTER_PRED_FN_CPUMASK; } else if (field->filter_type == FILTER_CPU) { if (single) { - pred->op = pred->op == OP_BAND ? OP_EQ : pred->op; + if (pred->op == OP_BAND) + pred->op = OP_EQ; + pred->fn_num = FILTER_PRED_FN_CPU; } else { pred->fn_num = FILTER_PRED_FN_CPU_CPUMASK; } } else if (single) { - pred->op = pred->op == OP_BAND ? OP_EQ : pred->op; + if (pred->op == OP_BAND) + pred->op = OP_EQ; + pred->fn_num = select_comparison_fn(pred->op, field->size, false); if (pred->op == OP_NE) pred->not = 1; -- cgit v1.2.3 From cbb557ba92f08b945e2cb20b7ab37ef49ab53cdd Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 1 Sep 2023 17:10:39 +0200 Subject: tracing/filters: Fix coding style issues Recent commits have introduced some coding style issues, fix those up. Link: https://lkml.kernel.org/r/20230901151039.125186-5-vschneid@redhat.com Cc: Masami Hiramatsu Signed-off-by: Valentin Schneider Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_filter.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 09b4733a2933..33264e510d16 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1360,7 +1360,7 @@ int filter_assign_type(const char *type) return FILTER_DYN_STRING; if (strstr(type, "cpumask_t")) return FILTER_CPUMASK; - } + } if (strstr(type, "__rel_loc") && strstr(type, "char")) return FILTER_RDYN_STRING; @@ -1731,7 +1731,9 @@ static int parse_pred(const char *str, void *data, maskstart = i; /* Walk the cpulist until closing } */ - for (; str[i] && str[i] != '}'; i++); + for (; str[i] && str[i] != '}'; i++) + ; + if (str[i] != '}') { parse_error(pe, FILT_ERR_MISSING_BRACE_CLOSE, pos + i); goto err_free; -- cgit v1.2.3