From 4bb0f0e73c8c30917d169c4a0f1ac083690c545b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 1 Aug 2017 12:01:52 -0400 Subject: tracing: Call clear_boot_tracer() at lateinit_sync The clear_boot_tracer function is used to reset the default_bootup_tracer string to prevent it from being accessed after boot, as it originally points to init data. But since clear_boot_tracer() is called via the init_lateinit() call, it races with the initcall for registering the hwlat tracer. If someone adds "ftrace=hwlat" to the kernel command line, depending on how the linker sets up the text, the saved command line may be cleared, and the hwlat tracer never is initialized. Simply have the clear_boot_tracer() be called by initcall_lateinit_sync() as that's for tasks to be called after lateinit. Link: https://bugzilla.kernel.org/show_bug.cgi?id=196551 Cc: stable@vger.kernel.org Fixes: e7c15cd8a ("tracing: Added hardware latency tracer") Reported-by: Zamir SUN Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 42b9355033d4..784fb43b2abe 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8407,4 +8407,4 @@ __init static int clear_boot_tracer(void) } fs_initcall(tracer_init_tracefs); -late_initcall(clear_boot_tracer); +late_initcall_sync(clear_boot_tracer); -- cgit v1.2.3 From 147d88e0b5eb90191bc5c12ca0a3c410b75a13d2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 1 Aug 2017 14:02:01 +0300 Subject: tracing: Missing error code in tracer_alloc_buffers() If ring_buffer_alloc() or one of the next couple function calls fail then we should return -ENOMEM but the current code returns success. Link: http://lkml.kernel.org/r/20170801110201.ajdkct7vwzixahvx@mwanda Cc: Sebastian Andrzej Siewior Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: b32614c03413 ('tracing/rb: Convert to hotplug state machine') Signed-off-by: Dan Carpenter Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 784fb43b2abe..d815fc317e9d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8293,6 +8293,7 @@ __init static int tracer_alloc_buffers(void) if (ret < 0) goto out_free_cpumask; /* Used for event triggers */ + ret = -ENOMEM; temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE); if (!temp_buffer) goto out_rm_hp_state; -- cgit v1.2.3 From a7e52ad7ed82e21273eccff93d1477a7b313aabb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 2 Aug 2017 14:20:54 -0400 Subject: ring-buffer: Have ring_buffer_alloc_read_page() return error on offline CPU Chunyu Hu reported: "per_cpu trace directories and files are created for all possible cpus, but only the cpus which have ever been on-lined have their own per cpu ring buffer (allocated by cpuhp threads). While trace_buffers_open, the open handler for trace file 'trace_pipe_raw' is always trying to access field of ring_buffer_per_cpu, and would panic with the NULL pointer. Align the behavior of trace_pipe_raw with trace_pipe, that returns -NODEV when openning it if that cpu does not have trace ring buffer. Reproduce: cat /sys/kernel/debug/tracing/per_cpu/cpu31/trace_pipe_raw (cpu31 is never on-lined, this is a 16 cores x86_64 box) Tested with: 1) boot with maxcpus=14, read trace_pipe_raw of cpu15. Got -NODEV. 2) oneline cpu15, read trace_pipe_raw of cpu15. Get the raw trace data. Call trace: [ 5760.950995] RIP: 0010:ring_buffer_alloc_read_page+0x32/0xe0 [ 5760.961678] tracing_buffers_read+0x1f6/0x230 [ 5760.962695] __vfs_read+0x37/0x160 [ 5760.963498] ? __vfs_read+0x5/0x160 [ 5760.964339] ? security_file_permission+0x9d/0xc0 [ 5760.965451] ? __vfs_read+0x5/0x160 [ 5760.966280] vfs_read+0x8c/0x130 [ 5760.967070] SyS_read+0x55/0xc0 [ 5760.967779] do_syscall_64+0x67/0x150 [ 5760.968687] entry_SYSCALL64_slow_path+0x25/0x25" This was introduced by the addition of the feature to reuse reader pages instead of re-allocating them. The problem is that the allocation of a reader page (which is per cpu) does not check if the cpu is online and set up for the ring buffer. Link: http://lkml.kernel.org/r/1500880866-1177-1-git-send-email-chuhu@redhat.com Cc: stable@vger.kernel.org Fixes: 73a757e63114 ("ring-buffer: Return reader page back into existing ring buffer") Reported-by: Chunyu Hu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d815fc317e9d..44004d8aa3b3 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6598,7 +6598,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, { struct ftrace_buffer_info *info = filp->private_data; struct trace_iterator *iter = &info->iter; - ssize_t ret; + ssize_t ret = 0; ssize_t size; if (!count) @@ -6612,10 +6612,15 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, if (!info->spare) { info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer, iter->cpu_file); - info->spare_cpu = iter->cpu_file; + if (IS_ERR(info->spare)) { + ret = PTR_ERR(info->spare); + info->spare = NULL; + } else { + info->spare_cpu = iter->cpu_file; + } } if (!info->spare) - return -ENOMEM; + return ret; /* Do we have previous read data to read? */ if (info->read < PAGE_SIZE) @@ -6790,8 +6795,9 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, ref->ref = 1; ref->buffer = iter->trace_buffer->buffer; ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); - if (!ref->page) { - ret = -ENOMEM; + if (IS_ERR(ref->page)) { + ret = PTR_ERR(ref->page); + ref->page = NULL; kfree(ref); break; } -- cgit v1.2.3 From 065e63f951432068ba89a844fcbff68ea16ee186 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 31 Aug 2017 17:03:47 -0400 Subject: tracing: Only have rmmod clear buffers that its events were active in Currently, when a module event is enabled, when that module is removed, it clears all ring buffers. This is to prevent another module from being loaded and having one of its trace event IDs from reusing a trace event ID of the removed module. This could cause undesirable effects as the trace event of the new module would be using its own processing algorithms to process raw data of another event. To prevent this, when a module is loaded, if any of its events have been used (signified by the WAS_ENABLED event call flag, which is never cleared), all ring buffers are cleared, just in case any one of them contains event data of the removed event. The problem is, there's no reason to clear all ring buffers if only one (or less than all of them) uses one of the events. Instead, only clear the ring buffers that recorded the events of a module that is being removed. To do this, instead of keeping the WAS_ENABLED flag with the trace event call, move it to the per instance (per ring buffer) event file descriptor. The event file descriptor maps each event to a separate ring buffer instance. Then when the module is removed, only the ring buffers that activated one of the module's events get cleared. The rest are not touched. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 44004d8aa3b3..30338a835a51 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1702,6 +1702,9 @@ void tracing_reset_all_online_cpus(void) struct trace_array *tr; list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (!tr->clear_trace) + continue; + tr->clear_trace = false; tracing_reset_online_cpus(&tr->trace_buffer); #ifdef CONFIG_TRACER_MAX_TRACE tracing_reset_online_cpus(&tr->max_buffer); -- cgit v1.2.3 From 3d9622c12c8873911f4cc0ccdabd0362c2fca06b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 5 Sep 2017 11:32:01 -0400 Subject: tracing: Add barrier to trace_printk() buffer nesting modification trace_printk() uses 4 buffers, one for each context (normal, softirq, irq and NMI), such that it does not need to worry about one context preempting the other. There's a nesting counter that gets incremented to figure out which buffer to use. If the context gets preempted by another context which calls trace_printk() it will increment the counter and use the next buffer, and restore the counter when it is finished. The problem is that gcc may optimize the modification of the buffer nesting counter and it may not be incremented in memory before the buffer is used. If this happens, and the context gets interrupted by another context, it could pick the same buffer and corrupt the one that is being used. Compiler barriers need to be added after the nesting variable is incremented and before it is decremented to prevent usage of the context buffers by more than one context at the same time. Cc: Andy Lutomirski Cc: stable@vger.kernel.org Fixes: e2ace00117 ("tracing: Choose static tp_printk buffer by explicit nesting count") Hat-tip-to: Peter Zijlstra Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 30338a835a51..78842557eea0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2802,11 +2802,17 @@ static char *get_trace_buf(void) if (!buffer || buffer->nesting >= 4) return NULL; - return &buffer->buffer[buffer->nesting++][0]; + buffer->nesting++; + + /* Interrupts must see nesting incremented before we use the buffer */ + barrier(); + return &buffer->buffer[buffer->nesting][0]; } static void put_trace_buf(void) { + /* Don't let the decrement of nesting leak before this */ + barrier(); this_cpu_dec(trace_percpu_buffer->nesting); } -- cgit v1.2.3 From 170b3b1050e28d1ba0700e262f0899ffa4fccc52 Mon Sep 17 00:00:00 2001 From: Baohong Liu Date: Tue, 5 Sep 2017 16:57:19 -0500 Subject: tracing: Apply trace_clock changes to instance max buffer Currently trace_clock timestamps are applied to both regular and max buffers only for global trace. For instance trace, trace_clock timestamps are applied only to regular buffer. But, regular and max buffers can be swapped, for example, following a snapshot. So, for instance trace, bad timestamps can be seen following a snapshot. Let's apply trace_clock timestamps to instance max buffer as well. Link: http://lkml.kernel.org/r/ebdb168d0be042dcdf51f81e696b17fabe3609c1.1504642143.git.tom.zanussi@linux.intel.com Cc: stable@vger.kernel.org Fixes: 277ba0446 ("tracing: Add interface to allow multiple trace buffers") Signed-off-by: Baohong Liu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 78842557eea0..5360b7aec57a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6229,7 +6229,7 @@ static int tracing_set_clock(struct trace_array *tr, const char *clockstr) tracing_reset_online_cpus(&tr->trace_buffer); #ifdef CONFIG_TRACER_MAX_TRACE - if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer) + if (tr->max_buffer.buffer) ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func); tracing_reset_online_cpus(&tr->max_buffer); #endif -- cgit v1.2.3 From 8dd33bcb7050dd6f8c1432732f930932c9d3a33e Mon Sep 17 00:00:00 2001 From: Bo Yan Date: Mon, 18 Sep 2017 10:03:35 -0700 Subject: tracing: Erase irqsoff trace with empty write One convenient way to erase trace is "echo > trace". However, this is currently broken if the current tracer is irqsoff tracer. This is because irqsoff tracer use max_buffer as the default trace buffer. Set the max_buffer as the one to be cleared when it's the trace buffer currently in use. Link: http://lkml.kernel.org/r/1505754215-29411-1-git-send-email-byan@nvidia.com Cc: Cc: stable@vger.kernel.org Fixes: 4acd4d00f ("tracing: give easy way to clear trace buffer") Signed-off-by: Bo Yan Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5360b7aec57a..a7fb136da891 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4020,11 +4020,17 @@ static int tracing_open(struct inode *inode, struct file *file) /* If this file was open for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { int cpu = tracing_get_cpu(inode); + struct trace_buffer *trace_buf = &tr->trace_buffer; + +#ifdef CONFIG_TRACER_MAX_TRACE + if (tr->current_trace->print_max) + trace_buf = &tr->max_buffer; +#endif if (cpu == RING_BUFFER_ALL_CPUS) - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(trace_buf); else - tracing_reset(&tr->trace_buffer, cpu); + tracing_reset(trace_buf, cpu); } if (file->f_mode & FMODE_READ) { -- cgit v1.2.3 From c7b3ae0bd2ca658c7a71c49901d08c590294fac9 Mon Sep 17 00:00:00 2001 From: "Ziqian SUN (Zamir)" Date: Mon, 11 Sep 2017 14:26:35 +0800 Subject: tracing: Ignore mmiotrace from kernel commandline The mmiotrace tracer cannot be enabled with ftrace=mmiotrace in kernel commandline. With this patch, noboot is added to the tracer struct, and when system boot with a tracer that has noboot=true, it will print out a warning message and continue booting. Link: http://lkml.kernel.org/r/1505111195-31942-1-git-send-email-zsun@redhat.com Signed-off-by: Ziqian SUN (Zamir) Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a7fb136da891..d3ca35f38803 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5364,6 +5364,13 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) if (t == tr->current_trace) goto out; + /* Some tracers won't work on kernel command line */ + if (system_state < SYSTEM_RUNNING && t->noboot) { + pr_warn("Tracer '%s' is not allowed on command line, ignored\n", + t->name); + goto out; + } + /* Some tracers are only allowed for the top level buffer */ if (!trace_ok_for_array(t, tr)) { ret = -EINVAL; -- cgit v1.2.3 From 75df6e688ccd517e339a7c422ef7ad73045b18a2 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Sun, 17 Sep 2017 03:23:48 -0700 Subject: tracing: Fix trace_pipe behavior for instance traces When reading data from trace_pipe, tracing_wait_pipe() performs a check to see if tracing has been turned off after some data was read. Currently, this check always looks at global trace state, but it should be checking the trace instance where trace_pipe is located at. Because of this bug, cat instances/i1/trace_pipe in the following script will immediately exit instead of waiting for data: cd /sys/kernel/debug/tracing echo 0 > tracing_on mkdir -p instances/i1 echo 1 > instances/i1/tracing_on echo 1 > instances/i1/events/sched/sched_process_exec/enable cat instances/i1/trace_pipe Link: http://lkml.kernel.org/r/20170917102348.1615-1-tahsin@google.com Cc: stable@vger.kernel.org Fixes: 10246fa35d4f ("tracing: give easy way to clear trace buffer") Signed-off-by: Tahsin Erdogan Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d3ca35f38803..752e5daf0896 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5680,7 +5680,7 @@ static int tracing_wait_pipe(struct file *filp) * * iter->pos will be 0 if we haven't read anything. */ - if (!tracing_is_on() && iter->pos) + if (!tracer_tracing_is_on(iter->tr) && iter->pos) break; mutex_unlock(&iter->mutex); -- cgit v1.2.3 From 12ecef0cb12102d8c034770173d2d1363cb97d52 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 21 Sep 2017 16:22:49 -0400 Subject: tracing: Reverse the order of trace_types_lock and event_mutex In order to make future changes where we need to call tracing_set_clock() from within an event command, the order of trace_types_lock and event_mutex must be reversed, as the event command will hold event_mutex and the trace_types_lock is taken from within tracing_set_clock(). Link: http://lkml.kernel.org/r/20170921162249.0dde3dca@gandalf.local.home Requested-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 752e5daf0896..5f1ac7d3402c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7687,6 +7687,7 @@ static int instance_mkdir(const char *name) struct trace_array *tr; int ret; + mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); ret = -EEXIST; @@ -7742,6 +7743,7 @@ static int instance_mkdir(const char *name) list_add(&tr->list, &ftrace_trace_arrays); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return 0; @@ -7753,6 +7755,7 @@ static int instance_mkdir(const char *name) out_unlock: mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return ret; @@ -7765,6 +7768,7 @@ static int instance_rmdir(const char *name) int ret; int i; + mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); ret = -ENODEV; @@ -7810,6 +7814,7 @@ static int instance_rmdir(const char *name) out_unlock: mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return ret; } -- cgit v1.2.3 From 7e465baa80293ed5f87fdf6405391d6f02110d4e Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 22 Sep 2017 14:58:20 -0500 Subject: tracing: Make traceprobe parsing code reusable traceprobe_probes_write() and traceprobe_command() actually contain nothing that ties them to kprobes - the code is generically useful for similar types of parsing elsewhere, so separate it out and move it to trace.c/trace.h. Other than moving it, the only change is in naming: traceprobe_probes_write() becomes trace_parse_run_command() and traceprobe_command() becomes trace_run_command(). Link: http://lkml.kernel.org/r/ae5c26ea40c196a8986854d921eb6e713ede7e3f.1506105045.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5f1ac7d3402c..73e67b68c53b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8281,6 +8281,92 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) } EXPORT_SYMBOL_GPL(ftrace_dump); +int trace_run_command(const char *buf, int (*createfn)(int, char **)) +{ + char **argv; + int argc, ret; + + argc = 0; + ret = 0; + argv = argv_split(GFP_KERNEL, buf, &argc); + if (!argv) + return -ENOMEM; + + if (argc) + ret = createfn(argc, argv); + + argv_free(argv); + + return ret; +} + +#define WRITE_BUFSIZE 4096 + +ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos, + int (*createfn)(int, char **)) +{ + char *kbuf, *buf, *tmp; + int ret = 0; + size_t done = 0; + size_t size; + + kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + while (done < count) { + size = count - done; + + if (size >= WRITE_BUFSIZE) + size = WRITE_BUFSIZE - 1; + + if (copy_from_user(kbuf, buffer + done, size)) { + ret = -EFAULT; + goto out; + } + kbuf[size] = '\0'; + buf = kbuf; + do { + tmp = strchr(buf, '\n'); + if (tmp) { + *tmp = '\0'; + size = tmp - buf + 1; + } else { + size = strlen(buf); + if (done + size < count) { + if (buf != kbuf) + break; + /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */ + pr_warn("Line length is too long: Should be less than %d\n", + WRITE_BUFSIZE - 2); + ret = -EINVAL; + goto out; + } + } + done += size; + + /* Remove comments */ + tmp = strchr(buf, '#'); + + if (tmp) + *tmp = '\0'; + + ret = trace_run_command(buf, createfn); + if (ret) + goto out; + buf += size; + + } while (done < count); + } + ret = done; + +out: + kfree(kbuf); + + return ret; +} + __init static int tracer_alloc_buffers(void) { int ring_buf_size; -- cgit v1.2.3