From dc300d77b86a122d3fd099206e1adf699ed80bd7 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sun, 12 Jul 2020 09:10:35 +0800 Subject: tracing: toplevel d_entry already initialized Currently we have following call flow: tracer_init_tracefs() tracing_init_dentry() event_trace_init() tracing_init_dentry() This shows tracing_init_dentry() is called twice in this flow and this is not necessary. Let's remove the second one when it is for sure be properly initialized. Link: https://lkml.kernel.org/r/20200712011036.70948-4-richard.weiyang@linux.alibaba.com Signed-off-by: Wei Yang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index a85effb2373b..ee25d849ebba 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -3434,7 +3434,6 @@ early_initcall(event_trace_enable_again); __init int event_trace_init(void) { struct trace_array *tr; - struct dentry *d_tracer; struct dentry *entry; int ret; @@ -3442,11 +3441,7 @@ __init int event_trace_init(void) if (!tr) return -ENODEV; - d_tracer = tracing_init_dentry(); - if (IS_ERR(d_tracer)) - return 0; - - entry = tracefs_create_file("available_events", 0444, d_tracer, + entry = tracefs_create_file("available_events", 0444, NULL, tr, &ftrace_avail_fops); if (!entry) pr_warn("Could not create tracefs 'available_events' entry\n"); @@ -3457,7 +3452,7 @@ __init int event_trace_init(void) if (trace_define_common_fields()) pr_warn("tracing: Failed to allocate common fields"); - ret = early_event_add_tracer(d_tracer, tr); + ret = early_event_add_tracer(NULL, tr); if (ret) return ret; -- cgit v1.2.3 From 22c36b18263426bdd97ef5e04c0e92224c612ee1 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sun, 12 Jul 2020 09:10:36 +0800 Subject: tracing: make tracing_init_dentry() returns an integer instead of a d_entry pointer Current tracing_init_dentry() return a d_entry pointer, while is not necessary. This function returns NULL on success or error on failure, which means there is no valid d_entry pointer return. Let's return 0 on success and negative value for error. Link: https://lkml.kernel.org/r/20200712011036.70948-5-richard.weiyang@linux.alibaba.com Signed-off-by: Wei Yang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 36 ++++++++++++++++++------------------ kernel/trace/trace.h | 2 +- kernel/trace/trace_dynevent.c | 8 ++++---- kernel/trace/trace_events_synth.c | 9 +++------ kernel/trace/trace_functions_graph.c | 8 ++++---- kernel/trace/trace_hwlat.c | 8 ++++---- kernel/trace/trace_kprobe.c | 10 +++++----- kernel/trace/trace_printk.c | 8 ++++---- kernel/trace/trace_stack.c | 12 ++++++------ kernel/trace/trace_stat.c | 8 ++++---- kernel/trace/trace_uprobe.c | 9 ++++----- 11 files changed, 57 insertions(+), 61 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2a7c26345e83..29a9034b38d0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8971,21 +8971,21 @@ static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) * directory. It is called via fs_initcall() by any of the boot up code * and expects to return the dentry of the top level tracing directory. */ -struct dentry *tracing_init_dentry(void) +int tracing_init_dentry(void) { struct trace_array *tr = &global_trace; if (security_locked_down(LOCKDOWN_TRACEFS)) { pr_warn("Tracing disabled due to lockdown\n"); - return ERR_PTR(-EPERM); + return -EPERM; } /* The top level trace array uses NULL as parent */ if (tr->dir) - return NULL; + return 0; if (WARN_ON(!tracefs_initialized())) - return ERR_PTR(-ENODEV); + return -ENODEV; /* * As there may still be users that expect the tracing @@ -8996,7 +8996,7 @@ struct dentry *tracing_init_dentry(void) tr->dir = debugfs_create_automount("tracing", NULL, trace_automount, NULL); - return NULL; + return 0; } extern struct trace_eval_map *__start_ftrace_eval_maps[]; @@ -9083,48 +9083,48 @@ static struct notifier_block trace_module_nb = { static __init int tracer_init_tracefs(void) { - struct dentry *d_tracer; + int ret; trace_access_lock_init(); - d_tracer = tracing_init_dentry(); - if (IS_ERR(d_tracer)) + ret = tracing_init_dentry(); + if (ret) return 0; event_trace_init(); - init_tracer_tracefs(&global_trace, d_tracer); - ftrace_init_tracefs_toplevel(&global_trace, d_tracer); + init_tracer_tracefs(&global_trace, NULL); + ftrace_init_tracefs_toplevel(&global_trace, NULL); - trace_create_file("tracing_thresh", 0644, d_tracer, + trace_create_file("tracing_thresh", 0644, NULL, &global_trace, &tracing_thresh_fops); - trace_create_file("README", 0444, d_tracer, + trace_create_file("README", 0444, NULL, NULL, &tracing_readme_fops); - trace_create_file("saved_cmdlines", 0444, d_tracer, + trace_create_file("saved_cmdlines", 0444, NULL, NULL, &tracing_saved_cmdlines_fops); - trace_create_file("saved_cmdlines_size", 0644, d_tracer, + trace_create_file("saved_cmdlines_size", 0644, NULL, NULL, &tracing_saved_cmdlines_size_fops); - trace_create_file("saved_tgids", 0444, d_tracer, + trace_create_file("saved_tgids", 0444, NULL, NULL, &tracing_saved_tgids_fops); trace_eval_init(); - trace_create_eval_file(d_tracer); + trace_create_eval_file(NULL); #ifdef CONFIG_MODULES register_module_notifier(&trace_module_nb); #endif #ifdef CONFIG_DYNAMIC_FTRACE - trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, + trace_create_file("dyn_ftrace_total_info", 0444, NULL, NULL, &tracing_dyn_info_fops); #endif - create_trace_instances(d_tracer); + create_trace_instances(NULL); update_tracer_options(&global_trace); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 610d21355526..0d3a405fe446 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -737,7 +737,7 @@ struct dentry *trace_create_file(const char *name, void *data, const struct file_operations *fops); -struct dentry *tracing_init_dentry(void); +int tracing_init_dentry(void); struct ring_buffer_event; diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 9f2e8520b748..9442a9bb080e 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -206,14 +206,14 @@ static const struct file_operations dynamic_events_ops = { /* Make a tracefs interface for controlling dynamic events */ static __init int init_dynamic_event(void) { - struct dentry *d_tracer; struct dentry *entry; + int ret; - d_tracer = tracing_init_dentry(); - if (IS_ERR(d_tracer)) + ret = tracing_init_dentry(); + if (ret) return 0; - entry = tracefs_create_file("dynamic_events", 0644, d_tracer, + entry = tracefs_create_file("dynamic_events", 0644, NULL, NULL, &dynamic_events_ops); /* Event list interface */ diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index c6cca0d1d584..f86a2aa0bccd 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -1757,7 +1757,6 @@ static const struct file_operations synth_events_fops = { static __init int trace_events_synth_init(void) { struct dentry *entry = NULL; - struct dentry *d_tracer; int err = 0; err = dyn_event_register(&synth_event_ops); @@ -1766,13 +1765,11 @@ static __init int trace_events_synth_init(void) return err; } - d_tracer = tracing_init_dentry(); - if (IS_ERR(d_tracer)) { - err = PTR_ERR(d_tracer); + err = tracing_init_dentry(); + if (err) goto err; - } - entry = tracefs_create_file("synthetic_events", 0644, d_tracer, + entry = tracefs_create_file("synthetic_events", 0644, NULL, NULL, &synth_events_fops); if (!entry) { err = -ENODEV; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 4a9c49c08ec9..60d66278aa0d 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -1336,13 +1336,13 @@ static const struct file_operations graph_depth_fops = { static __init int init_graph_tracefs(void) { - struct dentry *d_tracer; + int ret; - d_tracer = tracing_init_dentry(); - if (IS_ERR(d_tracer)) + ret = tracing_init_dentry(); + if (ret) return 0; - trace_create_file("max_graph_depth", 0644, d_tracer, + trace_create_file("max_graph_depth", 0644, NULL, NULL, &graph_depth_fops); return 0; diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 17873e5d0353..c9ad5c6fbaad 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -538,14 +538,14 @@ static const struct file_operations window_fops = { */ static int init_tracefs(void) { - struct dentry *d_tracer; + int ret; struct dentry *top_dir; - d_tracer = tracing_init_dentry(); - if (IS_ERR(d_tracer)) + ret = tracing_init_dentry(); + if (ret) return -ENOMEM; - top_dir = tracefs_create_dir("hwlat_detector", d_tracer); + top_dir = tracefs_create_dir("hwlat_detector", NULL); if (!top_dir) return -ENOMEM; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index aefb6065b508..feca9b19cd74 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1901,14 +1901,14 @@ subsys_initcall(init_kprobe_trace_early); /* Make a tracefs interface for controlling probe points */ static __init int init_kprobe_trace(void) { - struct dentry *d_tracer; + int ret; struct dentry *entry; - d_tracer = tracing_init_dentry(); - if (IS_ERR(d_tracer)) + ret = tracing_init_dentry(); + if (ret) return 0; - entry = tracefs_create_file("kprobe_events", 0644, d_tracer, + entry = tracefs_create_file("kprobe_events", 0644, NULL, NULL, &kprobe_events_ops); /* Event list interface */ @@ -1916,7 +1916,7 @@ static __init int init_kprobe_trace(void) pr_warn("Could not create tracefs 'kprobe_events' entry\n"); /* Profile interface */ - entry = tracefs_create_file("kprobe_profile", 0444, d_tracer, + entry = tracefs_create_file("kprobe_profile", 0444, NULL, NULL, &kprobe_profile_ops); if (!entry) diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index d4e31e969206..71b2e0fdc3e0 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -367,13 +367,13 @@ static const struct file_operations ftrace_formats_fops = { static __init int init_trace_printk_function_export(void) { - struct dentry *d_tracer; + int ret; - d_tracer = tracing_init_dentry(); - if (IS_ERR(d_tracer)) + ret = tracing_init_dentry(); + if (ret) return 0; - trace_create_file("printk_formats", 0444, d_tracer, + trace_create_file("printk_formats", 0444, NULL, NULL, &ftrace_formats_fops); return 0; diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 98bba4764c52..c408423e5d65 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -554,20 +554,20 @@ __setup("stacktrace", enable_stacktrace); static __init int stack_trace_init(void) { - struct dentry *d_tracer; + int ret; - d_tracer = tracing_init_dentry(); - if (IS_ERR(d_tracer)) + ret = tracing_init_dentry(); + if (ret) return 0; - trace_create_file("stack_max_size", 0644, d_tracer, + trace_create_file("stack_max_size", 0644, NULL, &stack_trace_max_size, &stack_max_size_fops); - trace_create_file("stack_trace", 0444, d_tracer, + trace_create_file("stack_trace", 0444, NULL, NULL, &stack_trace_fops); #ifdef CONFIG_DYNAMIC_FTRACE - trace_create_file("stack_trace_filter", 0644, d_tracer, + trace_create_file("stack_trace_filter", 0644, NULL, &trace_ops, &stack_trace_filter_fops); #endif diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index d1fa19773cc8..8d141c3825a9 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -276,13 +276,13 @@ static const struct file_operations tracing_stat_fops = { static int tracing_stat_init(void) { - struct dentry *d_tracing; + int ret; - d_tracing = tracing_init_dentry(); - if (IS_ERR(d_tracing)) + ret = tracing_init_dentry(); + if (ret) return -ENODEV; - stat_dir = tracefs_create_dir("trace_stat", d_tracing); + stat_dir = tracefs_create_dir("trace_stat", NULL); if (!stat_dir) { pr_warn("Could not create tracefs 'trace_stat' entry\n"); return -ENOMEM; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index f4286c9bdeb4..56729c6b6614 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1625,21 +1625,20 @@ void destroy_local_trace_uprobe(struct trace_event_call *event_call) /* Make a trace interface for controling probe points */ static __init int init_uprobe_trace(void) { - struct dentry *d_tracer; int ret; ret = dyn_event_register(&trace_uprobe_ops); if (ret) return ret; - d_tracer = tracing_init_dentry(); - if (IS_ERR(d_tracer)) + ret = tracing_init_dentry(); + if (ret) return 0; - trace_create_file("uprobe_events", 0644, d_tracer, + trace_create_file("uprobe_events", 0644, NULL, NULL, &uprobe_events_ops); /* Profile interface */ - trace_create_file("uprobe_profile", 0444, d_tracer, + trace_create_file("uprobe_profile", 0444, NULL, NULL, &uprobe_profile_ops); return 0; } -- cgit v1.2.3 From 5c8c206e4308ee33dea7c60b0cfcbed48a6438b4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 6 Aug 2020 20:32:59 -0700 Subject: tracing: Delete repeated words in comments Drop repeated words in kernel/trace/. {and, the, not} Link: https://lkml.kernel.org/r/20200807033259.13778-1-rdunlap@infradead.org Cc: Ingo Molnar Signed-off-by: Randy Dunlap Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 2 +- kernel/trace/trace.c | 2 +- kernel/trace/trace_dynevent.c | 2 +- kernel/trace/trace_events_synth.c | 2 +- kernel/trace/tracing_map.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 603255f5f085..84f32dbc7be8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2402,7 +2402,7 @@ struct ftrace_ops direct_ops = { * * If the record has the FTRACE_FL_REGS set, that means that it * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS - * is not not set, then it wants to convert to the normal callback. + * is not set, then it wants to convert to the normal callback. * * Returns the address of the trampoline to set to */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 29a9034b38d0..8fac7d6db222 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9287,7 +9287,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) } /* - * We need to stop all tracing on all CPUS to read the + * We need to stop all tracing on all CPUS to read * the next buffer. This is a bit expensive, but is * not done often. We fill all what we can read, * and then release the locks again. diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 9442a9bb080e..5fa49cfd2bb6 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -402,7 +402,7 @@ void dynevent_arg_init(struct dynevent_arg *arg, * whitespace, all followed by a separator, if applicable. After the * first arg string is successfully appended to the command string, * the optional @operator is appended, followed by the second arg and - * and optional @separator. If no separator was specified when + * optional @separator. If no separator was specified when * initializing the arg, a space will be appended. */ void dynevent_arg_pair_init(struct dynevent_arg_pair *arg_pair, diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index f86a2aa0bccd..7c765e80e974 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -1211,7 +1211,7 @@ __synth_event_trace_start(struct trace_event_file *file, * ENABLED bit is set (which attaches the probe thus allowing * this code to be called, etc). Because this is called * directly by the user, we don't have that but we still need - * to honor not logging when disabled. For the the iterated + * to honor not logging when disabled. For the iterated * trace case, we save the enabed state upon start and just * ignore the following data calls. */ diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index 74738c9856f1..4b50fc0cb12c 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -260,7 +260,7 @@ int tracing_map_add_var(struct tracing_map *map) * to use cmp_fn. * * A key can be a subset of a compound key; for that purpose, the - * offset param is used to describe where within the the compound key + * offset param is used to describe where within the compound key * the key referenced by this key field resides. * * Return: The index identifying the field in the map and associated -- cgit v1.2.3 From b427e765bdffcc18911ace199a17b09332a47d55 Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Thu, 13 Aug 2020 19:28:03 +0800 Subject: tracing: Use __this_cpu_read() in trace_buffered_event_enable() The code is executed with preemption disabled, so it's safe to use __this_cpu_read(). Link: https://lkml.kernel.org/r/20200813112803.12256-1-tian.xianting@h3c.com Signed-off-by: Xianting Tian Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8fac7d6db222..1c4ca25944ba 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2511,7 +2511,7 @@ void trace_buffered_event_enable(void) preempt_disable(); if (cpu == smp_processor_id() && - this_cpu_read(trace_buffered_event) != + __this_cpu_read(trace_buffered_event) != per_cpu(trace_buffered_event, cpu)) WARN_ON_ONCE(1); preempt_enable(); -- cgit v1.2.3 From f3d36426618ee2b2d1fa99aefb5fe4d2dc33807e Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 18 Aug 2020 08:08:57 +0300 Subject: kprobes: Use module_name() macro It is advised to use module_name() macro instead of dereferencing mod->name directly. This makes sense for consistencys sake and also it prevents a hard dependency to CONFIG_MODULES. Link: https://lkml.kernel.org/r/20200818050857.117998-1-jarkko.sakkinen@linux.intel.com Cc: Mark Rutland Cc: Ingo Molnar Cc: linux-mm@kvack.org Cc: Andi Kleen Cc: Ard Biesheuvel Cc: Jessica Yu Cc: Mark Rutland , Cc: Masami Hiramatsu Cc: Mike Rapoport Cc: Peter Zijlstra Cc: Russell King Cc: Will Deacon Acked-by: Masami Hiramatsu Signed-off-by: Jarkko Sakkinen Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index feca9b19cd74..f8e46929ceba 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -106,9 +106,10 @@ static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk) static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk, struct module *mod) { - int len = strlen(mod->name); + int len = strlen(module_name(mod)); const char *name = trace_kprobe_symbol(tk); - return strncmp(mod->name, name, len) == 0 && name[len] == ':'; + + return strncmp(module_name(mod), name, len) == 0 && name[len] == ':'; } static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk) @@ -688,7 +689,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb, if (ret) pr_warn("Failed to re-register probe %s on %s: %d\n", trace_probe_name(&tk->tp), - mod->name, ret); + module_name(mod), ret); } } mutex_unlock(&event_mutex); -- cgit v1.2.3 From eb8d8b4c9848b200586aa98e105b39f159656ba6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 5 Sep 2020 15:50:20 +0300 Subject: tracing: remove a pointless assignment The "tr" is a stack variable so setting it to NULL before a return is a no-op. Delete the assignment. Signed-off-by: Dan Carpenter Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1c4ca25944ba..55b829863127 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8799,7 +8799,6 @@ static int __remove_instance(struct trace_array *tr) free_cpumask_var(tr->tracing_cpumask); kfree(tr->name); kfree(tr); - tr = NULL; return 0; } -- cgit v1.2.3 From 40d14da383670db21a09e63d52db8dee9b77741e Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 6 Sep 2020 18:33:26 -0700 Subject: fgraph: Convert ret_stack tasklist scanning to rcu It seems that alloc_retstack_tasklist() can also take a lockless approach for scanning the tasklist, instead of using the big global tasklist_lock. For this we also kill another deprecated and rcu-unsafe tsk->thread_group user replacing it with for_each_process_thread(), maintaining semantics. Here tasklist_lock does not protect anything other than the list against concurrent fork/exit. And considering that the whole thing is capped by FTRACE_RETSTACK_ALLOC_SIZE (32), it should not be a problem to have a pontentially stale, yet stable, list. The task cannot go away either, so we don't risk racing with ftrace_graph_exit_task() which clears the retstack. The tsk->ret_stack management is not protected by tasklist_lock, being serialized with the corresponding publish/subscribe barriers against concurrent ftrace_push_return_trace(). In addition this plays nicer with cachelines by avoiding two atomic ops in the uncontended case. Link: https://lkml.kernel.org/r/20200907013326.9870-1-dave@stgolabs.net Acked-by: Oleg Nesterov Signed-off-by: Davidlohr Bueso Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/fgraph.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 1af321dec0f1..5658f13037b3 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -387,8 +387,8 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) } } - read_lock(&tasklist_lock); - do_each_thread(g, t) { + rcu_read_lock(); + for_each_process_thread(g, t) { if (start == end) { ret = -EAGAIN; goto unlock; @@ -403,10 +403,10 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) smp_wmb(); t->ret_stack = ret_stack_list[start++]; } - } while_each_thread(g, t); + } unlock: - read_unlock(&tasklist_lock); + rcu_read_unlock(); free: for (i = start; i < end; i++) kfree(ret_stack_list[i]); -- cgit v1.2.3 From 8490db06f914100fc8a5110481cbd37d8968be90 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 10 Sep 2020 17:55:15 +0900 Subject: tracing/boot: Add per-instance tracing_on option support Add per-instance tracing_on option, which will be useful with traceon/traceoff event trigger actions. For example, if we disable tracing_on by default and set traceon and traceoff on a pair of events, we can trace functions between the pair of events. Link: https://lkml.kernel.org/r/159972811538.428528.2561315102284268611.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_boot.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index fa0fc08c6ef8..d52d441a17e8 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -40,6 +40,16 @@ trace_boot_set_instance_options(struct trace_array *tr, struct xbc_node *node) pr_err("Failed to set option: %s\n", buf); } + p = xbc_node_find_value(node, "tracing_on", NULL); + if (p && *p != '\0') { + if (kstrtoul(p, 10, &v)) + pr_err("Failed to set tracing on: %s\n", p); + if (v) + tracer_tracing_on(tr); + else + tracer_tracing_off(tr); + } + p = xbc_node_find_value(node, "trace_clock", NULL); if (p && *p != '\0') { if (tracing_set_clock(tr, p) < 0) -- cgit v1.2.3 From 4725cd89978c26405a20414f3a0fa6cbd2bf9aad Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 10 Sep 2020 17:55:35 +0900 Subject: tracing/kprobes: Support perf-style return probe Support perf-style return probe ("SYMBOL%return") for kprobe events. This will allow boot-time tracing user to define a return probe event. Link: https://lkml.kernel.org/r/159972813535.428528.4437029657208468954.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- kernel/trace/trace_kprobe.c | 18 +++++++++++++++++- kernel/trace/trace_probe.h | 1 + 3 files changed, 19 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 55b829863127..ca6da462326d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5122,7 +5122,7 @@ static const char readme_msg[] = "\t -:[/]\n" #ifdef CONFIG_KPROBE_EVENTS "\t place: [:][+]|\n" - "place (kretprobe): [:][+]|\n" + "place (kretprobe): [:][+]%return|\n" #endif #ifdef CONFIG_UPROBE_EVENTS " place (uprobe): :[(ref_ctr_offset)]\n" diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index f8e46929ceba..9d46415296eb 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -718,6 +718,9 @@ static int trace_kprobe_create(int argc, const char *argv[]) * p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS] * - Add kretprobe: * r[MAXACTIVE][:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS] + * Or + * p:[GRP/]EVENT] [MOD:]KSYM[+0]%return [FETCHARGS] + * * Fetch args: * $retval : fetch return value * $stack : fetch stack address @@ -747,7 +750,6 @@ static int trace_kprobe_create(int argc, const char *argv[]) switch (argv[0][0]) { case 'r': is_return = true; - flags |= TPARG_FL_RETURN; break; case 'p': break; @@ -805,12 +807,26 @@ static int trace_kprobe_create(int argc, const char *argv[]) symbol = kstrdup(argv[1], GFP_KERNEL); if (!symbol) return -ENOMEM; + + tmp = strchr(symbol, '%'); + if (tmp) { + if (!strcmp(tmp, "%return")) { + *tmp = '\0'; + is_return = true; + } else { + trace_probe_log_err(tmp - symbol, BAD_ADDR_SUFFIX); + goto parse_error; + } + } + /* TODO: support .init module functions */ ret = traceprobe_split_symbol_offset(symbol, &offset); if (ret || offset < 0 || offset > UINT_MAX) { trace_probe_log_err(0, BAD_PROBE_ADDR); goto parse_error; } + if (is_return) + flags |= TPARG_FL_RETURN; if (kprobe_on_func_entry(NULL, symbol, offset)) flags |= TPARG_FL_FENTRY; if (offset && is_return && !(flags & TPARG_FL_FENTRY)) { diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index a22b62813f8c..04d00987da69 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -404,6 +404,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(MAXACT_TOO_BIG, "Maxactive is too big"), \ C(BAD_PROBE_ADDR, "Invalid probed address or symbol"), \ C(BAD_RETPROBE, "Retprobe address must be an function entry"), \ + C(BAD_ADDR_SUFFIX, "Invalid probed address suffix"), \ C(NO_GROUP_NAME, "Group name is not specified"), \ C(GROUP_TOO_LONG, "Group name is too long"), \ C(BAD_GROUP_NAME, "Group name must follow the same rules as C identifiers"), \ -- cgit v1.2.3 From 3dd3aae32dc91efab916b28cf95986186c6e8d6b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 10 Sep 2020 17:55:46 +0900 Subject: tracing/uprobes: Support perf-style return probe Support perf-style return probe ("SYMBOL%return") for uprobe events as same as kprobe events does. Link: https://lkml.kernel.org/r/159972814601.428528.7641183316212425445.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- kernel/trace/trace_uprobe.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ca6da462326d..c35fcd2f2529 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5125,7 +5125,7 @@ static const char readme_msg[] = "place (kretprobe): [:][+]%return|\n" #endif #ifdef CONFIG_UPROBE_EVENTS - " place (uprobe): :[(ref_ctr_offset)]\n" + " place (uprobe): :[%return][(ref_ctr_offset)]\n" #endif "\t args: =fetcharg[:type]\n" "\t fetcharg: %, @
, @[+|-],\n" diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 56729c6b6614..3cf7128e1ad3 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -528,7 +528,7 @@ end: /* * Argument syntax: - * - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] + * - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET[%return][(REF)] [FETCHARGS] */ static int trace_uprobe_create(int argc, const char **argv) { @@ -617,6 +617,19 @@ static int trace_uprobe_create(int argc, const char **argv) } } + /* Check if there is %return suffix */ + tmp = strchr(arg, '%'); + if (tmp) { + if (!strcmp(tmp, "%return")) { + *tmp = '\0'; + is_return = true; + } else { + trace_probe_log_err(tmp - filename, BAD_ADDR_SUFFIX); + ret = -EINVAL; + goto fail_address_parse; + } + } + /* Parse uprobe offset. */ ret = kstrtoul(arg, 0, &offset); if (ret) { -- cgit v1.2.3 From ac343da7bc9048629f9d12d98e8f0573df88836b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 10 Sep 2020 21:38:48 +0900 Subject: tracing: Define event fields early stage Define event fields at early stage so that boot-time tracing can access the event fields (like per-event filter setting). Link: https://lkml.kernel.org/r/159974152862.478751.2023768466808361350.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events.c | 92 +++++++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 37 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index ee25d849ebba..8e87fbab6930 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2123,12 +2123,48 @@ event_subsystem_dir(struct trace_array *tr, const char *name, return NULL; } +static int +event_define_fields(struct trace_event_call *call) +{ + struct list_head *head; + int ret = 0; + + /* + * Other events may have the same class. Only update + * the fields if they are not already defined. + */ + head = trace_get_fields(call); + if (list_empty(head)) { + struct trace_event_fields *field = call->class->fields_array; + unsigned int offset = sizeof(struct trace_entry); + + for (; field->type; field++) { + if (field->type == TRACE_FUNCTION_TYPE) { + field->define_fields(call); + break; + } + + offset = ALIGN(offset, field->align); + ret = trace_define_field(call, field->type, field->name, + offset, field->size, + field->is_signed, field->filter_type); + if (WARN_ON_ONCE(ret)) { + pr_err("error code is %d\n", ret); + break; + } + + offset += field->size; + } + } + + return ret; +} + static int event_create_dir(struct dentry *parent, struct trace_event_file *file) { struct trace_event_call *call = file->event_call; struct trace_array *tr = file->tr; - struct list_head *head; struct dentry *d_events; const char *name; int ret; @@ -2162,35 +2198,10 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file) &ftrace_event_id_fops); #endif - /* - * Other events may have the same class. Only update - * the fields if they are not already defined. - */ - head = trace_get_fields(call); - if (list_empty(head)) { - struct trace_event_fields *field = call->class->fields_array; - unsigned int offset = sizeof(struct trace_entry); - - for (; field->type; field++) { - if (field->type == TRACE_FUNCTION_TYPE) { - ret = field->define_fields(call); - break; - } - - offset = ALIGN(offset, field->align); - ret = trace_define_field(call, field->type, field->name, - offset, field->size, - field->is_signed, field->filter_type); - if (ret) - break; - - offset += field->size; - } - if (ret < 0) { - pr_warn("Could not initialize trace point events/%s\n", - name); - return -1; - } + ret = event_define_fields(call); + if (ret < 0) { + pr_warn("Could not initialize trace point events/%s\n", name); + return ret; } /* @@ -2493,7 +2504,7 @@ __trace_early_add_new_event(struct trace_event_call *call, if (!file) return -ENOMEM; - return 0; + return event_define_fields(call); } struct ftrace_module_file_ops; @@ -3431,6 +3442,18 @@ static __init int event_trace_enable_again(void) early_initcall(event_trace_enable_again); +/* Init fields which doesn't related to the tracefs */ +static __init int event_trace_init_fields(void) +{ + if (trace_define_generic_fields()) + pr_warn("tracing: Failed to allocated generic fields"); + + if (trace_define_common_fields()) + pr_warn("tracing: Failed to allocate common fields"); + + return 0; +} + __init int event_trace_init(void) { struct trace_array *tr; @@ -3446,12 +3469,6 @@ __init int event_trace_init(void) if (!entry) pr_warn("Could not create tracefs 'available_events' entry\n"); - if (trace_define_generic_fields()) - pr_warn("tracing: Failed to allocated generic fields"); - - if (trace_define_common_fields()) - pr_warn("tracing: Failed to allocate common fields"); - ret = early_event_add_tracer(NULL, tr); if (ret) return ret; @@ -3469,6 +3486,7 @@ void __init trace_event_init(void) event_trace_memsetup(); init_ftrace_syscalls(); event_trace_enable(); + event_trace_init_fields(); } #ifdef CONFIG_EVENT_TRACE_STARTUP_TEST -- cgit v1.2.3 From a838deab4e635994476bfc5b254bdf461e168752 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 10 Sep 2020 21:38:58 +0900 Subject: tracing: Enable adding dynamic events early stage Split the event fields initialization from creating new event directory. This allows the boot-time tracing to define dynamic events before initializing events directory on tracefs. Link: https://lkml.kernel.org/r/159974153790.478751.3475515065034825374.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 8e87fbab6930..42c0e7df6e70 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -38,6 +38,7 @@ DEFINE_MUTEX(event_mutex); LIST_HEAD(ftrace_events); static LIST_HEAD(ftrace_generic_fields); static LIST_HEAD(ftrace_common_fields); +static bool eventdir_initialized; #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO) @@ -2486,7 +2487,10 @@ __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr) if (!file) return -ENOMEM; - return event_create_dir(tr->event_dir, file); + if (eventdir_initialized) + return event_create_dir(tr->event_dir, file); + else + return event_define_fields(call); } /* @@ -3478,6 +3482,9 @@ __init int event_trace_init(void) if (ret) pr_warn("Failed to register trace events module notifier\n"); #endif + + eventdir_initialized = true; + return 0; } -- cgit v1.2.3 From 4114fbfd02f12d7a58cc4bd6fc36e0925266f9f7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 10 Sep 2020 21:39:07 +0900 Subject: tracing: Enable creating new instance early boot Enable creating new trace_array instance in early boot stage. If the instances directory is not created, postpone it until the tracefs is initialized. Link: https://lkml.kernel.org/r/159974154763.478751.6289753509587233103.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 53 +++++++++++++++++++++++++++++++++--------- kernel/trace/trace.h | 7 ++++++ kernel/trace/trace_functions.c | 22 +++++++++++------- 3 files changed, 63 insertions(+), 19 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c35fcd2f2529..6211a13b3327 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8636,6 +8636,24 @@ struct trace_array *trace_array_find_get(const char *instance) return tr; } +static int trace_array_create_dir(struct trace_array *tr) +{ + int ret; + + tr->dir = tracefs_create_dir(tr->name, trace_instance_dir); + if (!tr->dir) + return -EINVAL; + + ret = event_trace_add_tracer(tr->dir, tr); + if (ret) + tracefs_remove(tr->dir); + + init_tracer_tracefs(tr, tr->dir); + __update_tracer_options(tr); + + return ret; +} + static struct trace_array *trace_array_create(const char *name) { struct trace_array *tr; @@ -8671,30 +8689,27 @@ static struct trace_array *trace_array_create(const char *name) if (allocate_trace_buffers(tr, trace_buf_size) < 0) goto out_free_tr; - tr->dir = tracefs_create_dir(name, trace_instance_dir); - if (!tr->dir) - goto out_free_tr; - - ret = event_trace_add_tracer(tr->dir, tr); - if (ret) { - tracefs_remove(tr->dir); + if (ftrace_allocate_ftrace_ops(tr) < 0) goto out_free_tr; - } ftrace_init_trace_array(tr); - init_tracer_tracefs(tr, tr->dir); init_trace_flags_index(tr); - __update_tracer_options(tr); + + if (trace_instance_dir) { + ret = trace_array_create_dir(tr); + if (ret) + goto out_free_tr; + } list_add(&tr->list, &ftrace_trace_arrays); tr->ref++; - return tr; out_free_tr: + ftrace_free_ftrace_ops(tr); free_trace_buffers(tr); free_cpumask_var(tr->tracing_cpumask); kfree(tr->name); @@ -8852,11 +8867,27 @@ static int instance_rmdir(const char *name) static __init void create_trace_instances(struct dentry *d_tracer) { + struct trace_array *tr; + trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer, instance_mkdir, instance_rmdir); if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n")) return; + + mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); + + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (!tr->name) + continue; + if (MEM_FAIL(trace_array_create_dir(tr) < 0, + "Failed to create instance directory\n")) + break; + } + + mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); } static void diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 0d3a405fe446..525434145eea 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1125,6 +1125,8 @@ extern int ftrace_is_dead(void); int ftrace_create_function_files(struct trace_array *tr, struct dentry *parent); void ftrace_destroy_function_files(struct trace_array *tr); +int ftrace_allocate_ftrace_ops(struct trace_array *tr); +void ftrace_free_ftrace_ops(struct trace_array *tr); void ftrace_init_global_array_ops(struct trace_array *tr); void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func); void ftrace_reset_array_ops(struct trace_array *tr); @@ -1146,6 +1148,11 @@ ftrace_create_function_files(struct trace_array *tr, { return 0; } +static inline int ftrace_allocate_ftrace_ops(struct trace_array *tr) +{ + return 0; +} +static inline void ftrace_free_ftrace_ops(struct trace_array *tr) { } static inline void ftrace_destroy_function_files(struct trace_array *tr) { } static inline __init void ftrace_init_global_array_ops(struct trace_array *tr) { } diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index dd4dff71d89a..2c2126e1871d 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -34,10 +34,14 @@ enum { TRACE_FUNC_OPT_STACK = 0x1, }; -static int allocate_ftrace_ops(struct trace_array *tr) +int ftrace_allocate_ftrace_ops(struct trace_array *tr) { struct ftrace_ops *ops; + /* The top level array uses the "global_ops" */ + if (tr->flags & TRACE_ARRAY_FL_GLOBAL) + return 0; + ops = kzalloc(sizeof(*ops), GFP_KERNEL); if (!ops) return -ENOMEM; @@ -48,15 +52,19 @@ static int allocate_ftrace_ops(struct trace_array *tr) tr->ops = ops; ops->private = tr; + return 0; } +void ftrace_free_ftrace_ops(struct trace_array *tr) +{ + kfree(tr->ops); + tr->ops = NULL; +} int ftrace_create_function_files(struct trace_array *tr, struct dentry *parent) { - int ret; - /* * The top level array uses the "global_ops", and the files are * created on boot up. @@ -64,9 +72,8 @@ int ftrace_create_function_files(struct trace_array *tr, if (tr->flags & TRACE_ARRAY_FL_GLOBAL) return 0; - ret = allocate_ftrace_ops(tr); - if (ret) - return ret; + if (!tr->ops) + return -EINVAL; ftrace_create_filter_files(tr->ops, parent); @@ -76,8 +83,7 @@ int ftrace_create_function_files(struct trace_array *tr, void ftrace_destroy_function_files(struct trace_array *tr) { ftrace_destroy_filter_files(tr->ops); - kfree(tr->ops); - tr->ops = NULL; + ftrace_free_ftrace_ops(tr); } static int function_trace_init(struct trace_array *tr) -- cgit v1.2.3 From ba0fbfbb21cd90d51e4f6668ee8397e810818028 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 10 Sep 2020 21:39:17 +0900 Subject: tracing/boot, kprobe, synth: Initialize boot-time tracing earlier Initialize boot-time tracing in core_initcall_sync instead of fs_initcall, and initialize required tracers (kprobes and synth) in core_initcall. This will allow the boot-time tracing to trace __init code from the beginning of postcore_initcall stage. Link: https://lkml.kernel.org/r/159974155727.478751.7486926132902849578.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_boot.c | 7 +++++-- kernel/trace/trace_events_synth.c | 19 ++++++++++++++----- kernel/trace/trace_kprobe.c | 6 +++--- 3 files changed, 22 insertions(+), 10 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index d52d441a17e8..754e3cf2df3a 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -340,5 +340,8 @@ static int __init trace_boot_init(void) return 0; } - -fs_initcall(trace_boot_init); +/* + * Start tracing at the end of core-initcall, so that it starts tracing + * from the beginning of postcore_initcall. + */ +core_initcall_sync(trace_boot_init); diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 7c765e80e974..a9cd7793f7ea 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -1754,17 +1754,26 @@ static const struct file_operations synth_events_fops = { .release = seq_release, }; -static __init int trace_events_synth_init(void) +/* + * Register dynevent at core_initcall. This allows kernel to setup kprobe + * events in postcore_initcall without tracefs. + */ +static __init int trace_events_synth_init_early(void) { - struct dentry *entry = NULL; int err = 0; err = dyn_event_register(&synth_event_ops); - if (err) { + if (err) pr_warn("Could not register synth_event_ops\n"); - return err; - } + return err; +} +core_initcall(trace_events_synth_init_early); + +static __init int trace_events_synth_init(void) +{ + struct dentry *entry = NULL; + int err = 0; err = tracing_init_dentry(); if (err) goto err; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9d46415296eb..b911e9f6d9f5 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1897,8 +1897,8 @@ static __init void setup_boot_kprobe_events(void) } /* - * Register dynevent at subsys_initcall. This allows kernel to setup kprobe - * events in fs_initcall without tracefs. + * Register dynevent at core_initcall. This allows kernel to setup kprobe + * events in postcore_initcall without tracefs. */ static __init int init_kprobe_trace_early(void) { @@ -1913,7 +1913,7 @@ static __init int init_kprobe_trace_early(void) return 0; } -subsys_initcall(init_kprobe_trace_early); +core_initcall(init_kprobe_trace_early); /* Make a tracefs interface for controlling probe points */ static __init int init_kprobe_trace(void) -- cgit v1.2.3 From 720dee53ad8dfd528941fbbc264574601b04488a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 25 Sep 2020 01:40:08 +0900 Subject: tracing/boot: Initialize per-instance event list in early boot Initialize per-instance event list in early boot time (before initializing instance directory on tracefs). This fixes boot-time tracing to correctly handle the boot-time per-instance settings. Link: https://lkml.kernel.org/r/160096560826.182763.17110991546046128881.stgit@devnote2 Fixes: 4114fbfd02f1 ("tracing: Enable creating new instance early boot") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 3 ++- kernel/trace/trace.h | 1 + kernel/trace/trace_events.c | 30 ++++++++++++++++-------------- 3 files changed, 19 insertions(+), 15 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6211a13b3327..3f2533adae72 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8700,7 +8700,8 @@ static struct trace_array *trace_array_create(const char *name) ret = trace_array_create_dir(tr); if (ret) goto out_free_tr; - } + } else + __trace_early_add_events(tr); list_add(&tr->list, &ftrace_trace_arrays); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 525434145eea..5b0e797cacdd 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1658,6 +1658,7 @@ extern void trace_event_enable_tgid_record(bool enable); extern int event_trace_init(void); extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr); extern int event_trace_del_tracer(struct trace_array *tr); +extern void __trace_early_add_events(struct trace_array *tr); extern struct trace_event_file *__find_event_file(struct trace_array *tr, const char *system, diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 42c0e7df6e70..851ab37058dd 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -3131,14 +3131,13 @@ static inline int register_event_cmds(void) { return 0; } #endif /* CONFIG_DYNAMIC_FTRACE */ /* - * The top level array has already had its trace_event_file - * descriptors created in order to allow for early events to - * be recorded. This function is called after the tracefs has been - * initialized, and we now have to create the files associated - * to the events. + * The top level array and trace arrays created by boot-time tracing + * have already had its trace_event_file descriptors created in order + * to allow for early events to be recorded. + * This function is called after the tracefs has been initialized, + * and we now have to create the files associated to the events. */ -static __init void -__trace_early_add_event_dirs(struct trace_array *tr) +static void __trace_early_add_event_dirs(struct trace_array *tr) { struct trace_event_file *file; int ret; @@ -3153,13 +3152,12 @@ __trace_early_add_event_dirs(struct trace_array *tr) } /* - * For early boot up, the top trace array requires to have - * a list of events that can be enabled. This must be done before - * the filesystem is set up in order to allow events to be traced - * early. + * For early boot up, the top trace array and the trace arrays created + * by boot-time tracing require to have a list of events that can be + * enabled. This must be done before the filesystem is set up in order + * to allow events to be traced early. */ -static __init void -__trace_early_add_events(struct trace_array *tr) +void __trace_early_add_events(struct trace_array *tr) { struct trace_event_call *call; int ret; @@ -3290,7 +3288,11 @@ int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr) goto out; down_write(&trace_event_sem); - __trace_add_event_dirs(tr); + /* If tr already has the event list, it is initialized in early boot. */ + if (unlikely(!list_empty(&tr->events))) + __trace_early_add_event_dirs(tr); + else + __trace_add_event_dirs(tr); up_write(&trace_event_sem); out: -- cgit v1.2.3 From fdda88d31addb19a35ac7962e752e55aaeb5c20a Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Fri, 2 Oct 2020 22:31:26 +0800 Subject: ftrace: Fix some typos in comment s/coorditate/coordinate/ s/emty/empty/ s/preeptive/preemptive/ s/succes/success/ s/carefule/careful/ Link: https://lkml.kernel.org/r/20201002143126.2890-1-hqjagain@gmail.com Signed-off-by: Qiujun Huang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 84f32dbc7be8..123d520b9261 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -230,7 +230,7 @@ static void update_ftrace_function(void) /* * For static tracing, we need to be a bit more careful. * The function change takes affect immediately. Thus, - * we need to coorditate the setting of the function_trace_ops + * we need to coordinate the setting of the function_trace_ops * with the setting of the ftrace_trace_function. * * Set the function to the list ops, which will call the @@ -1451,7 +1451,7 @@ static bool hash_contains_ip(unsigned long ip, { /* * The function record is a match if it exists in the filter - * hash and not in the notrace hash. Note, an emty hash is + * hash and not in the notrace hash. Note, an empty hash is * considered a match for the filter hash, but an empty * notrace hash is considered not in the notrace hash. */ @@ -2976,7 +2976,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) synchronize_rcu_tasks_rude(); /* - * When the kernel is preeptive, tasks can be preempted + * When the kernel is preemptive, tasks can be preempted * while on a ftrace trampoline. Just scheduling a task on * a CPU is not good enough to flush them. Calling * synchornize_rcu_tasks() will wait for those tasks to @@ -4368,7 +4368,7 @@ void **ftrace_func_mapper_find_ip(struct ftrace_func_mapper *mapper, * @ip: The instruction pointer address to map @data to * @data: The data to map to @ip * - * Returns 0 on succes otherwise an error. + * Returns 0 on success otherwise an error. */ int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper, unsigned long ip, void *data) @@ -4536,7 +4536,7 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, /* * Note, there's a small window here that the func_hash->filter_hash - * may be NULL or empty. Need to be carefule when reading the loop. + * may be NULL or empty. Need to be careful when reading the loop. */ mutex_lock(&probe->ops.func_hash->regex_lock); -- cgit v1.2.3 From 4a4a56b4e76bbe3211c4f93d99c0c1543f5f3230 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 4 Oct 2020 17:14:03 -0500 Subject: tracing: Change STR_VAR_MAX_LEN 32 is too small for this value, and anyway it makes more sense to use MAX_FILTER_STR_VAL, as this is also the value used for variable-length __strings. Link: https://lkml.kernel.org/r/6adfd1668ac1fd8670bd58206944a762061a5559.1601848695.git.zanussi@kernel.org Tested-by: Axel Rasmussen Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 2 ++ kernel/trace/trace_synth.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 1b2ef6490229..3b22e2122d1a 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1398,6 +1398,8 @@ static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt) n_str = hist_data->n_field_var_str + hist_data->n_save_var_str; + BUILD_BUG_ON(STR_VAR_LEN_MAX & (sizeof(u64) - 1)); + size = STR_VAR_LEN_MAX; for (i = 0; i < n_str; i++) { diff --git a/kernel/trace/trace_synth.h b/kernel/trace/trace_synth.h index ac35c45207c4..5166705d1556 100644 --- a/kernel/trace/trace_synth.h +++ b/kernel/trace/trace_synth.h @@ -7,7 +7,7 @@ #define SYNTH_SYSTEM "synthetic" #define SYNTH_FIELDS_MAX 32 -#define STR_VAR_LEN_MAX 32 /* must be multiple of sizeof(u64) */ +#define STR_VAR_LEN_MAX MAX_FILTER_STR_VAL /* must be multiple of sizeof(u64) */ struct synth_field { char *type; -- cgit v1.2.3 From 8fbeb52a598c7ab5aa603d6bb083b8a8d16d607a Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 4 Oct 2020 17:14:04 -0500 Subject: tracing: Fix parse_synth_field() error handling synth_field_size() returns either a positive size or an error (zero or a negative value). However, the existing code assumes the only error value is 0. It doesn't handle negative error codes, as it assigns directly to field->size (a size_t; unsigned), thereby interpreting the error code as a valid size instead. Do the test before assignment to field->size. [ axelrasmussen@google.com: changelog addition, first paragraph above ] Link: https://lkml.kernel.org/r/9b6946d9776b2eeb43227678158196de1c3c6e1d.1601848695.git.zanussi@kernel.org Fixes: 4b147936fa50 (tracing: Add support for 'synthetic' events) Reviewed-by: Masami Hiramatsu Tested-by: Axel Rasmussen Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_synth.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index a9cd7793f7ea..fa8a99828f41 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -465,6 +465,7 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, struct synth_field *field; const char *prefix = NULL, *field_type = argv[0], *field_name, *array; int len, ret = 0; + ssize_t size; if (field_type[0] == ';') field_type++; @@ -520,11 +521,12 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, field->type[len - 1] = '\0'; } - field->size = synth_field_size(field->type); - if (!field->size) { + size = synth_field_size(field->type); + if (size <= 0) { ret = -EINVAL; goto free; } + field->size = size; if (synth_field_is_string(field->type)) field->is_string = true; -- cgit v1.2.3 From 63a1e5de3006f4ad713e4d72bcb404d0301e853d Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 4 Oct 2020 17:14:05 -0500 Subject: tracing: Save normal string variables String variables created as field variables and save variables are already handled properly by having their values copied when set. The same isn't done for normal variables, but needs to be - simply saving a pointer to a string contained in an old event isn't sufficient, since that event's data may quickly become overwritten and therefore a string pointer to it could yield garbage. This change uses the same mechanism as field variables and simply appends the new strings to the existing per-element field_var_str[] array allocated for that purpose. Link: https://lkml.kernel.org/r/1c1a03798b02e67307412a0c719d1bfb69b13007.1601848695.git.zanussi@kernel.org Fixes: 02205a6752f2 (tracing: Add support for 'field variables') Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 3b22e2122d1a..812bc5f94b5c 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -147,6 +147,8 @@ struct hist_field { */ unsigned int var_ref_idx; bool read_once; + + unsigned int var_str_idx; }; static u64 hist_field_none(struct hist_field *field, @@ -349,6 +351,7 @@ struct hist_trigger_data { unsigned int n_keys; unsigned int n_fields; unsigned int n_vars; + unsigned int n_var_str; unsigned int key_size; struct tracing_map_sort_key sort_keys[TRACING_MAP_SORT_KEYS_MAX]; unsigned int n_sort_keys; @@ -1396,7 +1399,12 @@ static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt) } } - n_str = hist_data->n_field_var_str + hist_data->n_save_var_str; + n_str = hist_data->n_field_var_str + hist_data->n_save_var_str + + hist_data->n_var_str; + if (n_str > SYNTH_FIELDS_MAX) { + hist_elt_data_free(elt_data); + return -EINVAL; + } BUILD_BUG_ON(STR_VAR_LEN_MAX & (sizeof(u64) - 1)); @@ -3653,6 +3661,7 @@ static int create_var_field(struct hist_trigger_data *hist_data, { struct trace_array *tr = hist_data->event_file->tr; unsigned long flags = 0; + int ret; if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX)) return -EINVAL; @@ -3667,7 +3676,12 @@ static int create_var_field(struct hist_trigger_data *hist_data, if (WARN_ON(hist_data->n_vars > TRACING_MAP_VARS_MAX)) return -EINVAL; - return __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags); + ret = __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags); + + if (hist_data->fields[val_idx]->flags & HIST_FIELD_FL_STRING) + hist_data->fields[val_idx]->var_str_idx = hist_data->n_var_str++; + + return ret; } static int create_val_fields(struct hist_trigger_data *hist_data, @@ -4394,6 +4408,22 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data, hist_val = hist_field->fn(hist_field, elt, rbe, rec); if (hist_field->flags & HIST_FIELD_FL_VAR) { var_idx = hist_field->var.idx; + + if (hist_field->flags & HIST_FIELD_FL_STRING) { + unsigned int str_start, var_str_idx, idx; + char *str, *val_str; + + str_start = hist_data->n_field_var_str + + hist_data->n_save_var_str; + var_str_idx = hist_field->var_str_idx; + idx = str_start + var_str_idx; + + str = elt_data->field_var_str[idx]; + val_str = (char *)(uintptr_t)hist_val; + strscpy(str, val_str, STR_VAR_LEN_MAX); + + hist_val = (u64)(uintptr_t)str; + } tracing_map_set_var(elt, var_idx, hist_val); continue; } -- cgit v1.2.3 From bd82631d7ccdc894af2738e47abcba2cb6e7dea9 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 4 Oct 2020 17:14:06 -0500 Subject: tracing: Add support for dynamic strings to synthetic events Currently, sythetic events only support static string fields such as: # echo 'test_latency u64 lat; char somename[32]' > /sys/kernel/debug/tracing/synthetic_events Which is fine, but wastes a lot of space in the event. It also prevents the most commonly-defined strings in the existing trace events e.g. those defined using __string(), from being passed to synthetic events via the trace() action. With this change, synthetic events with dynamic fields can be defined: # echo 'test_latency u64 lat; char somename[]' > /sys/kernel/debug/tracing/synthetic_events And the trace() action can be used to generate events using either dynamic or static strings: # echo 'hist:keys=name:lat=common_timestamp.usecs-$ts0:onmatch(sys.event).test_latency($lat,name)' > /sys/kernel/debug/tracing/events The synthetic event dynamic strings are implemented in the same way as the existing __data_loc strings and appear as such in the format file. [ : added __set_synth_event_print_fmt() changes: I added the following to make it work with trace-cmd. Dynamic strings must have __get_str() for events in the print_fmt otherwise it can't be parsed correctly. ] Link: https://lore.kernel.org/r/cover.1601588066.git.zanussi@kernel.org Link: https://lkml.kernel.org/r/3ed35b6d0e390f5b94cb4a9ba1cc18f5982ab277.1601848695.git.zanussi@kernel.org Tested-by: Axel Rasmussen Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/synth_event_gen_test.c | 18 ++- kernel/trace/trace_events_hist.c | 9 ++ kernel/trace/trace_events_synth.c | 248 +++++++++++++++++++++++++++++++----- kernel/trace/trace_synth.h | 4 + 4 files changed, 241 insertions(+), 38 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c index 7d56d621ffea..edd912cd14aa 100644 --- a/kernel/trace/synth_event_gen_test.c +++ b/kernel/trace/synth_event_gen_test.c @@ -242,9 +242,11 @@ static struct synth_field_desc create_synth_test_fields[] = { { .type = "pid_t", .name = "next_pid_field" }, { .type = "char[16]", .name = "next_comm_field" }, { .type = "u64", .name = "ts_ns" }, + { .type = "char[]", .name = "dynstring_field_1" }, { .type = "u64", .name = "ts_ms" }, { .type = "unsigned int", .name = "cpu" }, { .type = "char[64]", .name = "my_string_field" }, + { .type = "char[]", .name = "dynstring_field_2" }, { .type = "int", .name = "my_int_field" }, }; @@ -254,7 +256,7 @@ static struct synth_field_desc create_synth_test_fields[] = { */ static int __init test_create_synth_event(void) { - u64 vals[7]; + u64 vals[9]; int ret; /* Create the create_synth_test event with the fields above */ @@ -292,10 +294,12 @@ static int __init test_create_synth_event(void) vals[0] = 777; /* next_pid_field */ vals[1] = (u64)(long)"tiddlywinks"; /* next_comm_field */ vals[2] = 1000000; /* ts_ns */ - vals[3] = 1000; /* ts_ms */ - vals[4] = raw_smp_processor_id(); /* cpu */ - vals[5] = (u64)(long)"thneed"; /* my_string_field */ - vals[6] = 398; /* my_int_field */ + vals[3] = (u64)(long)"xrayspecs"; /* dynstring_field_1 */ + vals[4] = 1000; /* ts_ms */ + vals[5] = raw_smp_processor_id(); /* cpu */ + vals[6] = (u64)(long)"thneed"; /* my_string_field */ + vals[7] = (u64)(long)"kerplunk"; /* dynstring_field_2 */ + vals[8] = 398; /* my_int_field */ /* Now generate a create_synth_test event */ ret = synth_event_trace_array(create_synth_test, vals, ARRAY_SIZE(vals)); @@ -422,13 +426,15 @@ static int __init test_trace_synth_event(void) int ret; /* Trace some bogus values just for testing */ - ret = synth_event_trace(create_synth_test, 7, /* number of values */ + ret = synth_event_trace(create_synth_test, 9, /* number of values */ (u64)444, /* next_pid_field */ (u64)(long)"clackers", /* next_comm_field */ (u64)1000000, /* ts_ns */ + (u64)(long)"viewmaster",/* dynstring_field_1 */ (u64)1000, /* ts_ms */ (u64)raw_smp_processor_id(), /* cpu */ (u64)(long)"Thneed", /* my_string_field */ + (u64)(long)"yoyos", /* dynstring_field_2 */ (u64)999); /* my_int_field */ return ret; } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 812bc5f94b5c..c74a7d157306 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -3289,6 +3289,15 @@ static int check_synth_field(struct synth_event *event, field = event->fields[field_pos]; + /* + * A dynamic string synth field can accept static or + * dynamic. A static string synth field can only accept a + * same-sized static string, which is checked for later. + */ + if (strstr(hist_field->type, "char[") && field->is_string + && field->is_dynamic) + return 0; + if (strcmp(field->type, hist_field->type) != 0) { if (field->size != hist_field->size || field->is_signed != hist_field->is_signed) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index fa8a99828f41..24bc6d61aa40 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -88,7 +88,7 @@ static int synth_event_define_fields(struct trace_event_call *call) event->fields[i]->offset = n_u64; - if (event->fields[i]->is_string) { + if (event->fields[i]->is_string && !event->fields[i]->is_dynamic) { offset += STR_VAR_LEN_MAX; n_u64 += STR_VAR_LEN_MAX / sizeof(u64); } else { @@ -139,6 +139,9 @@ static int synth_field_string_size(char *type) if (len > 3) return -EINVAL; + if (len == 0) + return 0; /* variable-length string */ + strncpy(buf, start, len); buf[len] = '\0'; @@ -290,10 +293,25 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter, /* parameter values */ if (se->fields[i]->is_string) { - trace_seq_printf(s, print_fmt, se->fields[i]->name, - (char *)&entry->fields[n_u64], - i == se->n_fields - 1 ? "" : " "); - n_u64 += STR_VAR_LEN_MAX / sizeof(u64); + if (se->fields[i]->is_dynamic) { + u32 offset, data_offset; + char *str_field; + + offset = (u32)entry->fields[n_u64]; + data_offset = offset & 0xffff; + + str_field = (char *)entry + data_offset; + + trace_seq_printf(s, print_fmt, se->fields[i]->name, + str_field, + i == se->n_fields - 1 ? "" : " "); + n_u64++; + } else { + trace_seq_printf(s, print_fmt, se->fields[i]->name, + (char *)&entry->fields[n_u64], + i == se->n_fields - 1 ? "" : " "); + n_u64 += STR_VAR_LEN_MAX / sizeof(u64); + } } else { struct trace_print_flags __flags[] = { __def_gfpflag_names, {-1, NULL} }; @@ -325,16 +343,52 @@ static struct trace_event_functions synth_event_funcs = { .trace = print_synth_event }; +static unsigned int trace_string(struct synth_trace_event *entry, + struct synth_event *event, + char *str_val, + bool is_dynamic, + unsigned int data_size, + unsigned int *n_u64) +{ + unsigned int len = 0; + char *str_field; + + if (is_dynamic) { + u32 data_offset; + + data_offset = offsetof(typeof(*entry), fields); + data_offset += event->n_u64 * sizeof(u64); + data_offset += data_size; + + str_field = (char *)entry + data_offset; + + len = strlen(str_val) + 1; + strscpy(str_field, str_val, len); + + data_offset |= len << 16; + *(u32 *)&entry->fields[*n_u64] = data_offset; + + (*n_u64)++; + } else { + str_field = (char *)&entry->fields[*n_u64]; + + strscpy(str_field, str_val, STR_VAR_LEN_MAX); + (*n_u64) += STR_VAR_LEN_MAX / sizeof(u64); + } + + return len; +} + static notrace void trace_event_raw_event_synth(void *__data, u64 *var_ref_vals, unsigned int *var_ref_idx) { + unsigned int i, n_u64, val_idx, len, data_size = 0; struct trace_event_file *trace_file = __data; struct synth_trace_event *entry; struct trace_event_buffer fbuffer; struct trace_buffer *buffer; struct synth_event *event; - unsigned int i, n_u64, val_idx; int fields_size = 0; event = trace_file->event_call->data; @@ -344,6 +398,18 @@ static notrace void trace_event_raw_event_synth(void *__data, fields_size = event->n_u64 * sizeof(u64); + for (i = 0; i < event->n_dynamic_fields; i++) { + unsigned int field_pos = event->dynamic_fields[i]->field_pos; + char *str_val; + + val_idx = var_ref_idx[field_pos]; + str_val = (char *)(long)var_ref_vals[val_idx]; + + len = strlen(str_val) + 1; + + fields_size += len; + } + /* * Avoid ring buffer recursion detection, as this event * is being performed within another event. @@ -360,10 +426,11 @@ static notrace void trace_event_raw_event_synth(void *__data, val_idx = var_ref_idx[i]; if (event->fields[i]->is_string) { char *str_val = (char *)(long)var_ref_vals[val_idx]; - char *str_field = (char *)&entry->fields[n_u64]; - strscpy(str_field, str_val, STR_VAR_LEN_MAX); - n_u64 += STR_VAR_LEN_MAX / sizeof(u64); + len = trace_string(entry, event, str_val, + event->fields[i]->is_dynamic, + data_size, &n_u64); + data_size += len; /* only dynamic string increments */ } else { struct synth_field *field = event->fields[i]; u64 val = var_ref_vals[val_idx]; @@ -422,8 +489,13 @@ static int __set_synth_event_print_fmt(struct synth_event *event, pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); for (i = 0; i < event->n_fields; i++) { - pos += snprintf(buf + pos, LEN_OR_ZERO, - ", REC->%s", event->fields[i]->name); + if (event->fields[i]->is_dynamic && + event->fields[i]->is_dynamic) + pos += snprintf(buf + pos, LEN_OR_ZERO, + ", __get_str(%s)", event->fields[i]->name); + else + pos += snprintf(buf + pos, LEN_OR_ZERO, + ", REC->%s", event->fields[i]->name); } #undef LEN_OR_ZERO @@ -522,9 +594,30 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, } size = synth_field_size(field->type); - if (size <= 0) { + if (size < 0) { ret = -EINVAL; goto free; + } else if (size == 0) { + if (synth_field_is_string(field->type)) { + char *type; + + type = kzalloc(sizeof("__data_loc ") + strlen(field->type) + 1, GFP_KERNEL); + if (!type) { + ret = -ENOMEM; + goto free; + } + + strcat(type, "__data_loc "); + strcat(type, field->type); + kfree(field->type); + field->type = type; + + field->is_dynamic = true; + size = sizeof(u64); + } else { + ret = -EINVAL; + goto free; + } } field->size = size; @@ -532,7 +625,6 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, field->is_string = true; field->is_signed = synth_field_signed(field->type); - out: return field; free: @@ -663,6 +755,7 @@ static void free_synth_event(struct synth_event *event) free_synth_field(event->fields[i]); kfree(event->fields); + kfree(event->dynamic_fields); kfree(event->name); kfree(event->class.system); free_synth_tracepoint(event->tp); @@ -673,8 +766,8 @@ static void free_synth_event(struct synth_event *event) static struct synth_event *alloc_synth_event(const char *name, int n_fields, struct synth_field **fields) { + unsigned int i, j, n_dynamic_fields = 0; struct synth_event *event; - unsigned int i; event = kzalloc(sizeof(*event), GFP_KERNEL); if (!event) { @@ -696,11 +789,33 @@ static struct synth_event *alloc_synth_event(const char *name, int n_fields, goto out; } + for (i = 0; i < n_fields; i++) + if (fields[i]->is_dynamic) + n_dynamic_fields++; + + if (n_dynamic_fields) { + event->dynamic_fields = kcalloc(n_dynamic_fields, + sizeof(*event->dynamic_fields), + GFP_KERNEL); + if (!event->dynamic_fields) { + free_synth_event(event); + event = ERR_PTR(-ENOMEM); + goto out; + } + } + dyn_event_init(&event->devent, &synth_event_ops); - for (i = 0; i < n_fields; i++) + for (i = 0, j = 0; i < n_fields; i++) { event->fields[i] = fields[i]; + if (fields[i]->is_dynamic) { + event->dynamic_fields[j] = fields[i]; + event->dynamic_fields[j]->field_pos = i; + event->dynamic_fields[j++] = fields[i]; + event->n_dynamic_fields++; + } + } event->n_fields = n_fields; out: return event; @@ -712,6 +827,10 @@ static int synth_event_check_arg_fn(void *data) int size; size = synth_field_size((char *)arg_pair->lhs); + if (size == 0) { + if (strstr((char *)arg_pair->lhs, "[")) + return 0; + } return size ? 0 : -EINVAL; } @@ -1200,10 +1319,9 @@ void synth_event_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen) EXPORT_SYMBOL_GPL(synth_event_cmd_init); static inline int -__synth_event_trace_start(struct trace_event_file *file, - struct synth_event_trace_state *trace_state) +__synth_event_trace_init(struct trace_event_file *file, + struct synth_event_trace_state *trace_state) { - int entry_size, fields_size = 0; int ret = 0; memset(trace_state, '\0', sizeof(*trace_state)); @@ -1225,8 +1343,20 @@ __synth_event_trace_start(struct trace_event_file *file, } trace_state->event = file->event_call->data; +out: + return ret; +} + +static inline int +__synth_event_trace_start(struct trace_event_file *file, + struct synth_event_trace_state *trace_state, + int dynamic_fields_size) +{ + int entry_size, fields_size = 0; + int ret = 0; fields_size = trace_state->event->n_u64 * sizeof(u64); + fields_size += dynamic_fields_size; /* * Avoid ring buffer recursion detection, as this event @@ -1243,7 +1373,7 @@ __synth_event_trace_start(struct trace_event_file *file, ring_buffer_nest_end(trace_state->buffer); ret = -EINVAL; } -out: + return ret; } @@ -1276,23 +1406,46 @@ __synth_event_trace_end(struct synth_event_trace_state *trace_state) */ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...) { + unsigned int i, n_u64, len, data_size = 0; struct synth_event_trace_state state; - unsigned int i, n_u64; va_list args; int ret; - ret = __synth_event_trace_start(file, &state); + ret = __synth_event_trace_init(file, &state); if (ret) { if (ret == -ENOENT) ret = 0; /* just disabled, not really an error */ return ret; } + if (state.event->n_dynamic_fields) { + va_start(args, n_vals); + + for (i = 0; i < state.event->n_fields; i++) { + u64 val = va_arg(args, u64); + + if (state.event->fields[i]->is_string && + state.event->fields[i]->is_dynamic) { + char *str_val = (char *)(long)val; + + data_size += strlen(str_val) + 1; + } + } + + va_end(args); + } + + ret = __synth_event_trace_start(file, &state, data_size); + if (ret) + return ret; + if (n_vals != state.event->n_fields) { ret = -EINVAL; goto out; } + data_size = 0; + va_start(args, n_vals); for (i = 0, n_u64 = 0; i < state.event->n_fields; i++) { u64 val; @@ -1301,10 +1454,11 @@ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...) if (state.event->fields[i]->is_string) { char *str_val = (char *)(long)val; - char *str_field = (char *)&state.entry->fields[n_u64]; - strscpy(str_field, str_val, STR_VAR_LEN_MAX); - n_u64 += STR_VAR_LEN_MAX / sizeof(u64); + len = trace_string(state.entry, state.event, str_val, + state.event->fields[i]->is_dynamic, + data_size, &n_u64); + data_size += len; /* only dynamic string increments */ } else { struct synth_field *field = state.event->fields[i]; @@ -1357,29 +1511,46 @@ EXPORT_SYMBOL_GPL(synth_event_trace); int synth_event_trace_array(struct trace_event_file *file, u64 *vals, unsigned int n_vals) { + unsigned int i, n_u64, field_pos, len, data_size = 0; struct synth_event_trace_state state; - unsigned int i, n_u64; + char *str_val; int ret; - ret = __synth_event_trace_start(file, &state); + ret = __synth_event_trace_init(file, &state); if (ret) { if (ret == -ENOENT) ret = 0; /* just disabled, not really an error */ return ret; } + if (state.event->n_dynamic_fields) { + for (i = 0; i < state.event->n_dynamic_fields; i++) { + field_pos = state.event->dynamic_fields[i]->field_pos; + str_val = (char *)(long)vals[field_pos]; + len = strlen(str_val) + 1; + data_size += len; + } + } + + ret = __synth_event_trace_start(file, &state, data_size); + if (ret) + return ret; + if (n_vals != state.event->n_fields) { ret = -EINVAL; goto out; } + data_size = 0; + for (i = 0, n_u64 = 0; i < state.event->n_fields; i++) { if (state.event->fields[i]->is_string) { char *str_val = (char *)(long)vals[i]; - char *str_field = (char *)&state.entry->fields[n_u64]; - strscpy(str_field, str_val, STR_VAR_LEN_MAX); - n_u64 += STR_VAR_LEN_MAX / sizeof(u64); + len = trace_string(state.entry, state.event, str_val, + state.event->fields[i]->is_dynamic, + data_size, &n_u64); + data_size += len; /* only dynamic string increments */ } else { struct synth_field *field = state.event->fields[i]; u64 val = vals[i]; @@ -1447,9 +1618,17 @@ int synth_event_trace_start(struct trace_event_file *file, if (!trace_state) return -EINVAL; - ret = __synth_event_trace_start(file, trace_state); - if (ret == -ENOENT) - ret = 0; /* just disabled, not really an error */ + ret = __synth_event_trace_init(file, trace_state); + if (ret) { + if (ret == -ENOENT) + ret = 0; /* just disabled, not really an error */ + return ret; + } + + if (trace_state->event->n_dynamic_fields) + return -ENOTSUPP; + + ret = __synth_event_trace_start(file, trace_state, 0); return ret; } @@ -1510,6 +1689,11 @@ static int __synth_event_add_val(const char *field_name, u64 val, char *str_val = (char *)(long)val; char *str_field; + if (field->is_dynamic) { /* add_val can't do dynamic strings */ + ret = -EINVAL; + goto out; + } + if (!str_val) { ret = -EINVAL; goto out; diff --git a/kernel/trace/trace_synth.h b/kernel/trace/trace_synth.h index 5166705d1556..6e146b959dcd 100644 --- a/kernel/trace/trace_synth.h +++ b/kernel/trace/trace_synth.h @@ -16,6 +16,8 @@ struct synth_field { unsigned int offset; bool is_signed; bool is_string; + bool is_dynamic; + bool field_pos; }; struct synth_event { @@ -24,6 +26,8 @@ struct synth_event { char *name; struct synth_field **fields; unsigned int n_fields; + struct synth_field **dynamic_fields; + unsigned int n_dynamic_fields; unsigned int n_u64; struct trace_event_class class; struct trace_event_call call; -- cgit v1.2.3 From 1bc36bd4a8557285870b34cfec7910871049e93e Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 4 Oct 2020 17:14:07 -0500 Subject: tracing: Add README information for synthetic_events file Add an entry with a basic description of events/synthetic_events along with a simple example. Link: https://lkml.kernel.org/r/3c7f178cf95aaeebc01eda7d95600dd937233eb7.1601848695.git.zanussi@kernel.org Reviewed-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3f2533adae72..73fd0e0c0f39 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5249,7 +5249,12 @@ static const char readme_msg[] = "\t trace(,param list) - generate synthetic event\n" "\t save(field,...) - save current event fields\n" #ifdef CONFIG_TRACER_SNAPSHOT - "\t snapshot() - snapshot the trace buffer\n" + "\t snapshot() - snapshot the trace buffer\n\n" +#endif +#ifdef CONFIG_SYNTH_EVENTS + " events/synthetic_events\t- Create/append/remove/show synthetic events\n" + "\t Write into this file to define/undefine new synthetic events.\n" + "\t example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n" #endif #endif ; -- cgit v1.2.3 From 8db4d6bfbbf9206567fd529dc73dc058b3929db0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 4 Oct 2020 17:14:09 -0500 Subject: tracing: Change synthetic event string format to limit printed length Change the format for printing synthetic field strings to limit the length of the string printed even if it's not correctly terminated. Link: https://lore.kernel.org/r/20201002210036.0200371b@oasis.local.home Link: https://lkml.kernel.org/r/b6bdb34e70d970e8026daa3503db6b8e5cdad524.1601848695.git.zanussi@kernel.org Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_synth.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 24bc6d61aa40..742ce5f62d6d 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -234,7 +234,7 @@ static const char *synth_field_fmt(char *type) else if (strcmp(type, "gfp_t") == 0) fmt = "%x"; else if (synth_field_is_string(type)) - fmt = "%s"; + fmt = "%.*s"; return fmt; } @@ -303,11 +303,13 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter, str_field = (char *)entry + data_offset; trace_seq_printf(s, print_fmt, se->fields[i]->name, + STR_VAR_LEN_MAX, str_field, i == se->n_fields - 1 ? "" : " "); n_u64++; } else { trace_seq_printf(s, print_fmt, se->fields[i]->name, + STR_VAR_LEN_MAX, (char *)&entry->fields[n_u64], i == se->n_fields - 1 ? "" : " "); n_u64 += STR_VAR_LEN_MAX / sizeof(u64); -- cgit v1.2.3 From 59e65b3358f44d4d0134eca3b6c269f359f21cd5 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 31 Aug 2020 11:11:00 +0800 Subject: ftrace: Use fls() to get the bits for dup_hash() The effect here is to get the number of bits, lets use fls() to do this job. Link: https://lkml.kernel.org/r/20200831031104.23322-3-richard.weiyang@linux.alibaba.com Signed-off-by: Wei Yang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 123d520b9261..5633d37d8806 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1370,8 +1370,9 @@ static struct ftrace_hash *dup_hash(struct ftrace_hash *src, int size) /* * Make the hash size about 1/2 the # found */ - for (size /= 2; size; size >>= 1) - bits++; + bits = fls(size); + if (bits) + bits--; /* Don't allocate too much */ if (bits > FTRACE_HASH_MAX_BITS) -- cgit v1.2.3 From be49313273211c47d1b317d6b2dbe02637c2794c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 5 Oct 2020 20:21:14 -0400 Subject: ftrace: Simplify the hash calculation No need to add a check to subtract the number of bits if bits is zero after fls(). Just divide the size by two before calling it. This does give the same answer for size of 0 and 1, but that's fine. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5633d37d8806..c51a91aea1fd 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1368,11 +1368,10 @@ static struct ftrace_hash *dup_hash(struct ftrace_hash *src, int size) int i; /* - * Make the hash size about 1/2 the # found + * Use around half the size (max bit of it), but + * a minimum of 2 is fine (as size of 0 or 1 both give 1 for bits). */ - bits = fls(size); - if (bits) - bits--; + bits = fls(size / 2); /* Don't allocate too much */ if (bits > FTRACE_HASH_MAX_BITS) -- cgit v1.2.3 From b40c6eabfcd409e022fcb377ac268e3ef9446fde Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 31 Aug 2020 11:11:02 +0800 Subject: ftrace: Simplify the calculation of page number for ftrace_page->records Based on the following two reasones, we could simplify the calculation: - If the number after roundup count is not power of 2, we would definitely have more than 1 empty page with a higher order. - get_count_order() just return current order, so one lower order could meet the requirement. The calculation could be simplified by lower one order level when pages are not power of 2. Link: https://lkml.kernel.org/r/20200831031104.23322-5-richard.weiyang@linux.alibaba.com Signed-off-by: Wei Yang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index c51a91aea1fd..c3be18b4f27b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3129,18 +3129,19 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) static int ftrace_allocate_records(struct ftrace_page *pg, int count) { int order; - int cnt; + int pages, cnt; if (WARN_ON(!count)) return -EINVAL; - order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE)); + pages = DIV_ROUND_UP(count, ENTRIES_PER_PAGE); + order = get_count_order(pages); /* * We want to fill as much as possible. No more than a page * may be empty. */ - while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE) + if (!is_power_of_2(pages)) order--; again: -- cgit v1.2.3 From 7ba031e8b74c6aa156a0d9867dc13cf817d52047 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 5 Oct 2020 20:37:41 -0400 Subject: ftrace: Format variable declarations of ftrace_allocate_records I hate when unrelated variables are declared on the same line. Split them. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index c3be18b4f27b..4833b6a82ce7 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3129,7 +3129,8 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) static int ftrace_allocate_records(struct ftrace_page *pg, int count) { int order; - int pages, cnt; + int pages; + int cnt; if (WARN_ON(!count)) return -EINVAL; -- cgit v1.2.3 From 43aa422c0c07135236bd91cbb45b048fd85e73b5 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Wed, 30 Sep 2020 19:43:03 +0100 Subject: tracing: Remove a pointless assignment The variable 'len' has been assigned a value but is not used after that. So, remove the assignement. Link: https://lkml.kernel.org/r/20200930184303.22896-1-sudipm.mukherjee@gmail.com Signed-off-by: Sudip Mukherjee Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 73fd0e0c0f39..0806fa9f2815 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6667,7 +6667,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, written = -EFAULT; } else written = cnt; - len = cnt; if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) { /* do not add \n before testing triggers, but add \0 */ -- cgit v1.2.3 From 848183553e431e6e9c2ea2f72421a7a1bbc6532e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 7 Oct 2020 10:34:34 -0400 Subject: tracing: Fix synthetic print fmt check for use of __get_str() A cut and paste error had the check to use __get_str() test "is_dynamic" twice, instead of checking "is_string && is_dynamic". Link: https://lore.kernel.org/r/d34dccd5-96ba-a2d9-46ea-de8807525deb@canonical.com Reported-by: Colin Ian King Acked-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_synth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 742ce5f62d6d..3b2dcc42b8ee 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -491,7 +491,7 @@ static int __set_synth_event_print_fmt(struct synth_event *event, pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); for (i = 0; i < event->n_fields; i++) { - if (event->fields[i]->is_dynamic && + if (event->fields[i]->is_string && event->fields[i]->is_dynamic) pos += snprintf(buf + pos, LEN_OR_ZERO, ", __get_str(%s)", event->fields[i]->name); -- cgit v1.2.3 From 6d9bd139455d9d40fec8c242985996468b34180c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 13 Oct 2020 15:48:52 -0400 Subject: tracing: Check return value of __create_val_fields() before using its result After having a typo for writing a histogram trigger. Wrote: echo 'hist:key=pid:ts=common_timestamp.usec' > events/sched/sched_waking/trigger Instead of: echo 'hist:key=pid:ts=common_timestamp.usecs' > events/sched/sched_waking/trigger and the following crash happened: BUG: kernel NULL pointer dereference, address: 0000000000000008 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 4 PID: 1641 Comm: sh Not tainted 5.9.0-rc5-test+ #549 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016 RIP: 0010:event_hist_trigger_func+0x70b/0x1ee0 Code: 24 08 89 d5 49 89 cc e9 8c 00 00 00 4c 89 f2 41 b9 00 10 00 00 4c 89 e1 44 89 ee 4c 89 ff e8 dc d3 ff ff 45 89 ea 4b 8b 14 d7 42 08 04 74 17 41 8b 8f c0 00 00 00 8d 71 01 41 89 b7 c0 00 00 RSP: 0018:ffff959213d53db0 EFLAGS: 00010202 RAX: ffffffffffffffea RBX: 0000000000000000 RCX: 0000000000084c04 RDX: 0000000000000000 RSI: df7326aefebd174c RDI: 0000000000031080 RBP: 0000000000000002 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000001 R11: 0000000000000046 R12: ffff959211dcf690 R13: 0000000000000001 R14: ffff95925a36e370 R15: ffff959251c89800 FS: 00007fb9ea934740(0000) GS:ffff95925ab00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 00000000c976c005 CR4: 00000000001706e0 Call Trace: ? trigger_process_regex+0x78/0x110 trigger_process_regex+0xc5/0x110 event_trigger_write+0x71/0xd0 vfs_write+0xca/0x210 ksys_write+0x70/0xf0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fb9eaa29487 Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 This was caused by accessing the hlist_data fields after the call to __create_val_fields() without checking if the creation succeed. Link: https://lkml.kernel.org/r/20201013154852.3abd8702@gandalf.local.home Fixes: 63a1e5de3006 ("tracing: Save normal string variables") Reviewed-by: Masami Hiramatsu Reviewed-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index c74a7d157306..96c3f86b81c5 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -3687,7 +3687,7 @@ static int create_var_field(struct hist_trigger_data *hist_data, ret = __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags); - if (hist_data->fields[val_idx]->flags & HIST_FIELD_FL_STRING) + if (!ret && hist_data->fields[val_idx]->flags & HIST_FIELD_FL_STRING) hist_data->fields[val_idx]->var_str_idx = hist_data->n_var_str++; return ret; -- cgit v1.2.3 From bbeb97464eefc65f506084fd9f18f21653e01137 Mon Sep 17 00:00:00 2001 From: Gaurav Kohli Date: Tue, 6 Oct 2020 15:03:53 +0530 Subject: tracing: Fix race in trace_open and buffer resize call Below race can come, if trace_open and resize of cpu buffer is running parallely on different cpus CPUX CPUY ring_buffer_resize atomic_read(&buffer->resize_disabled) tracing_open tracing_reset_online_cpus ring_buffer_reset_cpu rb_reset_cpu rb_update_pages remove/insert pages resetting pointer This race can cause data abort or some times infinte loop in rb_remove_pages and rb_insert_pages while checking pages for sanity. Take buffer lock to fix this. Link: https://lkml.kernel.org/r/1601976833-24377-1-git-send-email-gkohli@codeaurora.org Cc: stable@vger.kernel.org Fixes: b23d7a5f4a07a ("ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU") Signed-off-by: Gaurav Kohli Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 93ef0ab6ea20..15bf28b13e50 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4866,6 +4866,9 @@ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu) if (!cpumask_test_cpu(cpu, buffer->cpumask)) return; + /* prevent another thread from changing buffer sizes */ + mutex_lock(&buffer->mutex); + atomic_inc(&cpu_buffer->resize_disabled); atomic_inc(&cpu_buffer->record_disabled); @@ -4876,6 +4879,8 @@ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu) atomic_dec(&cpu_buffer->record_disabled); atomic_dec(&cpu_buffer->resize_disabled); + + mutex_unlock(&buffer->mutex); } EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); @@ -4889,6 +4894,9 @@ void ring_buffer_reset_online_cpus(struct trace_buffer *buffer) struct ring_buffer_per_cpu *cpu_buffer; int cpu; + /* prevent another thread from changing buffer sizes */ + mutex_lock(&buffer->mutex); + for_each_online_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; @@ -4907,6 +4915,8 @@ void ring_buffer_reset_online_cpus(struct trace_buffer *buffer) atomic_dec(&cpu_buffer->record_disabled); atomic_dec(&cpu_buffer->resize_disabled); } + + mutex_unlock(&buffer->mutex); } /** -- cgit v1.2.3 From c16340971949ba6560c8e7c985bad8a180c57aa3 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 11 Oct 2020 00:28:09 +0900 Subject: tracing/boot: Add ftrace.instance.*.alloc_snapshot option Add ftrace.instance.*.alloc_snapshot option. This option has been described in Documentation/trace/boottime-trace.rst but not implemented yet. ftrace.[instance.INSTANCE.]alloc_snapshot Allocate snapshot buffer. The difference from kernel.alloc_snapshot is that the kernel.alloc_snapshot will allocate the buffer only for the main instance, but this can allocate buffer for any new instances. Link: https://lkml.kernel.org/r/160234368948.400560.15313384470765915015.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_boot.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 754e3cf2df3a..c22a152ef0b4 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -284,6 +284,12 @@ trace_boot_enable_tracer(struct trace_array *tr, struct xbc_node *node) if (tracing_set_tracer(tr, p) < 0) pr_err("Failed to set given tracer: %s\n", p); } + + /* Since tracer can free snapshot buffer, allocate snapshot here.*/ + if (xbc_node_find_value(node, "alloc_snapshot", NULL)) { + if (tracing_alloc_snapshot_instance(tr) < 0) + pr_err("Failed to allocate snapshot buffer\n"); + } } static void __init -- cgit v1.2.3 From 499f7bb0853570c5d9cbf2a2ecbed517852cacfa Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Sat, 10 Oct 2020 22:09:24 +0800 Subject: tracing: Fix some typos in comments s/wihin/within/ s/retrieven/retrieved/ s/suppport/support/ s/wil/will/ s/accidently/accidentally/ s/if the if the/if the/ Link: https://lkml.kernel.org/r/20201010140924.3809-1-hqjagain@gmail.com Signed-off-by: Qiujun Huang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 4 ++-- kernel/trace/trace.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0806fa9f2815..63c97012ed39 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9465,7 +9465,7 @@ __init static int tracer_alloc_buffers(void) } /* - * Make sure we don't accidently add more trace options + * Make sure we don't accidentally add more trace options * than we have bits for. */ BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE); @@ -9494,7 +9494,7 @@ __init static int tracer_alloc_buffers(void) /* * The prepare callbacks allocates some memory for the ring buffer. We - * don't free the buffer if the if the CPU goes down. If we were to free + * don't free the buffer if the CPU goes down. If we were to free * the buffer, then the user would lose any trace that was in the * buffer. The memory will be removed once the "instance" is removed. */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 5b0e797cacdd..f777bb68e660 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -246,7 +246,7 @@ typedef bool (*cond_update_fn_t)(struct trace_array *tr, void *cond_data); * tracing_snapshot_cond(tr, cond_data), the cond_data passed in is * passed in turn to the cond_snapshot.update() function. That data * can be compared by the update() implementation with the cond_data - * contained wihin the struct cond_snapshot instance associated with + * contained within the struct cond_snapshot instance associated with * the trace_array. Because the tr->max_lock is held throughout the * update() call, the update() function can directly retrieve the * cond_snapshot and cond_data associated with the per-instance @@ -271,7 +271,7 @@ typedef bool (*cond_update_fn_t)(struct trace_array *tr, void *cond_data); * take the snapshot, by returning 'true' if so, 'false' if no * snapshot should be taken. Because the max_lock is held for * the duration of update(), the implementation is safe to - * directly retrieven and save any implementation data it needs + * directly retrieved and save any implementation data it needs * to in association with the snapshot. */ struct cond_snapshot { @@ -573,7 +573,7 @@ struct tracer { * The function callback, which can use the FTRACE bits to * check for recursion. * - * Now if the arch does not suppport a feature, and it calls + * Now if the arch does not support a feature, and it calls * the global list function which calls the ftrace callback * all three of these steps will do a recursion protection. * There's no reason to do one if the previous caller already @@ -1479,7 +1479,7 @@ __trace_event_discard_commit(struct trace_buffer *buffer, /* * Helper function for event_trigger_unlock_commit{_regs}(). * If there are event triggers attached to this event that requires - * filtering against its fields, then they wil be called as the + * filtering against its fields, then they will be called as the * entry already holds the field information of the current event. * * It also checks if the event should be discarded or not. -- cgit v1.2.3 From 7d27adf575e7e917666f4eeca62188353a358060 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 13 Oct 2020 09:17:52 -0500 Subject: tracing: Don't show dynamic string internals in synthetic event description For synthetic event dynamic fields, the type contains "__data_loc", which is basically an internal part of the type which is only meant to be displayed in the format, not in the event description itself, which is confusing to users since they can't use __data_loc on the command-line to define an event field, which printing it would lead them to believe. So filter it out from the description, while leaving it in the type. Link: https://lkml.kernel.org/r/b3b7baf7813298a5ede4ff02e2e837b91c05a724.1602598160.git.zanussi@kernel.org Reported-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_synth.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 3b2dcc42b8ee..b19e2f4159ab 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -1867,14 +1867,22 @@ static int __synth_event_show(struct seq_file *m, struct synth_event *event) { struct synth_field *field; unsigned int i; + char *type, *t; seq_printf(m, "%s\t", event->name); for (i = 0; i < event->n_fields; i++) { field = event->fields[i]; + type = field->type; + t = strstr(type, "__data_loc"); + if (t) { /* __data_loc belongs in format but not event desc */ + t += sizeof("__data_loc"); + type = t; + } + /* parameter values */ - seq_printf(m, "%s %s%s", field->type, field->name, + seq_printf(m, "%s %s%s", type, field->name, i == event->n_fields - 1 ? "" : "; "); } -- cgit v1.2.3 From 42d120e2dda5724ea789461413b8691abc315ba1 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 13 Oct 2020 09:17:53 -0500 Subject: tracing: Move is_good_name() from trace_probe.h to trace.h is_good_name() is useful for other trace infrastructure, such as synthetic events, so make it available via trace.h. Link: https://lkml.kernel.org/r/cc6d6a2d7da6957fcbe1e2922e76d18d2bb459b4.1602598160.git.zanussi@kernel.org Acked-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.h | 13 +++++++++++++ kernel/trace/trace_probe.h | 13 ------------- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f777bb68e660..34e0c4d5a6e7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -19,6 +19,7 @@ #include #include #include +#include #ifdef CONFIG_FTRACE_SYSCALLS #include /* For NR_SYSCALLS */ @@ -2090,4 +2091,16 @@ static __always_inline void trace_iterator_reset(struct trace_iterator *iter) iter->pos = -1; } +/* Check the name is good for event/group/fields */ +static inline bool is_good_name(const char *name) +{ + if (!isalpha(*name) && *name != '_') + return false; + while (*++name != '\0') { + if (!isalpha(*name) && !isdigit(*name) && *name != '_') + return false; + } + return true; +} + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 04d00987da69..2f703a20c724 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -348,18 +347,6 @@ bool trace_probe_match_command_args(struct trace_probe *tp, #define trace_probe_for_each_link_rcu(pos, tp) \ list_for_each_entry_rcu(pos, &(tp)->event->files, list) -/* Check the name is good for event/group/fields */ -static inline bool is_good_name(const char *name) -{ - if (!isalpha(*name) && *name != '_') - return false; - while (*++name != '\0') { - if (!isalpha(*name) && !isdigit(*name) && *name != '_') - return false; - } - return true; -} - #define TPARG_FL_RETURN BIT(0) #define TPARG_FL_KERNEL BIT(1) #define TPARG_FL_FENTRY BIT(2) -- cgit v1.2.3 From 9bbb33291f8e44819aaed32d367f702303ff663e Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 13 Oct 2020 09:17:54 -0500 Subject: tracing: Check that the synthetic event and field names are legal Call the is_good_name() function used by probe events to make sure synthetic event and field names don't contain illegal characters and cause unexpected parsing of synthetic event commands. Link: https://lkml.kernel.org/r/c4d4bb59d3ac39bcbd70fba0cf837d6b1cedb015.1602598160.git.zanussi@kernel.org Fixes: 4b147936fa50 (tracing: Add support for 'synthetic' events) Reported-by: Masami Hiramatsu Reviewed-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_synth.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index b19e2f4159ab..8c9d6e464da0 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -572,6 +572,10 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, ret = -ENOMEM; goto free; } + if (!is_good_name(field->name)) { + ret = -EINVAL; + goto free; + } if (field_type[0] == ';') field_type++; @@ -1112,6 +1116,11 @@ static int __create_synth_event(int argc, const char *name, const char **argv) mutex_lock(&event_mutex); + if (!is_good_name(name)) { + ret = -EINVAL; + goto out; + } + event = find_synth_event(name); if (event) { ret = -EEXIST; -- cgit v1.2.3 From d4d704637d935ef5e588b0610b647376dd9f37d4 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 13 Oct 2020 09:17:55 -0500 Subject: tracing: Add synthetic event error logging Add support for synthetic event error logging, which entails adding a logging function for it, a way to save the synthetic event command, and a set of specific synthetic event parse error strings and handling. Link: https://lkml.kernel.org/r/ed099c66df13b40cfc633aaeb17f66c37a923066.1602598160.git.zanussi@kernel.org [ : wrote save_cmdstr() seq_buf implementation. ] Tested-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_synth.c | 92 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 8c9d6e464da0..f77851018121 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -20,6 +20,48 @@ #include "trace_synth.h" +#undef ERRORS +#define ERRORS \ + C(BAD_NAME, "Illegal name"), \ + C(CMD_INCOMPLETE, "Incomplete command"), \ + C(EVENT_EXISTS, "Event already exists"), \ + C(TOO_MANY_FIELDS, "Too many fields"), \ + C(INCOMPLETE_TYPE, "Incomplete type"), \ + C(INVALID_TYPE, "Invalid type"), \ + C(INVALID_FIELD, "Invalid field"), \ + C(CMD_TOO_LONG, "Command too long"), + +#undef C +#define C(a, b) SYNTH_ERR_##a + +enum { ERRORS }; + +#undef C +#define C(a, b) b + +static const char *err_text[] = { ERRORS }; + +static char last_cmd[MAX_FILTER_STR_VAL]; + +static int errpos(const char *str) +{ + return err_pos(last_cmd, str); +} + +static void last_cmd_set(char *str) +{ + if (!str) + return; + + strncpy(last_cmd, str, MAX_FILTER_STR_VAL - 1); +} + +static void synth_err(u8 err_type, u8 err_pos) +{ + tracing_log_err(NULL, "synthetic_events", last_cmd, err_text, + err_type, err_pos); +} + static int create_synth_event(int argc, const char **argv); static int synth_event_show(struct seq_file *m, struct dyn_event *ev); static int synth_event_release(struct dyn_event *ev); @@ -545,8 +587,10 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, field_type++; if (!strcmp(field_type, "unsigned")) { - if (argc < 3) + if (argc < 3) { + synth_err(SYNTH_ERR_INCOMPLETE_TYPE, errpos(field_type)); return ERR_PTR(-EINVAL); + } prefix = "unsigned "; field_type = argv[1]; field_name = argv[2]; @@ -573,6 +617,7 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, goto free; } if (!is_good_name(field->name)) { + synth_err(SYNTH_ERR_BAD_NAME, errpos(field_name)); ret = -EINVAL; goto free; } @@ -601,6 +646,7 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, size = synth_field_size(field->type); if (size < 0) { + synth_err(SYNTH_ERR_INVALID_TYPE, errpos(field_type)); ret = -EINVAL; goto free; } else if (size == 0) { @@ -621,6 +667,7 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, field->is_dynamic = true; size = sizeof(u64); } else { + synth_err(SYNTH_ERR_INVALID_TYPE, errpos(field_type)); ret = -EINVAL; goto free; } @@ -1098,12 +1145,47 @@ int synth_event_gen_cmd_array_start(struct dynevent_cmd *cmd, const char *name, } EXPORT_SYMBOL_GPL(synth_event_gen_cmd_array_start); +static int save_cmdstr(int argc, const char *name, const char **argv) +{ + struct seq_buf s; + char *buf; + int i; + + buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + seq_buf_init(&s, buf, MAX_DYNEVENT_CMD_LEN); + + seq_buf_puts(&s, name); + + for (i = 0; i < argc; i++) { + seq_buf_putc(&s, ' '); + seq_buf_puts(&s, argv[i]); + } + + if (!seq_buf_buffer_left(&s)) { + synth_err(SYNTH_ERR_CMD_TOO_LONG, 0); + kfree(buf); + return -EINVAL; + } + buf[s.len] = 0; + last_cmd_set(buf); + + kfree(buf); + return 0; +} + static int __create_synth_event(int argc, const char *name, const char **argv) { struct synth_field *field, *fields[SYNTH_FIELDS_MAX]; struct synth_event *event = NULL; int i, consumed = 0, n_fields = 0, ret = 0; + ret = save_cmdstr(argc, name, argv); + if (ret) + return ret; + /* * Argument syntax: * - Add synthetic event: field[;field] ... @@ -1111,18 +1193,22 @@ static int __create_synth_event(int argc, const char *name, const char **argv) * where 'field' = type field_name */ - if (name[0] == '\0' || argc < 1) + if (name[0] == '\0' || argc < 1) { + synth_err(SYNTH_ERR_CMD_INCOMPLETE, 0); return -EINVAL; + } mutex_lock(&event_mutex); if (!is_good_name(name)) { + synth_err(SYNTH_ERR_BAD_NAME, errpos(name)); ret = -EINVAL; goto out; } event = find_synth_event(name); if (event) { + synth_err(SYNTH_ERR_EVENT_EXISTS, errpos(name)); ret = -EEXIST; goto out; } @@ -1131,6 +1217,7 @@ static int __create_synth_event(int argc, const char *name, const char **argv) if (strcmp(argv[i], ";") == 0) continue; if (n_fields == SYNTH_FIELDS_MAX) { + synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0); ret = -EINVAL; goto err; } @@ -1145,6 +1232,7 @@ static int __create_synth_event(int argc, const char *name, const char **argv) } if (i < argc && strcmp(argv[i], ";") != 0) { + synth_err(SYNTH_ERR_INVALID_FIELD, errpos(argv[i])); ret = -EINVAL; goto err; } -- cgit v1.2.3 From 10819e25799aae564005b6049a45e9808797b3bb Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 13 Oct 2020 09:17:57 -0500 Subject: tracing: Handle synthetic event array field type checking correctly Since synthetic event array types are derived from the field name, there may be a semicolon at the end of the type which should be stripped off. If there are more characters following that, normal type string checking will result in an invalid type. Without this patch, you can end up with an invalid field type string that gets displayed in both the synthetic event description and the event format: Before: # echo 'myevent char str[16]; int v' >> synthetic_events # cat synthetic_events myevent char[16]; str; int v name: myevent ID: 1936 format: field:unsigned short common_type; offset:0; size:2; signed:0; field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1; signed:0; field:int common_pid; offset:4; size:4; signed:1; field:char str[16];; offset:8; size:16; signed:1; field:int v; offset:40; size:4; signed:1; print fmt: "str=%s, v=%d", REC->str, REC->v After: # echo 'myevent char str[16]; int v' >> synthetic_events # cat synthetic_events myevent char[16] str; int v # cat events/synthetic/myevent/format name: myevent ID: 1936 format: field:unsigned short common_type; offset:0; size:2; signed:0; field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1; signed:0; field:int common_pid; offset:4; size:4; signed:1; field:char str[16]; offset:8; size:16; signed:1; field:int v; offset:40; size:4; signed:1; print fmt: "str=%s, v=%d", REC->str, REC->v Link: https://lkml.kernel.org/r/6587663b56c2d45ab9d8c8472a2110713cdec97d.1602598160.git.zanussi@kernel.org [ : wrote parse_synth_field() snippet. ] Fixes: 4b147936fa50 (tracing: Add support for 'synthetic' events) Reported-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_synth.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index f77851018121..d239f0e2af8f 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -174,7 +174,7 @@ static int synth_field_string_size(char *type) start += sizeof("char[") - 1; end = strchr(type, ']'); - if (!end || end < start) + if (!end || end < start || type + strlen(type) > end + 1) return -EINVAL; len = end - start; @@ -625,8 +625,14 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, if (field_type[0] == ';') field_type++; len = strlen(field_type) + 1; - if (array) - len += strlen(array); + + if (array) { + int l = strlen(array); + + if (l && array[l - 1] == ';') + l--; + len += l; + } if (prefix) len += strlen(prefix); -- cgit v1.2.3 From 6107742d15832011cd0396d821f3225b52551f1f Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Fri, 9 Oct 2020 15:05:23 -0700 Subject: tracing: support "bool" type in synthetic trace events It's common [1] to define tracepoint fields as "bool" when they contain a true / false value. Currently, defining a synthetic event with a "bool" field yields EINVAL. It's possible to work around this by using e.g. u8 (assuming sizeof(bool) is 1, and bool is unsigned; if either of these properties don't match, you get EINVAL [2]). Supporting "bool" explicitly makes hooking this up easier and more portable for userspace. [1]: grep -r "bool" include/trace/events/ [2]: check_synth_field() in kernel/trace/trace_events_hist.c Link: https://lkml.kernel.org/r/20201009220524.485102-2-axelrasmussen@google.com Acked-by: Michel Lespinasse Acked-by: David Rientjes Signed-off-by: Axel Rasmussen Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_synth.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index d239f0e2af8f..3212e2c653b3 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -229,6 +229,8 @@ static int synth_field_size(char *type) size = sizeof(long); else if (strcmp(type, "unsigned long") == 0) size = sizeof(unsigned long); + else if (strcmp(type, "bool") == 0) + size = sizeof(bool); else if (strcmp(type, "pid_t") == 0) size = sizeof(pid_t); else if (strcmp(type, "gfp_t") == 0) @@ -271,6 +273,8 @@ static const char *synth_field_fmt(char *type) fmt = "%ld"; else if (strcmp(type, "unsigned long") == 0) fmt = "%lu"; + else if (strcmp(type, "bool") == 0) + fmt = "%d"; else if (strcmp(type, "pid_t") == 0) fmt = "%d"; else if (strcmp(type, "gfp_t") == 0) -- cgit v1.2.3