From 9da73974eb9c965dd9989befb593b8c8da9e4bdc Mon Sep 17 00:00:00 2001 From: Vamshi K Sthambamkadi Date: Wed, 22 Apr 2020 11:45:06 +0530 Subject: tracing: Fix memory leaks in trace_events_hist.c kmemleak report 1: [<9092c50b>] kmem_cache_alloc_trace+0x138/0x270 [<05a2c9ed>] create_field_var+0xcf/0x180 [<528a2d68>] action_create+0xe2/0xc80 [<63f50b61>] event_hist_trigger_func+0x15b5/0x1920 [<28ea5d3d>] trigger_process_regex+0x7b/0xc0 [<3138e86f>] event_trigger_write+0x4d/0xb0 [] __vfs_write+0x30/0x200 [<4f424a0d>] vfs_write+0x96/0x1b0 [] ksys_write+0x53/0xc0 [<3717101a>] __ia32_sys_write+0x15/0x20 [] do_fast_syscall_32+0x70/0x250 [<46e2629c>] entry_SYSENTER_32+0xaf/0x102 This is because save_vars[] of struct hist_trigger_data are not destroyed kmemleak report 2: [<9092c50b>] kmem_cache_alloc_trace+0x138/0x270 [<6e5e97c5>] create_var+0x3c/0x110 [] create_field_var+0xaf/0x180 [<528a2d68>] action_create+0xe2/0xc80 [<63f50b61>] event_hist_trigger_func+0x15b5/0x1920 [<28ea5d3d>] trigger_process_regex+0x7b/0xc0 [<3138e86f>] event_trigger_write+0x4d/0xb0 [] __vfs_write+0x30/0x200 [<4f424a0d>] vfs_write+0x96/0x1b0 [] ksys_write+0x53/0xc0 [<3717101a>] __ia32_sys_write+0x15/0x20 [] do_fast_syscall_32+0x70/0x250 [<46e2629c>] entry_SYSENTER_32+0xaf/0x102 struct hist_field allocated through create_var() do not initialize "ref" field to 1. The code in __destroy_hist_field() does not destroy object if "ref" is initialized to zero, the condition if (--hist_field->ref > 1) always passes since unsigned int wraps. kmemleak report 3: [] __kmalloc_track_caller+0x139/0x2b0 [] kstrdup+0x27/0x50 [<39d70006>] init_var_ref+0x58/0xd0 [<8ca76370>] create_var_ref+0x89/0xe0 [] action_create+0x38f/0xc80 [<7c146821>] event_hist_trigger_func+0x15b5/0x1920 [<07de3f61>] trigger_process_regex+0x7b/0xc0 [] event_trigger_write+0x4d/0xb0 [<19bf1512>] __vfs_write+0x30/0x200 [<64ce4d27>] vfs_write+0x96/0x1b0 [] ksys_write+0x53/0xc0 [<7d4230cd>] __ia32_sys_write+0x15/0x20 [<8eadca00>] do_fast_syscall_32+0x70/0x250 [<235cf985>] entry_SYSENTER_32+0xaf/0x102 hist_fields (system & event_name) are not freed Link: http://lkml.kernel.org/r/20200422061503.GA5151@cosmos Signed-off-by: Vamshi K Sthambamkadi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 5f6834a2bf41..fcab11cc6833 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -3320,6 +3320,9 @@ static void __destroy_hist_field(struct hist_field *hist_field) kfree(hist_field->name); kfree(hist_field->type); + kfree(hist_field->system); + kfree(hist_field->event_name); + kfree(hist_field); } @@ -4382,6 +4385,7 @@ static struct hist_field *create_var(struct hist_trigger_data *hist_data, goto out; } + var->ref = 1; var->flags = HIST_FIELD_FL_VAR; var->var.idx = idx; var->var.hist_data = var->hist_data = hist_data; @@ -5011,6 +5015,9 @@ static void destroy_field_vars(struct hist_trigger_data *hist_data) for (i = 0; i < hist_data->n_field_vars; i++) destroy_field_var(hist_data->field_vars[i]); + + for (i = 0; i < hist_data->n_save_vars; i++) + destroy_field_var(hist_data->save_vars[i]); } static void save_field_var(struct hist_trigger_data *hist_data, -- cgit v1.2.3 From 353da87921a5ec654e7e9024e083f099f1b33c97 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 22 Apr 2020 21:38:45 -0400 Subject: ftrace: Fix memory leak caused by not freeing entry in unregister_ftrace_direct() kmemleak reported the following: unreferenced object 0xffff90d47127a920 (size 32): comm "modprobe", pid 1766, jiffies 4294792031 (age 162.568s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 22 01 00 00 00 00 ad de ........"....... 00 78 12 a7 ff ff ff ff 00 00 b6 c0 ff ff ff ff .x.............. backtrace: [<00000000bb79e72e>] register_ftrace_direct+0xcb/0x3a0 [<00000000295e4f79>] do_one_initcall+0x72/0x340 [<00000000873ead18>] do_init_module+0x5a/0x220 [<00000000974d9de5>] load_module+0x2235/0x2550 [<0000000059c3d6ce>] __do_sys_finit_module+0xc0/0x120 [<000000005a8611b4>] do_syscall_64+0x60/0x230 [<00000000a0cdc49e>] entry_SYSCALL_64_after_hwframe+0x49/0xb3 The entry used to save the direct descriptor needs to be freed when unregistering. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 041694a1eb74..bd030b1b9514 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5165,6 +5165,7 @@ int unregister_ftrace_direct(unsigned long ip, unsigned long addr) list_del_rcu(&direct->next); synchronize_rcu_tasks(); kfree(direct); + kfree(entry); ftrace_direct_func_count--; } } -- cgit v1.2.3 From d013496f99c5608e0f80afd67acb1ba93c4144ea Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 10 Apr 2020 15:33:12 +0800 Subject: tracing: Convert local functions in tracing_map.c to static Fix the following sparse warning: kernel/trace/tracing_map.c:286:6: warning: symbol 'tracing_map_array_clear' was not declared. Should it be static? kernel/trace/tracing_map.c:297:6: warning: symbol 'tracing_map_array_free' was not declared. Should it be static? kernel/trace/tracing_map.c:319:26: warning: symbol 'tracing_map_array_alloc' was not declared. Should it be static? Link: http://lkml.kernel.org/r/20200410073312.38855-1-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/tracing_map.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index 9e31bfc818ff..74738c9856f1 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -283,7 +283,7 @@ int tracing_map_add_key_field(struct tracing_map *map, return idx; } -void tracing_map_array_clear(struct tracing_map_array *a) +static void tracing_map_array_clear(struct tracing_map_array *a) { unsigned int i; @@ -294,7 +294,7 @@ void tracing_map_array_clear(struct tracing_map_array *a) memset(a->pages[i], 0, PAGE_SIZE); } -void tracing_map_array_free(struct tracing_map_array *a) +static void tracing_map_array_free(struct tracing_map_array *a) { unsigned int i; @@ -316,7 +316,7 @@ void tracing_map_array_free(struct tracing_map_array *a) kfree(a); } -struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts, +static struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts, unsigned int entry_size) { struct tracing_map_array *a; -- cgit v1.2.3 From dcbd21c9fca5e954fd4e3d91884907eb6d47187e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 25 Apr 2020 14:49:09 +0900 Subject: tracing/kprobes: Fix a double initialization typo Fix a typo that resulted in an unnecessary double initialization to addr. Link: http://lkml.kernel.org/r/158779374968.6082.2337484008464939919.stgit@devnote2 Cc: Tom Zanussi Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: c7411a1a126f ("tracing/kprobe: Check whether the non-suffixed symbol is notrace") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d0568af4a0ef..0d9300c3b084 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -453,7 +453,7 @@ static bool __within_notrace_func(unsigned long addr) static bool within_notrace_func(struct trace_kprobe *tk) { - unsigned long addr = addr = trace_kprobe_address(tk); + unsigned long addr = trace_kprobe_address(tk); char symname[KSYM_NAME_LEN], *p; if (!__within_notrace_func(addr)) -- cgit v1.2.3 From da0f1f4167e3af69e1d8b32d6d65195ddd2bfb64 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 25 Apr 2020 14:49:17 +0900 Subject: tracing/boottime: Fix kprobe event API usage Fix boottime kprobe events to use API correctly for multiple events. For example, when we set a multiprobe kprobe events in bootconfig like below, ftrace.event.kprobes.myevent { probes = "vfs_read $arg1 $arg2", "vfs_write $arg1 $arg2" } This cause an error; trace_boot: Failed to add probe: p:kprobes/myevent (null) vfs_read $arg1 $arg2 vfs_write $arg1 $arg2 This shows the 1st argument becomes NULL and multiprobes are merged to 1 probe. Link: http://lkml.kernel.org/r/158779375766.6082.201939936008972838.stgit@devnote2 Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 29a154810546 ("tracing: Change trace_boot to use kprobe_event interface") Reviewed-by: Tom Zanussi Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_boot.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 06d7feb5255f..9de29bb45a27 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -95,24 +95,20 @@ trace_boot_add_kprobe_event(struct xbc_node *node, const char *event) struct xbc_node *anode; char buf[MAX_BUF_LEN]; const char *val; - int ret; + int ret = 0; - kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN); + xbc_node_for_each_array_value(node, "probes", anode, val) { + kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN); - ret = kprobe_event_gen_cmd_start(&cmd, event, NULL); - if (ret) - return ret; + ret = kprobe_event_gen_cmd_start(&cmd, event, val); + if (ret) + break; - xbc_node_for_each_array_value(node, "probes", anode, val) { - ret = kprobe_event_add_field(&cmd, val); + ret = kprobe_event_gen_cmd_end(&cmd); if (ret) - return ret; + pr_err("Failed to add probe: %s\n", buf); } - ret = kprobe_event_gen_cmd_end(&cmd); - if (ret) - pr_err("Failed to add probe: %s\n", buf); - return ret; } #else -- cgit v1.2.3 From 5b4dcd2d201a395ad4054067bfae4a07554fbd65 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 25 Apr 2020 14:49:26 +0900 Subject: tracing/kprobes: Reject new event if loc is NULL Reject the new event which has NULL location for kprobes. For kprobes, user must specify at least the location. Link: http://lkml.kernel.org/r/158779376597.6082.1411212055469099461.stgit@devnote2 Cc: Tom Zanussi Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 2a588dd1d5d6 ("tracing: Add kprobe event command generation functions") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 0d9300c3b084..35989383ae11 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -940,6 +940,9 @@ EXPORT_SYMBOL_GPL(kprobe_event_cmd_init); * complete command or only the first part of it; in the latter case, * kprobe_event_add_fields() can be used to add more fields following this. * + * Unlikely the synth_event_gen_cmd_start(), @loc must be specified. This + * returns -EINVAL if @loc == NULL. + * * Return: 0 if successful, error otherwise. */ int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe, @@ -953,6 +956,9 @@ int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe, if (cmd->type != DYNEVENT_TYPE_KPROBE) return -EINVAL; + if (!loc) + return -EINVAL; + if (kretprobe) snprintf(buf, MAX_EVENT_NAME_LEN, "r:kprobes/%s", name); else -- cgit v1.2.3 From d16a8c31077e75ecb9427fbfea59b74eed00f698 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 6 May 2020 10:20:10 -0400 Subject: tracing: Wait for preempt irq delay thread to finish Running on a slower machine, it is possible that the preempt delay kernel thread may still be executing if the module was immediately removed after added, and this can cause the kernel to crash as the kernel thread might be executing after its code has been removed. There's no reason that the caller of the code shouldn't just wait for the delay thread to finish, as the thread can also be created by a trigger in the sysfs code, which also has the same issues. Link: http://lore.kernel.org/r/5EA2B0C8.2080706@cn.fujitsu.com Cc: stable@vger.kernel.org Fixes: 793937236d1ee ("lib: Add module for testing preemptoff/irqsoff latency tracers") Reported-by: Xiao Yang Reviewed-by: Xiao Yang Reviewed-by: Joel Fernandes Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/preemptirq_delay_test.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c index 31c0fad4cb9e..c4c86de63cf9 100644 --- a/kernel/trace/preemptirq_delay_test.c +++ b/kernel/trace/preemptirq_delay_test.c @@ -113,22 +113,42 @@ static int preemptirq_delay_run(void *data) for (i = 0; i < s; i++) (testfuncs[i])(i); + + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { + schedule(); + set_current_state(TASK_INTERRUPTIBLE); + } + + __set_current_state(TASK_RUNNING); + return 0; } -static struct task_struct *preemptirq_start_test(void) +static int preemptirq_run_test(void) { + struct task_struct *task; + char task_name[50]; snprintf(task_name, sizeof(task_name), "%s_test", test_mode); - return kthread_run(preemptirq_delay_run, NULL, task_name); + task = kthread_run(preemptirq_delay_run, NULL, task_name); + if (IS_ERR(task)) + return PTR_ERR(task); + if (task) + kthread_stop(task); + return 0; } static ssize_t trigger_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - preemptirq_start_test(); + ssize_t ret; + + ret = preemptirq_run_test(); + if (ret) + return ret; return count; } @@ -148,11 +168,9 @@ static struct kobject *preemptirq_delay_kobj; static int __init preemptirq_delay_init(void) { - struct task_struct *test_task; int retval; - test_task = preemptirq_start_test(); - retval = PTR_ERR_OR_ZERO(test_task); + retval = preemptirq_run_test(); if (retval != 0) return retval; -- cgit v1.2.3 From 11f5efc3ab66284f7aaacc926e9351d658e2577b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 6 May 2020 10:36:18 -0400 Subject: tracing: Add a vmalloc_sync_mappings() for safe measure x86_64 lazily maps in the vmalloc pages, and the way this works with per_cpu areas can be complex, to say the least. Mappings may happen at boot up, and if nothing synchronizes the page tables, those page mappings may not be synced till they are used. This causes issues for anything that might touch one of those mappings in the path of the page fault handler. When one of those unmapped mappings is touched in the page fault handler, it will cause another page fault, which in turn will cause a page fault, and leave us in a loop of page faults. Commit 763802b53a42 ("x86/mm: split vmalloc_sync_all()") split vmalloc_sync_all() into vmalloc_sync_unmappings() and vmalloc_sync_mappings(), as on system exit, it did not need to do a full sync on x86_64 (although it still needed to be done on x86_32). By chance, the vmalloc_sync_all() would synchronize the page mappings done at boot up and prevent the per cpu area from being a problem for tracing in the page fault handler. But when that synchronization in the exit of a task became a nop, it caused the problem to appear. Link: https://lore.kernel.org/r/20200429054857.66e8e333@oasis.local.home Cc: stable@vger.kernel.org Fixes: 737223fbca3b1 ("tracing: Consolidate buffer allocation code") Reported-by: "Tzvetomir Stoyanov (VMware)" Suggested-by: Joerg Roedel Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8d2b98812625..9ed6d92768af 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8525,6 +8525,19 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) */ allocate_snapshot = false; #endif + + /* + * Because of some magic with the way alloc_percpu() works on + * x86_64, we need to synchronize the pgd of all the tables, + * otherwise the trace events that happen in x86_64 page fault + * handlers can't cope with accessing the chance that a + * alloc_percpu()'d memory might be touched in the page fault trace + * event. Oh, and we need to audit all other alloc_percpu() and vmalloc() + * calls in tracing, because something might get triggered within a + * page fault trace event! + */ + vmalloc_sync_mappings(); + return 0; } -- cgit v1.2.3 From 192b7993b3ff92b62b687e940e5e88fa0218d764 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Thu, 23 Apr 2020 12:08:25 +0800 Subject: tracing: Make tracing_snapshot_instance_cond() static Fix the following sparse warning: kernel/trace/trace.c:950:6: warning: symbol 'tracing_snapshot_instance_cond' was not declared. Should it be static? Link: http://lkml.kernel.org/r/1587614905-48692-1-git-send-email-zou_wei@huawei.com Reported-by: Hulk Robot Signed-off-by: Zou Wei Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9ed6d92768af..29615f15a820 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -947,7 +947,8 @@ int __trace_bputs(unsigned long ip, const char *str) EXPORT_SYMBOL_GPL(__trace_bputs); #ifdef CONFIG_TRACER_SNAPSHOT -void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data) +static void tracing_snapshot_instance_cond(struct trace_array *tr, + void *cond_data) { struct tracer *tracer = tr->current_trace; unsigned long flags; -- cgit v1.2.3 From 78a5255ffb6a1af189a83e493d916ba1c54d8c75 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 9 May 2020 13:57:10 -0700 Subject: Stop the ad-hoc games with -Wno-maybe-initialized We have some rather random rules about when we accept the "maybe-initialized" warnings, and when we don't. For example, we consider it unreliable for gcc versions < 4.9, but also if -O3 is enabled, or if optimizing for size. And then various kernel config options disabled it, because they know that they trigger that warning by confusing gcc sufficiently (ie PROFILE_ALL_BRANCHES). And now gcc-10 seems to be introducing a lot of those warnings too, so it falls under the same heading as 4.9 did. At the same time, we have a very straightforward way to _enable_ that warning when wanted: use "W=2" to enable more warnings. So stop playing these ad-hoc games, and just disable that warning by default, with the known and straight-forward "if you want to work on the extra compiler warnings, use W=123". Would it be great to have code that is always so obvious that it never confuses the compiler whether a variable is used initialized or not? Yes, it would. In a perfect world, the compilers would be smarter, and our source code would be simpler. That's currently not the world we live in, though. Signed-off-by: Linus Torvalds --- kernel/trace/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 402eef84c859..743647005f64 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -466,7 +466,6 @@ config PROFILE_ANNOTATED_BRANCHES config PROFILE_ALL_BRANCHES bool "Profile all if conditionals" if !FORTIFY_SOURCE select TRACE_BRANCH_PROFILING - imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED # avoid false positives help This tracer profiles all branch conditions. Every if () taken in the kernel is recorded whether it hit or miss. -- cgit v1.2.3