From bd95b88d9e51fcbf392a7e90338a8fcc3499cbd6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 16 Oct 2008 09:31:27 -0400 Subject: ftrace: release functions from hash The x86 architecture uses a static recording of mcount caller locations and is not affected by this patch. For architectures still using the dynamic ftrace daemon, this patch is critical. It removes the race between the recording of a function that calls mcount, the unloading of a module, and the ftrace daemon updating the call sites. This patch adds the releasing of the hash functions that the daemon uses to update the mcount call sites. When a module is unloaded, not only are the replaced call site table update, but now so is the hash recorded functions that the ftrace daemon will use. Again, architectures that implement MCOUNT_RECORD are not affected by this (which currently only x86 has). Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4dda4f60a2a9..1f54a94189fe 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -164,10 +164,14 @@ static DEFINE_SPINLOCK(ftrace_hash_lock); #define ftrace_hash_lock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags) #define ftrace_hash_unlock(flags) \ spin_unlock_irqrestore(&ftrace_hash_lock, flags) +static void ftrace_release_hash(unsigned long start, unsigned long end); #else /* This is protected via the ftrace_lock with MCOUNT_RECORD. */ #define ftrace_hash_lock(flags) do { (void)(flags); } while (0) #define ftrace_hash_unlock(flags) do { } while(0) +static inline void ftrace_release_hash(unsigned long start, unsigned long end) +{ +} #endif /* @@ -347,6 +351,7 @@ void ftrace_release(void *start, unsigned long size) } spin_unlock(&ftrace_lock); + ftrace_release_hash(s, e); } static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) @@ -1659,6 +1664,44 @@ void __init ftrace_init(void) ftrace_disabled = 1; } #else /* CONFIG_FTRACE_MCOUNT_RECORD */ + +static void ftrace_release_hash(unsigned long start, unsigned long end) +{ + struct dyn_ftrace *rec; + struct hlist_node *t, *n; + struct hlist_head *head, temp_list; + unsigned long flags; + int i, cpu; + + preempt_disable_notrace(); + + /* disable incase we call something that calls mcount */ + cpu = raw_smp_processor_id(); + per_cpu(ftrace_shutdown_disable_cpu, cpu)++; + + ftrace_hash_lock(flags); + + for (i = 0; i < FTRACE_HASHSIZE; i++) { + INIT_HLIST_HEAD(&temp_list); + head = &ftrace_hash[i]; + + /* all CPUS are stopped, we are safe to modify code */ + hlist_for_each_entry_safe(rec, t, n, head, node) { + if (rec->flags & FTRACE_FL_FREE) + continue; + + if ((rec->ip >= start) && (rec->ip < end)) + ftrace_free_rec(rec); + } + } + + ftrace_hash_unlock(flags); + + per_cpu(ftrace_shutdown_disable_cpu, cpu)--; + preempt_enable_notrace(); + +} + static int ftraced(void *ignore) { unsigned long usecs; -- cgit v1.2.3 From c2db8054c1eaf99983d8deee347876b01c26c2cf Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 6 Oct 2008 19:06:11 -0400 Subject: ftrace: fix depends A lot of tracers have HAVE_FTRACE as a dependent config where it really should not. The HAVE_FTRACE is a misnomer (soon to be fixed) and describes if the architecture has the function tracer (mcount) implemented. The ftrace infrastructure is implemented in all archs. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 5 ----- 1 file changed, 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 1cb3e1f616af..5866edbc2ed1 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -49,7 +49,6 @@ config IRQSOFF_TRACER default n depends on TRACE_IRQFLAGS_SUPPORT depends on GENERIC_TIME - depends on HAVE_FTRACE depends on DEBUG_KERNEL select TRACE_IRQFLAGS select TRACING @@ -73,7 +72,6 @@ config PREEMPT_TRACER default n depends on GENERIC_TIME depends on PREEMPT - depends on HAVE_FTRACE depends on DEBUG_KERNEL select TRACING select TRACER_MAX_TRACE @@ -101,7 +99,6 @@ config SYSPROF_TRACER config SCHED_TRACER bool "Scheduling Latency Tracer" - depends on HAVE_FTRACE depends on DEBUG_KERNEL select TRACING select CONTEXT_SWITCH_TRACER @@ -112,7 +109,6 @@ config SCHED_TRACER config CONTEXT_SWITCH_TRACER bool "Trace process context switches" - depends on HAVE_FTRACE depends on DEBUG_KERNEL select TRACING select MARKERS @@ -122,7 +118,6 @@ config CONTEXT_SWITCH_TRACER config BOOT_TRACER bool "Trace boot initcalls" - depends on HAVE_FTRACE depends on DEBUG_KERNEL select TRACING help -- cgit v1.2.3 From 606576ce816603d9fe1fb453a88bc6eea16ca709 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 6 Oct 2008 19:06:12 -0400 Subject: ftrace: rename FTRACE to FUNCTION_TRACER Due to confusion between the ftrace infrastructure and the gcc profiling tracer "ftrace", this patch renames the config options from FTRACE to FUNCTION_TRACER. The other two names that are offspring from FTRACE DYNAMIC_FTRACE and FTRACE_MCOUNT_RECORD will stay the same. This patch was generated mostly by script, and partially by hand. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 17 +++++++++-------- kernel/trace/Makefile | 6 +++--- kernel/trace/trace.c | 2 +- kernel/trace/trace.h | 2 +- kernel/trace/trace_irqsoff.c | 4 ++-- kernel/trace/trace_sched_wakeup.c | 4 ++-- kernel/trace/trace_selftest.c | 4 ++-- 7 files changed, 20 insertions(+), 19 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 5866edbc2ed1..3533c583df47 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -1,11 +1,12 @@ # -# Architectures that offer an FTRACE implementation should select HAVE_FTRACE: +# Architectures that offer an FUNCTION_TRACER implementation should +# select HAVE_FUNCTION_TRACER: # config NOP_TRACER bool -config HAVE_FTRACE +config HAVE_FUNCTION_TRACER bool select NOP_TRACER @@ -28,9 +29,9 @@ config TRACING select STACKTRACE select TRACEPOINTS -config FTRACE +config FUNCTION_TRACER bool "Kernel Function Tracer" - depends on HAVE_FTRACE + depends on HAVE_FUNCTION_TRACER depends on DEBUG_KERNEL select FRAME_POINTER select TRACING @@ -136,9 +137,9 @@ config BOOT_TRACER config STACK_TRACER bool "Trace max stack" - depends on HAVE_FTRACE + depends on HAVE_FUNCTION_TRACER depends on DEBUG_KERNEL - select FTRACE + select FUNCTION_TRACER select STACKTRACE help This special tracer records the maximum stack footprint of the @@ -155,7 +156,7 @@ config STACK_TRACER config DYNAMIC_FTRACE bool "enable/disable ftrace tracepoints dynamically" - depends on FTRACE + depends on FUNCTION_TRACER depends on HAVE_DYNAMIC_FTRACE depends on DEBUG_KERNEL default y @@ -165,7 +166,7 @@ config DYNAMIC_FTRACE with a No-Op instruction) as they are called. A table is created to dynamically enable them again. - This way a CONFIG_FTRACE kernel is slightly larger, but otherwise + This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise has native performance as long as no tracing is active. The changes to the code are done by a kernel thread that diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index a85dfba88ba0..c8228b1a49e9 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -1,7 +1,7 @@ # Do not instrument the tracer itself: -ifdef CONFIG_FTRACE +ifdef CONFIG_FUNCTION_TRACER ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) @@ -10,13 +10,13 @@ CFLAGS_trace_selftest_dynamic.o = -pg obj-y += trace_selftest_dynamic.o endif -obj-$(CONFIG_FTRACE) += libftrace.o +obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o obj-$(CONFIG_RING_BUFFER) += ring_buffer.o obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o -obj-$(CONFIG_FTRACE) += trace_functions.o +obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d345d649d073..aeb2f2505bc5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -851,7 +851,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) preempt_enable_notrace(); } -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER static void function_trace_call(unsigned long ip, unsigned long parent_ip) { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f1f99572cde7..6889ca48f1f1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -335,7 +335,7 @@ void update_max_tr_single(struct trace_array *tr, extern cycle_t ftrace_now(int cpu); -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER void tracing_start_function_trace(void); void tracing_stop_function_trace(void); #else diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index a7db7f040ae0..9c74071c10e0 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -63,7 +63,7 @@ irq_trace(void) */ static __cacheline_aligned_in_smp unsigned long max_sequence; -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER /* * irqsoff uses its own tracer function to keep the overhead down: */ @@ -104,7 +104,7 @@ static struct ftrace_ops trace_ops __read_mostly = { .func = irqsoff_tracer_call, }; -#endif /* CONFIG_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ /* * Should this new latency be reported/recorded? diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index fe4a252c2363..3ae93f16b565 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -31,7 +31,7 @@ static raw_spinlock_t wakeup_lock = static void __wakeup_reset(struct trace_array *tr); -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER /* * irqsoff uses its own tracer function to keep the overhead down: */ @@ -96,7 +96,7 @@ static struct ftrace_ops trace_ops __read_mostly = { .func = wakeup_tracer_call, }; -#endif /* CONFIG_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ /* * Should this new latency be reported/recorded? diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 09cf230d7eca..95815d26a041 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -70,7 +70,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) return ret; } -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE @@ -226,7 +226,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) return ret; } -#endif /* CONFIG_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_IRQSOFF_TRACER int -- cgit v1.2.3 From 3ce83aea86bf46fd1bff59d2e6d16f48fdce22fc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 6 Oct 2008 19:06:13 -0400 Subject: ftrace: rename the ftrace tracer to function To avoid further confusion between the ftrace infrastructure and the function tracer. This patch renames the "ftrace" function tracer to "function". Now in available_tracers, instead of "ftrace" there will be "function". This makes more sense, since people will not know exactly what the "ftrace" tracer does. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace_functions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index e90eb0c2c56c..0f85a64003d3 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -64,7 +64,7 @@ static void function_trace_ctrl_update(struct trace_array *tr) static struct tracer function_trace __read_mostly = { - .name = "ftrace", + .name = "function", .init = function_trace_init, .reset = function_trace_reset, .ctrl_update = function_trace_ctrl_update, -- cgit v1.2.3 From 81520a1b0649d0701205b818714a8c1e1cfbbb5b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 6 Oct 2008 21:24:18 -0400 Subject: ftrace: stack tracer only record when on stack The stack trace API does not record if the stack is not on the current task's stack. That is, if the stack is the interrupt stack or NMI stack, the output does not show. Also, the size of those stacks are not consistent with the size of the thread stack, this makes the calculation of the stack size usually bogus. This all confuses the stack tracer. I unfortunately do not have time to fix all these problems, but this patch does record the worst stack when the stack pointer is on the tasks stack (instead of bogus numbers). The patch simply returns if the stack pointer is not on the task's stack. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace_stack.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 74c5d9a3afae..be682b62fe58 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -44,6 +44,10 @@ static inline void check_stack(void) if (this_size <= max_stack_size) return; + /* we do not handle interrupt stacks yet */ + if (!object_is_on_stack(&this_size)) + return; + raw_local_irq_save(flags); __raw_spin_lock(&max_stack_lock); -- cgit v1.2.3 From 17d80fd07d35ae1d231b3378ee4f00ace54f9d31 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 21 Oct 2008 16:31:18 +0200 Subject: tracing: create tracers menu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We seem to have plenty tracers, lets create a menu and not clutter the already cluttered debug menu more. Signed-off-by: Peter Zijlstra Acked-by: Frédéric Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 3533c583df47..bc535cb91de9 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -29,6 +29,8 @@ config TRACING select STACKTRACE select TRACEPOINTS +menu "Tracers" + config FUNCTION_TRACER bool "Kernel Function Tracer" depends on HAVE_FUNCTION_TRACER @@ -191,3 +193,5 @@ config FTRACE_STARTUP_TEST a series of tests are made to verify that the tracer is functioning properly. It will do tests on all the configured tracers of ftrace. + +endmenu -- cgit v1.2.3 From 6ae2a0765ab764da11cc305058ee5333810228f4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 13 Oct 2008 10:22:06 -0400 Subject: ring-buffer: fix free page The pages of a buffer was originally pointing to the page struct, it now points to the page address. The freeing of the page still uses the page frame free "__free_page" instead of the correct free_page to the address. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 94af1fe56bb4..091aeefe321e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -130,7 +130,7 @@ struct buffer_page { static inline void free_buffer_page(struct buffer_page *bpage) { if (bpage->page) - __free_page(bpage->page); + free_page((unsigned long)bpage->page); kfree(bpage); } -- cgit v1.2.3 From 593eb8a2d63e95772a5f22d746f18a997c5ee463 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:32:59 -0400 Subject: ftrace: return error on failed modified text. Have the ftrace_modify_code return error values: -EFAULT on error of reading the address -EINVAL if what is read does not match what it expected -EPERM if the write fails to update after a successful match. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1f54a94189fe..b2de8de77356 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -596,22 +596,22 @@ ftrace_code_disable(struct dyn_ftrace *rec) { unsigned long ip; unsigned char *nop, *call; - int failed; + int ret; ip = rec->ip; nop = ftrace_nop_replace(); call = ftrace_call_replace(ip, mcount_addr); - failed = ftrace_modify_code(ip, call, nop); - if (failed) { - switch (failed) { - case 1: + ret = ftrace_modify_code(ip, call, nop); + if (ret) { + switch (ret) { + case -EFAULT: WARN_ON_ONCE(1); pr_info("ftrace faulted on modifying "); print_ip_sym(ip); break; - case 2: + case -EINVAL: WARN_ON_ONCE(1); pr_info("ftrace failed to modify "); print_ip_sym(ip); @@ -620,6 +620,15 @@ ftrace_code_disable(struct dyn_ftrace *rec) print_ip_ins(" replace: ", nop); printk(KERN_CONT "\n"); break; + case -EPERM: + WARN_ON_ONCE(1); + pr_info("ftrace faulted on writing "); + print_ip_sym(ip); + break; + default: + WARN_ON_ONCE(1); + pr_info("ftrace faulted on unknown error "); + print_ip_sym(ip); } rec->flags |= FTRACE_FL_FAILED; -- cgit v1.2.3 From 81adbdc029ecc416d56563e7f159100181dd711d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:02 -0400 Subject: ftrace: only have ftrace_kill atomic When an anomaly is detected, we need a way to completely disable ftrace. Right now we have two functions: ftrace_kill and ftrace_kill_atomic. The ftrace_kill tries to do it in a "nice" way by converting everything back to a nop. The "nice" way is dangerous itself, so this patch removes it and only has the "atomic" version, which is all that is needed. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 42 ++---------------------------------------- kernel/trace/trace.c | 2 +- 2 files changed, 3 insertions(+), 41 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b2de8de77356..93245ae046e1 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1549,22 +1549,6 @@ int ftrace_force_update(void) return ret; } -static void ftrace_force_shutdown(void) -{ - struct task_struct *task; - int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC; - - mutex_lock(&ftraced_lock); - task = ftraced_task; - ftraced_task = NULL; - ftraced_suspend = -1; - ftrace_run_update_code(command); - mutex_unlock(&ftraced_lock); - - if (task) - kthread_stop(task); -} - static __init int ftrace_init_debugfs(void) { struct dentry *d_tracer; @@ -1795,17 +1779,16 @@ core_initcall(ftrace_dynamic_init); # define ftrace_shutdown() do { } while (0) # define ftrace_startup_sysctl() do { } while (0) # define ftrace_shutdown_sysctl() do { } while (0) -# define ftrace_force_shutdown() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ /** - * ftrace_kill_atomic - kill ftrace from critical sections + * ftrace_kill - kill ftrace * * This function should be used by panic code. It stops ftrace * but in a not so nice way. If you need to simply kill ftrace * from a non-atomic section, use ftrace_kill. */ -void ftrace_kill_atomic(void) +void ftrace_kill(void) { ftrace_disabled = 1; ftrace_enabled = 0; @@ -1815,27 +1798,6 @@ void ftrace_kill_atomic(void) clear_ftrace_function(); } -/** - * ftrace_kill - totally shutdown ftrace - * - * This is a safety measure. If something was detected that seems - * wrong, calling this function will keep ftrace from doing - * any more modifications, and updates. - * used when something went wrong. - */ -void ftrace_kill(void) -{ - mutex_lock(&ftrace_sysctl_lock); - ftrace_disabled = 1; - ftrace_enabled = 0; - - clear_ftrace_function(); - mutex_unlock(&ftrace_sysctl_lock); - - /* Try to totally disable ftrace */ - ftrace_force_shutdown(); -} - /** * register_ftrace_function - register a function for profiling * @ops - ops structure that holds the function for profiling. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index aeb2f2505bc5..333a5162149b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3097,7 +3097,7 @@ void ftrace_dump(void) dump_ran = 1; /* No turning back! */ - ftrace_kill_atomic(); + ftrace_kill(); for_each_tracing_cpu(cpu) { atomic_inc(&global_trace.data[cpu]->disabled); -- cgit v1.2.3 From 6912896e994ddaf06cc0f6d3f2098bc4b59bdd84 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:03 -0400 Subject: ftrace: add ftrace warn on to disable ftrace Add ftrace warn on to disable ftrace as well as report a warning. [ Thanks to Andrew Morton for suggesting using the WARN_ON return value ] Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 93245ae046e1..b08996ca561d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -32,6 +32,18 @@ #include "trace.h" +#define FTRACE_WARN_ON(cond) \ + do { \ + if (WARN_ON(cond)) \ + ftrace_kill(); \ + } while (0) + +#define FTRACE_WARN_ON_ONCE(cond) \ + do { \ + if (WARN_ON_ONCE(cond)) \ + ftrace_kill(); \ + } while (0) + /* ftrace_enabled is a method to turn ftrace on or off */ int ftrace_enabled __read_mostly; static int last_ftrace_enabled; @@ -363,10 +375,8 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) rec = ftrace_free_records; if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { - WARN_ON_ONCE(1); + FTRACE_WARN_ON_ONCE(1); ftrace_free_records = NULL; - ftrace_disabled = 1; - ftrace_enabled = 0; return NULL; } @@ -415,7 +425,7 @@ ftrace_record_ip(unsigned long ip) key = hash_long(ip, FTRACE_HASHBITS); - WARN_ON_ONCE(key >= FTRACE_HASHSIZE); + FTRACE_WARN_ON_ONCE(key >= FTRACE_HASHSIZE); if (ftrace_ip_in_hash(ip, key)) goto out; @@ -607,12 +617,12 @@ ftrace_code_disable(struct dyn_ftrace *rec) if (ret) { switch (ret) { case -EFAULT: - WARN_ON_ONCE(1); + FTRACE_WARN_ON_ONCE(1); pr_info("ftrace faulted on modifying "); print_ip_sym(ip); break; case -EINVAL: - WARN_ON_ONCE(1); + FTRACE_WARN_ON_ONCE(1); pr_info("ftrace failed to modify "); print_ip_sym(ip); print_ip_ins(" expected: ", call); @@ -621,12 +631,12 @@ ftrace_code_disable(struct dyn_ftrace *rec) printk(KERN_CONT "\n"); break; case -EPERM: - WARN_ON_ONCE(1); + FTRACE_WARN_ON_ONCE(1); pr_info("ftrace faulted on writing "); print_ip_sym(ip); break; default: - WARN_ON_ONCE(1); + FTRACE_WARN_ON_ONCE(1); pr_info("ftrace faulted on unknown error "); print_ip_sym(ip); } @@ -1722,8 +1732,7 @@ static int ftraced(void *ignore) ftrace_update_cnt != 1 ? "s" : "", ftrace_update_tot_cnt, usecs, usecs != 1 ? "s" : ""); - ftrace_disabled = 1; - WARN_ON_ONCE(1); + FTRACE_WARN_ON_ONCE(1); } } mutex_unlock(&ftraced_lock); -- cgit v1.2.3 From cb7be3b2fc2cf089ee52b16f0fd9ebb29e9944e1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:05 -0400 Subject: ftrace: remove daemon The ftrace daemon is complex and error prone. This patch strips it out of the code. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 322 ++++-------------------------------------- kernel/trace/trace_selftest.c | 14 -- 2 files changed, 28 insertions(+), 308 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b08996ca561d..e758cab0836f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -165,25 +165,8 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) } #ifdef CONFIG_DYNAMIC_FTRACE - #ifndef CONFIG_FTRACE_MCOUNT_RECORD -/* - * The hash lock is only needed when the recording of the mcount - * callers are dynamic. That is, by the caller themselves and - * not recorded via the compilation. - */ -static DEFINE_SPINLOCK(ftrace_hash_lock); -#define ftrace_hash_lock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags) -#define ftrace_hash_unlock(flags) \ - spin_unlock_irqrestore(&ftrace_hash_lock, flags) -static void ftrace_release_hash(unsigned long start, unsigned long end); -#else -/* This is protected via the ftrace_lock with MCOUNT_RECORD. */ -#define ftrace_hash_lock(flags) do { (void)(flags); } while (0) -#define ftrace_hash_unlock(flags) do { } while(0) -static inline void ftrace_release_hash(unsigned long start, unsigned long end) -{ -} +# error Dynamic ftrace depends on MCOUNT_RECORD #endif /* @@ -194,8 +177,6 @@ static inline void ftrace_release_hash(unsigned long start, unsigned long end) */ static unsigned long mcount_addr = MCOUNT_ADDR; -static struct task_struct *ftraced_task; - enum { FTRACE_ENABLE_CALLS = (1 << 0), FTRACE_DISABLE_CALLS = (1 << 1), @@ -212,7 +193,6 @@ static struct hlist_head ftrace_hash[FTRACE_HASHSIZE]; static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); -static DEFINE_MUTEX(ftraced_lock); static DEFINE_MUTEX(ftrace_regex_lock); struct ftrace_page { @@ -230,10 +210,6 @@ struct ftrace_page { static struct ftrace_page *ftrace_pages_start; static struct ftrace_page *ftrace_pages; -static int ftraced_trigger; -static int ftraced_suspend; -static int ftraced_stop; - static int ftrace_record_suspend; static struct dyn_ftrace *ftrace_free_records; @@ -398,7 +374,6 @@ static void ftrace_record_ip(unsigned long ip) { struct dyn_ftrace *node; - unsigned long flags; unsigned long key; int resched; int cpu; @@ -430,24 +405,18 @@ ftrace_record_ip(unsigned long ip) if (ftrace_ip_in_hash(ip, key)) goto out; - ftrace_hash_lock(flags); - /* This ip may have hit the hash before the lock */ if (ftrace_ip_in_hash(ip, key)) - goto out_unlock; + goto out; node = ftrace_alloc_dyn_node(ip); if (!node) - goto out_unlock; + goto out; node->ip = ip; ftrace_add_hash(node, key); - ftraced_trigger = 1; - - out_unlock: - ftrace_hash_unlock(flags); out: per_cpu(ftrace_shutdown_disable_cpu, cpu)--; @@ -647,7 +616,7 @@ ftrace_code_disable(struct dyn_ftrace *rec) return 1; } -static int __ftrace_update_code(void *ignore); +static int ftrace_update_code(void *ignore); static int __ftrace_modify_code(void *data) { @@ -659,7 +628,7 @@ static int __ftrace_modify_code(void *data) * Update any recorded ips now that we have the * machine stopped */ - __ftrace_update_code(NULL); + ftrace_update_code(NULL); ftrace_replace_code(1); tracing_on = 1; } else if (*command & FTRACE_DISABLE_CALLS) { @@ -686,26 +655,9 @@ static void ftrace_run_update_code(int command) stop_machine(__ftrace_modify_code, &command, NULL); } -void ftrace_disable_daemon(void) -{ - /* Stop the daemon from calling kstop_machine */ - mutex_lock(&ftraced_lock); - ftraced_stop = 1; - mutex_unlock(&ftraced_lock); - - ftrace_force_update(); -} - -void ftrace_enable_daemon(void) -{ - mutex_lock(&ftraced_lock); - ftraced_stop = 0; - mutex_unlock(&ftraced_lock); - - ftrace_force_update(); -} - static ftrace_func_t saved_ftrace_func; +static int ftrace_start; +static DEFINE_MUTEX(ftrace_start_lock); static void ftrace_startup(void) { @@ -714,9 +666,9 @@ static void ftrace_startup(void) if (unlikely(ftrace_disabled)) return; - mutex_lock(&ftraced_lock); - ftraced_suspend++; - if (ftraced_suspend == 1) + mutex_lock(&ftrace_start_lock); + ftrace_start++; + if (ftrace_start == 1) command |= FTRACE_ENABLE_CALLS; if (saved_ftrace_func != ftrace_trace_function) { @@ -729,7 +681,7 @@ static void ftrace_startup(void) ftrace_run_update_code(command); out: - mutex_unlock(&ftraced_lock); + mutex_unlock(&ftrace_start_lock); } static void ftrace_shutdown(void) @@ -739,9 +691,9 @@ static void ftrace_shutdown(void) if (unlikely(ftrace_disabled)) return; - mutex_lock(&ftraced_lock); - ftraced_suspend--; - if (!ftraced_suspend) + mutex_lock(&ftrace_start_lock); + ftrace_start--; + if (!ftrace_start) command |= FTRACE_DISABLE_CALLS; if (saved_ftrace_func != ftrace_trace_function) { @@ -754,7 +706,7 @@ static void ftrace_shutdown(void) ftrace_run_update_code(command); out: - mutex_unlock(&ftraced_lock); + mutex_unlock(&ftrace_start_lock); } static void ftrace_startup_sysctl(void) @@ -764,15 +716,15 @@ static void ftrace_startup_sysctl(void) if (unlikely(ftrace_disabled)) return; - mutex_lock(&ftraced_lock); + mutex_lock(&ftrace_start_lock); /* Force update next time */ saved_ftrace_func = NULL; - /* ftraced_suspend is true if we want ftrace running */ - if (ftraced_suspend) + /* ftrace_start is true if we want ftrace running */ + if (ftrace_start) command |= FTRACE_ENABLE_CALLS; ftrace_run_update_code(command); - mutex_unlock(&ftraced_lock); + mutex_unlock(&ftrace_start_lock); } static void ftrace_shutdown_sysctl(void) @@ -782,20 +734,20 @@ static void ftrace_shutdown_sysctl(void) if (unlikely(ftrace_disabled)) return; - mutex_lock(&ftraced_lock); - /* ftraced_suspend is true if ftrace is running */ - if (ftraced_suspend) + mutex_lock(&ftrace_start_lock); + /* ftrace_start is true if ftrace is running */ + if (ftrace_start) command |= FTRACE_DISABLE_CALLS; ftrace_run_update_code(command); - mutex_unlock(&ftraced_lock); + mutex_unlock(&ftrace_start_lock); } static cycle_t ftrace_update_time; static unsigned long ftrace_update_cnt; unsigned long ftrace_update_tot_cnt; -static int __ftrace_update_code(void *ignore) +static int ftrace_update_code(void *ignore) { int i, save_ftrace_enabled; cycle_t start, stop; @@ -869,7 +821,6 @@ static int __ftrace_update_code(void *ignore) stop = ftrace_now(raw_smp_processor_id()); ftrace_update_time = stop - start; ftrace_update_tot_cnt += ftrace_update_cnt; - ftraced_trigger = 0; ftrace_enabled = save_ftrace_enabled; ftrace_record_suspend--; @@ -877,17 +828,6 @@ static int __ftrace_update_code(void *ignore) return 0; } -static int ftrace_update_code(void) -{ - if (unlikely(ftrace_disabled) || - !ftrace_enabled || !ftraced_trigger) - return 0; - - stop_machine(__ftrace_update_code, NULL, NULL); - - return 1; -} - static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) { struct ftrace_page *pg; @@ -1425,10 +1365,10 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) } mutex_lock(&ftrace_sysctl_lock); - mutex_lock(&ftraced_lock); - if (iter->filtered && ftraced_suspend && ftrace_enabled) + mutex_lock(&ftrace_start_lock); + if (iter->filtered && ftrace_start && ftrace_enabled) ftrace_run_update_code(FTRACE_ENABLE_CALLS); - mutex_unlock(&ftraced_lock); + mutex_unlock(&ftrace_start_lock); mutex_unlock(&ftrace_sysctl_lock); kfree(iter); @@ -1448,55 +1388,6 @@ ftrace_notrace_release(struct inode *inode, struct file *file) return ftrace_regex_release(inode, file, 0); } -static ssize_t -ftraced_read(struct file *filp, char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - /* don't worry about races */ - char *buf = ftraced_stop ? "disabled\n" : "enabled\n"; - int r = strlen(buf); - - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - -static ssize_t -ftraced_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - char buf[64]; - long val; - int ret; - - if (cnt >= sizeof(buf)) - return -EINVAL; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - if (strncmp(buf, "enable", 6) == 0) - val = 1; - else if (strncmp(buf, "disable", 7) == 0) - val = 0; - else { - buf[cnt] = 0; - - ret = strict_strtoul(buf, 10, &val); - if (ret < 0) - return ret; - - val = !!val; - } - - if (val) - ftrace_enable_daemon(); - else - ftrace_disable_daemon(); - - filp->f_pos += cnt; - - return cnt; -} - static struct file_operations ftrace_avail_fops = { .open = ftrace_avail_open, .read = seq_read, @@ -1527,38 +1418,6 @@ static struct file_operations ftrace_notrace_fops = { .release = ftrace_notrace_release, }; -static struct file_operations ftraced_fops = { - .open = tracing_open_generic, - .read = ftraced_read, - .write = ftraced_write, -}; - -/** - * ftrace_force_update - force an update to all recording ftrace functions - */ -int ftrace_force_update(void) -{ - int ret = 0; - - if (unlikely(ftrace_disabled)) - return -ENODEV; - - mutex_lock(&ftrace_sysctl_lock); - mutex_lock(&ftraced_lock); - - /* - * If ftraced_trigger is not set, then there is nothing - * to update. - */ - if (ftraced_trigger && !ftrace_update_code()) - ret = -EBUSY; - - mutex_unlock(&ftraced_lock); - mutex_unlock(&ftrace_sysctl_lock); - - return ret; -} - static __init int ftrace_init_debugfs(void) { struct dentry *d_tracer; @@ -1589,17 +1448,11 @@ static __init int ftrace_init_debugfs(void) pr_warning("Could not create debugfs " "'set_ftrace_notrace' entry\n"); - entry = debugfs_create_file("ftraced_enabled", 0644, d_tracer, - NULL, &ftraced_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'ftraced_enabled' entry\n"); return 0; } fs_initcall(ftrace_init_debugfs); -#ifdef CONFIG_FTRACE_MCOUNT_RECORD static int ftrace_convert_nops(unsigned long *start, unsigned long *end) { @@ -1619,7 +1472,7 @@ static int ftrace_convert_nops(unsigned long *start, /* p is ignored */ local_irq_save(flags); - __ftrace_update_code(p); + ftrace_update_code(p); local_irq_restore(flags); return 0; @@ -1666,122 +1519,6 @@ void __init ftrace_init(void) failed: ftrace_disabled = 1; } -#else /* CONFIG_FTRACE_MCOUNT_RECORD */ - -static void ftrace_release_hash(unsigned long start, unsigned long end) -{ - struct dyn_ftrace *rec; - struct hlist_node *t, *n; - struct hlist_head *head, temp_list; - unsigned long flags; - int i, cpu; - - preempt_disable_notrace(); - - /* disable incase we call something that calls mcount */ - cpu = raw_smp_processor_id(); - per_cpu(ftrace_shutdown_disable_cpu, cpu)++; - - ftrace_hash_lock(flags); - - for (i = 0; i < FTRACE_HASHSIZE; i++) { - INIT_HLIST_HEAD(&temp_list); - head = &ftrace_hash[i]; - - /* all CPUS are stopped, we are safe to modify code */ - hlist_for_each_entry_safe(rec, t, n, head, node) { - if (rec->flags & FTRACE_FL_FREE) - continue; - - if ((rec->ip >= start) && (rec->ip < end)) - ftrace_free_rec(rec); - } - } - - ftrace_hash_unlock(flags); - - per_cpu(ftrace_shutdown_disable_cpu, cpu)--; - preempt_enable_notrace(); - -} - -static int ftraced(void *ignore) -{ - unsigned long usecs; - - while (!kthread_should_stop()) { - - set_current_state(TASK_INTERRUPTIBLE); - - /* check once a second */ - schedule_timeout(HZ); - - if (unlikely(ftrace_disabled)) - continue; - - mutex_lock(&ftrace_sysctl_lock); - mutex_lock(&ftraced_lock); - if (!ftraced_suspend && !ftraced_stop && - ftrace_update_code()) { - usecs = nsecs_to_usecs(ftrace_update_time); - if (ftrace_update_tot_cnt > 100000) { - ftrace_update_tot_cnt = 0; - pr_info("hm, dftrace overflow: %lu change%s" - " (%lu total) in %lu usec%s\n", - ftrace_update_cnt, - ftrace_update_cnt != 1 ? "s" : "", - ftrace_update_tot_cnt, - usecs, usecs != 1 ? "s" : ""); - FTRACE_WARN_ON_ONCE(1); - } - } - mutex_unlock(&ftraced_lock); - mutex_unlock(&ftrace_sysctl_lock); - - ftrace_shutdown_replenish(); - } - __set_current_state(TASK_RUNNING); - return 0; -} - -static int __init ftrace_dynamic_init(void) -{ - struct task_struct *p; - unsigned long addr; - int ret; - - addr = (unsigned long)ftrace_record_ip; - - stop_machine(ftrace_dyn_arch_init, &addr, NULL); - - /* ftrace_dyn_arch_init places the return code in addr */ - if (addr) { - ret = (int)addr; - goto failed; - } - - ret = ftrace_dyn_table_alloc(NR_TO_INIT); - if (ret) - goto failed; - - p = kthread_run(ftraced, NULL, "ftraced"); - if (IS_ERR(p)) { - ret = -1; - goto failed; - } - - last_ftrace_enabled = ftrace_enabled = 1; - ftraced_task = p; - - return 0; - - failed: - ftrace_disabled = 1; - return ret; -} - -core_initcall(ftrace_dynamic_init); -#endif /* CONFIG_FTRACE_MCOUNT_RECORD */ #else # define ftrace_startup() do { } while (0) @@ -1801,9 +1538,6 @@ void ftrace_kill(void) { ftrace_disabled = 1; ftrace_enabled = 0; -#ifdef CONFIG_DYNAMIC_FTRACE - ftraced_suspend = -1; -#endif clear_ftrace_function(); } diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 95815d26a041..90bc752a7580 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -99,13 +99,6 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, /* passed in by parameter to fool gcc from optimizing */ func(); - /* update the records */ - ret = ftrace_force_update(); - if (ret) { - printk(KERN_CONT ".. ftraced failed .. "); - return ret; - } - /* * Some archs *cough*PowerPC*cough* add charachters to the * start of the function names. We simply put a '*' to @@ -183,13 +176,6 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) /* make sure msleep has been recorded */ msleep(1); - /* force the recorded functions to be traced */ - ret = ftrace_force_update(); - if (ret) { - printk(KERN_CONT ".. ftraced failed .. "); - return ret; - } - /* start the tracing */ ftrace_enabled = 1; tracer_enabled = 1; -- cgit v1.2.3 From 4d296c24326783bff1282ac72f310d8bac8df413 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:06 -0400 Subject: ftrace: remove mcount set The arch dependent function ftrace_mcount_set was only used by the daemon start up code. This patch removes it. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e758cab0836f..226fd9132d53 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -620,7 +620,6 @@ static int ftrace_update_code(void *ignore); static int __ftrace_modify_code(void *data) { - unsigned long addr; int *command = data; if (*command & FTRACE_ENABLE_CALLS) { @@ -639,14 +638,6 @@ static int __ftrace_modify_code(void *data) if (*command & FTRACE_UPDATE_TRACE_FUNC) ftrace_update_ftrace_func(ftrace_trace_function); - if (*command & FTRACE_ENABLE_MCOUNT) { - addr = (unsigned long)ftrace_record_ip; - ftrace_mcount_set(&addr); - } else if (*command & FTRACE_DISABLE_MCOUNT) { - addr = (unsigned long)ftrace_stub; - ftrace_mcount_set(&addr); - } - return 0; } -- cgit v1.2.3 From 08f5ac906d2c0faf96d608c54a0b03177376da8d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:07 -0400 Subject: ftrace: remove ftrace hash The ftrace hash was used by the ftrace_daemon code. The record ip function would place the calling address (ip) into the hash. The daemon would later read the hash and modify that code. The hash complicates the code. This patch removes it. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 243 ++++++++------------------------------------------ kernel/trace/trace.c | 3 - 2 files changed, 35 insertions(+), 211 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 226fd9132d53..07762c08a944 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include @@ -189,9 +188,7 @@ static int ftrace_filtered; static int tracing_on; static int frozen_record_count; -static struct hlist_head ftrace_hash[FTRACE_HASHSIZE]; - -static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); +static LIST_HEAD(ftrace_new_addrs); static DEFINE_MUTEX(ftrace_regex_lock); @@ -210,8 +207,6 @@ struct ftrace_page { static struct ftrace_page *ftrace_pages_start; static struct ftrace_page *ftrace_pages; -static int ftrace_record_suspend; - static struct dyn_ftrace *ftrace_free_records; @@ -242,72 +237,6 @@ static inline int record_frozen(struct dyn_ftrace *rec) # define record_frozen(rec) ({ 0; }) #endif /* CONFIG_KPROBES */ -int skip_trace(unsigned long ip) -{ - unsigned long fl; - struct dyn_ftrace *rec; - struct hlist_node *t; - struct hlist_head *head; - - if (frozen_record_count == 0) - return 0; - - head = &ftrace_hash[hash_long(ip, FTRACE_HASHBITS)]; - hlist_for_each_entry_rcu(rec, t, head, node) { - if (rec->ip == ip) { - if (record_frozen(rec)) { - if (rec->flags & FTRACE_FL_FAILED) - return 1; - - if (!(rec->flags & FTRACE_FL_CONVERTED)) - return 1; - - if (!tracing_on || !ftrace_enabled) - return 1; - - if (ftrace_filtered) { - fl = rec->flags & (FTRACE_FL_FILTER | - FTRACE_FL_NOTRACE); - if (!fl || (fl & FTRACE_FL_NOTRACE)) - return 1; - } - } - break; - } - } - - return 0; -} - -static inline int -ftrace_ip_in_hash(unsigned long ip, unsigned long key) -{ - struct dyn_ftrace *p; - struct hlist_node *t; - int found = 0; - - hlist_for_each_entry_rcu(p, t, &ftrace_hash[key], node) { - if (p->ip == ip) { - found = 1; - break; - } - } - - return found; -} - -static inline void -ftrace_add_hash(struct dyn_ftrace *node, unsigned long key) -{ - hlist_add_head_rcu(&node->node, &ftrace_hash[key]); -} - -/* called from kstop_machine */ -static inline void ftrace_del_hash(struct dyn_ftrace *node) -{ - hlist_del(&node->node); -} - static void ftrace_free_rec(struct dyn_ftrace *rec) { rec->ip = (unsigned long)ftrace_free_records; @@ -362,69 +291,36 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) } if (ftrace_pages->index == ENTRIES_PER_PAGE) { - if (!ftrace_pages->next) - return NULL; + if (!ftrace_pages->next) { + /* allocate another page */ + ftrace_pages->next = + (void *)get_zeroed_page(GFP_KERNEL); + if (!ftrace_pages->next) + return NULL; + } ftrace_pages = ftrace_pages->next; } return &ftrace_pages->records[ftrace_pages->index++]; } -static void +static struct dyn_ftrace * ftrace_record_ip(unsigned long ip) { - struct dyn_ftrace *node; - unsigned long key; - int resched; - int cpu; + struct dyn_ftrace *rec; if (!ftrace_enabled || ftrace_disabled) - return; - - resched = need_resched(); - preempt_disable_notrace(); - - /* - * We simply need to protect against recursion. - * Use the the raw version of smp_processor_id and not - * __get_cpu_var which can call debug hooks that can - * cause a recursive crash here. - */ - cpu = raw_smp_processor_id(); - per_cpu(ftrace_shutdown_disable_cpu, cpu)++; - if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1) - goto out; - - if (unlikely(ftrace_record_suspend)) - goto out; - - key = hash_long(ip, FTRACE_HASHBITS); - - FTRACE_WARN_ON_ONCE(key >= FTRACE_HASHSIZE); - - if (ftrace_ip_in_hash(ip, key)) - goto out; - - /* This ip may have hit the hash before the lock */ - if (ftrace_ip_in_hash(ip, key)) - goto out; - - node = ftrace_alloc_dyn_node(ip); - if (!node) - goto out; + return NULL; - node->ip = ip; + rec = ftrace_alloc_dyn_node(ip); + if (!rec) + return NULL; - ftrace_add_hash(node, key); + rec->ip = ip; - out: - per_cpu(ftrace_shutdown_disable_cpu, cpu)--; + list_add(&rec->list, &ftrace_new_addrs); - /* prevent recursion with scheduler */ - if (resched) - preempt_enable_no_resched_notrace(); - else - preempt_enable_notrace(); + return rec; } #define FTRACE_ADDR ((long)(ftrace_caller)) @@ -543,7 +439,6 @@ static void ftrace_replace_code(int enable) rec->flags |= FTRACE_FL_FAILED; if ((system_state == SYSTEM_BOOTING) || !core_kernel_text(rec->ip)) { - ftrace_del_hash(rec); ftrace_free_rec(rec); } } @@ -551,15 +446,6 @@ static void ftrace_replace_code(int enable) } } -static void ftrace_shutdown_replenish(void) -{ - if (ftrace_pages->next) - return; - - /* allocate another page */ - ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); -} - static void print_ip_ins(const char *fmt, unsigned char *p) { int i; @@ -616,18 +502,11 @@ ftrace_code_disable(struct dyn_ftrace *rec) return 1; } -static int ftrace_update_code(void *ignore); - static int __ftrace_modify_code(void *data) { int *command = data; if (*command & FTRACE_ENABLE_CALLS) { - /* - * Update any recorded ips now that we have the - * machine stopped - */ - ftrace_update_code(NULL); ftrace_replace_code(1); tracing_on = 1; } else if (*command & FTRACE_DISABLE_CALLS) { @@ -738,84 +617,34 @@ static cycle_t ftrace_update_time; static unsigned long ftrace_update_cnt; unsigned long ftrace_update_tot_cnt; -static int ftrace_update_code(void *ignore) +static int ftrace_update_code(void) { - int i, save_ftrace_enabled; + struct dyn_ftrace *p, *t; cycle_t start, stop; - struct dyn_ftrace *p; - struct hlist_node *t, *n; - struct hlist_head *head, temp_list; - - /* Don't be recording funcs now */ - ftrace_record_suspend++; - save_ftrace_enabled = ftrace_enabled; - ftrace_enabled = 0; start = ftrace_now(raw_smp_processor_id()); ftrace_update_cnt = 0; - /* No locks needed, the machine is stopped! */ - for (i = 0; i < FTRACE_HASHSIZE; i++) { - INIT_HLIST_HEAD(&temp_list); - head = &ftrace_hash[i]; + list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) { - /* all CPUS are stopped, we are safe to modify code */ - hlist_for_each_entry_safe(p, t, n, head, node) { - /* Skip over failed records which have not been - * freed. */ - if (p->flags & FTRACE_FL_FAILED) - continue; + /* If something went wrong, bail without enabling anything */ + if (unlikely(ftrace_disabled)) + return -1; - /* Unconverted records are always at the head of the - * hash bucket. Once we encounter a converted record, - * simply skip over to the next bucket. Saves ftraced - * some processor cycles (ftrace does its bid for - * global warming :-p ). */ - if (p->flags & (FTRACE_FL_CONVERTED)) - break; + list_del_init(&p->list); - /* Ignore updates to this record's mcount site. - * Reintroduce this record at the head of this - * bucket to attempt to "convert" it again if - * the kprobe on it is unregistered before the - * next run. */ - if (get_kprobe((void *)p->ip)) { - ftrace_del_hash(p); - INIT_HLIST_NODE(&p->node); - hlist_add_head(&p->node, &temp_list); - freeze_record(p); - continue; - } else { - unfreeze_record(p); - } - - /* convert record (i.e, patch mcount-call with NOP) */ - if (ftrace_code_disable(p)) { - p->flags |= FTRACE_FL_CONVERTED; - ftrace_update_cnt++; - } else { - if ((system_state == SYSTEM_BOOTING) || - !core_kernel_text(p->ip)) { - ftrace_del_hash(p); - ftrace_free_rec(p); - } - } - } - - hlist_for_each_entry_safe(p, t, n, &temp_list, node) { - hlist_del(&p->node); - INIT_HLIST_NODE(&p->node); - hlist_add_head(&p->node, head); - } + /* convert record (i.e, patch mcount-call with NOP) */ + if (ftrace_code_disable(p)) { + p->flags |= FTRACE_FL_CONVERTED; + ftrace_update_cnt++; + } else + ftrace_free_rec(p); } stop = ftrace_now(raw_smp_processor_id()); ftrace_update_time = stop - start; ftrace_update_tot_cnt += ftrace_update_cnt; - ftrace_enabled = save_ftrace_enabled; - ftrace_record_suspend--; - return 0; } @@ -847,7 +676,7 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) pg = ftrace_pages = ftrace_pages_start; cnt = num_to_init / ENTRIES_PER_PAGE; - pr_info("ftrace: allocating %ld hash entries in %d pages\n", + pr_info("ftrace: allocating %ld entries in %d pages\n", num_to_init, cnt); for (i = 0; i < cnt; i++) { @@ -1451,20 +1280,18 @@ static int ftrace_convert_nops(unsigned long *start, unsigned long addr; unsigned long flags; + mutex_lock(&ftrace_start_lock); p = start; while (p < end) { addr = ftrace_call_adjust(*p++); - /* should not be called from interrupt context */ - spin_lock(&ftrace_lock); ftrace_record_ip(addr); - spin_unlock(&ftrace_lock); - ftrace_shutdown_replenish(); } - /* p is ignored */ + /* disable interrupts to prevent kstop machine */ local_irq_save(flags); - ftrace_update_code(p); + ftrace_update_code(); local_irq_restore(flags); + mutex_unlock(&ftrace_start_lock); return 0; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 333a5162149b..06951e229443 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -865,9 +865,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) if (unlikely(!ftrace_function_enabled)) return; - if (skip_trace(ip)) - return; - pc = preempt_count(); resched = need_resched(); preempt_disable_notrace(); -- cgit v1.2.3 From 66b0de3569b00f61978782b9f97aa4803dbec0fb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Oct 2008 16:11:03 +0200 Subject: ftrace: fix build failure fix: kernel/trace/ftrace.c: In function 'ftrace_release': kernel/trace/ftrace.c:271: error: implicit declaration of function 'ftrace_release_hash' release_hash is not needed without dftraced. Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 07762c08a944..27212321eb0d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -267,8 +267,6 @@ void ftrace_release(void *start, unsigned long size) } } spin_unlock(&ftrace_lock); - - ftrace_release_hash(s, e); } static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) -- cgit v1.2.3 From f17845e5d97ead8fbdadfd40039e058ec7cf4a42 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Oct 2008 12:47:10 +0200 Subject: ftrace: warning in kernel/trace/ftrace.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this warning: kernel/trace/ftrace.c:189: warning: ‘frozen_record_count’ defined but not used triggers because frozen_record_count is only used in the KCONFIG_MARKERS case. Move the variable it there. Alas, this frozen-record facility seems to have little use. The frozen_record_count variable is not used by anything, nor the flags. So this section might need a bit of dead-code-removal care as well. Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 27212321eb0d..7618c528756b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -186,7 +186,6 @@ enum { static int ftrace_filtered; static int tracing_on; -static int frozen_record_count; static LIST_HEAD(ftrace_new_addrs); @@ -211,6 +210,9 @@ static struct dyn_ftrace *ftrace_free_records; #ifdef CONFIG_KPROBES + +static int frozen_record_count; + static inline void freeze_record(struct dyn_ftrace *rec) { if (!(rec->flags & FTRACE_FL_FROZEN)) { @@ -1443,3 +1445,4 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, mutex_unlock(&ftrace_sysctl_lock); return ret; } + -- cgit v1.2.3 From e2862c9470beb842d3f1c1965b03a2112114c160 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 27 Oct 2008 17:43:28 +1100 Subject: trace: fix printk warning for u64 A powerpc ppc64_defconfig build produces these warnings: kernel/trace/ring_buffer.c: In function 'rb_add_time_stamp': kernel/trace/ring_buffer.c:969: warning: format '%llu' expects type 'long long unsigned int', but argument 2 has type 'u64' kernel/trace/ring_buffer.c:969: warning: format '%llu' expects type 'long long unsigned int', but argument 3 has type 'u64' kernel/trace/ring_buffer.c:969: warning: format '%llu' expects type 'long long unsigned int', but argument 4 has type 'u64' Just cast the u64s to unsigned long long like we do everywhere else. Signed-off-by: Stephen Rothwell Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 091aeefe321e..cedf4e268285 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -966,7 +966,9 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, if (unlikely(*delta > (1ULL << 59) && !once++)) { printk(KERN_WARNING "Delta way too big! %llu" " ts=%llu write stamp = %llu\n", - *delta, *ts, cpu_buffer->write_stamp); + (unsigned long long)*delta, + (unsigned long long)*ts, + (unsigned long long)cpu_buffer->write_stamp); WARN_ON(1); } -- cgit v1.2.3 From ea31e72d753e5817a97de552f152d0cb55c7defc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 22 Oct 2008 19:26:23 +0200 Subject: tracing/ftrace: make boot tracer select the sched_switch tracer Impact: build fix If the boot tracer is selected but not the sched_switch, there will be a build failure: kernel/built-in.o: In function `boot_trace_init': trace_boot.c:(.text+0x5ee38): undefined reference to `sched_switch_trace' kernel/built-in.o: In function `disable_boot_trace': (.text+0x5eee1): undefined reference to `tracing_stop_cmdline_record' kernel/built-in.o: In function `enable_boot_trace': (.text+0x5ef11): undefined reference to `tracing_start_cmdline_record' This patch fixes it. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index bc535cb91de9..e0cea282e0c5 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -123,6 +123,7 @@ config BOOT_TRACER bool "Trace boot initcalls" depends on DEBUG_KERNEL select TRACING + select CONTEXT_SWITCH_TRACER help This tracer helps developers to optimize boot times: it records the timings of the initcalls and traces key events and the identity -- cgit v1.2.3 From 21798a84ab383cdac0e7ee3368e0792b718b867d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 28 Oct 2008 09:43:26 +0100 Subject: tracing: fix a build error on alpha Impact: build fix on Alpha When tracing is enabled, some arch have included on their but others like alpha or m68k don't. Build error on alpha: kernel/trace/trace.c: In function 'tracing_cpumask_write': kernel/trace/trace.c:2145: error: implicit declaration of function 'raw_local_irq_disable' kernel/trace/trace.c:2162: error: implicit declaration of function 'raw_local_irq_enable' Tested on Alpha through a cross-compiler (should correct a similar issue on m68k). Reported-by: Alexey Dobriyan Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 06951e229443..bc577dcc0e47 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -34,6 +34,7 @@ #include #include +#include #include "trace.h" -- cgit v1.2.3 From 60063a66236c15f5613f91390631e06718689782 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 28 Oct 2008 10:44:24 -0400 Subject: ftrace: fix current_tracer error return The commit (in linux-tip) c2931e05ec5965597cbfb79ad332d4a29aeceb23 ( ftrace: return an error when setting a nonexistent tracer ) added useful code that would error when a bad tracer was written into the current_tracer file. But this had a bug if the amount written was more than the amount read by that code. The first iteration would set the tracer correctly, but since it did not consume the rest of what was written (usually whitespace), the userspace utility would continue to write what was not consumed. This second iteration would fail to find a tracer and return -EINVAL. Funny thing is that the tracer would have already been set. This patch just consumes all the data that is written to the file. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bc577dcc0e47..a610ca771558 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2377,9 +2377,10 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, int i; size_t ret; + ret = cnt; + if (cnt > max_tracer_type_len) cnt = max_tracer_type_len; - ret = cnt; if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; @@ -2412,8 +2413,8 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, out: mutex_unlock(&trace_types_lock); - if (ret == cnt) - filp->f_pos += cnt; + if (ret > 0) + filp->f_pos += ret; return ret; } -- cgit v1.2.3 From 0b6e4d56bf71866a2b58daa8323cf747988ce7e4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 28 Oct 2008 20:17:38 +0100 Subject: ftrace: perform an initialization for ftrace to enable it Impact: corrects a bug which made the non-dyn function tracer not functional With latest git, the non-dynamic function tracer didn't get any trace. The problem was the fact that ftrace_enabled wasn't initialized to 1 because ftrace hasn't any init function when DYNAMIC_FTRACE is disabled. So when a tracer tries to register an ftrace_ops struct, __register_ftrace_function failed to set the hook. This patch corrects it by setting an init function to initialize ftrace during the boot. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7618c528756b..4a39d24568c8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1339,6 +1339,14 @@ void __init ftrace_init(void) } #else + +static int __init ftrace_nodyn_init(void) +{ + ftrace_enabled = 1; + return 0; +} +device_initcall(ftrace_nodyn_init); + # define ftrace_startup() do { } while (0) # define ftrace_shutdown() do { } while (0) # define ftrace_startup_sysctl() do { } while (0) -- cgit v1.2.3 From f3384b28a05624783b53836ccfed95ecde66a7ad Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 29 Oct 2008 11:15:57 -0400 Subject: ftrace: fix trace_nop config select Impact: build fix on non-function-tracing architectures The trace_nop is the tracer that is defined when no tracer is set in the ftrace infrastructure. The trace_nop was mistakenly selected by HAVE_FTRACE due to the confusion between ftrace infrastructure and the ftrace function tracer (which has been solved by renaming the function tracer). This patch changes the select to the approriate TRACING. This patch should fix compile errors on architectures that do not define the FUNCTION_TRACER. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e0cea282e0c5..b58f43bec363 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -8,7 +8,6 @@ config NOP_TRACER config HAVE_FUNCTION_TRACER bool - select NOP_TRACER config HAVE_DYNAMIC_FTRACE bool @@ -28,6 +27,7 @@ config TRACING select RING_BUFFER select STACKTRACE select TRACEPOINTS + select NOP_TRACER menu "Tracers" -- cgit v1.2.3 From 9244489a7b69fe0746dc7cb3957f02e05bd1ceb0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 24 Oct 2008 09:42:59 -0400 Subject: ftrace: handle archs that do not support irqs_disabled_flags Impact: build fix on non-lockdep architectures Some architectures do not support a way to read the irq flags that is set from "local_irq_save(flags)" to determine if interrupts were disabled or enabled. Ftrace uses this information to display to the user if the trace occurred with interrupts enabled or disabled. Besides the fact that those archs that do not support this will fail to compile, unless they fix it, we do not want to have the trace simply say interrupts were not disabled or they were enabled, without knowing the real answer. This patch adds a 'X' in the output to let the user know that the architecture they are running on does not support a way for the tracer to determine if interrupts were enabled or disabled. It also lets those same archs compile with tracing enabled. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 7 ++++++- kernel/trace/trace.h | 20 +++++++++++--------- 2 files changed, 17 insertions(+), 10 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a610ca771558..8a499e2adaec 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -656,7 +656,11 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; entry->flags = +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | +#else + TRACE_FLAG_IRQS_NOSUPPORT | +#endif ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); @@ -1244,7 +1248,8 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid); trace_seq_printf(s, "%3d", cpu); trace_seq_printf(s, "%c%c", - (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.', + (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : + (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.', ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); hardirq = entry->flags & TRACE_FLAG_HARDIRQ; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6889ca48f1f1..8465ad052707 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -120,18 +120,20 @@ struct trace_boot { /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: - * IRQS_OFF - interrupts were disabled - * NEED_RESCED - reschedule is requested - * HARDIRQ - inside an interrupt handler - * SOFTIRQ - inside a softirq handler - * CONT - multiple entries hold the trace item + * IRQS_OFF - interrupts were disabled + * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags + * NEED_RESCED - reschedule is requested + * HARDIRQ - inside an interrupt handler + * SOFTIRQ - inside a softirq handler + * CONT - multiple entries hold the trace item */ enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, - TRACE_FLAG_NEED_RESCHED = 0x02, - TRACE_FLAG_HARDIRQ = 0x04, - TRACE_FLAG_SOFTIRQ = 0x08, - TRACE_FLAG_CONT = 0x10, + TRACE_FLAG_IRQS_NOSUPPORT = 0x02, + TRACE_FLAG_NEED_RESCHED = 0x04, + TRACE_FLAG_HARDIRQ = 0x08, + TRACE_FLAG_SOFTIRQ = 0x10, + TRACE_FLAG_CONT = 0x20, }; #define TRACE_BUF_SIZE 1024 -- cgit v1.2.3 From c2c80529460095035752bf0ecc1af82c1e0f6e0f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 31 Oct 2008 19:50:41 +0000 Subject: tracing, alpha: undefined reference to `save_stack_trace' Impact: build fix on !stacktrace architectures only select STACKTRACE on architectures that have STACKTRACE_SUPPORT ... since we also need to ifdef out the guts of ftrace_trace_stack(). We also want to disallow setting TRACE_ITER_STACKTRACE in trace_flags on such configs, but that can wait. Signed-off-by: Al Viro Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 2 +- kernel/trace/trace.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index b58f43bec363..33dbefd471e8 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -25,7 +25,7 @@ config TRACING bool select DEBUG_FS select RING_BUFFER - select STACKTRACE + select STACKTRACE if STACKTRACE_SUPPORT select TRACEPOINTS select NOP_TRACER diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8a499e2adaec..85bee775a03e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -705,6 +705,7 @@ static void ftrace_trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc) { +#ifdef CONFIG_STACKTRACE struct ring_buffer_event *event; struct stack_entry *entry; struct stack_trace trace; @@ -730,6 +731,7 @@ static void ftrace_trace_stack(struct trace_array *tr, save_stack_trace(&trace); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); +#endif } void __trace_stack(struct trace_array *tr, -- cgit v1.2.3 From b3aa557722b3d5858f14ca559e03461c24125aaf Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 31 Oct 2008 15:44:07 -0400 Subject: ftrace: use kretprobe trampoline name to test in output Impact: ia64+tracing build fix When a function is kprobed, the return address is set to the kprobe_trampoline, or something similar. This caused the output of the trace to look confusing when the parent seemed to be this "kprobe_trampoline" function. To fix this, Abhishek Sagar added a test of the instruction pointer of the parent to see if it matched the kprobe_trampoline. If it did, the output would print a "[unknown/kretprobe'd]" instead. Unfortunately, not all archs do this the same way, and the trampoline function may not be exported, which causes failures in builds. This patch will compare the name instead of the pointer to see if it matches. This prevents us from depending on a function from being exported, and should work on all archs. The worst that can happen is that an arch might use a different name and then we go back to the confusing output. At least the arch will still build. Reported-by: Abhishek Sagar Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Tested-by: Abhishek Sagar Acked-by: Abhishek Sagar --- kernel/trace/trace.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 85bee775a03e..9f3b478f9171 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1088,17 +1088,20 @@ static void s_stop(struct seq_file *m, void *p) mutex_unlock(&trace_types_lock); } -#define KRETPROBE_MSG "[unknown/kretprobe'd]" - #ifdef CONFIG_KRETPROBES -static inline int kretprobed(unsigned long addr) +static inline const char *kretprobed(const char *name) { - return addr == (unsigned long)kretprobe_trampoline; + static const char tramp_name[] = "kretprobe_trampoline"; + int size = sizeof(tramp_name); + + if (strncmp(tramp_name, name, size) == 0) + return "[unknown/kretprobe'd]"; + return name; } #else -static inline int kretprobed(unsigned long addr) +static inline const char *kretprobed(const char *name) { - return 0; + return name; } #endif /* CONFIG_KRETPROBES */ @@ -1107,10 +1110,13 @@ seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) { #ifdef CONFIG_KALLSYMS char str[KSYM_SYMBOL_LEN]; + const char *name; kallsyms_lookup(address, NULL, NULL, NULL, str); - return trace_seq_printf(s, fmt, str); + name = kretprobed(str); + + return trace_seq_printf(s, fmt, name); #endif return 1; } @@ -1121,9 +1127,12 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt, { #ifdef CONFIG_KALLSYMS char str[KSYM_SYMBOL_LEN]; + const char *name; sprint_symbol(str, address); - return trace_seq_printf(s, fmt, str); + name = kretprobed(str); + + return trace_seq_printf(s, fmt, name); #endif return 1; } @@ -1377,10 +1386,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) seq_print_ip_sym(s, field->ip, sym_flags); trace_seq_puts(s, " ("); - if (kretprobed(field->parent_ip)) - trace_seq_puts(s, KRETPROBE_MSG); - else - seq_print_ip_sym(s, field->parent_ip, sym_flags); + seq_print_ip_sym(s, field->parent_ip, sym_flags); trace_seq_puts(s, ")\n"); break; } @@ -1496,12 +1502,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) ret = trace_seq_printf(s, " <-"); if (!ret) return TRACE_TYPE_PARTIAL_LINE; - if (kretprobed(field->parent_ip)) - ret = trace_seq_puts(s, KRETPROBE_MSG); - else - ret = seq_print_ip_sym(s, - field->parent_ip, - sym_flags); + ret = seq_print_ip_sym(s, + field->parent_ip, + sym_flags); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } -- cgit v1.2.3 From 818e3dd30a4ff34fff6d90e87ae59c73f6a53691 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 31 Oct 2008 09:58:35 -0400 Subject: tracing, ring-buffer: add paranoid checks for loops While writing a new tracer, I had a bug where I caused the ring-buffer to recurse in a bad way. The bug was with the tracer I was writing and not the ring-buffer itself. But it took a long time to find the problem. This patch adds paranoid checks into the ring-buffer infrastructure that will catch bugs of this nature. Note: I put the bug back in the tracer and this patch showed the error nicely and prevented the lockup. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index cedf4e268285..3f3380638646 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1022,8 +1022,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event; u64 ts, delta; int commit = 0; + int nr_loops = 0; again: + /* + * We allow for interrupts to reenter here and do a trace. + * If one does, it will cause this original code to loop + * back here. Even with heavy interrupts happening, this + * should only happen a few times in a row. If this happens + * 1000 times in a row, there must be either an interrupt + * storm or we have something buggy. + * Bail! + */ + if (unlikely(++nr_loops > 1000)) { + RB_WARN_ON(cpu_buffer, 1); + return NULL; + } + ts = ring_buffer_time_stamp(cpu_buffer->cpu); /* @@ -1532,10 +1547,23 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) { struct buffer_page *reader = NULL; unsigned long flags; + int nr_loops = 0; spin_lock_irqsave(&cpu_buffer->lock, flags); again: + /* + * This should normally only loop twice. But because the + * start of the reader inserts an empty page, it causes + * a case where we will loop three times. There should be no + * reason to loop four times (that I know of). + */ + if (unlikely(++nr_loops > 3)) { + RB_WARN_ON(cpu_buffer, 1); + reader = NULL; + goto out; + } + reader = cpu_buffer->reader_page; /* If there's more to read, return this page */ @@ -1665,6 +1693,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; struct buffer_page *reader; + int nr_loops = 0; if (!cpu_isset(cpu, buffer->cpumask)) return NULL; @@ -1672,6 +1701,19 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) cpu_buffer = buffer->buffers[cpu]; again: + /* + * We repeat when a timestamp is encountered. It is possible + * to get multiple timestamps from an interrupt entering just + * as one timestamp is about to be written. The max times + * that this can happen is the number of nested interrupts we + * can have. Nesting 10 deep of interrupts is clearly + * an anomaly. + */ + if (unlikely(++nr_loops > 10)) { + RB_WARN_ON(cpu_buffer, 1); + return NULL; + } + reader = rb_get_reader_page(cpu_buffer); if (!reader) return NULL; @@ -1722,6 +1764,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) struct ring_buffer *buffer; struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; + int nr_loops = 0; if (ring_buffer_iter_empty(iter)) return NULL; @@ -1730,6 +1773,19 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) buffer = cpu_buffer->buffer; again: + /* + * We repeat when a timestamp is encountered. It is possible + * to get multiple timestamps from an interrupt entering just + * as one timestamp is about to be written. The max times + * that this can happen is the number of nested interrupts we + * can have. Nesting 10 deep of interrupts is clearly + * an anomaly. + */ + if (unlikely(++nr_loops > 10)) { + RB_WARN_ON(cpu_buffer, 1); + return NULL; + } + if (rb_per_cpu_empty(cpu_buffer)) return NULL; -- cgit v1.2.3 From 072ba49838b42c873c496d72c91bb237914cf3b6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 26 Oct 2008 15:26:57 -0700 Subject: ftrace: fix breakage in bin_fmt results In 777e208d40d0953efc6fb4ab58590da3f7d8f02d we changed from outputting field->cpu (a char) to iter->cpu (unsigned int), increasing the resulting structure size by 3 bytes. Signed-off-by: Eric Anholt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9f3b478f9171..974973e39e87 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1755,7 +1755,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) return TRACE_TYPE_HANDLED; SEQ_PUT_FIELD_RET(s, entry->pid); - SEQ_PUT_FIELD_RET(s, iter->cpu); + SEQ_PUT_FIELD_RET(s, entry->cpu); SEQ_PUT_FIELD_RET(s, iter->ts); switch (entry->type) { -- cgit v1.2.3 From bf5e6519b85b3853f2d0bb4f17a4e2eaeffeb574 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 10 Nov 2008 21:46:00 -0500 Subject: ftrace: disable tracing on resize Impact: fix for bug on resize This patch addresses the bug found here: http://bugzilla.kernel.org/show_bug.cgi?id=11996 When ftrace converted to the new unified trace buffer, the resizing of the buffer was not protected as much as it was originally. If tracing is performed while the resize occurs, then the buffer can be corrupted. This patch disables all ftrace buffer modifications before a resize takes place. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9f3b478f9171..abfa8103d046 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2676,7 +2676,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, { unsigned long val; char buf[64]; - int ret; + int ret, cpu; struct trace_array *tr = filp->private_data; if (cnt >= sizeof(buf)) @@ -2704,6 +2704,14 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, goto out; } + /* disable all cpu buffers */ + for_each_tracing_cpu(cpu) { + if (global_trace.data[cpu]) + atomic_inc(&global_trace.data[cpu]->disabled); + if (max_tr.data[cpu]) + atomic_inc(&max_tr.data[cpu]->disabled); + } + if (val != global_trace.entries) { ret = ring_buffer_resize(global_trace.buffer, val); if (ret < 0) { @@ -2735,6 +2743,13 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, if (tracing_disabled) cnt = -ENOMEM; out: + for_each_tracing_cpu(cpu) { + if (global_trace.data[cpu]) + atomic_dec(&global_trace.data[cpu]->disabled); + if (max_tr.data[cpu]) + atomic_dec(&max_tr.data[cpu]->disabled); + } + max_tr.entries = global_trace.entries; mutex_unlock(&trace_types_lock); -- cgit v1.2.3 From 4143c5cb36331155a1823af8b3a8c761a59fed71 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 10 Nov 2008 21:46:01 -0500 Subject: ring-buffer: prevent infinite looping on time stamping Impact: removal of unnecessary looping The lockless part of the ring buffer allows for reentry into the code from interrupts. A timestamp is taken, a test is preformed and if it detects that an interrupt occurred that did tracing, it tries again. The problem arises if the timestamp code itself causes a trace. The detection will detect this and loop again. The difference between this and an interrupt doing tracing, is that this will fail every time, and cause an infinite loop. Currently, we test if the loop happens 1000 times, and if so, it will produce a warning and disable the ring buffer. The problem with this approach is that it makes it difficult to perform some types of tracing (tracing the timestamp code itself). Each trace entry has a delta timestamp from the previous entry. If a trace entry is reserved but and interrupt occurs and traces before the previous entry is commited, the delta timestamp for that entry will be zero. This actually makes sense in terms of tracing, because the interrupt entry happened before the preempted entry was commited, so one may consider the two happening at the same time. The order is still preserved in the buffer. With this idea, instead of trying to get a new timestamp if an interrupt made it in between the timestamp and the test, the entry could simply make the delta zero and continue. This will prevent interrupts or tracers in the timer code from causing the above loop. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3f3380638646..2f76193c3489 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1060,7 +1060,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, /* Did the write stamp get updated already? */ if (unlikely(ts < cpu_buffer->write_stamp)) - goto again; + delta = 0; if (test_time_stamp(delta)) { -- cgit v1.2.3 From a358324466b171e145df20bdb74fe81759906de6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 11 Nov 2008 15:01:42 -0500 Subject: ring-buffer: buffer record on/off switch Impact: enable/disable ring buffer recording API added Several kernel developers have requested that there be a way to stop recording into the ring buffers with a simple switch that can also be enabled from userspace. This patch addes a new kernel API to the ring buffers called: tracing_on() tracing_off() When tracing_off() is called, all ring buffers will not be able to record into their buffers. tracing_on() will enable the ring buffers again. These two act like an on/off switch. That is, there is no counting of the number of times tracing_off or tracing_on has been called. A new file is added to the debugfs/tracing directory called tracing_on This allows for userspace applications to also flip the switch. echo 0 > debugfs/tracing/tracing_on disables the tracing. echo 1 > /debugfs/tracing/tracing_on enables it. Note, this does not disable or enable any tracers. It only sets or clears a flag that needs to be set in order for the ring buffers to write to their buffers. It is a global flag, and affects all ring buffers. The buffers start out with tracing_on enabled. There are now three flags that control recording into the buffers: tracing_on: which affects all ring buffer tracers. buffer->record_disabled: which affects an allocated buffer, which may be set if an anomaly is detected, and tracing is disabled. cpu_buffer->record_disabled: which is set by tracing_stop() or if an anomaly is detected. tracing_start can not reenable this if an anomaly occurred. The userspace debugfs/tracing/tracing_enabled is implemented with tracing_stop() but the user space code can not enable it if the kernel called tracing_stop(). Userspace can enable the tracing_on even if the kernel disabled it. It is just a switch used to stop tracing if a condition was hit. tracing_on is not for protecting critical areas in the kernel nor is it for stopping tracing if an anomaly occurred. This is because userspace can reenable it at any time. Side effect: With this patch, I discovered a dead variable in ftrace.c called tracing_on. This patch removes it. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 8 +--- kernel/trace/ring_buffer.c | 101 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4a39d24568c8..14fa52297b28 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -185,7 +185,6 @@ enum { }; static int ftrace_filtered; -static int tracing_on; static LIST_HEAD(ftrace_new_addrs); @@ -506,13 +505,10 @@ static int __ftrace_modify_code(void *data) { int *command = data; - if (*command & FTRACE_ENABLE_CALLS) { + if (*command & FTRACE_ENABLE_CALLS) ftrace_replace_code(1); - tracing_on = 1; - } else if (*command & FTRACE_DISABLE_CALLS) { + else if (*command & FTRACE_DISABLE_CALLS) ftrace_replace_code(0); - tracing_on = 0; - } if (*command & FTRACE_UPDATE_TRACE_FUNC) ftrace_update_ftrace_func(ftrace_trace_function); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2f76193c3489..b08ee9f00c8d 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -16,6 +16,35 @@ #include #include +#include "trace.h" + +/* Global flag to disable all recording to ring buffers */ +static int ring_buffers_off __read_mostly; + +/** + * tracing_on - enable all tracing buffers + * + * This function enables all tracing buffers that may have been + * disabled with tracing_off. + */ +void tracing_on(void) +{ + ring_buffers_off = 0; +} + +/** + * tracing_off - turn off all tracing buffers + * + * This function stops all tracing buffers from recording data. + * It does not disable any overhead the tracers themselves may + * be causing. This function simply causes all recording to + * the ring buffers to fail. + */ +void tracing_off(void) +{ + ring_buffers_off = 1; +} + /* Up this if you want to test the TIME_EXTENTS and normalization */ #define DEBUG_SHIFT 0 @@ -1133,6 +1162,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, struct ring_buffer_event *event; int cpu, resched; + if (ring_buffers_off) + return NULL; + if (atomic_read(&buffer->record_disabled)) return NULL; @@ -1249,6 +1281,9 @@ int ring_buffer_write(struct ring_buffer *buffer, int ret = -EBUSY; int cpu, resched; + if (ring_buffers_off) + return -EBUSY; + if (atomic_read(&buffer->record_disabled)) return -EBUSY; @@ -2070,3 +2105,69 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, return 0; } +static ssize_t +rb_simple_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int *p = filp->private_data; + char buf[64]; + int r; + + /* !ring_buffers_off == tracing_on */ + r = sprintf(buf, "%d\n", !*p); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static ssize_t +rb_simple_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int *p = filp->private_data; + char buf[64]; + long val; + int ret; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + ret = strict_strtoul(buf, 10, &val); + if (ret < 0) + return ret; + + /* !ring_buffers_off == tracing_on */ + *p = !val; + + (*ppos)++; + + return cnt; +} + +static struct file_operations rb_simple_fops = { + .open = tracing_open_generic, + .read = rb_simple_read, + .write = rb_simple_write, +}; + + +static __init int rb_init_debugfs(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + + entry = debugfs_create_file("tracing_on", 0644, d_tracer, + &ring_buffers_off, &rb_simple_fops); + if (!entry) + pr_warning("Could not create debugfs 'tracing_on' entry\n"); + + return 0; +} + +fs_initcall(rb_init_debugfs); -- cgit v1.2.3 From 47e74f2ba8fbf9fb1378e2524e6cfdc2fb37f160 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 00:01:27 -0500 Subject: ring-buffer: no preempt for sched_clock() Impact: disable preemption when calling sched_clock() The ring_buffer_time_stamp still uses sched_clock as its counter. But it is a bug to call it with preemption enabled. This requirement should not be pushed to the ring_buffer_time_stamp callers, so the ring_buffer_time_stamp needs to disable preemption when calling sched_clock. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b08ee9f00c8d..231db209fa82 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -51,8 +51,14 @@ void tracing_off(void) /* FIXME!!! */ u64 ring_buffer_time_stamp(int cpu) { + u64 time; + + preempt_disable_notrace(); /* shift to debug/test normalization and TIME_EXTENTS */ - return sched_clock() << DEBUG_SHIFT; + time = sched_clock() << DEBUG_SHIFT; + preempt_enable_notrace(); + + return time; } void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) -- cgit v1.2.3 From ee51a1de7e3837577412be269e0100038068e691 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 13 Nov 2008 14:58:31 +0100 Subject: tracing: fix mmiotrace resizing crash Pekka reported a crash when resizing the mmiotrace tracer (if only mmiotrace is enabled). This happens because in that case we do not allocate the max buffer, but we try to use it. Make ring_buffer_resize() idempotent against NULL buffers. Reported-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 231db209fa82..036456cbb4f7 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -538,6 +538,12 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) LIST_HEAD(pages); int i, cpu; + /* + * Always succeed at resizing a non-existent buffer: + */ + if (!buffer) + return size; + size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); size *= BUF_PAGE_SIZE; buffer_size = buffer->pages * BUF_PAGE_SIZE; -- cgit v1.2.3 From 5821e1b74f0d08952cb5da4bfd2d9a388d8df58e Mon Sep 17 00:00:00 2001 From: walimis Date: Sat, 15 Nov 2008 15:19:06 +0800 Subject: function tracing: fix wrong pos computing when read buffer has been fulfilled Impact: make output of available_filter_functions complete phenomenon: The first value of dyn_ftrace_total_info is not equal with `cat available_filter_functions | wc -l`, but they should be equal. root cause: When printing functions with seq_printf in t_show, if the read buffer is just overflowed by current function record, then this function won't be printed to user space through read buffer, it will just be dropped. So we can't see this function printing. So, every time the last function to fill the read buffer, if overflowed, will be dropped. This also applies to set_ftrace_filter if set_ftrace_filter has more bytes than read buffer. fix: Through checking return value of seq_printf, if less than 0, we know this function doesn't be printed. Then we decrease position to force this function to be printed next time, in next read buffer. Another little fix is to show correct allocating pages count. Signed-off-by: walimis Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 14fa52297b28..e60205722d0c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -673,7 +673,7 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) cnt = num_to_init / ENTRIES_PER_PAGE; pr_info("ftrace: allocating %ld entries in %d pages\n", - num_to_init, cnt); + num_to_init, cnt + 1); for (i = 0; i < cnt; i++) { pg->next = (void *)get_zeroed_page(GFP_KERNEL); @@ -753,13 +753,11 @@ static void *t_start(struct seq_file *m, loff_t *pos) void *p = NULL; loff_t l = -1; - if (*pos != iter->pos) { - for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l)) - ; - } else { - l = *pos; - p = t_next(m, p, &l); - } + if (*pos > iter->pos) + *pos = iter->pos; + + l = *pos; + p = t_next(m, p, &l); return p; } @@ -770,15 +768,21 @@ static void t_stop(struct seq_file *m, void *p) static int t_show(struct seq_file *m, void *v) { + struct ftrace_iterator *iter = m->private; struct dyn_ftrace *rec = v; char str[KSYM_SYMBOL_LEN]; + int ret = 0; if (!rec) return 0; kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); - seq_printf(m, "%s\n", str); + ret = seq_printf(m, "%s\n", str); + if (ret < 0) { + iter->pos--; + iter->idx--; + } return 0; } @@ -804,7 +808,7 @@ ftrace_avail_open(struct inode *inode, struct file *file) return -ENOMEM; iter->pg = ftrace_pages_start; - iter->pos = -1; + iter->pos = 0; ret = seq_open(file, &show_ftrace_seq_ops); if (!ret) { @@ -891,7 +895,7 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable) if (file->f_mode & FMODE_READ) { iter->pg = ftrace_pages_start; - iter->pos = -1; + iter->pos = 0; iter->flags = enable ? FTRACE_ITER_FILTER : FTRACE_ITER_NOTRACE; -- cgit v1.2.3 From 0bb943c7a2136716757a263f604d26309fd98042 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 14 Nov 2008 19:05:31 +0100 Subject: tracing: kernel/trace/trace.c: introduce missing kfree() Impact: fix memory leak Error handling code following a kzalloc should free the allocated data. The semantic match that finds the problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r exists@ local idexpression x; statement S; expression E; identifier f,l; position p1,p2; expression *ptr != NULL; @@ ( if ((x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...)) == NULL) S | x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S ) <... when != x when != if (...) { <+...x...+> } x->f = E ...> ( return \(0\|<+...x...+>\|ptr\); | return@p2 ...; ) @script:python@ p1 << r.p1; p2 << r.p2; @@ print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line) // Signed-off-by: Julia Lawall Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 697eda36b86a..d86e3252f300 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1936,6 +1936,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) ring_buffer_read_finish(iter->buffer_iter[cpu]); } mutex_unlock(&trace_types_lock); + kfree(iter); return ERR_PTR(-ENOMEM); } -- cgit v1.2.3 From 641d2f63cfe24539e154efa2f932937934c27dde Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Tue, 18 Nov 2008 19:22:13 +0100 Subject: trace: introduce missing mutex_unlock() Impact: fix tracing buffer mutex leak in case of allocation failure This error was spotted by this semantic patch: http://www.emn.fr/x-info/coccinelle/mut.html It looks correct as far as I can tell. Please review. Signed-off-by: Vegard Nossum Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 036456cbb4f7..f780e9552f91 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -617,6 +617,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) list_del_init(&page->list); free_buffer_page(page); } + mutex_unlock(&buffer->mutex); return -ENOMEM; } -- cgit v1.2.3 From f10ed36ec1118c6f9523cd7e53cb0aadb53efe9f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 7 Nov 2008 22:36:02 -0500 Subject: ftrace: fix set_ftrace_filter Impact: fix of output of set_ftrace_filter The commit "ftrace: do not show freed records in available_filter_functions" Removed a bit too much from the set_ftrace_filter code, where we now see all functions in the set_ftrace_filter file even when we set a filter. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4a39d24568c8..dcac7418f688 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -738,6 +738,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos) ((iter->flags & FTRACE_ITER_FAILURES) && !(rec->flags & FTRACE_FL_FAILED)) || + ((iter->flags & FTRACE_ITER_FILTER) && + !(rec->flags & FTRACE_FL_FILTER)) || + ((iter->flags & FTRACE_ITER_NOTRACE) && !(rec->flags & FTRACE_FL_NOTRACE))) { rec = NULL; -- cgit v1.2.3 From 820432783190b4096499e38a4a4d7095c511913d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 18 Nov 2008 23:57:14 -0500 Subject: ftrace: make filtered functions effective on setting Impact: fix filter selection to apply when set It can be confusing when the set_filter_functions is set (or cleared) and the functions being recorded by the dynamic tracer does not match. This patch causes the code to be updated if the function tracer is enabled and the filter is changed. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index dcac7418f688..5cbddb59e99f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1189,7 +1189,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) mutex_lock(&ftrace_sysctl_lock); mutex_lock(&ftrace_start_lock); - if (iter->filtered && ftrace_start && ftrace_enabled) + if (ftrace_start && ftrace_enabled) ftrace_run_update_code(FTRACE_ENABLE_CALLS); mutex_unlock(&ftrace_start_lock); mutex_unlock(&ftrace_sysctl_lock); -- cgit v1.2.3 From 32464779a1b8c15e9aa9aa0306b2f735080df9d8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 18 Nov 2008 20:33:02 -0500 Subject: ftrace: fix dyn ftrace filter selection Impact: clean up and fix for dyn ftrace filter selection The previous logic of the dynamic ftrace selection of enabling or disabling functions was complex and incorrect. This patch simplifies the code and corrects the usage. This simplification also makes the code more robust. Here is the correct logic: Given a function that can be traced by dynamic ftrace: If the function is not to be traced, disable it if it was enabled. (this is if the function is in the set_ftrace_notrace file) (filter is on if there exists any functions in set_ftrace_filter file) If the filter is on, and we are enabling functions: If the function is in set_ftrace_filter, enable it if it is not already enabled. If the function is not in set_ftrace_filter, disable it if it is not already disabled. Otherwise, if the filter is off and we are enabling function tracing: Enable the function if it is not already enabled. Otherwise, if we are disabling function tracing: Disable the function if it is not already disabled. This code now sets or clears the ENABLED flag in the record, and at the end it will enable the function if the flag is set, or disable the function if the flag is cleared. The parameters for the function that does the above logic is also simplified. Instead of passing in confusing "new" and "old" where they might be swapped if the "enabled" flag is not set. The old logic even had one of the above always NULL and had to be filled in. The new logic simply passes in one parameter called "nop". A "call" is calculated in the code, and at the end of the logic, when we know we need to either disable or enable the function, we can then use the "nop" and "call" properly. This code is more robust than the previous version. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 108 +++++++++++++++++++++++--------------------------- 1 file changed, 50 insertions(+), 58 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5cbddb59e99f..fdaab04a0282 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -327,96 +327,89 @@ ftrace_record_ip(unsigned long ip) static int __ftrace_replace_code(struct dyn_ftrace *rec, - unsigned char *old, unsigned char *new, int enable) + unsigned char *nop, int enable) { unsigned long ip, fl; + unsigned char *call, *old, *new; ip = rec->ip; - if (ftrace_filtered && enable) { + /* + * If this record is not to be traced and + * it is not enabled then do nothing. + * + * If this record is not to be traced and + * it is enabled then disabled it. + * + */ + if (rec->flags & FTRACE_FL_NOTRACE) { + if (rec->flags & FTRACE_FL_ENABLED) + rec->flags &= ~FTRACE_FL_ENABLED; + else + return 0; + + } else if (ftrace_filtered && enable) { /* - * If filtering is on: - * - * If this record is set to be filtered and - * is enabled then do nothing. - * - * If this record is set to be filtered and - * it is not enabled, enable it. - * - * If this record is not set to be filtered - * and it is not enabled do nothing. - * - * If this record is set not to trace then - * do nothing. - * - * If this record is set not to trace and - * it is enabled then disable it. - * - * If this record is not set to be filtered and - * it is enabled, disable it. + * Filtering is on: */ - fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE | - FTRACE_FL_ENABLED); + fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED); - if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) || - (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) || - !fl || (fl == FTRACE_FL_NOTRACE)) + /* Record is filtered and enabled, do nothing */ + if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) return 0; - /* - * If it is enabled disable it, - * otherwise enable it! - */ - if (fl & FTRACE_FL_ENABLED) { - /* swap new and old */ - new = old; - old = ftrace_call_replace(ip, FTRACE_ADDR); + /* Record is not filtered and is not enabled do nothing */ + if (!fl) + return 0; + + /* Record is not filtered but enabled, disable it */ + if (fl == FTRACE_FL_ENABLED) rec->flags &= ~FTRACE_FL_ENABLED; - } else { - new = ftrace_call_replace(ip, FTRACE_ADDR); + else + /* Otherwise record is filtered but not enabled, enable it */ rec->flags |= FTRACE_FL_ENABLED; - } } else { + /* Disable or not filtered */ if (enable) { - /* - * If this record is set not to trace and is - * not enabled, do nothing. - */ - fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED); - if (fl == FTRACE_FL_NOTRACE) - return 0; - - new = ftrace_call_replace(ip, FTRACE_ADDR); - } else - old = ftrace_call_replace(ip, FTRACE_ADDR); - - if (enable) { + /* if record is enabled, do nothing */ if (rec->flags & FTRACE_FL_ENABLED) return 0; + rec->flags |= FTRACE_FL_ENABLED; + } else { + + /* if record is not enabled do nothing */ if (!(rec->flags & FTRACE_FL_ENABLED)) return 0; + rec->flags &= ~FTRACE_FL_ENABLED; } } + call = ftrace_call_replace(ip, FTRACE_ADDR); + + if (rec->flags & FTRACE_FL_ENABLED) { + old = nop; + new = call; + } else { + old = call; + new = nop; + } + return ftrace_modify_code(ip, old, new); } static void ftrace_replace_code(int enable) { int i, failed; - unsigned char *new = NULL, *old = NULL; + unsigned char *nop = NULL; struct dyn_ftrace *rec; struct ftrace_page *pg; - if (enable) - old = ftrace_nop_replace(); - else - new = ftrace_nop_replace(); + nop = ftrace_nop_replace(); for (pg = ftrace_pages_start; pg; pg = pg->next) { for (i = 0; i < pg->index; i++) { @@ -434,7 +427,7 @@ static void ftrace_replace_code(int enable) unfreeze_record(rec); } - failed = __ftrace_replace_code(rec, old, new, enable); + failed = __ftrace_replace_code(rec, nop, enable); if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { rec->flags |= FTRACE_FL_FAILED; if ((system_state == SYSTEM_BOOTING) || @@ -538,8 +531,7 @@ static void ftrace_startup(void) mutex_lock(&ftrace_start_lock); ftrace_start++; - if (ftrace_start == 1) - command |= FTRACE_ENABLE_CALLS; + command |= FTRACE_ENABLE_CALLS; if (saved_ftrace_func != ftrace_trace_function) { saved_ftrace_func = ftrace_trace_function; -- cgit v1.2.3 From 522a110b42b306d696cf84e34c677ed0e7080194 Mon Sep 17 00:00:00 2001 From: Liming Wang Date: Fri, 21 Nov 2008 11:00:18 +0800 Subject: function tracing: fix wrong position computing of stack_trace Impact: make output of stack_trace complete if buffer overruns When read buffer overruns, the output of stack_trace isn't complete. When printing records with seq_printf in t_show, if the read buffer has overruned by the current record, then this record won't be printed to user space through read buffer, it will just be dropped in this printing. When next printing, t_start should return the "*pos"th record, which is the one dropped by previous printing, but it just returns (m->private + *pos)th record. Here we use a more sane method to implement seq_operations which can be found in kernel code. Thus we needn't initialize m->private. About testing, it's not easy to overrun read buffer, but we can use seq_printf to print more padding bytes in t_show, then it's easy to check whether or not records are lost. This commit has been tested on both condition of overrun and non overrun. Signed-off-by: Liming Wang Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace_stack.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index be682b62fe58..3bdb44bde4b7 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -184,11 +184,16 @@ static struct file_operations stack_max_size_fops = { static void * t_next(struct seq_file *m, void *v, loff_t *pos) { - long i = (long)m->private; + long i; (*pos)++; - i++; + if (v == SEQ_START_TOKEN) + i = 0; + else { + i = *(long *)v; + i++; + } if (i >= max_stack_trace.nr_entries || stack_dump_trace[i] == ULONG_MAX) @@ -201,12 +206,15 @@ t_next(struct seq_file *m, void *v, loff_t *pos) static void *t_start(struct seq_file *m, loff_t *pos) { - void *t = &m->private; + void *t = SEQ_START_TOKEN; loff_t l = 0; local_irq_disable(); __raw_spin_lock(&max_stack_lock); + if (*pos == 0) + return SEQ_START_TOKEN; + for (; t && l < *pos; t = t_next(m, t, &l)) ; @@ -235,10 +243,10 @@ static int trace_lookup_stack(struct seq_file *m, long i) static int t_show(struct seq_file *m, void *v) { - long i = *(long *)v; + long i; int size; - if (i < 0) { + if (v == SEQ_START_TOKEN) { seq_printf(m, " Depth Size Location" " (%d entries)\n" " ----- ---- --------\n", @@ -246,6 +254,8 @@ static int t_show(struct seq_file *m, void *v) return 0; } + i = *(long *)v; + if (i >= max_stack_trace.nr_entries || stack_dump_trace[i] == ULONG_MAX) return 0; @@ -275,10 +285,6 @@ static int stack_trace_open(struct inode *inode, struct file *file) int ret; ret = seq_open(file, &stack_trace_seq_ops); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = (void *)-1; - } return ret; } -- cgit v1.2.3 From 7ee1768ddb3075ae3a0801cc2d0ea4195530a7db Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sun, 23 Nov 2008 21:24:30 +0200 Subject: x86, mmiotrace: fix buffer overrun detection Impact: fix mmiotrace overrun tracing When ftrace framework moved to use the ring buffer facility, the buffer overrun detection was broken after 2.6.27 by commit | commit 3928a8a2d98081d1bc3c0a84a2d70e29b90ecf1c | Author: Steven Rostedt | Date: Mon Sep 29 23:02:41 2008 -0400 | | ftrace: make work with new ring buffer | | This patch ports ftrace over to the new ring buffer. The detection is now fixed by using the ring buffer API. When mmiotrace detects a buffer overrun, it will report the number of lost events. People reading an mmiotrace log must know if something was missed, otherwise the data may not make sense. Signed-off-by: Pekka Paalanen Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace_mmiotrace.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index f28484618ff0..e62cbf78eab6 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -18,12 +18,14 @@ struct header_iter { static struct trace_array *mmio_trace_array; static bool overrun_detected; +static unsigned long prev_overruns; static void mmio_reset_data(struct trace_array *tr) { int cpu; overrun_detected = false; + prev_overruns = 0; tr->time_start = ftrace_now(tr->cpu); for_each_online_cpu(cpu) @@ -128,16 +130,12 @@ static void mmio_close(struct trace_iterator *iter) static unsigned long count_overruns(struct trace_iterator *iter) { - int cpu; unsigned long cnt = 0; -/* FIXME: */ -#if 0 - for_each_online_cpu(cpu) { - cnt += iter->overrun[cpu]; - iter->overrun[cpu] = 0; - } -#endif - (void)cpu; + unsigned long over = ring_buffer_overruns(iter->tr->buffer); + + if (over > prev_overruns) + cnt = over - prev_overruns; + prev_overruns = over; return cnt; } -- cgit v1.2.3 From 4f5a7f40ddbae98569acbb99118a98570315579c Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 27 Nov 2008 10:21:46 +0800 Subject: ftrace: prevent recursion Impact: prevent unnecessary stack recursion if the resched flag was set before we entered, then don't reschedule. Signed-off-by: Lai Jiangshan Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f780e9552f91..668bbb5ef2bd 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1215,7 +1215,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, out: if (resched) - preempt_enable_notrace(); + preempt_enable_no_resched_notrace(); else preempt_enable_notrace(); return NULL; -- cgit v1.2.3