From 07d777fe8c3985bc83428c2866713c2d1b3d4129 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 Sep 2011 14:01:55 -0400 Subject: tracing: Add percpu buffers for trace_printk() Currently, trace_printk() uses a single buffer to write into to calculate the size and format needed to save the trace. To do this safely in an SMP environment, a spin_lock() is taken to only allow one writer at a time to the buffer. But this could also affect what is being traced, and add synchronization that would not be there otherwise. Ideally, using percpu buffers would be useful, but since trace_printk() is only used in development, having per cpu buffers for something never used is a waste of space. Thus, the use of the trace_bprintk() format section is changed to be used for static fmts as well as dynamic ones. Then at boot up, we can check if the section that holds the trace_printk formats is non-empty, and if it does contain something, then we know a trace_printk() has been added to the kernel. At this time the trace_printk per cpu buffers are allocated. A check is also done at module load time in case a module is added that contains a trace_printk(). Once the buffers are allocated, they are never freed. If you use a trace_printk() then you should know what you are doing. A buffer is made for each type of context: normal softirq irq nmi The context is checked and the appropriate buffer is used. This allows for totally lockless usage of trace_printk(), and they no longer even disable interrupts. Requested-by: Peter Zijlstra Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 645231c373c8..c0d34420a913 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -480,15 +480,16 @@ do { \ #define trace_printk(fmt, args...) \ do { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ + \ __trace_printk_check_format(fmt, ##args); \ - if (__builtin_constant_p(fmt)) { \ - static const char *trace_printk_fmt \ - __attribute__((section("__trace_printk_fmt"))) = \ - __builtin_constant_p(fmt) ? fmt : NULL; \ \ + if (__builtin_constant_p(fmt)) \ __trace_bprintk(_THIS_IP_, trace_printk_fmt, ##args); \ - } else \ - __trace_printk(_THIS_IP_, fmt, ##args); \ + else \ + __trace_printk(_THIS_IP_, fmt, ##args); \ } while (0) extern __printf(2, 3) -- cgit v1.2.3 From 438ced1720b584000a9e8a4349d1f6bb7ee3ad6d Mon Sep 17 00:00:00 2001 From: Vaibhav Nagarnaik Date: Thu, 2 Feb 2012 12:00:41 -0800 Subject: ring-buffer: Add per_cpu ring buffer control files Add a debugfs entry under per_cpu/ folder for each cpu called buffer_size_kb to control the ring buffer size for each CPU independently. If the global file buffer_size_kb is used to set size, the individual ring buffers will be adjusted to the given size. The buffer_size_kb will report the common size to maintain backward compatibility. If the buffer_size_kb file under the per_cpu/ directory is used to change buffer size for a specific CPU, only the size of the respective ring buffer is updated. When tracing/buffer_size_kb is read, it reports 'X' to indicate that sizes of per_cpu ring buffers are not equivalent. Link: http://lkml.kernel.org/r/1328212844-11889-1-git-send-email-vnagarnaik@google.com Cc: Frederic Weisbecker Cc: Michael Rubin Cc: David Sharp Cc: Justin Teravest Signed-off-by: Vaibhav Nagarnaik Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 7be2e88f23fd..6c8835f74f79 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -96,9 +96,11 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k __ring_buffer_alloc((size), (flags), &__key); \ }) +#define RING_BUFFER_ALL_CPUS -1 + void ring_buffer_free(struct ring_buffer *buffer); -int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); +int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, int cpu); void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val); @@ -129,7 +131,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts); void ring_buffer_iter_reset(struct ring_buffer_iter *iter); int ring_buffer_iter_empty(struct ring_buffer_iter *iter); -unsigned long ring_buffer_size(struct ring_buffer *buffer); +unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu); void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu); void ring_buffer_reset(struct ring_buffer *buffer); -- cgit v1.2.3 From 08d636b6d4fb80647fe8869ea1cd97b2c26a4751 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 16 Aug 2011 09:57:10 -0400 Subject: ftrace/x86: Have arch x86_64 use breakpoints instead of stop machine This method changes x86 to add a breakpoint to the mcount locations instead of calling stop machine. Now that iret can be handled by NMIs, we perform the following to update code: 1) Add a breakpoint to all locations that will be modified 2) Sync all cores 3) Update all locations to be either a nop or call (except breakpoint op) 4) Sync all cores 5) Remove the breakpoint with the new code. 6) Sync all cores [ Added updates that Masami suggested: Use unlikely(modifying_ftrace_code) in int3 trap to keep kprobes efficient. Don't use NOTIFY_* in ftrace handler in int3 as it is not a notifier. ] Cc: H. Peter Anvin Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 72a6cabb4d5b..0b5590330bca 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -286,6 +286,12 @@ struct ftrace_rec_iter *ftrace_rec_iter_start(void); struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter); struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter); +#define for_ftrace_rec_iter(iter) \ + for (iter = ftrace_rec_iter_start(); \ + iter; \ + iter = ftrace_rec_iter_next(iter)) + + int ftrace_update_record(struct dyn_ftrace *rec, int enable); int ftrace_test_record(struct dyn_ftrace *rec, int enable); void ftrace_run_stop_machine(int command); -- cgit v1.2.3 From b02ee9a33b65bcc4ad13c12a0b04afdaab3ddd8d Mon Sep 17 00:00:00 2001 From: Minho Ban Date: Mon, 7 May 2012 11:36:00 +0900 Subject: tracing: Prevent wasting time evaluating parameters in trace_preempt_on/off This fixes spending time for evaluating parameters in trace_preempt_on/off when the tracer config is off. The patch mainly inspired by Steven Rostedt, thanks Steven. Link: http://lkml.kernel.org/r/4FA73510.7070705@samsung.com Cc: Ingo Molnar Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Turner Cc: Thomas Gleixner Cc: Hidetoshi Seto Cc: Paul E. McKenney Cc: Josh Triplett Signed-off-by: Minho Ban Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0b5590330bca..d32cc5e4b0cc 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -491,8 +491,12 @@ static inline void __ftrace_enabled_restore(int enabled) extern void trace_preempt_on(unsigned long a0, unsigned long a1); extern void trace_preempt_off(unsigned long a0, unsigned long a1); #else - static inline void trace_preempt_on(unsigned long a0, unsigned long a1) { } - static inline void trace_preempt_off(unsigned long a0, unsigned long a1) { } +/* + * Use defines instead of static inlines because some arches will make code out + * of the CALLER_ADDR, when we really want these to be a real nop. + */ +# define trace_preempt_on(a0, a1) do { } while (0) +# define trace_preempt_off(a0, a1) do { } while (0) #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD -- cgit v1.2.3 From fd0d000b2c34aa43d4e92dcf0dfaeda7e123008a Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 2 Apr 2012 20:19:08 +0200 Subject: perf: Pass last sampling period to perf_sample_data_init() We always need to pass the last sample period to perf_sample_data_init(), otherwise the event distribution will be wrong. Thus, modifiyng the function interface with the required period as argument. So basically a pattern like this: perf_sample_data_init(&data, ~0ULL); data.period = event->hw.last_period; will now be like that: perf_sample_data_init(&data, ~0ULL, event->hw.last_period); Avoids unininitialized data.period and simplifies code. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1333390758-10893-3-git-send-email-robert.richter@amd.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ddbb6a901f65..f32578634d9d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1132,11 +1132,14 @@ struct perf_sample_data { struct perf_branch_stack *br_stack; }; -static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) +static inline void perf_sample_data_init(struct perf_sample_data *data, + u64 addr, u64 period) { + /* remaining struct members initialized in perf_prepare_sample() */ data->addr = addr; data->raw = NULL; data->br_stack = NULL; + data->period = period; } extern void perf_output_sample(struct perf_output_handle *handle, -- cgit v1.2.3 From cb04ff9ac424d0e689d9b612e9f73cb443ab4b7e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 May 2012 18:56:04 +0200 Subject: sched, perf: Use a single callback into the scheduler We can easily use a single callback for both sched-in and sched-out. This reduces the code footprint in the scheduler path as well as removes the PMU black spot otherwise present between the out and in callback. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-o56ajxp1edwqg6x9d31wb805@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f32578634d9d..8adf70e9e3cc 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1084,10 +1084,8 @@ extern void perf_pmu_unregister(struct pmu *pmu); extern int perf_num_counters(void); extern const char *perf_pmu_name(void); -extern void __perf_event_task_sched_in(struct task_struct *prev, - struct task_struct *task); -extern void __perf_event_task_sched_out(struct task_struct *prev, - struct task_struct *next); +extern void __perf_event_task_sched(struct task_struct *prev, + struct task_struct *next); extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); @@ -1207,20 +1205,13 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) extern struct static_key_deferred perf_sched_events; -static inline void perf_event_task_sched_in(struct task_struct *prev, +static inline void perf_event_task_sched(struct task_struct *prev, struct task_struct *task) -{ - if (static_key_false(&perf_sched_events.key)) - __perf_event_task_sched_in(prev, task); -} - -static inline void perf_event_task_sched_out(struct task_struct *prev, - struct task_struct *next) { perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); if (static_key_false(&perf_sched_events.key)) - __perf_event_task_sched_out(prev, next); + __perf_event_task_sched(prev, task); } extern void perf_event_mmap(struct vm_area_struct *vma); @@ -1295,11 +1286,8 @@ extern void perf_event_disable(struct perf_event *event); extern void perf_event_task_tick(void); #else static inline void -perf_event_task_sched_in(struct task_struct *prev, - struct task_struct *task) { } -static inline void -perf_event_task_sched_out(struct task_struct *prev, - struct task_struct *next) { } +perf_event_task_sched(struct task_struct *prev, + struct task_struct *task) { } static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } -- cgit v1.2.3 From f0cf973a224a3e3c1dec3395af3ba01cf14b1ff4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 25 Apr 2012 14:39:54 -0400 Subject: ftrace: Return record ip addr for ftrace_location() ftrace_location() is passed an addr, and returns 1 if the addr is on a ftrace nop (or caller to ftrace_caller), and 0 otherwise. To let kprobes know if it should move a breakpoint or not, it must return the actual addr that is the start of the ftrace nop. This way a kprobe placed on the location of a ftrace nop, can instead be placed on the instruction after the nop. Even if the probe addr is on the second or later byte of the nop, it can simply be moved forward. Cc: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index d32cc5e4b0cc..609948eb2b0a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -295,7 +295,7 @@ struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter); int ftrace_update_record(struct dyn_ftrace *rec, int enable); int ftrace_test_record(struct dyn_ftrace *rec, int enable); void ftrace_run_stop_machine(int command); -int ftrace_location(unsigned long ip); +unsigned long ftrace_location(unsigned long ip); extern ftrace_func_t ftrace_trace_function; -- cgit v1.2.3 From 8ed3e2cfe40ffe43630fd8efa34fc97c95b4c298 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 26 Apr 2012 14:59:43 -0400 Subject: ftrace: Make ftrace_modify_all_code() global for archs to use Rename __ftrace_modify_code() to ftrace_modify_all_code() and make it global for all archs to use. This will remove the duplication of code, as archs that can modify code without stop_machine() can use it directly outside of the stop_machine() call. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 609948eb2b0a..cd72ace7ade3 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -319,6 +319,8 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +void ftrace_modify_all_code(int command); + #ifndef FTRACE_ADDR #define FTRACE_ADDR ((unsigned long)ftrace_caller) #endif -- cgit v1.2.3 From e4f5d5440bb860a3e8942ca8f7277a7f31798965 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 27 Apr 2012 09:13:18 -0400 Subject: ftrace/x86: Have x86 ftrace use the ftrace_modify_all_code() To remove duplicate code, have the ftrace arch_ftrace_update_code() use the generic ftrace_modify_all_code(). This requires that the default ftrace_replace_code() becomes a weak function so that an arch may override it. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index cd72ace7ade3..55e6d63d46d0 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -314,6 +314,7 @@ ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable); /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); extern int ftrace_dyn_arch_init(void *data); +extern void ftrace_replace_code(int enable); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); -- cgit v1.2.3