From 07d777fe8c3985bc83428c2866713c2d1b3d4129 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 22 Sep 2011 14:01:55 -0400
Subject: tracing: Add percpu buffers for trace_printk()

Currently, trace_printk() uses a single buffer to write into
to calculate the size and format needed to save the trace. To
do this safely in an SMP environment, a spin_lock() is taken
to only allow one writer at a time to the buffer. But this could
also affect what is being traced, and add synchronization that
would not be there otherwise.

Ideally, using percpu buffers would be useful, but since trace_printk()
is only used in development, having per cpu buffers for something
never used is a waste of space. Thus, the use of the trace_bprintk()
format section is changed to be used for static fmts as well as dynamic ones.
Then at boot up, we can check if the section that holds the trace_printk
formats is non-empty, and if it does contain something, then we
know a trace_printk() has been added to the kernel. At this time
the trace_printk per cpu buffers are allocated. A check is also
done at module load time in case a module is added that contains a
trace_printk().

Once the buffers are allocated, they are never freed. If you use
a trace_printk() then you should know what you are doing.

A buffer is made for each type of context:

  normal
  softirq
  irq
  nmi

The context is checked and the appropriate buffer is used.
This allows for totally lockless usage of trace_printk(),
and they no longer even disable interrupts.

Requested-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/kernel.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 645231c373c8..c0d34420a913 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -480,15 +480,16 @@ do {									\
 
 #define trace_printk(fmt, args...)					\
 do {									\
+	static const char *trace_printk_fmt				\
+		__attribute__((section("__trace_printk_fmt"))) =	\
+		__builtin_constant_p(fmt) ? fmt : NULL;			\
+									\
 	__trace_printk_check_format(fmt, ##args);			\
-	if (__builtin_constant_p(fmt)) {				\
-		static const char *trace_printk_fmt			\
-		  __attribute__((section("__trace_printk_fmt"))) =	\
-			__builtin_constant_p(fmt) ? fmt : NULL;		\
 									\
+	if (__builtin_constant_p(fmt))					\
 		__trace_bprintk(_THIS_IP_, trace_printk_fmt, ##args);	\
-	} else								\
-		__trace_printk(_THIS_IP_, fmt, ##args);		\
+	else								\
+		__trace_printk(_THIS_IP_, fmt, ##args);			\
 } while (0)
 
 extern __printf(2, 3)
-- 
cgit v1.2.3


From 438ced1720b584000a9e8a4349d1f6bb7ee3ad6d Mon Sep 17 00:00:00 2001
From: Vaibhav Nagarnaik <vnagarnaik@google.com>
Date: Thu, 2 Feb 2012 12:00:41 -0800
Subject: ring-buffer: Add per_cpu ring buffer control files

Add a debugfs entry under per_cpu/ folder for each cpu called
buffer_size_kb to control the ring buffer size for each CPU
independently.

If the global file buffer_size_kb is used to set size, the individual
ring buffers will be adjusted to the given size. The buffer_size_kb will
report the common size to maintain backward compatibility.

If the buffer_size_kb file under the per_cpu/ directory is used to
change buffer size for a specific CPU, only the size of the respective
ring buffer is updated. When tracing/buffer_size_kb is read, it reports
'X' to indicate that sizes of per_cpu ring buffers are not equivalent.

Link: http://lkml.kernel.org/r/1328212844-11889-1-git-send-email-vnagarnaik@google.com

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Michael Rubin <mrubin@google.com>
Cc: David Sharp <dhsharp@google.com>
Cc: Justin Teravest <teravest@google.com>
Signed-off-by: Vaibhav Nagarnaik <vnagarnaik@google.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 7be2e88f23fd..6c8835f74f79 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -96,9 +96,11 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
 	__ring_buffer_alloc((size), (flags), &__key);	\
 })
 
+#define RING_BUFFER_ALL_CPUS -1
+
 void ring_buffer_free(struct ring_buffer *buffer);
 
-int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
+int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, int cpu);
 
 void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val);
 
@@ -129,7 +131,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts);
 void ring_buffer_iter_reset(struct ring_buffer_iter *iter);
 int ring_buffer_iter_empty(struct ring_buffer_iter *iter);
 
-unsigned long ring_buffer_size(struct ring_buffer *buffer);
+unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu);
 
 void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu);
 void ring_buffer_reset(struct ring_buffer *buffer);
-- 
cgit v1.2.3


From 08d636b6d4fb80647fe8869ea1cd97b2c26a4751 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 16 Aug 2011 09:57:10 -0400
Subject: ftrace/x86: Have arch x86_64 use breakpoints instead of stop machine

This method changes x86 to add a breakpoint to the mcount locations
instead of calling stop machine.

Now that iret can be handled by NMIs, we perform the following to
update code:

1) Add a breakpoint to all locations that will be modified

2) Sync all cores

3) Update all locations to be either a nop or call (except breakpoint
   op)

4) Sync all cores

5) Remove the breakpoint with the new code.

6) Sync all cores

[
  Added updates that Masami suggested:
   Use unlikely(modifying_ftrace_code) in int3 trap to keep kprobes efficient.
   Don't use NOTIFY_* in ftrace handler in int3 as it is not a notifier.
]

Cc: H. Peter Anvin <hpa@zytor.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 72a6cabb4d5b..0b5590330bca 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -286,6 +286,12 @@ struct ftrace_rec_iter *ftrace_rec_iter_start(void);
 struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter);
 struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter);
 
+#define for_ftrace_rec_iter(iter)		\
+	for (iter = ftrace_rec_iter_start();	\
+	     iter;				\
+	     iter = ftrace_rec_iter_next(iter))
+
+
 int ftrace_update_record(struct dyn_ftrace *rec, int enable);
 int ftrace_test_record(struct dyn_ftrace *rec, int enable);
 void ftrace_run_stop_machine(int command);
-- 
cgit v1.2.3


From b02ee9a33b65bcc4ad13c12a0b04afdaab3ddd8d Mon Sep 17 00:00:00 2001
From: Minho Ban <mhban@samsung.com>
Date: Mon, 7 May 2012 11:36:00 +0900
Subject: tracing: Prevent wasting time evaluating parameters in
 trace_preempt_on/off

This fixes spending time for evaluating parameters in trace_preempt_on/off when
the tracer config is off.

The patch mainly inspired by Steven Rostedt, thanks Steven.

Link: http://lkml.kernel.org/r/4FA73510.7070705@samsung.com

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Turner <pjt@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Minho Ban <mhban@samsung.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 0b5590330bca..d32cc5e4b0cc 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -491,8 +491,12 @@ static inline void __ftrace_enabled_restore(int enabled)
   extern void trace_preempt_on(unsigned long a0, unsigned long a1);
   extern void trace_preempt_off(unsigned long a0, unsigned long a1);
 #else
-  static inline void trace_preempt_on(unsigned long a0, unsigned long a1) { }
-  static inline void trace_preempt_off(unsigned long a0, unsigned long a1) { }
+/*
+ * Use defines instead of static inlines because some arches will make code out
+ * of the CALLER_ADDR, when we really want these to be a real nop.
+ */
+# define trace_preempt_on(a0, a1) do { } while (0)
+# define trace_preempt_off(a0, a1) do { } while (0)
 #endif
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
-- 
cgit v1.2.3


From fd0d000b2c34aa43d4e92dcf0dfaeda7e123008a Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 2 Apr 2012 20:19:08 +0200
Subject: perf: Pass last sampling period to perf_sample_data_init()

We always need to pass the last sample period to
perf_sample_data_init(), otherwise the event distribution will be
wrong. Thus, modifiyng the function interface with the required period
as argument. So basically a pattern like this:

        perf_sample_data_init(&data, ~0ULL);
        data.period = event->hw.last_period;

will now be like that:

        perf_sample_data_init(&data, ~0ULL, event->hw.last_period);

Avoids unininitialized data.period and simplifies code.

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1333390758-10893-3-git-send-email-robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ddbb6a901f65..f32578634d9d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1132,11 +1132,14 @@ struct perf_sample_data {
 	struct perf_branch_stack	*br_stack;
 };
 
-static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
+static inline void perf_sample_data_init(struct perf_sample_data *data,
+					 u64 addr, u64 period)
 {
+	/* remaining struct members initialized in perf_prepare_sample() */
 	data->addr = addr;
 	data->raw  = NULL;
 	data->br_stack = NULL;
+	data->period	= period;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
-- 
cgit v1.2.3


From cb04ff9ac424d0e689d9b612e9f73cb443ab4b7e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 8 May 2012 18:56:04 +0200
Subject: sched, perf: Use a single callback into the scheduler

We can easily use a single callback for both sched-in and sched-out. This
reduces the code footprint in the scheduler path as well as removes
the PMU black spot otherwise present between the out and in callback.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-o56ajxp1edwqg6x9d31wb805@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 24 ++++++------------------
 1 file changed, 6 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f32578634d9d..8adf70e9e3cc 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1084,10 +1084,8 @@ extern void perf_pmu_unregister(struct pmu *pmu);
 
 extern int perf_num_counters(void);
 extern const char *perf_pmu_name(void);
-extern void __perf_event_task_sched_in(struct task_struct *prev,
-				       struct task_struct *task);
-extern void __perf_event_task_sched_out(struct task_struct *prev,
-					struct task_struct *next);
+extern void __perf_event_task_sched(struct task_struct *prev,
+				    struct task_struct *next);
 extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
@@ -1207,20 +1205,13 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
 
 extern struct static_key_deferred perf_sched_events;
 
-static inline void perf_event_task_sched_in(struct task_struct *prev,
+static inline void perf_event_task_sched(struct task_struct *prev,
 					    struct task_struct *task)
-{
-	if (static_key_false(&perf_sched_events.key))
-		__perf_event_task_sched_in(prev, task);
-}
-
-static inline void perf_event_task_sched_out(struct task_struct *prev,
-					     struct task_struct *next)
 {
 	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);
 
 	if (static_key_false(&perf_sched_events.key))
-		__perf_event_task_sched_out(prev, next);
+		__perf_event_task_sched(prev, task);
 }
 
 extern void perf_event_mmap(struct vm_area_struct *vma);
@@ -1295,11 +1286,8 @@ extern void perf_event_disable(struct perf_event *event);
 extern void perf_event_task_tick(void);
 #else
 static inline void
-perf_event_task_sched_in(struct task_struct *prev,
-			 struct task_struct *task)			{ }
-static inline void
-perf_event_task_sched_out(struct task_struct *prev,
-			  struct task_struct *next)			{ }
+perf_event_task_sched(struct task_struct *prev,
+		      struct task_struct *task)				{ }
 static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
-- 
cgit v1.2.3


From f0cf973a224a3e3c1dec3395af3ba01cf14b1ff4 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 25 Apr 2012 14:39:54 -0400
Subject: ftrace: Return record ip addr for ftrace_location()

ftrace_location() is passed an addr, and returns 1 if the addr is
on a ftrace nop (or caller to ftrace_caller), and 0 otherwise.

To let kprobes know if it should move a breakpoint or not, it
must return the actual addr that is the start of the ftrace nop.
This way a kprobe placed on the location of a ftrace nop, can
instead be placed on the instruction after the nop. Even if the
probe addr is on the second or later byte of the nop, it can
simply be moved forward.

Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index d32cc5e4b0cc..609948eb2b0a 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -295,7 +295,7 @@ struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter);
 int ftrace_update_record(struct dyn_ftrace *rec, int enable);
 int ftrace_test_record(struct dyn_ftrace *rec, int enable);
 void ftrace_run_stop_machine(int command);
-int ftrace_location(unsigned long ip);
+unsigned long ftrace_location(unsigned long ip);
 
 extern ftrace_func_t ftrace_trace_function;
 
-- 
cgit v1.2.3


From 8ed3e2cfe40ffe43630fd8efa34fc97c95b4c298 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 26 Apr 2012 14:59:43 -0400
Subject: ftrace: Make ftrace_modify_all_code() global for archs to use

Rename __ftrace_modify_code() to ftrace_modify_all_code() and make
it global for all archs to use. This will remove the duplication
of code, as archs that can modify code without stop_machine()
can use it directly outside of the stop_machine() call.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 609948eb2b0a..cd72ace7ade3 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -319,6 +319,8 @@ extern void ftrace_caller(void);
 extern void ftrace_call(void);
 extern void mcount_call(void);
 
+void ftrace_modify_all_code(int command);
+
 #ifndef FTRACE_ADDR
 #define FTRACE_ADDR ((unsigned long)ftrace_caller)
 #endif
-- 
cgit v1.2.3


From e4f5d5440bb860a3e8942ca8f7277a7f31798965 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 27 Apr 2012 09:13:18 -0400
Subject: ftrace/x86: Have x86 ftrace use the ftrace_modify_all_code()

To remove duplicate code, have the ftrace arch_ftrace_update_code()
use the generic ftrace_modify_all_code(). This requires that the
default ftrace_replace_code() becomes a weak function so that an
arch may override it.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index cd72ace7ade3..55e6d63d46d0 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -314,6 +314,7 @@ ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable);
 /* defined in arch */
 extern int ftrace_ip_converted(unsigned long ip);
 extern int ftrace_dyn_arch_init(void *data);
+extern void ftrace_replace_code(int enable);
 extern int ftrace_update_ftrace_func(ftrace_func_t func);
 extern void ftrace_caller(void);
 extern void ftrace_call(void);
-- 
cgit v1.2.3