From 4e267db135c44d0b18e553899fe7df32b89211a5 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 14 Apr 2016 07:38:13 -0400
Subject: tracing: Make the pid filtering helper functions global

Make the functions used for pid filtering global for tracing, such that the
function tracer can use the pid code as well.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.h        |  9 +++++++++
 kernel/trace/trace_events.c | 34 +++++++++++++++++-----------------
 2 files changed, 26 insertions(+), 17 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 5167c366d6b7..172330891c6d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -628,6 +628,15 @@ extern unsigned long nsecs_to_usecs(unsigned long nsecs);
 
 extern unsigned long tracing_thresh;
 
+/* PID filtering */
+bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids,
+			     pid_t search_pid);
+bool trace_ignore_this_task(struct trace_pid_list *filtered_pids,
+			    struct task_struct *task);
+void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
+				  struct task_struct *self,
+				  struct task_struct *task);
+
 #ifdef CONFIG_TRACER_MAX_TRACE
 void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
 void update_max_tr_single(struct trace_array *tr,
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 3d4155892a1e..b5e514c4dada 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -503,8 +503,8 @@ static void ftrace_clear_events(struct trace_array *tr)
 extern int pid_max;
 
 /* Returns true if found in filter */
-static bool
-find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
+bool
+trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
 {
 	/*
 	 * If pid_max changed after filtered_pids was created, we
@@ -516,8 +516,8 @@ find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
 	return test_bit(search_pid, filtered_pids->pids);
 }
 
-static bool
-ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
+bool
+trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
 {
 	/*
 	 * Return false, because if filtered_pids does not exist,
@@ -526,19 +526,19 @@ ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
 	if (!filtered_pids)
 		return false;
 
-	return !find_filtered_pid(filtered_pids, task->pid);
+	return !trace_find_filtered_pid(filtered_pids, task->pid);
 }
 
-static void filter_add_remove_task(struct trace_pid_list *pid_list,
-				   struct task_struct *self,
-				   struct task_struct *task)
+void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
+				  struct task_struct *self,
+				  struct task_struct *task)
 {
 	if (!pid_list)
 		return;
 
 	/* For forks, we only add if the forking task is listed */
 	if (self) {
-		if (!find_filtered_pid(pid_list, self->pid))
+		if (!trace_find_filtered_pid(pid_list, self->pid))
 			return;
 	}
 
@@ -560,7 +560,7 @@ event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
 	struct trace_array *tr = data;
 
 	pid_list = rcu_dereference_sched(tr->filtered_pids);
-	filter_add_remove_task(pid_list, NULL, task);
+	trace_filter_add_remove_task(pid_list, NULL, task);
 }
 
 static void
@@ -572,7 +572,7 @@ event_filter_pid_sched_process_fork(void *data,
 	struct trace_array *tr = data;
 
 	pid_list = rcu_dereference_sched(tr->filtered_pids);
-	filter_add_remove_task(pid_list, self, task);
+	trace_filter_add_remove_task(pid_list, self, task);
 }
 
 void trace_event_follow_fork(struct trace_array *tr, bool enable)
@@ -600,8 +600,8 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
 	pid_list = rcu_dereference_sched(tr->filtered_pids);
 
 	this_cpu_write(tr->trace_buffer.data->ignore_pid,
-		       ignore_this_task(pid_list, prev) &&
-		       ignore_this_task(pid_list, next));
+		       trace_ignore_this_task(pid_list, prev) &&
+		       trace_ignore_this_task(pid_list, next));
 }
 
 static void
@@ -614,7 +614,7 @@ event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
 	pid_list = rcu_dereference_sched(tr->filtered_pids);
 
 	this_cpu_write(tr->trace_buffer.data->ignore_pid,
-		       ignore_this_task(pid_list, next));
+		       trace_ignore_this_task(pid_list, next));
 }
 
 static void
@@ -630,7 +630,7 @@ event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
 	pid_list = rcu_dereference_sched(tr->filtered_pids);
 
 	this_cpu_write(tr->trace_buffer.data->ignore_pid,
-		       ignore_this_task(pid_list, task));
+		       trace_ignore_this_task(pid_list, task));
 }
 
 static void
@@ -647,7 +647,7 @@ event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
 
 	/* Set tracing if current is enabled */
 	this_cpu_write(tr->trace_buffer.data->ignore_pid,
-		       ignore_this_task(pid_list, current));
+		       trace_ignore_this_task(pid_list, current));
 }
 
 static void __ftrace_clear_event_pids(struct trace_array *tr)
@@ -1654,7 +1654,7 @@ static void ignore_task_cpu(void *data)
 					     mutex_is_locked(&event_mutex));
 
 	this_cpu_write(tr->trace_buffer.data->ignore_pid,
-		       ignore_this_task(pid_list, current));
+		       trace_ignore_this_task(pid_list, current));
 }
 
 static ssize_t
-- 
cgit v1.2.3


From d8275c454dcdba296675221b4c12f19d1b6e0ee8 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 14 Apr 2016 12:15:22 -0400
Subject: tracing: Move filtered_pid helper functions into trace.c

As the filtered_pid functions are going to be used by function tracer as
well as trace_events, move the code into the generic trace.c file.

The functions moved are:

 trace_find_filtered_pid()
 trace_ignore_this_task()
 trace_filter_add_remove_task()

Kernel Doc text was also added.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c        | 78 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_events.c | 51 -----------------------------
 2 files changed, 78 insertions(+), 51 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8a4bd6b68a0b..0b87fe8e6d0b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -319,6 +319,84 @@ int call_filter_check_discard(struct trace_event_call *call, void *rec,
 	return 0;
 }
 
+/**
+ * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
+ * @filtered_pids: The list of pids to check
+ * @search_pid: The PID to find in @filtered_pids
+ *
+ * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
+ */
+bool
+trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
+{
+	/*
+	 * If pid_max changed after filtered_pids was created, we
+	 * by default ignore all pids greater than the previous pid_max.
+	 */
+	if (search_pid >= filtered_pids->pid_max)
+		return false;
+
+	return test_bit(search_pid, filtered_pids->pids);
+}
+
+/**
+ * trace_ignore_this_task - should a task be ignored for tracing
+ * @filtered_pids: The list of pids to check
+ * @task: The task that should be ignored if not filtered
+ *
+ * Checks if @task should be traced or not from @filtered_pids.
+ * Returns true if @task should *NOT* be traced.
+ * Returns false if @task should be traced.
+ */
+bool
+trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
+{
+	/*
+	 * Return false, because if filtered_pids does not exist,
+	 * all pids are good to trace.
+	 */
+	if (!filtered_pids)
+		return false;
+
+	return !trace_find_filtered_pid(filtered_pids, task->pid);
+}
+
+/**
+ * trace_pid_filter_add_remove - Add or remove a task from a pid_list
+ * @pid_list: The list to modify
+ * @self: The current task for fork or NULL for exit
+ * @task: The task to add or remove
+ *
+ * If adding a task, if @self is defined, the task is only added if @self
+ * is also included in @pid_list. This happens on fork and tasks should
+ * only be added when the parent is listed. If @self is NULL, then the
+ * @task pid will be removed from the list, which would happen on exit
+ * of a task.
+ */
+void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
+				  struct task_struct *self,
+				  struct task_struct *task)
+{
+	if (!pid_list)
+		return;
+
+	/* For forks, we only add if the forking task is listed */
+	if (self) {
+		if (!trace_find_filtered_pid(pid_list, self->pid))
+			return;
+	}
+
+	/* Sorry, but we don't support pid_max changing after setting */
+	if (task->pid >= pid_list->pid_max)
+		return;
+
+	/* "self" is set for forks, and NULL for exits */
+	if (self)
+		set_bit(task->pid, pid_list->pids);
+	else
+		clear_bit(task->pid, pid_list->pids);
+}
+
 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
 {
 	u64 ts;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index b5e514c4dada..a11e6d9a3841 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -502,57 +502,6 @@ static void ftrace_clear_events(struct trace_array *tr)
 /* Shouldn't this be in a header? */
 extern int pid_max;
 
-/* Returns true if found in filter */
-bool
-trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
-{
-	/*
-	 * If pid_max changed after filtered_pids was created, we
-	 * by default ignore all pids greater than the previous pid_max.
-	 */
-	if (search_pid >= filtered_pids->pid_max)
-		return false;
-
-	return test_bit(search_pid, filtered_pids->pids);
-}
-
-bool
-trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
-{
-	/*
-	 * Return false, because if filtered_pids does not exist,
-	 * all pids are good to trace.
-	 */
-	if (!filtered_pids)
-		return false;
-
-	return !trace_find_filtered_pid(filtered_pids, task->pid);
-}
-
-void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
-				  struct task_struct *self,
-				  struct task_struct *task)
-{
-	if (!pid_list)
-		return;
-
-	/* For forks, we only add if the forking task is listed */
-	if (self) {
-		if (!trace_find_filtered_pid(pid_list, self->pid))
-			return;
-	}
-
-	/* Sorry, but we don't support pid_max changing after setting */
-	if (task->pid >= pid_list->pid_max)
-		return;
-
-	/* "self" is set for forks, and NULL for exits */
-	if (self)
-		set_bit(task->pid, pid_list->pids);
-	else
-		clear_bit(task->pid, pid_list->pids);
-}
-
 static void
 event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
 {
-- 
cgit v1.2.3


From 5cc8976bd52153678ca37cc1e3000833b20276f3 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 20 Apr 2016 15:19:54 -0400
Subject: tracing: Move the pid_list seq_file functions to be global

To allow other aspects of ftrace to use the pid_list logic, we need to reuse
the seq_file functions. Making the generic part into functions that can be
called by other files will help in this regard.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c        | 71 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace.h        |  3 ++
 kernel/trace/trace_events.c | 34 ++--------------------
 3 files changed, 77 insertions(+), 31 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0b87fe8e6d0b..7943e306cc7f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -397,6 +397,77 @@ void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
 		clear_bit(task->pid, pid_list->pids);
 }
 
+/**
+ * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
+ * @pid_list: The pid list to show
+ * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
+ * @pos: The position of the file
+ *
+ * This is used by the seq_file "next" operation to iterate the pids
+ * listed in a trace_pid_list structure.
+ *
+ * Returns the pid+1 as we want to display pid of zero, but NULL would
+ * stop the iteration.
+ */
+void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
+{
+	unsigned long pid = (unsigned long)v;
+
+	(*pos)++;
+
+	/* pid already is +1 of the actual prevous bit */
+	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
+
+	/* Return pid + 1 to allow zero to be represented */
+	if (pid < pid_list->pid_max)
+		return (void *)(pid + 1);
+
+	return NULL;
+}
+
+/**
+ * trace_pid_start - Used for seq_file to start reading pid lists
+ * @pid_list: The pid list to show
+ * @pos: The position of the file
+ *
+ * This is used by seq_file "start" operation to start the iteration
+ * of listing pids.
+ *
+ * Returns the pid+1 as we want to display pid of zero, but NULL would
+ * stop the iteration.
+ */
+void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
+{
+	unsigned long pid;
+	loff_t l = 0;
+
+	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
+	if (pid >= pid_list->pid_max)
+		return NULL;
+
+	/* Return pid + 1 so that zero can be the exit value */
+	for (pid++; pid && l < *pos;
+	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
+		;
+	return (void *)pid;
+}
+
+/**
+ * trace_pid_show - show the current pid in seq_file processing
+ * @m: The seq_file structure to write into
+ * @v: A void pointer of the pid (+1) value to display
+ *
+ * Can be directly used by seq_file operations to display the current
+ * pid value.
+ */
+int trace_pid_show(struct seq_file *m, void *v)
+{
+	unsigned long pid = (unsigned long)v - 1;
+
+	seq_printf(m, "%lu\n", pid);
+	return 0;
+}
+
 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
 {
 	u64 ts;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 172330891c6d..45442d5842f2 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -636,6 +636,9 @@ bool trace_ignore_this_task(struct trace_pid_list *filtered_pids,
 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
 				  struct task_struct *self,
 				  struct task_struct *task);
+void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos);
+void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos);
+int trace_pid_show(struct seq_file *m, void *v);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
 void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index a11e6d9a3841..fd831a972bae 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -983,18 +983,8 @@ p_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct trace_array *tr = m->private;
 	struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);
-	unsigned long pid = (unsigned long)v;
 
-	(*pos)++;
-
-	/* pid already is +1 of the actual prevous bit */
-	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
-
-	/* Return pid + 1 to allow zero to be represented */
-	if (pid < pid_list->pid_max)
-		return (void *)(pid + 1);
-
-	return NULL;
+	return trace_pid_next(pid_list, v, pos);
 }
 
 static void *p_start(struct seq_file *m, loff_t *pos)
@@ -1002,8 +992,6 @@ static void *p_start(struct seq_file *m, loff_t *pos)
 {
 	struct trace_pid_list *pid_list;
 	struct trace_array *tr = m->private;
-	unsigned long pid;
-	loff_t l = 0;
 
 	/*
 	 * Grab the mutex, to keep calls to p_next() having the same
@@ -1019,15 +1007,7 @@ static void *p_start(struct seq_file *m, loff_t *pos)
 	if (!pid_list)
 		return NULL;
 
-	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
-	if (pid >= pid_list->pid_max)
-		return NULL;
-
-	/* Return pid + 1 so that zero can be the exit value */
-	for (pid++; pid && l < *pos;
-	     pid = (unsigned long)p_next(m, (void *)pid, &l))
-		;
-	return (void *)pid;
+	return trace_pid_start(pid_list, pos);
 }
 
 static void p_stop(struct seq_file *m, void *p)
@@ -1037,14 +1017,6 @@ static void p_stop(struct seq_file *m, void *p)
 	mutex_unlock(&event_mutex);
 }
 
-static int p_show(struct seq_file *m, void *v)
-{
-	unsigned long pid = (unsigned long)v - 1;
-
-	seq_printf(m, "%lu\n", pid);
-	return 0;
-}
-
 static ssize_t
 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
 		  loff_t *ppos)
@@ -1795,7 +1767,7 @@ static const struct seq_operations show_set_event_seq_ops = {
 static const struct seq_operations show_set_pid_seq_ops = {
 	.start = p_start,
 	.next = p_next,
-	.show = p_show,
+	.show = trace_pid_show,
 	.stop = p_stop,
 };
 
-- 
cgit v1.2.3


From 76c813e26606d35ea9d8d6f96e646b3944c730a9 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 21 Apr 2016 11:35:30 -0400
Subject: tracing: Move pid_list write processing into its own function

The addition of PIDs into a pid_list via the write operation of
set_event_pid is a bit complex. The same operation will be needed for
function tracing pids. Move the code into its own generic function in
trace.c, so that we can avoid duplication of this code.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c        | 109 ++++++++++++++++++++++++++++++++++++++++++-
 kernel/trace/trace.h        |   7 +++
 kernel/trace/trace_events.c | 110 ++++----------------------------------------
 3 files changed, 124 insertions(+), 102 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7943e306cc7f..a8bb7485fd1d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -25,7 +25,7 @@
 #include <linux/hardirq.h>
 #include <linux/linkage.h>
 #include <linux/uaccess.h>
-#include <linux/kprobes.h>
+#include <linux/vmalloc.h>
 #include <linux/ftrace.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
@@ -319,6 +319,12 @@ int call_filter_check_discard(struct trace_event_call *call, void *rec,
 	return 0;
 }
 
+void trace_free_pid_list(struct trace_pid_list *pid_list)
+{
+	vfree(pid_list->pids);
+	kfree(pid_list);
+}
+
 /**
  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
  * @filtered_pids: The list of pids to check
@@ -468,6 +474,107 @@ int trace_pid_show(struct seq_file *m, void *v)
 	return 0;
 }
 
+/* 128 should be much more than enough */
+#define PID_BUF_SIZE		127
+
+int trace_pid_write(struct trace_pid_list *filtered_pids,
+		    struct trace_pid_list **new_pid_list,
+		    const char __user *ubuf, size_t cnt)
+{
+	struct trace_pid_list *pid_list;
+	struct trace_parser parser;
+	unsigned long val;
+	int nr_pids = 0;
+	ssize_t read = 0;
+	ssize_t ret = 0;
+	loff_t pos;
+	pid_t pid;
+
+	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
+		return -ENOMEM;
+
+	/*
+	 * Always recreate a new array. The write is an all or nothing
+	 * operation. Always create a new array when adding new pids by
+	 * the user. If the operation fails, then the current list is
+	 * not modified.
+	 */
+	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
+	if (!pid_list)
+		return -ENOMEM;
+
+	pid_list->pid_max = READ_ONCE(pid_max);
+
+	/* Only truncating will shrink pid_max */
+	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
+		pid_list->pid_max = filtered_pids->pid_max;
+
+	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
+	if (!pid_list->pids) {
+		kfree(pid_list);
+		return -ENOMEM;
+	}
+
+	if (filtered_pids) {
+		/* copy the current bits to the new max */
+		pid = find_first_bit(filtered_pids->pids,
+				     filtered_pids->pid_max);
+		while (pid < filtered_pids->pid_max) {
+			set_bit(pid, pid_list->pids);
+			pid = find_next_bit(filtered_pids->pids,
+					    filtered_pids->pid_max,
+					    pid + 1);
+			nr_pids++;
+		}
+	}
+
+	while (cnt > 0) {
+
+		pos = 0;
+
+		ret = trace_get_user(&parser, ubuf, cnt, &pos);
+		if (ret < 0 || !trace_parser_loaded(&parser))
+			break;
+
+		read += ret;
+		ubuf += ret;
+		cnt -= ret;
+
+		parser.buffer[parser.idx] = 0;
+
+		ret = -EINVAL;
+		if (kstrtoul(parser.buffer, 0, &val))
+			break;
+		if (val >= pid_list->pid_max)
+			break;
+
+		pid = (pid_t)val;
+
+		set_bit(pid, pid_list->pids);
+		nr_pids++;
+
+		trace_parser_clear(&parser);
+		ret = 0;
+	}
+	trace_parser_put(&parser);
+
+	if (ret < 0) {
+		trace_free_pid_list(pid_list);
+		return ret;
+	}
+
+	if (!nr_pids) {
+		/* Cleared the list of pids */
+		trace_free_pid_list(pid_list);
+		read = ret;
+		pid_list = NULL;
+	}
+
+	*new_pid_list = pid_list;
+
+	return read;
+}
+
 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
 {
 	u64 ts;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 45442d5842f2..a4dce1ef9e03 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -629,6 +629,9 @@ extern unsigned long nsecs_to_usecs(unsigned long nsecs);
 extern unsigned long tracing_thresh;
 
 /* PID filtering */
+
+extern int pid_max;
+
 bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids,
 			     pid_t search_pid);
 bool trace_ignore_this_task(struct trace_pid_list *filtered_pids,
@@ -639,6 +642,10 @@ void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos);
 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos);
 int trace_pid_show(struct seq_file *m, void *v);
+void trace_free_pid_list(struct trace_pid_list *pid_list);
+int trace_pid_write(struct trace_pid_list *filtered_pids,
+		    struct trace_pid_list **new_pid_list,
+		    const char __user *ubuf, size_t cnt);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
 void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index fd831a972bae..fd449eb138cf 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -15,7 +15,6 @@
 #include <linux/kthread.h>
 #include <linux/tracefs.h>
 #include <linux/uaccess.h>
-#include <linux/vmalloc.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
 #include <linux/sort.h>
@@ -499,9 +498,6 @@ static void ftrace_clear_events(struct trace_array *tr)
 	mutex_unlock(&event_mutex);
 }
 
-/* Shouldn't this be in a header? */
-extern int pid_max;
-
 static void
 event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
 {
@@ -634,8 +630,7 @@ static void __ftrace_clear_event_pids(struct trace_array *tr)
 	/* Wait till all users are no longer using pid filtering */
 	synchronize_sched();
 
-	vfree(pid_list->pids);
-	kfree(pid_list);
+	trace_free_pid_list(pid_list);
 }
 
 static void ftrace_clear_event_pids(struct trace_array *tr)
@@ -1587,13 +1582,7 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
 	struct trace_pid_list *filtered_pids = NULL;
 	struct trace_pid_list *pid_list;
 	struct trace_event_file *file;
-	struct trace_parser parser;
-	unsigned long val;
-	loff_t this_pos;
-	ssize_t read = 0;
-	ssize_t ret = 0;
-	pid_t pid;
-	int nr_pids = 0;
+	ssize_t ret;
 
 	if (!cnt)
 		return 0;
@@ -1602,93 +1591,15 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
 	if (ret < 0)
 		return ret;
 
-	if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
-		return -ENOMEM;
-
 	mutex_lock(&event_mutex);
+
 	filtered_pids = rcu_dereference_protected(tr->filtered_pids,
 					     lockdep_is_held(&event_mutex));
 
-	/*
-	 * Always recreate a new array. The write is an all or nothing
-	 * operation. Always create a new array when adding new pids by
-	 * the user. If the operation fails, then the current list is
-	 * not modified.
-	 */
-	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
-	if (!pid_list) {
-		read = -ENOMEM;
-		goto out;
-	}
-	pid_list->pid_max = READ_ONCE(pid_max);
-	/* Only truncating will shrink pid_max */
-	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
-		pid_list->pid_max = filtered_pids->pid_max;
-	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
-	if (!pid_list->pids) {
-		kfree(pid_list);
-		read = -ENOMEM;
-		goto out;
-	}
-	if (filtered_pids) {
-		/* copy the current bits to the new max */
-		pid = find_first_bit(filtered_pids->pids,
-				     filtered_pids->pid_max);
-		while (pid < filtered_pids->pid_max) {
-			set_bit(pid, pid_list->pids);
-			pid = find_next_bit(filtered_pids->pids,
-					    filtered_pids->pid_max,
-					    pid + 1);
-			nr_pids++;
-		}
-	}
-
-	while (cnt > 0) {
-
-		this_pos = 0;
-
-		ret = trace_get_user(&parser, ubuf, cnt, &this_pos);
-		if (ret < 0 || !trace_parser_loaded(&parser))
-			break;
-
-		read += ret;
-		ubuf += ret;
-		cnt -= ret;
-
-		parser.buffer[parser.idx] = 0;
-
-		ret = -EINVAL;
-		if (kstrtoul(parser.buffer, 0, &val))
-			break;
-		if (val >= pid_list->pid_max)
-			break;
-
-		pid = (pid_t)val;
-
-		set_bit(pid, pid_list->pids);
-		nr_pids++;
-
-		trace_parser_clear(&parser);
-		ret = 0;
-	}
-	trace_parser_put(&parser);
-
-	if (ret < 0) {
-		vfree(pid_list->pids);
-		kfree(pid_list);
-		read = ret;
+	ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
+	if (ret < 0)
 		goto out;
-	}
 
-	if (!nr_pids) {
-		/* Cleared the list of pids */
-		vfree(pid_list->pids);
-		kfree(pid_list);
-		read = ret;
-		if (!filtered_pids)
-			goto out;
-		pid_list = NULL;
-	}
 	rcu_assign_pointer(tr->filtered_pids, pid_list);
 
 	list_for_each_entry(file, &tr->events, list) {
@@ -1697,10 +1608,8 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
 
 	if (filtered_pids) {
 		synchronize_sched();
-
-		vfree(filtered_pids->pids);
-		kfree(filtered_pids);
-	} else {
+		trace_free_pid_list(filtered_pids);
+	} else if (pid_list) {
 		/*
 		 * Register a probe that is called before all other probes
 		 * to set ignore_pid if next or prev do not match.
@@ -1738,9 +1647,8 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
  out:
 	mutex_unlock(&event_mutex);
 
-	ret = read;
-	if (read > 0)
-		*ppos += read;
+	if (ret > 0)
+		*ppos += ret;
 
 	return ret;
 }
-- 
cgit v1.2.3


From 345ddcc882d8896dcbdcb3e0ee4a415fc23ec8b0 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 22 Apr 2016 18:11:33 -0400
Subject: ftrace: Have set_ftrace_pid use the bitmap like events do

Convert set_ftrace_pid to use the bitmap like set_event_pid does. This
allows for instances to use the pid filtering as well, and will allow for
function-fork option to set if the children of a traced function should be
traced or not.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c                | 313 +++++++++++++++--------------------
 kernel/trace/trace.c                 |   1 +
 kernel/trace/trace.h                 |  15 +-
 kernel/trace/trace_functions.c       |   2 +-
 kernel/trace/trace_functions_graph.c |   2 +-
 5 files changed, 148 insertions(+), 185 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 900dbb1efff2..8b488f4dd8e8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -89,16 +89,16 @@ struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end;
 /* What to set function_trace_op to */
 static struct ftrace_ops *set_function_trace_op;
 
-/* List for set_ftrace_pid's pids. */
-LIST_HEAD(ftrace_pids);
-struct ftrace_pid {
-	struct list_head list;
-	struct pid *pid;
-};
-
-static bool ftrace_pids_enabled(void)
+static bool ftrace_pids_enabled(struct ftrace_ops *ops)
 {
-	return !list_empty(&ftrace_pids);
+	struct trace_array *tr;
+
+	if (!(ops->flags & FTRACE_OPS_FL_PID) || !ops->private)
+		return false;
+
+	tr = ops->private;
+
+	return tr->function_pids != NULL;
 }
 
 static void ftrace_update_trampoline(struct ftrace_ops *ops);
@@ -179,7 +179,9 @@ int ftrace_nr_registered_ops(void)
 static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
 			    struct ftrace_ops *op, struct pt_regs *regs)
 {
-	if (!test_tsk_trace_trace(current))
+	struct trace_array *tr = op->private;
+
+	if (tr && this_cpu_read(tr->trace_buffer.data->ftrace_ignore_pid))
 		return;
 
 	op->saved_func(ip, parent_ip, op, regs);
@@ -417,7 +419,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
 	/* Always save the function, and reset at unregistering */
 	ops->saved_func = ops->func;
 
-	if (ops->flags & FTRACE_OPS_FL_PID && ftrace_pids_enabled())
+	if (ftrace_pids_enabled(ops))
 		ops->func = ftrace_pid_func;
 
 	ftrace_update_trampoline(ops);
@@ -450,7 +452,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
 
 static void ftrace_update_pid_func(void)
 {
-	bool enabled = ftrace_pids_enabled();
 	struct ftrace_ops *op;
 
 	/* Only do something if we are tracing something */
@@ -459,8 +460,8 @@ static void ftrace_update_pid_func(void)
 
 	do_for_each_ftrace_op(op, ftrace_ops_list) {
 		if (op->flags & FTRACE_OPS_FL_PID) {
-			op->func = enabled ? ftrace_pid_func :
-				op->saved_func;
+			op->func = ftrace_pids_enabled(op) ?
+				ftrace_pid_func : op->saved_func;
 			ftrace_update_trampoline(op);
 		}
 	} while_for_each_ftrace_op(op);
@@ -5324,179 +5325,99 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
 	return ops->func;
 }
 
-static void clear_ftrace_swapper(void)
+static void
+ftrace_filter_pid_sched_switch_probe(void *data, bool preempt,
+		    struct task_struct *prev, struct task_struct *next)
 {
-	struct task_struct *p;
-	int cpu;
+	struct trace_array *tr = data;
+	struct trace_pid_list *pid_list;
 
-	get_online_cpus();
-	for_each_online_cpu(cpu) {
-		p = idle_task(cpu);
-		clear_tsk_trace_trace(p);
-	}
-	put_online_cpus();
-}
-
-static void set_ftrace_swapper(void)
-{
-	struct task_struct *p;
-	int cpu;
+	pid_list = rcu_dereference_sched(tr->function_pids);
 
-	get_online_cpus();
-	for_each_online_cpu(cpu) {
-		p = idle_task(cpu);
-		set_tsk_trace_trace(p);
-	}
-	put_online_cpus();
+	this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid,
+		       trace_ignore_this_task(pid_list, next));
 }
 
-static void clear_ftrace_pid(struct pid *pid)
+static void clear_ftrace_pids(struct trace_array *tr)
 {
-	struct task_struct *p;
+	struct trace_pid_list *pid_list;
+	int cpu;
 
-	rcu_read_lock();
-	do_each_pid_task(pid, PIDTYPE_PID, p) {
-		clear_tsk_trace_trace(p);
-	} while_each_pid_task(pid, PIDTYPE_PID, p);
-	rcu_read_unlock();
+	pid_list = rcu_dereference_protected(tr->function_pids,
+					     lockdep_is_held(&ftrace_lock));
+	if (!pid_list)
+		return;
 
-	put_pid(pid);
-}
+	unregister_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr);
 
-static void set_ftrace_pid(struct pid *pid)
-{
-	struct task_struct *p;
+	for_each_possible_cpu(cpu)
+		per_cpu_ptr(tr->trace_buffer.data, cpu)->ftrace_ignore_pid = false;
 
-	rcu_read_lock();
-	do_each_pid_task(pid, PIDTYPE_PID, p) {
-		set_tsk_trace_trace(p);
-	} while_each_pid_task(pid, PIDTYPE_PID, p);
-	rcu_read_unlock();
-}
+	rcu_assign_pointer(tr->function_pids, NULL);
 
-static void clear_ftrace_pid_task(struct pid *pid)
-{
-	if (pid == ftrace_swapper_pid)
-		clear_ftrace_swapper();
-	else
-		clear_ftrace_pid(pid);
-}
+	/* Wait till all users are no longer using pid filtering */
+	synchronize_sched();
 
-static void set_ftrace_pid_task(struct pid *pid)
-{
-	if (pid == ftrace_swapper_pid)
-		set_ftrace_swapper();
-	else
-		set_ftrace_pid(pid);
+	trace_free_pid_list(pid_list);
 }
 
-static int ftrace_pid_add(int p)
+static void ftrace_pid_reset(struct trace_array *tr)
 {
-	struct pid *pid;
-	struct ftrace_pid *fpid;
-	int ret = -EINVAL;
-
 	mutex_lock(&ftrace_lock);
-
-	if (!p)
-		pid = ftrace_swapper_pid;
-	else
-		pid = find_get_pid(p);
-
-	if (!pid)
-		goto out;
-
-	ret = 0;
-
-	list_for_each_entry(fpid, &ftrace_pids, list)
-		if (fpid->pid == pid)
-			goto out_put;
-
-	ret = -ENOMEM;
-
-	fpid = kmalloc(sizeof(*fpid), GFP_KERNEL);
-	if (!fpid)
-		goto out_put;
-
-	list_add(&fpid->list, &ftrace_pids);
-	fpid->pid = pid;
-
-	set_ftrace_pid_task(pid);
+	clear_ftrace_pids(tr);
 
 	ftrace_update_pid_func();
-
 	ftrace_startup_all(0);
 
 	mutex_unlock(&ftrace_lock);
-	return 0;
-
-out_put:
-	if (pid != ftrace_swapper_pid)
-		put_pid(pid);
-
-out:
-	mutex_unlock(&ftrace_lock);
-	return ret;
 }
 
-static void ftrace_pid_reset(void)
-{
-	struct ftrace_pid *fpid, *safe;
-
-	mutex_lock(&ftrace_lock);
-	list_for_each_entry_safe(fpid, safe, &ftrace_pids, list) {
-		struct pid *pid = fpid->pid;
-
-		clear_ftrace_pid_task(pid);
-
-		list_del(&fpid->list);
-		kfree(fpid);
-	}
-
-	ftrace_update_pid_func();
-	ftrace_startup_all(0);
-
-	mutex_unlock(&ftrace_lock);
-}
+/* Greater than any max PID */
+#define FTRACE_NO_PIDS		(void *)(PID_MAX_LIMIT + 1)
 
 static void *fpid_start(struct seq_file *m, loff_t *pos)
+	__acquires(RCU)
 {
+	struct trace_pid_list *pid_list;
+	struct trace_array *tr = m->private;
+
 	mutex_lock(&ftrace_lock);
+	rcu_read_lock_sched();
 
-	if (!ftrace_pids_enabled() && (!*pos))
-		return (void *) 1;
+	pid_list = rcu_dereference_sched(tr->function_pids);
 
-	return seq_list_start(&ftrace_pids, *pos);
+	if (!pid_list)
+		return !(*pos) ? FTRACE_NO_PIDS : NULL;
+
+	return trace_pid_start(pid_list, pos);
 }
 
 static void *fpid_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	if (v == (void *)1)
+	struct trace_array *tr = m->private;
+	struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_pids);
+
+	if (v == FTRACE_NO_PIDS)
 		return NULL;
 
-	return seq_list_next(v, &ftrace_pids, pos);
+	return trace_pid_next(pid_list, v, pos);
 }
 
 static void fpid_stop(struct seq_file *m, void *p)
+	__releases(RCU)
 {
+	rcu_read_unlock_sched();
 	mutex_unlock(&ftrace_lock);
 }
 
 static int fpid_show(struct seq_file *m, void *v)
 {
-	const struct ftrace_pid *fpid = list_entry(v, struct ftrace_pid, list);
-
-	if (v == (void *)1) {
+	if (v == FTRACE_NO_PIDS) {
 		seq_puts(m, "no pid\n");
 		return 0;
 	}
 
-	if (fpid->pid == ftrace_swapper_pid)
-		seq_puts(m, "swapper tasks\n");
-	else
-		seq_printf(m, "%u\n", pid_vnr(fpid->pid));
-
-	return 0;
+	return trace_pid_show(m, v);
 }
 
 static const struct seq_operations ftrace_pid_sops = {
@@ -5509,58 +5430,103 @@ static const struct seq_operations ftrace_pid_sops = {
 static int
 ftrace_pid_open(struct inode *inode, struct file *file)
 {
+	struct trace_array *tr = inode->i_private;
+	struct seq_file *m;
 	int ret = 0;
 
+	if (trace_array_get(tr) < 0)
+		return -ENODEV;
+
 	if ((file->f_mode & FMODE_WRITE) &&
 	    (file->f_flags & O_TRUNC))
-		ftrace_pid_reset();
+		ftrace_pid_reset(tr);
 
-	if (file->f_mode & FMODE_READ)
-		ret = seq_open(file, &ftrace_pid_sops);
+	ret = seq_open(file, &ftrace_pid_sops);
+	if (ret < 0) {
+		trace_array_put(tr);
+	} else {
+		m = file->private_data;
+		/* copy tr over to seq ops */
+		m->private = tr;
+	}
 
 	return ret;
 }
 
+static void ignore_task_cpu(void *data)
+{
+	struct trace_array *tr = data;
+	struct trace_pid_list *pid_list;
+
+	/*
+	 * This function is called by on_each_cpu() while the
+	 * event_mutex is held.
+	 */
+	pid_list = rcu_dereference_protected(tr->function_pids,
+					     mutex_is_locked(&ftrace_lock));
+
+	this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid,
+		       trace_ignore_this_task(pid_list, current));
+}
+
 static ssize_t
 ftrace_pid_write(struct file *filp, const char __user *ubuf,
 		   size_t cnt, loff_t *ppos)
 {
-	char buf[64], *tmp;
-	long val;
-	int ret;
+	struct seq_file *m = filp->private_data;
+	struct trace_array *tr = m->private;
+	struct trace_pid_list *filtered_pids = NULL;
+	struct trace_pid_list *pid_list;
+	ssize_t ret;
 
-	if (cnt >= sizeof(buf))
-		return -EINVAL;
+	if (!cnt)
+		return 0;
+
+	mutex_lock(&ftrace_lock);
+
+	filtered_pids = rcu_dereference_protected(tr->function_pids,
+					     lockdep_is_held(&ftrace_lock));
+
+	ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
+	if (ret < 0)
+		goto out;
 
-	if (copy_from_user(&buf, ubuf, cnt))
-		return -EFAULT;
+	rcu_assign_pointer(tr->function_pids, pid_list);
 
-	buf[cnt] = 0;
+	if (filtered_pids) {
+		synchronize_sched();
+		trace_free_pid_list(filtered_pids);
+	} else if (pid_list) {
+		/* Register a probe to set whether to ignore the tracing of a task */
+		register_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr);
+	}
 
 	/*
-	 * Allow "echo > set_ftrace_pid" or "echo -n '' > set_ftrace_pid"
-	 * to clean the filter quietly.
+	 * Ignoring of pids is done at task switch. But we have to
+	 * check for those tasks that are currently running.
+	 * Always do this in case a pid was appended or removed.
 	 */
-	tmp = strstrip(buf);
-	if (strlen(tmp) == 0)
-		return 1;
+	on_each_cpu(ignore_task_cpu, tr, 1);
 
-	ret = kstrtol(tmp, 10, &val);
-	if (ret < 0)
-		return ret;
+	ftrace_update_pid_func();
+	ftrace_startup_all(0);
+ out:
+	mutex_unlock(&ftrace_lock);
 
-	ret = ftrace_pid_add(val);
+	if (ret > 0)
+		*ppos += ret;
 
-	return ret ? ret : cnt;
+	return ret;
 }
 
 static int
 ftrace_pid_release(struct inode *inode, struct file *file)
 {
-	if (file->f_mode & FMODE_READ)
-		seq_release(inode, file);
+	struct trace_array *tr = inode->i_private;
 
-	return 0;
+	trace_array_put(tr);
+
+	return seq_release(inode, file);
 }
 
 static const struct file_operations ftrace_pid_fops = {
@@ -5571,24 +5537,17 @@ static const struct file_operations ftrace_pid_fops = {
 	.release	= ftrace_pid_release,
 };
 
-static __init int ftrace_init_tracefs(void)
+void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer)
 {
-	struct dentry *d_tracer;
-
-	d_tracer = tracing_init_dentry();
-	if (IS_ERR(d_tracer))
-		return 0;
-
-	ftrace_init_dyn_tracefs(d_tracer);
+	/* Only the top level directory has the dyn_tracefs and profile */
+	if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
+		ftrace_init_dyn_tracefs(d_tracer);
+		ftrace_profile_tracefs(d_tracer);
+	}
 
 	trace_create_file("set_ftrace_pid", 0644, d_tracer,
-			    NULL, &ftrace_pid_fops);
-
-	ftrace_profile_tracefs(d_tracer);
-
-	return 0;
+			    tr, &ftrace_pid_fops);
 }
-fs_initcall(ftrace_init_tracefs);
 
 /**
  * ftrace_kill - kill ftrace
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a8bb7485fd1d..aa240551fc5d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7233,6 +7233,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
 	for_each_tracing_cpu(cpu)
 		tracing_init_tracefs_percpu(tr, cpu);
 
+	ftrace_init_tracefs(tr, d_tracer);
 }
 
 static struct vfsmount *trace_automount(void *ingore)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index a4dce1ef9e03..eaee458755a4 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -156,6 +156,9 @@ struct trace_array_cpu {
 	char			comm[TASK_COMM_LEN];
 
 	bool			ignore_pid;
+#ifdef CONFIG_FUNCTION_TRACER
+	bool			ftrace_ignore_pid;
+#endif
 };
 
 struct tracer;
@@ -247,6 +250,7 @@ struct trace_array {
 	int			ref;
 #ifdef CONFIG_FUNCTION_TRACER
 	struct ftrace_ops	*ops;
+	struct trace_pid_list	__rcu *function_pids;
 	/* function tracing enabled */
 	int			function_enabled;
 #endif
@@ -840,12 +844,9 @@ extern struct list_head ftrace_pids;
 
 #ifdef CONFIG_FUNCTION_TRACER
 extern bool ftrace_filter_param __initdata;
-static inline int ftrace_trace_task(struct task_struct *task)
+static inline int ftrace_trace_task(struct trace_array *tr)
 {
-	if (list_empty(&ftrace_pids))
-		return 1;
-
-	return test_tsk_trace_trace(task);
+	return !this_cpu_read(tr->trace_buffer.data->ftrace_ignore_pid);
 }
 extern int ftrace_is_dead(void);
 int ftrace_create_function_files(struct trace_array *tr,
@@ -855,8 +856,9 @@ void ftrace_init_global_array_ops(struct trace_array *tr);
 void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func);
 void ftrace_reset_array_ops(struct trace_array *tr);
 int using_ftrace_ops_list_func(void);
+void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer);
 #else
-static inline int ftrace_trace_task(struct task_struct *task)
+static inline int ftrace_trace_task(struct trace_array *tr)
 {
 	return 1;
 }
@@ -871,6 +873,7 @@ static inline void ftrace_destroy_function_files(struct trace_array *tr) { }
 static inline __init void
 ftrace_init_global_array_ops(struct trace_array *tr) { }
 static inline void ftrace_reset_array_ops(struct trace_array *tr) { }
+static inline void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d) { }
 /* ftace_func_t type is not defined, use macro instead of static inline */
 #define ftrace_init_array_ops(tr, func) do { } while (0)
 #endif /* CONFIG_FUNCTION_TRACER */
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 5a095c2e4b69..0efa00d80623 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -43,7 +43,7 @@ static int allocate_ftrace_ops(struct trace_array *tr)
 
 	/* Currently only the non stack verision is supported */
 	ops->func = function_trace_call;
-	ops->flags = FTRACE_OPS_FL_RECURSION_SAFE;
+	ops->flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_PID;
 
 	tr->ops = ops;
 	ops->private = tr;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 3a0244ff7ea8..67cce7896aeb 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -319,7 +319,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
 	int cpu;
 	int pc;
 
-	if (!ftrace_trace_task(current))
+	if (!ftrace_trace_task(tr))
 		return 0;
 
 	/* trace it when it is-nested-in or is a function enabled. */
-- 
cgit v1.2.3


From 35abb67de744b5dbaec54381f2f9e0246089331d Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 8 Jun 2016 18:38:02 -0700
Subject: tracing: expose current->comm to [ku]probe events

ftrace is very quick to give up on saving the task command line (see
`trace_save_cmdline()`). The workaround for events which really care
about the command line is to explicitly assign it as part of the entry.
However, this doesn't work for kprobe events, as there's no
straightforward way to get access to current->comm. Add a kprobe/uprobe
event variable $comm which provides exactly that.

Link: http://lkml.kernel.org/r/f59b472033b943a370f5f48d0af37698f409108f.1465435894.git.osandov@fb.com

Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_kprobe.c |  1 +
 kernel/trace/trace_probe.c  | 33 +++++++++++++++++++++++++++++++++
 kernel/trace/trace_probe.h  | 10 ++++++++++
 3 files changed, 44 insertions(+)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 5546eec0505f..9aedb0b06683 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -587,6 +587,7 @@ static int create_trace_kprobe(int argc, char **argv)
 	 *  $retval	: fetch return value
 	 *  $stack	: fetch stack address
 	 *  $stackN	: fetch Nth of stack (N:0-)
+	 *  $comm       : fetch current task comm
 	 *  @ADDR	: fetch memory at ADDR (ADDR should be in kernel)
 	 *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
 	 *  %REG	: fetch register REG
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 1d372fa6fefb..74e80a582c28 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -218,6 +218,28 @@ free_bitfield_fetch_param(struct bitfield_fetch_param *data)
 	kfree(data);
 }
 
+void FETCH_FUNC_NAME(comm, string)(struct pt_regs *regs,
+					  void *data, void *dest)
+{
+	int maxlen = get_rloc_len(*(u32 *)dest);
+	u8 *dst = get_rloc_data(dest);
+	long ret;
+
+	if (!maxlen)
+		return;
+
+	ret = strlcpy(dst, current->comm, maxlen);
+	*(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest));
+}
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(comm, string));
+
+void FETCH_FUNC_NAME(comm, string_size)(struct pt_regs *regs,
+					       void *data, void *dest)
+{
+	*(u32 *)dest = strlen(current->comm) + 1;
+}
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(comm, string_size));
+
 static const struct fetch_type *find_fetch_type(const char *type,
 						const struct fetch_type *ftbl)
 {
@@ -348,6 +370,11 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
 			}
 		} else
 			ret = -EINVAL;
+	} else if (strcmp(arg, "comm") == 0) {
+		if (strcmp(t->name, "string") != 0 &&
+		    strcmp(t->name, "string_size") != 0)
+			return -EINVAL;
+		f->fn = t->fetch[FETCH_MTD_comm];
 	} else
 		ret = -EINVAL;
 
@@ -522,6 +549,12 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
 		arg[t - parg->comm] = '\0';
 		t++;
 	}
+	/*
+	 * The default type of $comm should be "string", and it can't be
+	 * dereferenced.
+	 */
+	if (!t && strcmp(arg, "$comm") == 0)
+		t = "string";
 	parg->type = find_fetch_type(t, ftbl);
 	if (!parg->type) {
 		pr_info("Unsupported type: %s\n", t);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index f6398db09114..45400ca5ded1 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -102,6 +102,7 @@ enum {
 	FETCH_MTD_reg = 0,
 	FETCH_MTD_stack,
 	FETCH_MTD_retval,
+	FETCH_MTD_comm,
 	FETCH_MTD_memory,
 	FETCH_MTD_symbol,
 	FETCH_MTD_deref,
@@ -183,6 +184,14 @@ DECLARE_BASIC_FETCH_FUNCS(bitfield);
 #define fetch_bitfield_string			NULL
 #define fetch_bitfield_string_size		NULL
 
+/* comm only makes sense as a string */
+#define fetch_comm_u8		NULL
+#define fetch_comm_u16		NULL
+#define fetch_comm_u32		NULL
+#define fetch_comm_u64		NULL
+DECLARE_FETCH_FUNC(comm, string);
+DECLARE_FETCH_FUNC(comm, string_size);
+
 /*
  * Define macro for basic types - we don't need to define s* types, because
  * we have to care only about bitwidth at recording time.
@@ -213,6 +222,7 @@ DEFINE_FETCH_##method(u64)
 ASSIGN_FETCH_FUNC(reg, ftype),				\
 ASSIGN_FETCH_FUNC(stack, ftype),			\
 ASSIGN_FETCH_FUNC(retval, ftype),			\
+ASSIGN_FETCH_FUNC(comm, ftype),				\
 ASSIGN_FETCH_FUNC(memory, ftype),			\
 ASSIGN_FETCH_FUNC(symbol, ftype),			\
 ASSIGN_FETCH_FUNC(deref, ftype),			\
-- 
cgit v1.2.3


From e2ace001176dc9745a472fe8bda1f0b28a4d7351 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 26 May 2016 12:00:33 -0700
Subject: tracing: Choose static tp_printk buffer by explicit nesting count

Currently, the trace_printk code chooses which static buffer to use based
on what type of atomic context (NMI, IRQ, etc) it's in.  Simplify the
code and make it more robust: simply count the nesting depth and choose
a buffer based on the current nesting depth.

The new code will only drop an event if we nest more than 4 deep,
and the old code was guaranteed to malfunction if that happened.

Link: http://lkml.kernel.org/r/07ab03aecfba25fcce8f9a211b14c9c5e2865c58.1464289095.git.luto@kernel.org

Acked-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 83 +++++++++++++++-------------------------------------
 1 file changed, 24 insertions(+), 59 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index aa240551fc5d..45e6747589c6 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2339,83 +2339,41 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
 
 /* created for use with alloc_percpu */
 struct trace_buffer_struct {
-	char buffer[TRACE_BUF_SIZE];
+	int nesting;
+	char buffer[4][TRACE_BUF_SIZE];
 };
 
 static struct trace_buffer_struct *trace_percpu_buffer;
-static struct trace_buffer_struct *trace_percpu_sirq_buffer;
-static struct trace_buffer_struct *trace_percpu_irq_buffer;
-static struct trace_buffer_struct *trace_percpu_nmi_buffer;
 
 /*
- * The buffer used is dependent on the context. There is a per cpu
- * buffer for normal context, softirq contex, hard irq context and
- * for NMI context. Thise allows for lockless recording.
- *
- * Note, if the buffers failed to be allocated, then this returns NULL
+ * Thise allows for lockless recording.  If we're nested too deeply, then
+ * this returns NULL.
  */
 static char *get_trace_buf(void)
 {
-	struct trace_buffer_struct *percpu_buffer;
-
-	/*
-	 * If we have allocated per cpu buffers, then we do not
-	 * need to do any locking.
-	 */
-	if (in_nmi())
-		percpu_buffer = trace_percpu_nmi_buffer;
-	else if (in_irq())
-		percpu_buffer = trace_percpu_irq_buffer;
-	else if (in_softirq())
-		percpu_buffer = trace_percpu_sirq_buffer;
-	else
-		percpu_buffer = trace_percpu_buffer;
+	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
 
-	if (!percpu_buffer)
+	if (!buffer || buffer->nesting >= 4)
 		return NULL;
 
-	return this_cpu_ptr(&percpu_buffer->buffer[0]);
+	return &buffer->buffer[buffer->nesting++][0];
+}
+
+static void put_trace_buf(void)
+{
+	this_cpu_dec(trace_percpu_buffer->nesting);
 }
 
 static int alloc_percpu_trace_buffer(void)
 {
 	struct trace_buffer_struct *buffers;
-	struct trace_buffer_struct *sirq_buffers;
-	struct trace_buffer_struct *irq_buffers;
-	struct trace_buffer_struct *nmi_buffers;
 
 	buffers = alloc_percpu(struct trace_buffer_struct);
-	if (!buffers)
-		goto err_warn;
-
-	sirq_buffers = alloc_percpu(struct trace_buffer_struct);
-	if (!sirq_buffers)
-		goto err_sirq;
-
-	irq_buffers = alloc_percpu(struct trace_buffer_struct);
-	if (!irq_buffers)
-		goto err_irq;
-
-	nmi_buffers = alloc_percpu(struct trace_buffer_struct);
-	if (!nmi_buffers)
-		goto err_nmi;
+	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
+		return -ENOMEM;
 
 	trace_percpu_buffer = buffers;
-	trace_percpu_sirq_buffer = sirq_buffers;
-	trace_percpu_irq_buffer = irq_buffers;
-	trace_percpu_nmi_buffer = nmi_buffers;
-
 	return 0;
-
- err_nmi:
-	free_percpu(irq_buffers);
- err_irq:
-	free_percpu(sirq_buffers);
- err_sirq:
-	free_percpu(buffers);
- err_warn:
-	WARN(1, "Could not allocate percpu trace_printk buffer");
-	return -ENOMEM;
 }
 
 static int buffers_allocated;
@@ -2506,7 +2464,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 	tbuffer = get_trace_buf();
 	if (!tbuffer) {
 		len = 0;
-		goto out;
+		goto out_nobuffer;
 	}
 
 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
@@ -2532,6 +2490,9 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 	}
 
 out:
+	put_trace_buf();
+
+out_nobuffer:
 	preempt_enable_notrace();
 	unpause_graph_tracing();
 
@@ -2563,7 +2524,7 @@ __trace_array_vprintk(struct ring_buffer *buffer,
 	tbuffer = get_trace_buf();
 	if (!tbuffer) {
 		len = 0;
-		goto out;
+		goto out_nobuffer;
 	}
 
 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
@@ -2582,7 +2543,11 @@ __trace_array_vprintk(struct ring_buffer *buffer,
 		__buffer_unlock_commit(buffer, event);
 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
 	}
- out:
+
+out:
+	put_trace_buf();
+
+out_nobuffer:
 	preempt_enable_notrace();
 	unpause_graph_tracing();
 
-- 
cgit v1.2.3


From e947841c0dce9db675a957182214ef8091ac3d61 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 17 Jun 2016 17:40:58 -0400
Subject: tracing: Show the preempt count of when the event was called

Because tracepoint callbacks are done with preemption enabled, the trace
events are always called with preempt disable due to the
rcu_read_lock_sched_notrace() in __DO_TRACE(). This causes the preempt count
shown in the recorded trace event to be inaccurate. It is always one more
that what the preempt_count was when the tracepoint was called.

If CONFIG_PREEMPT is enabled, subtract 1 from the preempt_count before
recording it in the trace buffer.

Link: http://lkml.kernel.org/r/20160525132537.GA10808@linutronix.de

Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_events.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index fd449eb138cf..03c0a48c3ac4 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -261,6 +261,14 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
 
 	local_save_flags(fbuffer->flags);
 	fbuffer->pc = preempt_count();
+	/*
+	 * If CONFIG_PREEMPT is enabled, then the tracepoint itself disables
+	 * preemption (adding one to the preempt_count). Since we are
+	 * interested in the preempt_count at the time the tracepoint was
+	 * hit, we need to subtract one to offset the increment.
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT))
+		fbuffer->pc--;
 	fbuffer->trace_file = trace_file;
 
 	fbuffer->event =
-- 
cgit v1.2.3


From 9a51933e360897d9b3867c9b09dd5ccf7493e97e Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Wed, 11 May 2016 14:06:57 -0500
Subject: tracing: Expose CPU physical addresses (resource values) for PCI
 devices

Previously, mmio_print_pcidev() put "user" addresses in the trace buffer.
On most architectures, these are the same as CPU physical addresses, but on
microblaze, mips, powerpc, and sparc, they may be something else, typically
a raw BAR value (a bus address as opposed to a CPU address).

Always expose the CPU physical address to avoid this arch-dependent
behavior.

This change should have no user-visible effect because this file currently
depends on CONFIG_HAVE_MMIOTRACE_SUPPORT, which is only defined for x86,
and pci_resource_to_user() is a no-op on x86.

Link: http://lkml.kernel.org/r/20160511190657.5898.4248.stgit@bhelgaas-glaptop2.roam.corp.google.com

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_mmiotrace.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 68f376ca6d3f..cd7480d0a201 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -68,19 +68,15 @@ static void mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
 	trace_seq_printf(s, "PCIDEV %02x%02x %04x%04x %x",
 			 dev->bus->number, dev->devfn,
 			 dev->vendor, dev->device, dev->irq);
-	/*
-	 * XXX: is pci_resource_to_user() appropriate, since we are
-	 * supposed to interpret the __ioremap() phys_addr argument based on
-	 * these printed values?
-	 */
 	for (i = 0; i < 7; i++) {
-		pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
+		start = dev->resource[i].start;
 		trace_seq_printf(s, " %llx",
 			(unsigned long long)(start |
 			(dev->resource[i].flags & PCI_REGION_FLAG_MASK)));
 	}
 	for (i = 0; i < 7; i++) {
-		pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
+		start = dev->resource[i].start;
+		end = dev->resource[i].end;
 		trace_seq_printf(s, " %llx",
 			dev->resource[i].start < dev->resource[i].end ?
 			(unsigned long long)(end - start) + 1 : 0);
-- 
cgit v1.2.3


From be54f69c26193de31053190761e521903b89d098 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 23 Jun 2016 14:03:47 -0400
Subject: tracing: Skip more functions when doing stack tracing of events

 # echo 1 > options/stacktrace
 # echo 1 > events/sched/sched_switch/enable
 # cat trace
          <idle>-0     [002] d..2  1982.525169: <stack trace>
 => save_stack_trace
 => __ftrace_trace_stack
 => trace_buffer_unlock_commit_regs
 => event_trigger_unlock_commit
 => trace_event_buffer_commit
 => trace_event_raw_event_sched_switch
 => __schedule
 => schedule
 => schedule_preempt_disabled
 => cpu_startup_entry
 => start_secondary

The above shows that we are seeing 6 functions before ever making it to the
caller of the sched_switch event.

 # echo stacktrace > events/sched/sched_switch/trigger
 # cat trace
          <idle>-0     [002] d..3  2146.335208: <stack trace>
 => trace_event_buffer_commit
 => trace_event_raw_event_sched_switch
 => __schedule
 => schedule
 => schedule_preempt_disabled
 => cpu_startup_entry
 => start_secondary

The stacktrace trigger isn't as bad, because it adds its own skip to the
stacktracing, but still has two events extra.

One issue is that if the stacktrace passes its own "regs" then there should
be no addition to the skip, as the regs will not include the functions being
called. This was an issue that was fixed by commit 7717c6be6999 ("tracing:
Fix stacktrace skip depth in trace_buffer_unlock_commit_regs()" as adding
the skip number for kprobes made the probes not have any stack at all.

But since this is only an issue when regs is being used, a skip should be
added if regs is NULL. Now we have:

 # echo 1 > options/stacktrace
 # echo 1 > events/sched/sched_switch/enable
 # cat trace
          <idle>-0     [000] d..2  1297.676333: <stack trace>
 => __schedule
 => schedule
 => schedule_preempt_disabled
 => cpu_startup_entry
 => rest_init
 => start_kernel
 => x86_64_start_reservations
 => x86_64_start_kernel

 # echo stacktrace > events/sched/sched_switch/trigger
 # cat trace
          <idle>-0     [002] d..3  1370.759745: <stack trace>
 => __schedule
 => schedule
 => schedule_preempt_disabled
 => cpu_startup_entry
 => start_secondary

And kprobes are not touched.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 45e6747589c6..3d9f31b576f3 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2118,7 +2118,17 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
 {
 	__buffer_unlock_commit(buffer, event);
 
-	ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
+	/*
+	 * If regs is not set, then skip the following callers:
+	 *   trace_buffer_unlock_commit_regs
+	 *   event_trigger_unlock_commit
+	 *   trace_event_buffer_commit
+	 *   trace_event_raw_event_sched_switch
+	 * Note, we can still get here via blktrace, wakeup tracer
+	 * and mmiotrace, but that's ok if they lose a function or
+	 * two. They are that meaningful.
+	 */
+	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
 	ftrace_trace_userstack(buffer, flags, pc);
 }
 
@@ -2168,6 +2178,13 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
 	trace.nr_entries	= 0;
 	trace.skip		= skip;
 
+	/*
+	 * Add two, for this function and the call to save_stack_trace()
+	 * If regs is set, then these functions will not be in the way.
+	 */
+	if (!regs)
+		trace.skip += 2;
+
 	/*
 	 * Since events can happen in NMIs there's no safe way to
 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
-- 
cgit v1.2.3


From 7fa8b7171a638ad896acabd9a17183b75b70aeb4 Mon Sep 17 00:00:00 2001
From: Joel Fernandes <agnel.joel@gmail.com>
Date: Fri, 17 Jun 2016 22:44:54 -0700
Subject: tracing/function_graph: Fix filters for function_graph threshold

Function graph tracer currently ignores filters if tracing_thresh is set.
For example, even if set_ftrace_pid is set, then its ignored if tracing_thresh
set, resulting in all processes being traced.

To fix this, we reuse the same entry function as when tracing_thresh is not
set and do everything as in the regular case except for writing the function entry
to the ring buffer.

Link: http://lkml.kernel.org/r/1466228694-2677-1-git-send-email-agnel.joel@gmail.com

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Joel Fernandes <agnel.joel@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_functions_graph.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 67cce7896aeb..7363ccf79512 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -338,6 +338,13 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
 	if (ftrace_graph_notrace_addr(trace->func))
 		return 1;
 
+	/*
+	 * Stop here if tracing_threshold is set. We only write function return
+	 * events to the ring buffer.
+	 */
+	if (tracing_thresh)
+		return 1;
+
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
 	data = per_cpu_ptr(tr->trace_buffer.data, cpu);
@@ -355,14 +362,6 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
 	return ret;
 }
 
-static int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
-{
-	if (tracing_thresh)
-		return 1;
-	else
-		return trace_graph_entry(trace);
-}
-
 static void
 __trace_graph_function(struct trace_array *tr,
 		unsigned long ip, unsigned long flags, int pc)
@@ -457,7 +456,7 @@ static int graph_trace_init(struct trace_array *tr)
 	set_graph_array(tr);
 	if (tracing_thresh)
 		ret = register_ftrace_graph(&trace_graph_thresh_return,
-					    &trace_graph_thresh_entry);
+					    &trace_graph_entry);
 	else
 		ret = register_ftrace_graph(&trace_graph_return,
 					    &trace_graph_entry);
-- 
cgit v1.2.3


From 501c2375253c0795048f48368e0b3e8b2f6646dc Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 5 Jul 2016 10:04:34 -0400
Subject: ftrace: Move toplevel init out of ftrace_init_tracefs()

Commit 345ddcc882d8 ("ftrace: Have set_ftrace_pid use the bitmap like events
do") placed ftrace_init_tracefs into the instance creation, and encapsulated
the top level updating with an if conditional, as the top level only gets
updated at boot up. Unfortunately, this triggers section mismatch errors as
the init functions are called from a function that can be called later, and
the section mismatch logic is unaware of the if conditional that would
prevent it from happening at run time.

To make everyone happy, create a separate ftrace_init_tracefs_toplevel()
routine that only gets called by init functions, and this will be what calls
other init functions for the toplevel directory.

Link: http://lkml.kernel.org/r/20160704102139.19cbc0d9@gandalf.local.home

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Reported-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 345ddcc882d8 ("ftrace: Have set_ftrace_pid use the bitmap like events do")
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 16 ++++++++++------
 kernel/trace/trace.c  |  1 +
 kernel/trace/trace.h  |  3 +++
 3 files changed, 14 insertions(+), 6 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 8b488f4dd8e8..84752c8e28b5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5539,16 +5539,20 @@ static const struct file_operations ftrace_pid_fops = {
 
 void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer)
 {
-	/* Only the top level directory has the dyn_tracefs and profile */
-	if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
-		ftrace_init_dyn_tracefs(d_tracer);
-		ftrace_profile_tracefs(d_tracer);
-	}
-
 	trace_create_file("set_ftrace_pid", 0644, d_tracer,
 			    tr, &ftrace_pid_fops);
 }
 
+void __init ftrace_init_tracefs_toplevel(struct trace_array *tr,
+					 struct dentry *d_tracer)
+{
+	/* Only the top level directory has the dyn_tracefs and profile */
+	WARN_ON(!(tr->flags & TRACE_ARRAY_FL_GLOBAL));
+
+	ftrace_init_dyn_tracefs(d_tracer);
+	ftrace_profile_tracefs(d_tracer);
+}
+
 /**
  * ftrace_kill - kill ftrace
  *
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3d9f31b576f3..5fd53a7847bc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7369,6 +7369,7 @@ static __init int tracer_init_tracefs(void)
 		return 0;
 
 	init_tracer_tracefs(&global_trace, d_tracer);
+	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
 
 	trace_create_file("tracing_thresh", 0644, d_tracer,
 			&global_trace, &tracing_thresh_fops);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index eaee458755a4..c1de3f493cd3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -857,6 +857,8 @@ void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func);
 void ftrace_reset_array_ops(struct trace_array *tr);
 int using_ftrace_ops_list_func(void);
 void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer);
+void ftrace_init_tracefs_toplevel(struct trace_array *tr,
+				  struct dentry *d_tracer);
 #else
 static inline int ftrace_trace_task(struct trace_array *tr)
 {
@@ -874,6 +876,7 @@ static inline __init void
 ftrace_init_global_array_ops(struct trace_array *tr) { }
 static inline void ftrace_reset_array_ops(struct trace_array *tr) { }
 static inline void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d) { }
+static inline void ftrace_init_tracefs_toplevel(struct trace_array *tr, struct dentry *d) { }
 /* ftace_func_t type is not defined, use macro instead of static inline */
 #define ftrace_init_array_ops(tr, func) do { } while (0)
 #endif /* CONFIG_FUNCTION_TRACER */
-- 
cgit v1.2.3


From 67f20b084574def586ecba68508acd5d054ccc88 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Mon, 4 Jul 2016 15:10:04 +0000
Subject: tracing: Using for_each_set_bit() to simplify trace_pid_write()

Using for_each_set_bit() to simplify the code.

Link: http://lkml.kernel.org/r/1467645004-11169-1-git-send-email-weiyj_lk@163.com

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5fd53a7847bc..dade4c9559cc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -517,13 +517,9 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
 
 	if (filtered_pids) {
 		/* copy the current bits to the new max */
-		pid = find_first_bit(filtered_pids->pids,
-				     filtered_pids->pid_max);
-		while (pid < filtered_pids->pid_max) {
+		for_each_set_bit(pid, filtered_pids->pids,
+				 filtered_pids->pid_max) {
 			set_bit(pid, pid_list->pids);
-			pid = find_next_bit(filtered_pids->pids,
-					    filtered_pids->pid_max,
-					    pid + 1);
 			nr_pids++;
 		}
 	}
-- 
cgit v1.2.3


From 7ad8fb61c4abf589596f0a4da34d987471481569 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Sun, 3 Jul 2016 08:51:34 -0500
Subject: tracing: Have HIST_TRIGGERS select TRACING

The kbuild test robot reported a compile error if HIST_TRIGGERS was
enabled but nothing else that selected TRACING was configured in.

HIST_TRIGGERS should directly select it and not rely on anything else
to do it.

Link: http://lkml.kernel.org/r/57791866.8080505@linux.intel.com

Reported-by: kbuild test robot <fennguang.wu@intel.com>
Fixes: 7ef224d1d0e3a ("tracing: Add 'hist' event trigger command")
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel/trace')

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index fafeaf803bd0..f4b86e8ca1e7 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -542,6 +542,7 @@ config HIST_TRIGGERS
 	bool "Histogram triggers"
 	depends on ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select TRACING_MAP
+	select TRACING
 	default n
 	help
 	  Hist triggers allow one or more arbitrary trace event fields
-- 
cgit v1.2.3


From a4a551b8f1d4c4ebffd0f49dfef44df3128546f8 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 29 Jun 2016 19:56:48 +0900
Subject: ftrace: Reduce size of function graph entries

Currently ftrace_graph_ent{,_entry} and ftrace_graph_ret{,_entry} struct
can have padding bytes at the end due to alignment in 64-bit data type.
As these data are recorded so frequently, those paddings waste
non-negligible space.  As the ring buffer maintains alignment properly
for each architecture, just to remove the extra padding using 'packed'
attribute.

  ftrace_graph_ent_entry:  24 -> 20
  ftrace_graph_ret_entry:  48 -> 44

Also I moved the 'overrun' field in struct ftrace_graph_ret to minimize
the padding in the middle.

Tested on x86_64 only.

Link: http://lkml.kernel.org/r/1467197808-13578-1-git-send-email-namhyung@kernel.org

Cc: Ingo Molnar <mingo@kernel.org>
Cc: linux-arch@vger.kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.h         | 11 +++++++++++
 kernel/trace/trace_entries.h |  4 ++--
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c1de3f493cd3..f783df416726 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -80,6 +80,12 @@ enum trace_type {
 	FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
 		     filter)
 
+#undef FTRACE_ENTRY_PACKED
+#define FTRACE_ENTRY_PACKED(name, struct_name, id, tstruct, print,	\
+			    filter)					\
+	FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
+		     filter) __packed
+
 #include "trace_entries.h"
 
 /*
@@ -1625,6 +1631,11 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled);
 #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter)	\
 	FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \
 		     filter)
+#undef FTRACE_ENTRY_PACKED
+#define FTRACE_ENTRY_PACKED(call, struct_name, id, tstruct, print, filter) \
+	FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \
+		     filter)
+
 #include "trace_entries.h"
 
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_FUNCTION_TRACER)
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index ee7b94a4810a..5c30efcda5e6 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -72,7 +72,7 @@ FTRACE_ENTRY_REG(function, ftrace_entry,
 );
 
 /* Function call entry */
-FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,
+FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
 
 	TRACE_GRAPH_ENT,
 
@@ -88,7 +88,7 @@ FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,
 );
 
 /* Function return entry */
-FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
+FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
 
 	TRACE_GRAPH_RET,
 
-- 
cgit v1.2.3