summaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-11-28 09:46:22 -0800
committerTejun Heo <tj@kernel.org>2011-11-28 09:46:22 -0800
commitd4bbf7e7759afc172e2bfbc5c416324590049cdd (patch)
tree7eab5ee5481cd3dcf1162329fec827177640018a /kernel/trace
parenta150439c4a97db379f0ed6faa46fbbb6e7bf3cb2 (diff)
parent401d0069cb344f401bc9d264c31db55876ff78c0 (diff)
Merge branch 'master' into x86/memblock
Conflicts & resolutions: * arch/x86/xen/setup.c dc91c728fd "xen: allow extra memory to be in multiple regions" 24aa07882b "memblock, x86: Replace memblock_x86_reserve/free..." conflicted on xen_add_extra_mem() updates. The resolution is trivial as the latter just want to replace memblock_x86_reserve_range() with memblock_reserve(). * drivers/pci/intel-iommu.c 166e9278a3f "x86/ia64: intel-iommu: move to drivers/iommu/" 5dfe8660a3d "bootmem: Replace work_with_active_regions() with..." conflicted as the former moved the file under drivers/iommu/. Resolved by applying the chnages from the latter on the moved file. * mm/Kconfig 6661672053a "memblock: add NO_BOOTMEM config symbol" c378ddd53f9 "memblock, x86: Make ARCH_DISCARD_MEMBLOCK a config option" conflicted trivially. Both added config options. Just letting both add their own options resolves the conflict. * mm/memblock.c d1f0ece6cdc "mm/memblock.c: small function definition fixes" ed7b56a799c "memblock: Remove memblock_memory_can_coalesce()" confliected. The former updates function removed by the latter. Resolution is trivial. Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/Makefile5
-rw-r--r--kernel/trace/blktrace.c22
-rw-r--r--kernel/trace/ftrace.c166
-rw-r--r--kernel/trace/ring_buffer.c188
-rw-r--r--kernel/trace/ring_buffer_benchmark.c2
-rw-r--r--kernel/trace/rpm-traces.c20
-rw-r--r--kernel/trace/trace.c507
-rw-r--r--kernel/trace/trace.h79
-rw-r--r--kernel/trace/trace_clock.c12
-rw-r--r--kernel/trace/trace_entries.h3
-rw-r--r--kernel/trace/trace_events.c139
-rw-r--r--kernel/trace/trace_events_filter.c801
-rw-r--r--kernel/trace/trace_events_filter_test.h50
-rw-r--r--kernel/trace/trace_functions.c3
-rw-r--r--kernel/trace/trace_functions_graph.c225
-rw-r--r--kernel/trace/trace_irqsoff.c14
-rw-r--r--kernel/trace/trace_kprobe.c378
-rw-r--r--kernel/trace/trace_mmiotrace.c2
-rw-r--r--kernel/trace/trace_output.c11
-rw-r--r--kernel/trace/trace_printk.c19
-rw-r--r--kernel/trace/trace_sched_wakeup.c4
-rw-r--r--kernel/trace/trace_stack.c13
-rw-r--r--kernel/trace/trace_syscalls.c1
24 files changed, 1749 insertions, 917 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2ad39e556cb4..cd3134510f3d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -82,7 +82,7 @@ config EVENT_POWER_TRACING_DEPRECATED
power:power_frequency
This is for userspace compatibility
and will vanish after 5 kernel iterations,
- namely 2.6.41.
+ namely 3.1.
config CONTEXT_SWITCH_TRACER
bool
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 761c510a06c5..5f39a07fe5ea 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -15,6 +15,8 @@ ifdef CONFIG_TRACING_BRANCHES
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
endif
+CFLAGS_trace_events_filter.o := -I$(src)
+
#
# Make the trace clocks available generally: it's infrastructure
# relied on by ptrace for example:
@@ -53,6 +55,9 @@ endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
obj-$(CONFIG_TRACEPOINTS) += power-traces.o
+ifeq ($(CONFIG_PM_RUNTIME),y)
+obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o
+endif
ifeq ($(CONFIG_TRACING),y)
obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
endif
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 6957aa298dfa..16fc34a0806f 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -23,6 +23,7 @@
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/debugfs.h>
+#include <linux/export.h>
#include <linux/time.h>
#include <linux/uaccess.h>
@@ -206,6 +207,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
what |= MASK_TC_BIT(rw, RAHEAD);
what |= MASK_TC_BIT(rw, META);
what |= MASK_TC_BIT(rw, DISCARD);
+ what |= MASK_TC_BIT(rw, FLUSH);
+ what |= MASK_TC_BIT(rw, FUA);
pid = tsk->pid;
if (act_log_check(bt, what, sector, pid))
@@ -1054,6 +1057,9 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
goto out;
}
+ if (tc & BLK_TC_FLUSH)
+ rwbs[i++] = 'F';
+
if (tc & BLK_TC_DISCARD)
rwbs[i++] = 'D';
else if (tc & BLK_TC_WRITE)
@@ -1063,10 +1069,10 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
else
rwbs[i++] = 'N';
+ if (tc & BLK_TC_FUA)
+ rwbs[i++] = 'F';
if (tc & BLK_TC_AHEAD)
rwbs[i++] = 'A';
- if (tc & BLK_TC_BARRIER)
- rwbs[i++] = 'B';
if (tc & BLK_TC_SYNC)
rwbs[i++] = 'S';
if (tc & BLK_TC_META)
@@ -1132,7 +1138,7 @@ typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
{
- char rwbs[6];
+ char rwbs[RWBS_LEN];
unsigned long long ts = iter->ts;
unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC);
unsigned secs = (unsigned long)ts;
@@ -1148,7 +1154,7 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
static int blk_log_action(struct trace_iterator *iter, const char *act)
{
- char rwbs[6];
+ char rwbs[RWBS_LEN];
const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
fill_rwbs(rwbs, t);
@@ -1561,7 +1567,7 @@ static const struct {
} mask_maps[] = {
{ BLK_TC_READ, "read" },
{ BLK_TC_WRITE, "write" },
- { BLK_TC_BARRIER, "barrier" },
+ { BLK_TC_FLUSH, "flush" },
{ BLK_TC_SYNC, "sync" },
{ BLK_TC_QUEUE, "queue" },
{ BLK_TC_REQUEUE, "requeue" },
@@ -1573,6 +1579,7 @@ static const struct {
{ BLK_TC_META, "meta" },
{ BLK_TC_DISCARD, "discard" },
{ BLK_TC_DRV_DATA, "drv_data" },
+ { BLK_TC_FUA, "fua" },
};
static int blk_trace_str2mask(const char *str)
@@ -1788,6 +1795,9 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
{
int i = 0;
+ if (rw & REQ_FLUSH)
+ rwbs[i++] = 'F';
+
if (rw & WRITE)
rwbs[i++] = 'W';
else if (rw & REQ_DISCARD)
@@ -1797,6 +1807,8 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
else
rwbs[i++] = 'N';
+ if (rw & REQ_FUA)
+ rwbs[i++] = 'F';
if (rw & REQ_RAHEAD)
rwbs[i++] = 'A';
if (rw & REQ_SYNC)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 908038f57440..900b409543db 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -22,6 +22,7 @@
#include <linux/hardirq.h>
#include <linux/kthread.h>
#include <linux/uaccess.h>
+#include <linux/module.h>
#include <linux/ftrace.h>
#include <linux/sysctl.h>
#include <linux/slab.h>
@@ -32,7 +33,6 @@
#include <trace/events/sched.h>
-#include <asm/ftrace.h>
#include <asm/setup.h>
#include "trace_output.h"
@@ -82,14 +82,14 @@ static int ftrace_disabled __read_mostly;
static DEFINE_MUTEX(ftrace_lock);
-static struct ftrace_ops ftrace_list_end __read_mostly =
-{
+static struct ftrace_ops ftrace_list_end __read_mostly = {
.func = ftrace_stub,
};
static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
+static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub;
ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
static struct ftrace_ops global_ops;
@@ -148,9 +148,11 @@ void clear_ftrace_function(void)
{
ftrace_trace_function = ftrace_stub;
__ftrace_trace_function = ftrace_stub;
+ __ftrace_trace_function_delay = ftrace_stub;
ftrace_pid_function = ftrace_stub;
}
+#undef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
/*
* For those archs that do not test ftrace_trace_stop in their
@@ -210,7 +212,12 @@ static void update_ftrace_function(void)
#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
ftrace_trace_function = func;
#else
+#ifdef CONFIG_DYNAMIC_FTRACE
+ /* do not update till all functions have been modified */
+ __ftrace_trace_function_delay = func;
+#else
__ftrace_trace_function = func;
+#endif
ftrace_trace_function = ftrace_test_stop_func;
#endif
}
@@ -785,8 +792,7 @@ static void unregister_ftrace_profiler(void)
unregister_ftrace_graph();
}
#else
-static struct ftrace_ops ftrace_profile_ops __read_mostly =
-{
+static struct ftrace_ops ftrace_profile_ops __read_mostly = {
.func = function_profile_call,
};
@@ -806,19 +812,10 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
unsigned long val;
- char buf[64]; /* big enough to hold a number */
int ret;
- if (cnt >= sizeof(buf))
- return -EINVAL;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = 0;
-
- ret = strict_strtoul(buf, 10, &val);
- if (ret < 0)
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
return ret;
val = !!val;
@@ -1182,8 +1179,14 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
return NULL;
}
+static void
+ftrace_hash_rec_disable(struct ftrace_ops *ops, int filter_hash);
+static void
+ftrace_hash_rec_enable(struct ftrace_ops *ops, int filter_hash);
+
static int
-ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
+ftrace_hash_move(struct ftrace_ops *ops, int enable,
+ struct ftrace_hash **dst, struct ftrace_hash *src)
{
struct ftrace_func_entry *entry;
struct hlist_node *tp, *tn;
@@ -1193,9 +1196,16 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
unsigned long key;
int size = src->count;
int bits = 0;
+ int ret;
int i;
/*
+ * Remove the current set, update the hash and add
+ * them back.
+ */
+ ftrace_hash_rec_disable(ops, enable);
+
+ /*
* If the new source is empty, just free dst and assign it
* the empty_hash.
*/
@@ -1215,9 +1225,10 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
if (bits > FTRACE_HASH_MAX_BITS)
bits = FTRACE_HASH_MAX_BITS;
+ ret = -ENOMEM;
new_hash = alloc_ftrace_hash(bits);
if (!new_hash)
- return -ENOMEM;
+ goto out;
size = 1 << src->size_bits;
for (i = 0; i < size; i++) {
@@ -1236,7 +1247,16 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
rcu_assign_pointer(*dst, new_hash);
free_ftrace_hash_rcu(old_hash);
- return 0;
+ ret = 0;
+ out:
+ /*
+ * Enable regardless of ret:
+ * On success, we enable the new hash.
+ * On failure, we re-enable the original hash.
+ */
+ ftrace_hash_rec_enable(ops, enable);
+
+ return ret;
}
/*
@@ -1596,6 +1616,12 @@ static int __ftrace_modify_code(void *data)
{
int *command = data;
+ /*
+ * Do not call function tracer while we update the code.
+ * We are in stop machine, no worrying about races.
+ */
+ function_trace_stop++;
+
if (*command & FTRACE_ENABLE_CALLS)
ftrace_replace_code(1);
else if (*command & FTRACE_DISABLE_CALLS)
@@ -1609,6 +1635,18 @@ static int __ftrace_modify_code(void *data)
else if (*command & FTRACE_STOP_FUNC_RET)
ftrace_disable_ftrace_graph_caller();
+#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ /*
+ * For archs that call ftrace_test_stop_func(), we must
+ * wait till after we update all the function callers
+ * before we update the callback. This keeps different
+ * ops that record different functions from corrupting
+ * each other.
+ */
+ __ftrace_trace_function = __ftrace_trace_function_delay;
+#endif
+ function_trace_stop--;
+
return 0;
}
@@ -1744,10 +1782,36 @@ static cycle_t ftrace_update_time;
static unsigned long ftrace_update_cnt;
unsigned long ftrace_update_tot_cnt;
+static int ops_traces_mod(struct ftrace_ops *ops)
+{
+ struct ftrace_hash *hash;
+
+ hash = ops->filter_hash;
+ return !!(!hash || !hash->count);
+}
+
static int ftrace_update_code(struct module *mod)
{
struct dyn_ftrace *p;
cycle_t start, stop;
+ unsigned long ref = 0;
+
+ /*
+ * When adding a module, we need to check if tracers are
+ * currently enabled and if they are set to trace all functions.
+ * If they are, we need to enable the module functions as well
+ * as update the reference counts for those function records.
+ */
+ if (mod) {
+ struct ftrace_ops *ops;
+
+ for (ops = ftrace_ops_list;
+ ops != &ftrace_list_end; ops = ops->next) {
+ if (ops->flags & FTRACE_OPS_FL_ENABLED &&
+ ops_traces_mod(ops))
+ ref++;
+ }
+ }
start = ftrace_now(raw_smp_processor_id());
ftrace_update_cnt = 0;
@@ -1760,7 +1824,7 @@ static int ftrace_update_code(struct module *mod)
p = ftrace_new_addrs;
ftrace_new_addrs = p->newlist;
- p->flags = 0L;
+ p->flags = ref;
/*
* Do the initial record conversion from mcount jump
@@ -1783,7 +1847,7 @@ static int ftrace_update_code(struct module *mod)
* conversion puts the module to the correct state, thus
* passing the ftrace_make_call check.
*/
- if (ftrace_start_up) {
+ if (ftrace_start_up && ref) {
int failed = __ftrace_replace_code(p, 1);
if (failed) {
ftrace_bug(failed, p->ip);
@@ -2407,10 +2471,9 @@ ftrace_match_module_records(struct ftrace_hash *hash, char *buff, char *mod)
*/
static int
-ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
+ftrace_mod_callback(struct ftrace_hash *hash,
+ char *func, char *cmd, char *param, int enable)
{
- struct ftrace_ops *ops = &global_ops;
- struct ftrace_hash *hash;
char *mod;
int ret = -EINVAL;
@@ -2430,11 +2493,6 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
if (!strlen(mod))
return ret;
- if (enable)
- hash = ops->filter_hash;
- else
- hash = ops->notrace_hash;
-
ret = ftrace_match_module_records(hash, func, mod);
if (!ret)
ret = -EINVAL;
@@ -2760,7 +2818,7 @@ static int ftrace_process_regex(struct ftrace_hash *hash,
mutex_lock(&ftrace_cmd_mutex);
list_for_each_entry(p, &ftrace_commands, list) {
if (strcmp(p->name, command) == 0) {
- ret = p->func(func, command, next, enable);
+ ret = p->func(hash, func, command, next, enable);
goto out_unlock;
}
}
@@ -2857,7 +2915,11 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
ftrace_match_records(hash, buf, len);
mutex_lock(&ftrace_lock);
- ret = ftrace_hash_move(orig_hash, hash);
+ ret = ftrace_hash_move(ops, enable, orig_hash, hash);
+ if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED
+ && ftrace_enabled)
+ ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+
mutex_unlock(&ftrace_lock);
mutex_unlock(&ftrace_regex_lock);
@@ -3040,18 +3102,12 @@ ftrace_regex_release(struct inode *inode, struct file *file)
orig_hash = &iter->ops->notrace_hash;
mutex_lock(&ftrace_lock);
- /*
- * Remove the current set, update the hash and add
- * them back.
- */
- ftrace_hash_rec_disable(iter->ops, filter_hash);
- ret = ftrace_hash_move(orig_hash, iter->hash);
- if (!ret) {
- ftrace_hash_rec_enable(iter->ops, filter_hash);
- if (iter->ops->flags & FTRACE_OPS_FL_ENABLED
- && ftrace_enabled)
- ftrace_run_update_code(FTRACE_ENABLE_CALLS);
- }
+ ret = ftrace_hash_move(iter->ops, filter_hash,
+ orig_hash, iter->hash);
+ if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED)
+ && ftrace_enabled)
+ ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+
mutex_unlock(&ftrace_lock);
}
free_ftrace_hash(iter->hash);
@@ -3330,7 +3386,7 @@ static int ftrace_process_locs(struct module *mod,
{
unsigned long *p;
unsigned long addr;
- unsigned long flags;
+ unsigned long flags = 0; /* Shut up gcc */
mutex_lock(&ftrace_lock);
p = start;
@@ -3348,12 +3404,18 @@ static int ftrace_process_locs(struct module *mod,
}
/*
- * Disable interrupts to prevent interrupts from executing
- * code that is being modified.
+ * We only need to disable interrupts on start up
+ * because we are modifying code that an interrupt
+ * may execute, and the modification is not atomic.
+ * But for modules, nothing runs the code we modify
+ * until we are finished with it, and there's no
+ * reason to cause large interrupt latencies while we do it.
*/
- local_irq_save(flags);
+ if (!mod)
+ local_irq_save(flags);
ftrace_update_code(mod);
- local_irq_restore(flags);
+ if (!mod)
+ local_irq_restore(flags);
mutex_unlock(&ftrace_lock);
return 0;
@@ -3802,6 +3864,14 @@ void ftrace_kill(void)
}
/**
+ * Test if ftrace is dead or not.
+ */
+int ftrace_is_dead(void)
+{
+ return ftrace_disabled;
+}
+
+/**
* register_ftrace_function - register a function for profiling
* @ops - ops structure that holds the function for profiling.
*
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b0c7aa407943..f5b7b5c1195b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -478,7 +478,7 @@ struct ring_buffer_per_cpu {
int cpu;
atomic_t record_disabled;
struct ring_buffer *buffer;
- spinlock_t reader_lock; /* serialize readers */
+ raw_spinlock_t reader_lock; /* serialize readers */
arch_spinlock_t lock;
struct lock_class_key lock_key;
struct list_head *pages;
@@ -488,12 +488,14 @@ struct ring_buffer_per_cpu {
struct buffer_page *reader_page;
unsigned long lost_events;
unsigned long last_overrun;
+ local_t entries_bytes;
local_t commit_overrun;
local_t overrun;
local_t entries;
local_t committing;
local_t commits;
unsigned long read;
+ unsigned long read_bytes;
u64 write_stamp;
u64 read_stamp;
};
@@ -997,15 +999,21 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
unsigned nr_pages)
{
struct buffer_page *bpage, *tmp;
- unsigned long addr;
LIST_HEAD(pages);
unsigned i;
WARN_ON(!nr_pages);
for (i = 0; i < nr_pages; i++) {
+ struct page *page;
+ /*
+ * __GFP_NORETRY flag makes sure that the allocation fails
+ * gracefully without invoking oom-killer and the system is
+ * not destabilized.
+ */
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
- GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
+ GFP_KERNEL | __GFP_NORETRY,
+ cpu_to_node(cpu_buffer->cpu));
if (!bpage)
goto free_pages;
@@ -1013,10 +1021,11 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
list_add(&bpage->list, &pages);
- addr = __get_free_page(GFP_KERNEL);
- if (!addr)
+ page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
+ GFP_KERNEL | __GFP_NORETRY, 0);
+ if (!page)
goto free_pages;
- bpage->page = (void *)addr;
+ bpage->page = page_address(page);
rb_init_page(bpage->page);
}
@@ -1045,7 +1054,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct buffer_page *bpage;
- unsigned long addr;
+ struct page *page;
int ret;
cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
@@ -1055,7 +1064,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
cpu_buffer->cpu = cpu;
cpu_buffer->buffer = buffer;
- spin_lock_init(&cpu_buffer->reader_lock);
+ raw_spin_lock_init(&cpu_buffer->reader_lock);
lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
@@ -1067,10 +1076,10 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
rb_check_bpage(cpu_buffer, bpage);
cpu_buffer->reader_page = bpage;
- addr = __get_free_page(GFP_KERNEL);
- if (!addr)
+ page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
+ if (!page)
goto fail_free_reader;
- bpage->page = (void *)addr;
+ bpage->page = page_address(page);
rb_init_page(bpage->page);
INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
@@ -1252,7 +1261,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
struct list_head *p;
unsigned i;
- spin_lock_irq(&cpu_buffer->reader_lock);
+ raw_spin_lock_irq(&cpu_buffer->reader_lock);
rb_head_page_deactivate(cpu_buffer);
for (i = 0; i < nr_pages; i++) {
@@ -1270,7 +1279,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
rb_check_pages(cpu_buffer);
out:
- spin_unlock_irq(&cpu_buffer->reader_lock);
+ raw_spin_unlock_irq(&cpu_buffer->reader_lock);
}
static void
@@ -1281,7 +1290,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
struct list_head *p;
unsigned i;
- spin_lock_irq(&cpu_buffer->reader_lock);
+ raw_spin_lock_irq(&cpu_buffer->reader_lock);
rb_head_page_deactivate(cpu_buffer);
for (i = 0; i < nr_pages; i++) {
@@ -1296,7 +1305,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
rb_check_pages(cpu_buffer);
out:
- spin_unlock_irq(&cpu_buffer->reader_lock);
+ raw_spin_unlock_irq(&cpu_buffer->reader_lock);
}
/**
@@ -1314,7 +1323,6 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
unsigned nr_pages, rm_pages, new_pages;
struct buffer_page *bpage, *tmp;
unsigned long buffer_size;
- unsigned long addr;
LIST_HEAD(pages);
int i, cpu;
@@ -1375,16 +1383,24 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
for_each_buffer_cpu(buffer, cpu) {
for (i = 0; i < new_pages; i++) {
+ struct page *page;
+ /*
+ * __GFP_NORETRY flag makes sure that the allocation
+ * fails gracefully without invoking oom-killer and
+ * the system is not destabilized.
+ */
bpage = kzalloc_node(ALIGN(sizeof(*bpage),
cache_line_size()),
- GFP_KERNEL, cpu_to_node(cpu));
+ GFP_KERNEL | __GFP_NORETRY,
+ cpu_to_node(cpu));
if (!bpage)
goto free_pages;
list_add(&bpage->list, &pages);
- addr = __get_free_page(GFP_KERNEL);
- if (!addr)
+ page = alloc_pages_node(cpu_to_node(cpu),
+ GFP_KERNEL | __GFP_NORETRY, 0);
+ if (!page)
goto free_pages;
- bpage->page = (void *)addr;
+ bpage->page = page_address(page);
rb_init_page(bpage->page);
}
}
@@ -1694,6 +1710,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
* the counters.
*/
local_add(entries, &cpu_buffer->overrun);
+ local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
/*
* The entries will be zeroed out when we move the
@@ -1849,6 +1866,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
event = __rb_page_index(tail_page, tail);
kmemcheck_annotate_bitfield(event, bitfield);
+ /* account for padding bytes */
+ local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
+
/*
* Save the original length to the meta data.
* This will be used by the reader to add lost event
@@ -2040,6 +2060,9 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
if (!tail)
tail_page->page->time_stamp = ts;
+ /* account for these added bytes */
+ local_add(length, &cpu_buffer->entries_bytes);
+
return event;
}
@@ -2062,6 +2085,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
unsigned long write_mask =
local_read(&bpage->write) & ~RB_WRITE_MASK;
+ unsigned long event_length = rb_event_length(event);
/*
* This is on the tail page. It is possible that
* a write could come in and move the tail page
@@ -2071,8 +2095,11 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
old_index += write_mask;
new_index += write_mask;
index = local_cmpxchg(&bpage->write, old_index, new_index);
- if (index == old_index)
+ if (index == old_index) {
+ /* update counters */
+ local_sub(event_length, &cpu_buffer->entries_bytes);
return 1;
+ }
}
/* could not discard */
@@ -2647,6 +2674,58 @@ rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
}
/**
+ * ring_buffer_oldest_event_ts - get the oldest event timestamp from the buffer
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to read from.
+ */
+unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
+{
+ unsigned long flags;
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct buffer_page *bpage;
+ unsigned long ret;
+
+ if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ return 0;
+
+ cpu_buffer = buffer->buffers[cpu];
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ /*
+ * if the tail is on reader_page, oldest time stamp is on the reader
+ * page
+ */
+ if (cpu_buffer->tail_page == cpu_buffer->reader_page)
+ bpage = cpu_buffer->reader_page;
+ else
+ bpage = rb_set_head_page(cpu_buffer);
+ ret = bpage->page->time_stamp;
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
+
+/**
+ * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to read from.
+ */
+unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ unsigned long ret;
+
+ if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ return 0;
+
+ cpu_buffer = buffer->buffers[cpu];
+ ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu);
+
+/**
* ring_buffer_entries_cpu - get the number of entries in a cpu buffer
* @buffer: The ring buffer
* @cpu: The per CPU buffer to get the entries from.
@@ -2790,9 +2869,9 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
cpu_buffer = iter->cpu_buffer;
- spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
rb_iter_reset(iter);
- spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
@@ -3251,12 +3330,12 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
again:
local_irq_save(flags);
if (dolock)
- spin_lock(&cpu_buffer->reader_lock);
+ raw_spin_lock(&cpu_buffer->reader_lock);
event = rb_buffer_peek(cpu_buffer, ts, lost_events);
if (event && event->type_len == RINGBUF_TYPE_PADDING)
rb_advance_reader(cpu_buffer);
if (dolock)
- spin_unlock(&cpu_buffer->reader_lock);
+ raw_spin_unlock(&cpu_buffer->reader_lock);
local_irq_restore(flags);
if (event && event->type_len == RINGBUF_TYPE_PADDING)
@@ -3281,9 +3360,9 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
unsigned long flags;
again:
- spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
event = rb_iter_peek(iter, ts);
- spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
if (event && event->type_len == RINGBUF_TYPE_PADDING)
goto again;
@@ -3323,7 +3402,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
cpu_buffer = buffer->buffers[cpu];
local_irq_save(flags);
if (dolock)
- spin_lock(&cpu_buffer->reader_lock);
+ raw_spin_lock(&cpu_buffer->reader_lock);
event = rb_buffer_peek(cpu_buffer, ts, lost_events);
if (event) {
@@ -3332,7 +3411,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
}
if (dolock)
- spin_unlock(&cpu_buffer->reader_lock);
+ raw_spin_unlock(&cpu_buffer->reader_lock);
local_irq_restore(flags);
out:
@@ -3424,11 +3503,11 @@ ring_buffer_read_start(struct ring_buffer_iter *iter)
cpu_buffer = iter->cpu_buffer;
- spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
arch_spin_lock(&cpu_buffer->lock);
rb_iter_reset(iter);
arch_spin_unlock(&cpu_buffer->lock);
- spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
EXPORT_SYMBOL_GPL(ring_buffer_read_start);
@@ -3463,7 +3542,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
unsigned long flags;
- spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
again:
event = rb_iter_peek(iter, ts);
if (!event)
@@ -3474,7 +3553,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
rb_advance_iter(iter);
out:
- spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
return event;
}
@@ -3513,11 +3592,13 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->reader_page->read = 0;
local_set(&cpu_buffer->commit_overrun, 0);
+ local_set(&cpu_buffer->entries_bytes, 0);
local_set(&cpu_buffer->overrun, 0);
local_set(&cpu_buffer->entries, 0);
local_set(&cpu_buffer->committing, 0);
local_set(&cpu_buffer->commits, 0);
cpu_buffer->read = 0;
+ cpu_buffer->read_bytes = 0;
cpu_buffer->write_stamp = 0;
cpu_buffer->read_stamp = 0;
@@ -3543,7 +3624,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
atomic_inc(&cpu_buffer->record_disabled);
- spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
goto out;
@@ -3555,7 +3636,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
arch_spin_unlock(&cpu_buffer->lock);
out:
- spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
atomic_dec(&cpu_buffer->record_disabled);
}
@@ -3593,10 +3674,10 @@ int ring_buffer_empty(struct ring_buffer *buffer)
cpu_buffer = buffer->buffers[cpu];
local_irq_save(flags);
if (dolock)
- spin_lock(&cpu_buffer->reader_lock);
+ raw_spin_lock(&cpu_buffer->reader_lock);
ret = rb_per_cpu_empty(cpu_buffer);
if (dolock)
- spin_unlock(&cpu_buffer->reader_lock);
+ raw_spin_unlock(&cpu_buffer->reader_lock);
local_irq_restore(flags);
if (!ret)
@@ -3627,10 +3708,10 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
cpu_buffer = buffer->buffers[cpu];
local_irq_save(flags);
if (dolock)
- spin_lock(&cpu_buffer->reader_lock);
+ raw_spin_lock(&cpu_buffer->reader_lock);
ret = rb_per_cpu_empty(cpu_buffer);
if (dolock)
- spin_unlock(&cpu_buffer->reader_lock);
+ raw_spin_unlock(&cpu_buffer->reader_lock);
local_irq_restore(flags);
return ret;
@@ -3730,16 +3811,17 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
* Returns:
* The page allocated, or NULL on error.
*/
-void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
+void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
{
struct buffer_data_page *bpage;
- unsigned long addr;
+ struct page *page;
- addr = __get_free_page(GFP_KERNEL);
- if (!addr)
+ page = alloc_pages_node(cpu_to_node(cpu),
+ GFP_KERNEL | __GFP_NORETRY, 0);
+ if (!page)
return NULL;
- bpage = (void *)addr;
+ bpage = page_address(page);
rb_init_page(bpage);
@@ -3826,7 +3908,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
if (!bpage)
goto out;
- spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
reader = rb_get_reader_page(cpu_buffer);
if (!reader)
@@ -3903,6 +3985,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
} else {
/* update the entry counter */
cpu_buffer->read += rb_page_entries(reader);
+ cpu_buffer->read_bytes += BUF_PAGE_SIZE;
/* swap the pages */
rb_init_page(bpage);
@@ -3949,7 +4032,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
out_unlock:
- spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
out:
return ret;
@@ -3978,20 +4061,11 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
unsigned long *p = filp->private_data;
- char buf[64];
unsigned long val;
int ret;
- if (cnt >= sizeof(buf))
- return -EINVAL;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = 0;
-
- ret = strict_strtoul(buf, 10, &val);
- if (ret < 0)
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
return ret;
if (val)
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 302f8a614635..a5457d577b98 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -106,7 +106,7 @@ static enum event_status read_page(int cpu)
int inc;
int i;
- bpage = ring_buffer_alloc_read_page(buffer);
+ bpage = ring_buffer_alloc_read_page(buffer, cpu);
if (!bpage)
return EVENT_DROPPED;
diff --git a/kernel/trace/rpm-traces.c b/kernel/trace/rpm-traces.c
new file mode 100644
index 000000000000..4b3b5eaf94d1
--- /dev/null
+++ b/kernel/trace/rpm-traces.c
@@ -0,0 +1,20 @@
+/*
+ * Power trace points
+ *
+ * Copyright (C) 2009 Ming Lei <ming.lei@canonical.com>
+ */
+
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/usb.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/rpm.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_return_int);
+EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_idle);
+EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_suspend);
+EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_resume);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ee9c921d7f21..f2bd275bb60f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -341,28 +341,29 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE;
static int trace_stop_count;
-static DEFINE_SPINLOCK(tracing_start_lock);
+static DEFINE_RAW_SPINLOCK(tracing_start_lock);
+
+static void wakeup_work_handler(struct work_struct *work)
+{
+ wake_up(&trace_wait);
+}
+
+static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler);
/**
* trace_wake_up - wake up tasks waiting for trace input
*
- * Simply wakes up any task that is blocked on the trace_wait
- * queue. These is used with trace_poll for tasks polling the trace.
+ * Schedules a delayed work to wake up any task that is blocked on the
+ * trace_wait queue. These is used with trace_poll for tasks polling the
+ * trace.
*/
void trace_wake_up(void)
{
- int cpu;
+ const unsigned long delay = msecs_to_jiffies(2);
if (trace_flags & TRACE_ITER_BLOCK)
return;
- /*
- * The runqueue_is_locked() can fail, but this is the best we
- * have for now:
- */
- cpu = get_cpu();
- if (!runqueue_is_locked(cpu))
- wake_up(&trace_wait);
- put_cpu();
+ schedule_delayed_work(&wakeup_work, delay);
}
static int __init set_buf_size(char *str)
@@ -424,6 +425,7 @@ static const char *trace_options[] = {
"graph-time",
"record-cmd",
"overwrite",
+ "disable_on_free",
NULL
};
@@ -433,6 +435,7 @@ static struct {
} trace_clocks[] = {
{ trace_clock_local, "local" },
{ trace_clock_global, "global" },
+ { trace_clock_counter, "counter" },
};
int trace_clock_id;
@@ -958,7 +961,7 @@ void tracing_start(void)
if (tracing_disabled)
return;
- spin_lock_irqsave(&tracing_start_lock, flags);
+ raw_spin_lock_irqsave(&tracing_start_lock, flags);
if (--trace_stop_count) {
if (trace_stop_count < 0) {
/* Someone screwed up their debugging */
@@ -983,7 +986,7 @@ void tracing_start(void)
ftrace_start();
out:
- spin_unlock_irqrestore(&tracing_start_lock, flags);
+ raw_spin_unlock_irqrestore(&tracing_start_lock, flags);
}
/**
@@ -998,7 +1001,7 @@ void tracing_stop(void)
unsigned long flags;
ftrace_stop();
- spin_lock_irqsave(&tracing_start_lock, flags);
+ raw_spin_lock_irqsave(&tracing_start_lock, flags);
if (trace_stop_count++)
goto out;
@@ -1016,7 +1019,7 @@ void tracing_stop(void)
arch_spin_unlock(&ftrace_max_lock);
out:
- spin_unlock_irqrestore(&tracing_start_lock, flags);
+ raw_spin_unlock_irqrestore(&tracing_start_lock, flags);
}
void trace_stop_cmdline_recording(void);
@@ -1191,6 +1194,18 @@ void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
}
EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
+void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
+ struct ring_buffer_event *event,
+ unsigned long flags, int pc,
+ struct pt_regs *regs)
+{
+ ring_buffer_unlock_commit(buffer, event);
+
+ ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
+ ftrace_trace_userstack(buffer, flags, pc);
+}
+EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs);
+
void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
struct ring_buffer_event *event)
{
@@ -1234,30 +1249,103 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
}
#ifdef CONFIG_STACKTRACE
+
+#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
+struct ftrace_stack {
+ unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
+};
+
+static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
+static DEFINE_PER_CPU(int, ftrace_stack_reserve);
+
static void __ftrace_trace_stack(struct ring_buffer *buffer,
unsigned long flags,
- int skip, int pc)
+ int skip, int pc, struct pt_regs *regs)
{
struct ftrace_event_call *call = &event_kernel_stack;
struct ring_buffer_event *event;
struct stack_entry *entry;
struct stack_trace trace;
+ int use_stack;
+ int size = FTRACE_STACK_ENTRIES;
+
+ trace.nr_entries = 0;
+ trace.skip = skip;
+
+ /*
+ * Since events can happen in NMIs there's no safe way to
+ * use the per cpu ftrace_stacks. We reserve it and if an interrupt
+ * or NMI comes in, it will just have to use the default
+ * FTRACE_STACK_SIZE.
+ */
+ preempt_disable_notrace();
+
+ use_stack = ++__get_cpu_var(ftrace_stack_reserve);
+ /*
+ * We don't need any atomic variables, just a barrier.
+ * If an interrupt comes in, we don't care, because it would
+ * have exited and put the counter back to what we want.
+ * We just need a barrier to keep gcc from moving things
+ * around.
+ */
+ barrier();
+ if (use_stack == 1) {
+ trace.entries = &__get_cpu_var(ftrace_stack).calls[0];
+ trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
+
+ if (regs)
+ save_stack_trace_regs(regs, &trace);
+ else
+ save_stack_trace(&trace);
+
+ if (trace.nr_entries > size)
+ size = trace.nr_entries;
+ } else
+ /* From now on, use_stack is a boolean */
+ use_stack = 0;
+
+ size *= sizeof(unsigned long);
event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
- sizeof(*entry), flags, pc);
+ sizeof(*entry) + size, flags, pc);
if (!event)
- return;
- entry = ring_buffer_event_data(event);
- memset(&entry->caller, 0, sizeof(entry->caller));
+ goto out;
+ entry = ring_buffer_event_data(event);
- trace.nr_entries = 0;
- trace.max_entries = FTRACE_STACK_ENTRIES;
- trace.skip = skip;
- trace.entries = entry->caller;
+ memset(&entry->caller, 0, size);
+
+ if (use_stack)
+ memcpy(&entry->caller, trace.entries,
+ trace.nr_entries * sizeof(unsigned long));
+ else {
+ trace.max_entries = FTRACE_STACK_ENTRIES;
+ trace.entries = entry->caller;
+ if (regs)
+ save_stack_trace_regs(regs, &trace);
+ else
+ save_stack_trace(&trace);
+ }
+
+ entry->size = trace.nr_entries;
- save_stack_trace(&trace);
if (!filter_check_discard(call, entry, buffer, event))
ring_buffer_unlock_commit(buffer, event);
+
+ out:
+ /* Again, don't let gcc optimize things here */
+ barrier();
+ __get_cpu_var(ftrace_stack_reserve)--;
+ preempt_enable_notrace();
+
+}
+
+void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
+ int skip, int pc, struct pt_regs *regs)
+{
+ if (!(trace_flags & TRACE_ITER_STACKTRACE))
+ return;
+
+ __ftrace_trace_stack(buffer, flags, skip, pc, regs);
}
void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
@@ -1266,13 +1354,13 @@ void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
if (!(trace_flags & TRACE_ITER_STACKTRACE))
return;
- __ftrace_trace_stack(buffer, flags, skip, pc);
+ __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
}
void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
int pc)
{
- __ftrace_trace_stack(tr->buffer, flags, skip, pc);
+ __ftrace_trace_stack(tr->buffer, flags, skip, pc, NULL);
}
/**
@@ -1288,7 +1376,7 @@ void trace_dump_stack(void)
local_save_flags(flags);
/* skipping 3 traces, seems to get us at the caller of this function */
- __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
+ __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count(), NULL);
}
static DEFINE_PER_CPU(int, user_stack_count);
@@ -1536,7 +1624,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
ftrace_enable_cpu();
- return event ? ring_buffer_event_data(event) : NULL;
+ if (event) {
+ iter->ent_size = ring_buffer_event_length(event);
+ return ring_buffer_event_data(event);
+ }
+ iter->ent_size = 0;
+ return NULL;
}
static struct trace_entry *
@@ -2051,6 +2144,9 @@ void trace_default_header(struct seq_file *m)
{
struct trace_iterator *iter = m->private;
+ if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
+ return;
+
if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
/* print nothing if the buffers are empty */
if (trace_empty(iter))
@@ -2064,6 +2160,14 @@ void trace_default_header(struct seq_file *m)
}
}
+static void test_ftrace_alive(struct seq_file *m)
+{
+ if (!ftrace_is_dead())
+ return;
+ seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
+ seq_printf(m, "# MAY BE MISSING FUNCTION EVENTS\n");
+}
+
static int s_show(struct seq_file *m, void *v)
{
struct trace_iterator *iter = v;
@@ -2073,6 +2177,7 @@ static int s_show(struct seq_file *m, void *v)
if (iter->tr) {
seq_printf(m, "# tracer: %s\n", iter->trace->name);
seq_puts(m, "#\n");
+ test_ftrace_alive(m);
}
if (iter->trace && iter->trace->print_header)
iter->trace->print_header(m);
@@ -2615,9 +2720,9 @@ static const char readme_msg[] =
"# cat /sys/kernel/debug/tracing/trace_options\n"
"noprint-parent nosym-offset nosym-addr noverbose\n"
"# echo print-parent > /sys/kernel/debug/tracing/trace_options\n"
- "# echo 1 > /sys/kernel/debug/tracing/tracing_enabled\n"
+ "# echo 1 > /sys/kernel/debug/tracing/tracing_on\n"
"# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n"
- "# echo 0 > /sys/kernel/debug/tracing/tracing_enabled\n"
+ "# echo 0 > /sys/kernel/debug/tracing/tracing_on\n"
;
static ssize_t
@@ -2701,20 +2806,11 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
struct trace_array *tr = filp->private_data;
- char buf[64];
unsigned long val;
int ret;
- if (cnt >= sizeof(buf))
- return -EINVAL;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = 0;
-
- ret = strict_strtoul(buf, 10, &val);
- if (ret < 0)
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
return ret;
val = !!val;
@@ -2767,7 +2863,7 @@ int tracer_init(struct tracer *t, struct trace_array *tr)
return t->init(tr);
}
-static int tracing_resize_ring_buffer(unsigned long size)
+static int __tracing_resize_ring_buffer(unsigned long size)
{
int ret;
@@ -2819,6 +2915,41 @@ static int tracing_resize_ring_buffer(unsigned long size)
return ret;
}
+static ssize_t tracing_resize_ring_buffer(unsigned long size)
+{
+ int cpu, ret = size;
+
+ mutex_lock(&trace_types_lock);
+
+ tracing_stop();
+
+ /* disable all cpu buffers */
+ for_each_tracing_cpu(cpu) {
+ if (global_trace.data[cpu])
+ atomic_inc(&global_trace.data[cpu]->disabled);
+ if (max_tr.data[cpu])
+ atomic_inc(&max_tr.data[cpu]->disabled);
+ }
+
+ if (size != global_trace.entries)
+ ret = __tracing_resize_ring_buffer(size);
+
+ if (ret < 0)
+ ret = -ENOMEM;
+
+ for_each_tracing_cpu(cpu) {
+ if (global_trace.data[cpu])
+ atomic_dec(&global_trace.data[cpu]->disabled);
+ if (max_tr.data[cpu])
+ atomic_dec(&max_tr.data[cpu]->disabled);
+ }
+
+ tracing_start();
+ mutex_unlock(&trace_types_lock);
+
+ return ret;
+}
+
/**
* tracing_update_buffers - used by tracing facility to expand ring buffers
@@ -2836,7 +2967,7 @@ int tracing_update_buffers(void)
mutex_lock(&trace_types_lock);
if (!ring_buffer_expanded)
- ret = tracing_resize_ring_buffer(trace_buf_size);
+ ret = __tracing_resize_ring_buffer(trace_buf_size);
mutex_unlock(&trace_types_lock);
return ret;
@@ -2860,7 +2991,7 @@ static int tracing_set_tracer(const char *buf)
mutex_lock(&trace_types_lock);
if (!ring_buffer_expanded) {
- ret = tracing_resize_ring_buffer(trace_buf_size);
+ ret = __tracing_resize_ring_buffer(trace_buf_size);
if (ret < 0)
goto out;
ret = 0;
@@ -2966,20 +3097,11 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
unsigned long *ptr = filp->private_data;
- char buf[64];
unsigned long val;
int ret;
- if (cnt >= sizeof(buf))
- return -EINVAL;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = 0;
-
- ret = strict_strtoul(buf, 10, &val);
- if (ret < 0)
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
return ret;
*ptr = val * 1000;
@@ -3434,83 +3556,96 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
unsigned long val;
- char buf[64];
- int ret, cpu;
-
- if (cnt >= sizeof(buf))
- return -EINVAL;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = 0;
+ int ret;
- ret = strict_strtoul(buf, 10, &val);
- if (ret < 0)
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
return ret;
/* must have at least 1 entry */
if (!val)
return -EINVAL;
- mutex_lock(&trace_types_lock);
-
- tracing_stop();
-
- /* disable all cpu buffers */
- for_each_tracing_cpu(cpu) {
- if (global_trace.data[cpu])
- atomic_inc(&global_trace.data[cpu]->disabled);
- if (max_tr.data[cpu])
- atomic_inc(&max_tr.data[cpu]->disabled);
- }
-
/* value is in KB */
val <<= 10;
- if (val != global_trace.entries) {
- ret = tracing_resize_ring_buffer(val);
- if (ret < 0) {
- cnt = ret;
- goto out;
- }
- }
+ ret = tracing_resize_ring_buffer(val);
+ if (ret < 0)
+ return ret;
*ppos += cnt;
- /* If check pages failed, return ENOMEM */
- if (tracing_disabled)
- cnt = -ENOMEM;
- out:
+ return cnt;
+}
+
+static ssize_t
+tracing_total_entries_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_array *tr = filp->private_data;
+ char buf[64];
+ int r, cpu;
+ unsigned long size = 0, expanded_size = 0;
+
+ mutex_lock(&trace_types_lock);
for_each_tracing_cpu(cpu) {
- if (global_trace.data[cpu])
- atomic_dec(&global_trace.data[cpu]->disabled);
- if (max_tr.data[cpu])
- atomic_dec(&max_tr.data[cpu]->disabled);
+ size += tr->entries >> 10;
+ if (!ring_buffer_expanded)
+ expanded_size += trace_buf_size >> 10;
}
-
- tracing_start();
+ if (ring_buffer_expanded)
+ r = sprintf(buf, "%lu\n", size);
+ else
+ r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
mutex_unlock(&trace_types_lock);
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ /*
+ * There is no need to read what the user has written, this function
+ * is just to make sure that there is no error when "echo" is used
+ */
+
+ *ppos += cnt;
+
return cnt;
}
-static int mark_printk(const char *fmt, ...)
+static int
+tracing_free_buffer_release(struct inode *inode, struct file *filp)
{
- int ret;
- va_list args;
- va_start(args, fmt);
- ret = trace_vprintk(0, fmt, args);
- va_end(args);
- return ret;
+ /* disable tracing ? */
+ if (trace_flags & TRACE_ITER_STOP_ON_FREE)
+ tracing_off();
+ /* resize the ring buffer to 0 */
+ tracing_resize_ring_buffer(0);
+
+ return 0;
}
static ssize_t
tracing_mark_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *fpos)
{
- char *buf;
- size_t written;
+ unsigned long addr = (unsigned long)ubuf;
+ struct ring_buffer_event *event;
+ struct ring_buffer *buffer;
+ struct print_entry *entry;
+ unsigned long irq_flags;
+ struct page *pages[2];
+ int nr_pages = 1;
+ ssize_t written;
+ void *page1;
+ void *page2;
+ int offset;
+ int size;
+ int len;
+ int ret;
if (tracing_disabled)
return -EINVAL;
@@ -3518,28 +3653,81 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
if (cnt > TRACE_BUF_SIZE)
cnt = TRACE_BUF_SIZE;
- buf = kmalloc(cnt + 2, GFP_KERNEL);
- if (buf == NULL)
- return -ENOMEM;
+ /*
+ * Userspace is injecting traces into the kernel trace buffer.
+ * We want to be as non intrusive as possible.
+ * To do so, we do not want to allocate any special buffers
+ * or take any locks, but instead write the userspace data
+ * straight into the ring buffer.
+ *
+ * First we need to pin the userspace buffer into memory,
+ * which, most likely it is, because it just referenced it.
+ * But there's no guarantee that it is. By using get_user_pages_fast()
+ * and kmap_atomic/kunmap_atomic() we can get access to the
+ * pages directly. We then write the data directly into the
+ * ring buffer.
+ */
+ BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
- if (copy_from_user(buf, ubuf, cnt)) {
- kfree(buf);
- return -EFAULT;
+ /* check if we cross pages */
+ if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
+ nr_pages = 2;
+
+ offset = addr & (PAGE_SIZE - 1);
+ addr &= PAGE_MASK;
+
+ ret = get_user_pages_fast(addr, nr_pages, 0, pages);
+ if (ret < nr_pages) {
+ while (--ret >= 0)
+ put_page(pages[ret]);
+ written = -EFAULT;
+ goto out;
+ }
+
+ page1 = kmap_atomic(pages[0]);
+ if (nr_pages == 2)
+ page2 = kmap_atomic(pages[1]);
+
+ local_save_flags(irq_flags);
+ size = sizeof(*entry) + cnt + 2; /* possible \n added */
+ buffer = global_trace.buffer;
+ event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
+ irq_flags, preempt_count());
+ if (!event) {
+ /* Ring buffer disabled, return as if not open for write */
+ written = -EBADF;
+ goto out_unlock;
}
- if (buf[cnt-1] != '\n') {
- buf[cnt] = '\n';
- buf[cnt+1] = '\0';
+
+ entry = ring_buffer_event_data(event);
+ entry->ip = _THIS_IP_;
+
+ if (nr_pages == 2) {
+ len = PAGE_SIZE - offset;
+ memcpy(&entry->buf, page1 + offset, len);
+ memcpy(&entry->buf[len], page2, cnt - len);
} else
- buf[cnt] = '\0';
+ memcpy(&entry->buf, page1 + offset, cnt);
- written = mark_printk("%s", buf);
- kfree(buf);
- *fpos += written;
+ if (entry->buf[cnt - 1] != '\n') {
+ entry->buf[cnt] = '\n';
+ entry->buf[cnt + 1] = '\0';
+ } else
+ entry->buf[cnt] = '\0';
+
+ ring_buffer_unlock_commit(buffer, event);
+
+ written = cnt;
- /* don't tell userspace we wrote more - it might confuse them */
- if (written > cnt)
- written = cnt;
+ *fpos += written;
+ out_unlock:
+ if (nr_pages == 2)
+ kunmap_atomic(page2);
+ kunmap_atomic(page1);
+ while (nr_pages > 0)
+ put_page(pages[--nr_pages]);
+ out:
return written;
}
@@ -3640,6 +3828,17 @@ static const struct file_operations tracing_entries_fops = {
.llseek = generic_file_llseek,
};
+static const struct file_operations tracing_total_entries_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_total_entries_read,
+ .llseek = generic_file_llseek,
+};
+
+static const struct file_operations tracing_free_buffer_fops = {
+ .write = tracing_free_buffer_write,
+ .release = tracing_free_buffer_release,
+};
+
static const struct file_operations tracing_mark_fops = {
.open = tracing_open_generic,
.write = tracing_mark_write,
@@ -3696,7 +3895,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
return 0;
if (!info->spare)
- info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
+ info->spare = ring_buffer_alloc_read_page(info->tr->buffer, info->cpu);
if (!info->spare)
return -ENOMEM;
@@ -3704,8 +3903,6 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
if (info->read < PAGE_SIZE)
goto read;
- info->read = 0;
-
trace_access_lock(info->cpu);
ret = ring_buffer_read_page(info->tr->buffer,
&info->spare,
@@ -3715,6 +3912,8 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
if (ret < 0)
return 0;
+ info->read = 0;
+
read:
size = PAGE_SIZE - info->read;
if (size > count)
@@ -3853,7 +4052,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
ref->ref = 1;
ref->buffer = info->tr->buffer;
- ref->page = ring_buffer_alloc_read_page(ref->buffer);
+ ref->page = ring_buffer_alloc_read_page(ref->buffer, info->cpu);
if (!ref->page) {
kfree(ref);
break;
@@ -3862,8 +4061,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
r = ring_buffer_read_page(ref->buffer, &ref->page,
len, info->cpu, 1);
if (r < 0) {
- ring_buffer_free_read_page(ref->buffer,
- ref->page);
+ ring_buffer_free_read_page(ref->buffer, ref->page);
kfree(ref);
break;
}
@@ -3923,6 +4121,8 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
struct trace_array *tr = &global_trace;
struct trace_seq *s;
unsigned long cnt;
+ unsigned long long t;
+ unsigned long usec_rem;
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s)
@@ -3939,6 +4139,17 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
trace_seq_printf(s, "commit overrun: %ld\n", cnt);
+ cnt = ring_buffer_bytes_cpu(tr->buffer, cpu);
+ trace_seq_printf(s, "bytes: %ld\n", cnt);
+
+ t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu));
+ usec_rem = do_div(t, USEC_PER_SEC);
+ trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", t, usec_rem);
+
+ t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu));
+ usec_rem = do_div(t, USEC_PER_SEC);
+ trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
+
count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
kfree(s);
@@ -4099,19 +4310,10 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
{
struct trace_option_dentry *topt = filp->private_data;
unsigned long val;
- char buf[64];
int ret;
- if (cnt >= sizeof(buf))
- return -EINVAL;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = 0;
-
- ret = strict_strtoul(buf, 10, &val);
- if (ret < 0)
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
return ret;
if (val != 0 && val != 1)
@@ -4159,20 +4361,11 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
loff_t *ppos)
{
long index = (long)filp->private_data;
- char buf[64];
unsigned long val;
int ret;
- if (cnt >= sizeof(buf))
- return -EINVAL;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = 0;
-
- ret = strict_strtoul(buf, 10, &val);
- if (ret < 0)
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
return ret;
if (val != 0 && val != 1)
@@ -4365,6 +4558,12 @@ static __init int tracer_init_debugfs(void)
trace_create_file("buffer_size_kb", 0644, d_tracer,
&global_trace, &tracing_entries_fops);
+ trace_create_file("buffer_total_size_kb", 0444, d_tracer,
+ &global_trace, &tracing_total_entries_fops);
+
+ trace_create_file("free_buffer", 0644, d_tracer,
+ &global_trace, &tracing_free_buffer_fops);
+
trace_create_file("trace_marker", 0220, d_tracer,
NULL, &tracing_mark_fops);
@@ -4478,6 +4677,12 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
tracing_off();
+ /* Did function tracer already get disabled? */
+ if (ftrace_is_dead()) {
+ printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
+ printk("# MAY BE MISSING FUNCTION EVENTS\n");
+ }
+
if (disable_tracing)
ftrace_kill();
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 229f8591f61d..092e1f8d18dc 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -2,7 +2,7 @@
#define _LINUX_KERNEL_TRACE_H
#include <linux/fs.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <linux/sched.h>
#include <linux/clocksource.h>
#include <linux/ring_buffer.h>
@@ -278,6 +278,29 @@ struct tracer {
};
+/* Only current can touch trace_recursion */
+#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
+#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
+
+/* Ring buffer has the 10 LSB bits to count */
+#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
+
+/* for function tracing recursion */
+#define TRACE_INTERNAL_BIT (1<<11)
+#define TRACE_GLOBAL_BIT (1<<12)
+/*
+ * Abuse of the trace_recursion.
+ * As we need a way to maintain state if we are tracing the function
+ * graph in irq because we want to trace a particular function that
+ * was called in irq context but we have irq tracing off. Since this
+ * can only be modified by current, we can reuse trace_recursion.
+ */
+#define TRACE_IRQ_BIT (1<<13)
+
+#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0)
+#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0)
+#define trace_recursion_test(bit) ((current)->trace_recursion & (bit))
+
#define TRACE_PIPE_ALL_CPU -1
int tracer_init(struct tracer *t, struct trace_array *tr);
@@ -389,6 +412,9 @@ void update_max_tr_single(struct trace_array *tr,
void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
int skip, int pc);
+void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
+ int skip, int pc, struct pt_regs *regs);
+
void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
int pc);
@@ -400,6 +426,12 @@ static inline void ftrace_trace_stack(struct ring_buffer *buffer,
{
}
+static inline void ftrace_trace_stack_regs(struct ring_buffer *buffer,
+ unsigned long flags, int skip,
+ int pc, struct pt_regs *regs)
+{
+}
+
static inline void ftrace_trace_userstack(struct ring_buffer *buffer,
unsigned long flags, int pc)
{
@@ -507,8 +539,18 @@ static inline int ftrace_graph_addr(unsigned long addr)
return 1;
for (i = 0; i < ftrace_graph_count; i++) {
- if (addr == ftrace_graph_funcs[i])
+ if (addr == ftrace_graph_funcs[i]) {
+ /*
+ * If no irqs are to be traced, but a set_graph_function
+ * is set, and called by an interrupt handler, we still
+ * want to trace it.
+ */
+ if (in_irq())
+ trace_recursion_set(TRACE_IRQ_BIT);
+ else
+ trace_recursion_clear(TRACE_IRQ_BIT);
return 1;
+ }
}
return 0;
@@ -537,11 +579,13 @@ static inline int ftrace_trace_task(struct task_struct *task)
return test_tsk_trace_trace(task);
}
+extern int ftrace_is_dead(void);
#else
static inline int ftrace_trace_task(struct task_struct *task)
{
return 1;
}
+static inline int ftrace_is_dead(void) { return 0; }
#endif
/*
@@ -609,6 +653,7 @@ enum trace_iterator_flags {
TRACE_ITER_GRAPH_TIME = 0x80000,
TRACE_ITER_RECORD_CMD = 0x100000,
TRACE_ITER_OVERWRITE = 0x200000,
+ TRACE_ITER_STOP_ON_FREE = 0x400000,
};
/*
@@ -677,6 +722,7 @@ struct event_subsystem {
struct dentry *entry;
struct event_filter *filter;
int nr_events;
+ int ref_count;
};
#define FILTER_PRED_INVALID ((unsigned short)-1)
@@ -717,16 +763,10 @@ struct filter_pred {
filter_pred_fn_t fn;
u64 val;
struct regex regex;
- /*
- * Leaf nodes use field_name, ops is used by AND and OR
- * nodes. The field_name is always freed when freeing a pred.
- * We can overload field_name for ops and have it freed
- * as well.
- */
- union {
- char *field_name;
- unsigned short *ops;
- };
+ unsigned short *ops;
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+ struct ftrace_event_field *field;
+#endif
int offset;
int not;
int op;
@@ -784,19 +824,4 @@ extern const char *__stop___trace_bprintk_fmt[];
FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
#include "trace_entries.h"
-/* Only current can touch trace_recursion */
-#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
-#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
-
-/* Ring buffer has the 10 LSB bits to count */
-#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
-
-/* for function tracing recursion */
-#define TRACE_INTERNAL_BIT (1<<11)
-#define TRACE_GLOBAL_BIT (1<<12)
-
-#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0)
-#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0)
-#define trace_recursion_test(bit) ((current)->trace_recursion & (bit))
-
#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 6302747a1398..394783531cbb 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -113,3 +113,15 @@ u64 notrace trace_clock_global(void)
return now;
}
+
+static atomic64_t trace_counter;
+
+/*
+ * trace_clock_counter(): simply an atomic counter.
+ * Use the trace_counter "counter" for cases where you do not care
+ * about timings, but are interested in strict ordering.
+ */
+u64 notrace trace_clock_counter(void)
+{
+ return atomic64_add_return(1, &trace_counter);
+}
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index e32744c84d94..93365907f219 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -161,7 +161,8 @@ FTRACE_ENTRY(kernel_stack, stack_entry,
TRACE_STACK,
F_STRUCT(
- __array( unsigned long, caller, FTRACE_STACK_ENTRIES )
+ __field( int, size )
+ __dynamic_array(unsigned long, caller )
),
F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 686ec399f2a8..581876f9f387 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -244,6 +244,35 @@ static void ftrace_clear_events(void)
mutex_unlock(&event_mutex);
}
+static void __put_system(struct event_subsystem *system)
+{
+ struct event_filter *filter = system->filter;
+
+ WARN_ON_ONCE(system->ref_count == 0);
+ if (--system->ref_count)
+ return;
+
+ if (filter) {
+ kfree(filter->filter_string);
+ kfree(filter);
+ }
+ kfree(system->name);
+ kfree(system);
+}
+
+static void __get_system(struct event_subsystem *system)
+{
+ WARN_ON_ONCE(system->ref_count == 0);
+ system->ref_count++;
+}
+
+static void put_system(struct event_subsystem *system)
+{
+ mutex_lock(&event_mutex);
+ __put_system(system);
+ mutex_unlock(&event_mutex);
+}
+
/*
* __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
*/
@@ -486,20 +515,11 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
loff_t *ppos)
{
struct ftrace_event_call *call = filp->private_data;
- char buf[64];
unsigned long val;
int ret;
- if (cnt >= sizeof(buf))
- return -EINVAL;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = 0;
-
- ret = strict_strtoul(buf, 10, &val);
- if (ret < 0)
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
return ret;
ret = tracing_update_buffers();
@@ -528,7 +548,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
{
const char set_to_char[4] = { '?', '0', '1', 'X' };
- const char *system = filp->private_data;
+ struct event_subsystem *system = filp->private_data;
struct ftrace_event_call *call;
char buf[2];
int set = 0;
@@ -539,7 +559,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
if (!call->name || !call->class || !call->class->reg)
continue;
- if (system && strcmp(call->class->system, system) != 0)
+ if (system && strcmp(call->class->system, system->name) != 0)
continue;
/*
@@ -569,21 +589,13 @@ static ssize_t
system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- const char *system = filp->private_data;
+ struct event_subsystem *system = filp->private_data;
+ const char *name = NULL;
unsigned long val;
- char buf[64];
ssize_t ret;
- if (cnt >= sizeof(buf))
- return -EINVAL;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = 0;
-
- ret = strict_strtoul(buf, 10, &val);
- if (ret < 0)
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
return ret;
ret = tracing_update_buffers();
@@ -593,7 +605,14 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
if (val != 0 && val != 1)
return -EINVAL;
- ret = __ftrace_set_clr_event(NULL, system, NULL, val);
+ /*
+ * Opening of "enable" adds a ref count to system,
+ * so the name is safe to use.
+ */
+ if (system)
+ name = system->name;
+
+ ret = __ftrace_set_clr_event(NULL, name, NULL, val);
if (ret)
goto out;
@@ -826,6 +845,52 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
return cnt;
}
+static LIST_HEAD(event_subsystems);
+
+static int subsystem_open(struct inode *inode, struct file *filp)
+{
+ struct event_subsystem *system = NULL;
+ int ret;
+
+ if (!inode->i_private)
+ goto skip_search;
+
+ /* Make sure the system still exists */
+ mutex_lock(&event_mutex);
+ list_for_each_entry(system, &event_subsystems, list) {
+ if (system == inode->i_private) {
+ /* Don't open systems with no events */
+ if (!system->nr_events) {
+ system = NULL;
+ break;
+ }
+ __get_system(system);
+ break;
+ }
+ }
+ mutex_unlock(&event_mutex);
+
+ if (system != inode->i_private)
+ return -ENODEV;
+
+ skip_search:
+ ret = tracing_open_generic(inode, filp);
+ if (ret < 0 && system)
+ put_system(system);
+
+ return ret;
+}
+
+static int subsystem_release(struct inode *inode, struct file *file)
+{
+ struct event_subsystem *system = inode->i_private;
+
+ if (system)
+ put_system(system);
+
+ return 0;
+}
+
static ssize_t
subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
@@ -963,17 +1028,19 @@ static const struct file_operations ftrace_event_filter_fops = {
};
static const struct file_operations ftrace_subsystem_filter_fops = {
- .open = tracing_open_generic,
+ .open = subsystem_open,
.read = subsystem_filter_read,
.write = subsystem_filter_write,
.llseek = default_llseek,
+ .release = subsystem_release,
};
static const struct file_operations ftrace_system_enable_fops = {
- .open = tracing_open_generic,
+ .open = subsystem_open,
.read = system_enable_read,
.write = system_enable_write,
.llseek = default_llseek,
+ .release = subsystem_release,
};
static const struct file_operations ftrace_show_header_fops = {
@@ -1002,8 +1069,6 @@ static struct dentry *event_trace_events_dir(void)
return d_events;
}
-static LIST_HEAD(event_subsystems);
-
static struct dentry *
event_subsystem_dir(const char *name, struct dentry *d_events)
{
@@ -1013,6 +1078,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
/* First see if we did not already create this dir */
list_for_each_entry(system, &event_subsystems, list) {
if (strcmp(system->name, name) == 0) {
+ __get_system(system);
system->nr_events++;
return system->entry;
}
@@ -1035,6 +1101,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
}
system->nr_events = 1;
+ system->ref_count = 1;
system->name = kstrdup(name, GFP_KERNEL);
if (!system->name) {
debugfs_remove(system->entry);
@@ -1062,8 +1129,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
"'%s/filter' entry\n", name);
}
- trace_create_file("enable", 0644, system->entry,
- (void *)system->name,
+ trace_create_file("enable", 0644, system->entry, system,
&ftrace_system_enable_fops);
return system->entry;
@@ -1184,16 +1250,9 @@ static void remove_subsystem_dir(const char *name)
list_for_each_entry(system, &event_subsystems, list) {
if (strcmp(system->name, name) == 0) {
if (!--system->nr_events) {
- struct event_filter *filter = system->filter;
-
debugfs_remove_recursive(system->entry);
list_del(&system->list);
- if (filter) {
- kfree(filter->filter_string);
- kfree(filter);
- }
- kfree(system->name);
- kfree(system);
+ __put_system(system);
}
break;
}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 8008ddcfbf20..816d3d074979 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -381,6 +381,63 @@ get_pred_parent(struct filter_pred *pred, struct filter_pred *preds,
return pred;
}
+enum walk_return {
+ WALK_PRED_ABORT,
+ WALK_PRED_PARENT,
+ WALK_PRED_DEFAULT,
+};
+
+typedef int (*filter_pred_walkcb_t) (enum move_type move,
+ struct filter_pred *pred,
+ int *err, void *data);
+
+static int walk_pred_tree(struct filter_pred *preds,
+ struct filter_pred *root,
+ filter_pred_walkcb_t cb, void *data)
+{
+ struct filter_pred *pred = root;
+ enum move_type move = MOVE_DOWN;
+ int done = 0;
+
+ if (!preds)
+ return -EINVAL;
+
+ do {
+ int err = 0, ret;
+
+ ret = cb(move, pred, &err, data);
+ if (ret == WALK_PRED_ABORT)
+ return err;
+ if (ret == WALK_PRED_PARENT)
+ goto get_parent;
+
+ switch (move) {
+ case MOVE_DOWN:
+ if (pred->left != FILTER_PRED_INVALID) {
+ pred = &preds[pred->left];
+ continue;
+ }
+ goto get_parent;
+ case MOVE_UP_FROM_LEFT:
+ pred = &preds[pred->right];
+ move = MOVE_DOWN;
+ continue;
+ case MOVE_UP_FROM_RIGHT:
+ get_parent:
+ if (pred == root)
+ break;
+ pred = get_pred_parent(pred, preds,
+ pred->parent,
+ &move);
+ continue;
+ }
+ done = 1;
+ } while (!done);
+
+ /* We are fine. */
+ return 0;
+}
+
/*
* A series of AND or ORs where found together. Instead of
* climbing up and down the tree branches, an array of the
@@ -410,99 +467,91 @@ static int process_ops(struct filter_pred *preds,
for (i = 0; i < op->val; i++) {
pred = &preds[op->ops[i]];
- match = pred->fn(pred, rec);
+ if (!WARN_ON_ONCE(!pred->fn))
+ match = pred->fn(pred, rec);
if (!!match == type)
return match;
}
return match;
}
+struct filter_match_preds_data {
+ struct filter_pred *preds;
+ int match;
+ void *rec;
+};
+
+static int filter_match_preds_cb(enum move_type move, struct filter_pred *pred,
+ int *err, void *data)
+{
+ struct filter_match_preds_data *d = data;
+
+ *err = 0;
+ switch (move) {
+ case MOVE_DOWN:
+ /* only AND and OR have children */
+ if (pred->left != FILTER_PRED_INVALID) {
+ /* If ops is set, then it was folded. */
+ if (!pred->ops)
+ return WALK_PRED_DEFAULT;
+ /* We can treat folded ops as a leaf node */
+ d->match = process_ops(d->preds, pred, d->rec);
+ } else {
+ if (!WARN_ON_ONCE(!pred->fn))
+ d->match = pred->fn(pred, d->rec);
+ }
+
+ return WALK_PRED_PARENT;
+ case MOVE_UP_FROM_LEFT:
+ /*
+ * Check for short circuits.
+ *
+ * Optimization: !!match == (pred->op == OP_OR)
+ * is the same as:
+ * if ((match && pred->op == OP_OR) ||
+ * (!match && pred->op == OP_AND))
+ */
+ if (!!d->match == (pred->op == OP_OR))
+ return WALK_PRED_PARENT;
+ break;
+ case MOVE_UP_FROM_RIGHT:
+ break;
+ }
+
+ return WALK_PRED_DEFAULT;
+}
+
/* return 1 if event matches, 0 otherwise (discard) */
int filter_match_preds(struct event_filter *filter, void *rec)
{
- int match = -1;
- enum move_type move = MOVE_DOWN;
struct filter_pred *preds;
- struct filter_pred *pred;
struct filter_pred *root;
- int n_preds;
- int done = 0;
+ struct filter_match_preds_data data = {
+ /* match is currently meaningless */
+ .match = -1,
+ .rec = rec,
+ };
+ int n_preds, ret;
/* no filter is considered a match */
if (!filter)
return 1;
n_preds = filter->n_preds;
-
if (!n_preds)
return 1;
/*
* n_preds, root and filter->preds are protect with preemption disabled.
*/
- preds = rcu_dereference_sched(filter->preds);
root = rcu_dereference_sched(filter->root);
if (!root)
return 1;
- pred = root;
-
- /* match is currently meaningless */
- match = -1;
-
- do {
- switch (move) {
- case MOVE_DOWN:
- /* only AND and OR have children */
- if (pred->left != FILTER_PRED_INVALID) {
- /* If ops is set, then it was folded. */
- if (!pred->ops) {
- /* keep going to down the left side */
- pred = &preds[pred->left];
- continue;
- }
- /* We can treat folded ops as a leaf node */
- match = process_ops(preds, pred, rec);
- } else
- match = pred->fn(pred, rec);
- /* If this pred is the only pred */
- if (pred == root)
- break;
- pred = get_pred_parent(pred, preds,
- pred->parent, &move);
- continue;
- case MOVE_UP_FROM_LEFT:
- /*
- * Check for short circuits.
- *
- * Optimization: !!match == (pred->op == OP_OR)
- * is the same as:
- * if ((match && pred->op == OP_OR) ||
- * (!match && pred->op == OP_AND))
- */
- if (!!match == (pred->op == OP_OR)) {
- if (pred == root)
- break;
- pred = get_pred_parent(pred, preds,
- pred->parent, &move);
- continue;
- }
- /* now go down the right side of the tree. */
- pred = &preds[pred->right];
- move = MOVE_DOWN;
- continue;
- case MOVE_UP_FROM_RIGHT:
- /* We finished this equation. */
- if (pred == root)
- break;
- pred = get_pred_parent(pred, preds,
- pred->parent, &move);
- continue;
- }
- done = 1;
- } while (!done);
-
- return match;
+ data.preds = preds = rcu_dereference_sched(filter->preds);
+ ret = walk_pred_tree(preds, root, filter_match_preds_cb, &data);
+ WARN_ON(ret);
+ return data.match;
}
EXPORT_SYMBOL_GPL(filter_match_preds);
@@ -628,22 +677,6 @@ find_event_field(struct ftrace_event_call *call, char *name)
return __find_event_field(head, name);
}
-static void filter_free_pred(struct filter_pred *pred)
-{
- if (!pred)
- return;
-
- kfree(pred->field_name);
- kfree(pred);
-}
-
-static void filter_clear_pred(struct filter_pred *pred)
-{
- kfree(pred->field_name);
- pred->field_name = NULL;
- pred->regex.len = 0;
-}
-
static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
{
stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL);
@@ -689,20 +722,13 @@ __pop_pred_stack(struct pred_stack *stack)
static int filter_set_pred(struct event_filter *filter,
int idx,
struct pred_stack *stack,
- struct filter_pred *src,
- filter_pred_fn_t fn)
+ struct filter_pred *src)
{
struct filter_pred *dest = &filter->preds[idx];
struct filter_pred *left;
struct filter_pred *right;
*dest = *src;
- if (src->field_name) {
- dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
- if (!dest->field_name)
- return -ENOMEM;
- }
- dest->fn = fn;
dest->index = idx;
if (dest->op == OP_OR || dest->op == OP_AND) {
@@ -743,11 +769,7 @@ static int filter_set_pred(struct event_filter *filter,
static void __free_preds(struct event_filter *filter)
{
- int i;
-
if (filter->preds) {
- for (i = 0; i < filter->a_preds; i++)
- kfree(filter->preds[i].field_name);
kfree(filter->preds);
filter->preds = NULL;
}
@@ -840,23 +862,19 @@ static void filter_free_subsystem_filters(struct event_subsystem *system)
}
}
-static int filter_add_pred_fn(struct filter_parse_state *ps,
- struct ftrace_event_call *call,
- struct event_filter *filter,
- struct filter_pred *pred,
- struct pred_stack *stack,
- filter_pred_fn_t fn)
+static int filter_add_pred(struct filter_parse_state *ps,
+ struct event_filter *filter,
+ struct filter_pred *pred,
+ struct pred_stack *stack)
{
- int idx, err;
+ int err;
if (WARN_ON(filter->n_preds == filter->a_preds)) {
parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
return -ENOSPC;
}
- idx = filter->n_preds;
- filter_clear_pred(&filter->preds[idx]);
- err = filter_set_pred(filter, idx, stack, pred, fn);
+ err = filter_set_pred(filter, filter->n_preds, stack, pred);
if (err)
return err;
@@ -937,31 +955,15 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
return fn;
}
-static int filter_add_pred(struct filter_parse_state *ps,
- struct ftrace_event_call *call,
- struct event_filter *filter,
- struct filter_pred *pred,
- struct pred_stack *stack,
- bool dry_run)
+static int init_pred(struct filter_parse_state *ps,
+ struct ftrace_event_field *field,
+ struct filter_pred *pred)
+
{
- struct ftrace_event_field *field;
- filter_pred_fn_t fn;
+ filter_pred_fn_t fn = filter_pred_none;
unsigned long long val;
int ret;
- fn = pred->fn = filter_pred_none;
-
- if (pred->op == OP_AND)
- goto add_pred_fn;
- else if (pred->op == OP_OR)
- goto add_pred_fn;
-
- field = find_event_field(call, pred->field_name);
- if (!field) {
- parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0);
- return -EINVAL;
- }
-
pred->offset = field->offset;
if (!is_legal_op(field, pred->op)) {
@@ -1001,9 +1003,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
if (pred->op == OP_NE)
pred->not = 1;
-add_pred_fn:
- if (!dry_run)
- return filter_add_pred_fn(ps, call, filter, pred, stack, fn);
+ pred->fn = fn;
return 0;
}
@@ -1302,39 +1302,37 @@ parse_operand:
return 0;
}
-static struct filter_pred *create_pred(int op, char *operand1, char *operand2)
+static struct filter_pred *create_pred(struct filter_parse_state *ps,
+ struct ftrace_event_call *call,
+ int op, char *operand1, char *operand2)
{
- struct filter_pred *pred;
+ struct ftrace_event_field *field;
+ static struct filter_pred pred;
- pred = kzalloc(sizeof(*pred), GFP_KERNEL);
- if (!pred)
- return NULL;
+ memset(&pred, 0, sizeof(pred));
+ pred.op = op;
+
+ if (op == OP_AND || op == OP_OR)
+ return &pred;
- pred->field_name = kstrdup(operand1, GFP_KERNEL);
- if (!pred->field_name) {
- kfree(pred);
+ if (!operand1 || !operand2) {
+ parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
return NULL;
}
- strcpy(pred->regex.pattern, operand2);
- pred->regex.len = strlen(pred->regex.pattern);
-
- pred->op = op;
-
- return pred;
-}
-
-static struct filter_pred *create_logical_pred(int op)
-{
- struct filter_pred *pred;
-
- pred = kzalloc(sizeof(*pred), GFP_KERNEL);
- if (!pred)
+ field = find_event_field(call, operand1);
+ if (!field) {
+ parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0);
return NULL;
+ }
- pred->op = op;
+ strcpy(pred.regex.pattern, operand2);
+ pred.regex.len = strlen(pred.regex.pattern);
- return pred;
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+ pred.field = field;
+#endif
+ return init_pred(ps, field, &pred) ? NULL : &pred;
}
static int check_preds(struct filter_parse_state *ps)
@@ -1375,6 +1373,23 @@ static int count_preds(struct filter_parse_state *ps)
return n_preds;
}
+struct check_pred_data {
+ int count;
+ int max;
+};
+
+static int check_pred_tree_cb(enum move_type move, struct filter_pred *pred,
+ int *err, void *data)
+{
+ struct check_pred_data *d = data;
+
+ if (WARN_ON(d->count++ > d->max)) {
+ *err = -EINVAL;
+ return WALK_PRED_ABORT;
+ }
+ return WALK_PRED_DEFAULT;
+}
+
/*
* The tree is walked at filtering of an event. If the tree is not correctly
* built, it may cause an infinite loop. Check here that the tree does
@@ -1383,107 +1398,76 @@ static int count_preds(struct filter_parse_state *ps)
static int check_pred_tree(struct event_filter *filter,
struct filter_pred *root)
{
- struct filter_pred *preds;
- struct filter_pred *pred;
- enum move_type move = MOVE_DOWN;
- int count = 0;
- int done = 0;
- int max;
-
- /*
- * The max that we can hit a node is three times.
- * Once going down, once coming up from left, and
- * once coming up from right. This is more than enough
- * since leafs are only hit a single time.
- */
- max = 3 * filter->n_preds;
+ struct check_pred_data data = {
+ /*
+ * The max that we can hit a node is three times.
+ * Once going down, once coming up from left, and
+ * once coming up from right. This is more than enough
+ * since leafs are only hit a single time.
+ */
+ .max = 3 * filter->n_preds,
+ .count = 0,
+ };
- preds = filter->preds;
- if (!preds)
- return -EINVAL;
- pred = root;
+ return walk_pred_tree(filter->preds, root,
+ check_pred_tree_cb, &data);
+}
- do {
- if (WARN_ON(count++ > max))
- return -EINVAL;
+static int count_leafs_cb(enum move_type move, struct filter_pred *pred,
+ int *err, void *data)
+{
+ int *count = data;
- switch (move) {
- case MOVE_DOWN:
- if (pred->left != FILTER_PRED_INVALID) {
- pred = &preds[pred->left];
- continue;
- }
- /* A leaf at the root is just a leaf in the tree */
- if (pred == root)
- break;
- pred = get_pred_parent(pred, preds,
- pred->parent, &move);
- continue;
- case MOVE_UP_FROM_LEFT:
- pred = &preds[pred->right];
- move = MOVE_DOWN;
- continue;
- case MOVE_UP_FROM_RIGHT:
- if (pred == root)
- break;
- pred = get_pred_parent(pred, preds,
- pred->parent, &move);
- continue;
- }
- done = 1;
- } while (!done);
+ if ((move == MOVE_DOWN) &&
+ (pred->left == FILTER_PRED_INVALID))
+ (*count)++;
- /* We are fine. */
- return 0;
+ return WALK_PRED_DEFAULT;
}
static int count_leafs(struct filter_pred *preds, struct filter_pred *root)
{
- struct filter_pred *pred;
- enum move_type move = MOVE_DOWN;
- int count = 0;
- int done = 0;
+ int count = 0, ret;
- pred = root;
+ ret = walk_pred_tree(preds, root, count_leafs_cb, &count);
+ WARN_ON(ret);
+ return count;
+}
- do {
- switch (move) {
- case MOVE_DOWN:
- if (pred->left != FILTER_PRED_INVALID) {
- pred = &preds[pred->left];
- continue;
- }
- /* A leaf at the root is just a leaf in the tree */
- if (pred == root)
- return 1;
- count++;
- pred = get_pred_parent(pred, preds,
- pred->parent, &move);
- continue;
- case MOVE_UP_FROM_LEFT:
- pred = &preds[pred->right];
- move = MOVE_DOWN;
- continue;
- case MOVE_UP_FROM_RIGHT:
- if (pred == root)
- break;
- pred = get_pred_parent(pred, preds,
- pred->parent, &move);
- continue;
- }
- done = 1;
- } while (!done);
+struct fold_pred_data {
+ struct filter_pred *root;
+ int count;
+ int children;
+};
- return count;
+static int fold_pred_cb(enum move_type move, struct filter_pred *pred,
+ int *err, void *data)
+{
+ struct fold_pred_data *d = data;
+ struct filter_pred *root = d->root;
+
+ if (move != MOVE_DOWN)
+ return WALK_PRED_DEFAULT;
+ if (pred->left != FILTER_PRED_INVALID)
+ return WALK_PRED_DEFAULT;
+
+ if (WARN_ON(d->count == d->children)) {
+ *err = -EINVAL;
+ return WALK_PRED_ABORT;
+ }
+
+ pred->index &= ~FILTER_PRED_FOLD;
+ root->ops[d->count++] = pred->index;
+ return WALK_PRED_DEFAULT;
}
static int fold_pred(struct filter_pred *preds, struct filter_pred *root)
{
- struct filter_pred *pred;
- enum move_type move = MOVE_DOWN;
- int count = 0;
+ struct fold_pred_data data = {
+ .root = root,
+ .count = 0,
+ };
int children;
- int done = 0;
/* No need to keep the fold flag */
root->index &= ~FILTER_PRED_FOLD;
@@ -1501,37 +1485,26 @@ static int fold_pred(struct filter_pred *preds, struct filter_pred *root)
return -ENOMEM;
root->val = children;
+ data.children = children;
+ return walk_pred_tree(preds, root, fold_pred_cb, &data);
+}
- pred = root;
- do {
- switch (move) {
- case MOVE_DOWN:
- if (pred->left != FILTER_PRED_INVALID) {
- pred = &preds[pred->left];
- continue;
- }
- if (WARN_ON(count == children))
- return -EINVAL;
- pred->index &= ~FILTER_PRED_FOLD;
- root->ops[count++] = pred->index;
- pred = get_pred_parent(pred, preds,
- pred->parent, &move);
- continue;
- case MOVE_UP_FROM_LEFT:
- pred = &preds[pred->right];
- move = MOVE_DOWN;
- continue;
- case MOVE_UP_FROM_RIGHT:
- if (pred == root)
- break;
- pred = get_pred_parent(pred, preds,
- pred->parent, &move);
- continue;
- }
- done = 1;
- } while (!done);
+static int fold_pred_tree_cb(enum move_type move, struct filter_pred *pred,
+ int *err, void *data)
+{
+ struct filter_pred *preds = data;
- return 0;
+ if (move != MOVE_DOWN)
+ return WALK_PRED_DEFAULT;
+ if (!(pred->index & FILTER_PRED_FOLD))
+ return WALK_PRED_DEFAULT;
+
+ *err = fold_pred(preds, pred);
+ if (*err)
+ return WALK_PRED_ABORT;
+
+ /* eveyrhing below is folded, continue with parent */
+ return WALK_PRED_PARENT;
}
/*
@@ -1542,51 +1515,8 @@ static int fold_pred(struct filter_pred *preds, struct filter_pred *root)
static int fold_pred_tree(struct event_filter *filter,
struct filter_pred *root)
{
- struct filter_pred *preds;
- struct filter_pred *pred;
- enum move_type move = MOVE_DOWN;
- int done = 0;
- int err;
-
- preds = filter->preds;
- if (!preds)
- return -EINVAL;
- pred = root;
-
- do {
- switch (move) {
- case MOVE_DOWN:
- if (pred->index & FILTER_PRED_FOLD) {
- err = fold_pred(preds, pred);
- if (err)
- return err;
- /* Folded nodes are like leafs */
- } else if (pred->left != FILTER_PRED_INVALID) {
- pred = &preds[pred->left];
- continue;
- }
-
- /* A leaf at the root is just a leaf in the tree */
- if (pred == root)
- break;
- pred = get_pred_parent(pred, preds,
- pred->parent, &move);
- continue;
- case MOVE_UP_FROM_LEFT:
- pred = &preds[pred->right];
- move = MOVE_DOWN;
- continue;
- case MOVE_UP_FROM_RIGHT:
- if (pred == root)
- break;
- pred = get_pred_parent(pred, preds,
- pred->parent, &move);
- continue;
- }
- done = 1;
- } while (!done);
-
- return 0;
+ return walk_pred_tree(filter->preds, root, fold_pred_tree_cb,
+ filter->preds);
}
static int replace_preds(struct ftrace_event_call *call,
@@ -1643,27 +1573,17 @@ static int replace_preds(struct ftrace_event_call *call,
goto fail;
}
- if (elt->op == OP_AND || elt->op == OP_OR) {
- pred = create_logical_pred(elt->op);
- goto add_pred;
- }
-
- if (!operand1 || !operand2) {
- parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
+ pred = create_pred(ps, call, elt->op, operand1, operand2);
+ if (!pred) {
err = -EINVAL;
goto fail;
}
- pred = create_pred(elt->op, operand1, operand2);
-add_pred:
- if (!pred) {
- err = -ENOMEM;
- goto fail;
+ if (!dry_run) {
+ err = filter_add_pred(ps, filter, pred, &stack);
+ if (err)
+ goto fail;
}
- err = filter_add_pred(ps, call, filter, pred, &stack, dry_run);
- filter_free_pred(pred);
- if (err)
- goto fail;
operand1 = operand2 = NULL;
}
@@ -1886,6 +1806,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
mutex_lock(&event_mutex);
+ /* Make sure the system still has events */
+ if (!system->nr_events) {
+ err = -ENODEV;
+ goto out_unlock;
+ }
+
if (!strcmp(strstrip(filter_string), "0")) {
filter_free_subsystem_preds(system);
remove_filter_string(system->filter);
@@ -1952,17 +1878,14 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
int err;
struct event_filter *filter;
struct filter_parse_state *ps;
- struct ftrace_event_call *call = NULL;
+ struct ftrace_event_call *call;
mutex_lock(&event_mutex);
- list_for_each_entry(call, &ftrace_events, list) {
- if (call->event.type == event_id)
- break;
- }
+ call = event->tp_event;
err = -EINVAL;
- if (&call->list == &ftrace_events)
+ if (!call)
goto out_unlock;
err = -EEXIST;
@@ -2006,3 +1929,215 @@ out_unlock:
#endif /* CONFIG_PERF_EVENTS */
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#define CREATE_TRACE_POINTS
+#include "trace_events_filter_test.h"
+
+static int test_get_filter(char *filter_str, struct ftrace_event_call *call,
+ struct event_filter **pfilter)
+{
+ struct event_filter *filter;
+ struct filter_parse_state *ps;
+ int err = -ENOMEM;
+
+ filter = __alloc_filter();
+ if (!filter)
+ goto out;
+
+ ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+ if (!ps)
+ goto free_filter;
+
+ parse_init(ps, filter_ops, filter_str);
+ err = filter_parse(ps);
+ if (err)
+ goto free_ps;
+
+ err = replace_preds(call, filter, ps, filter_str, false);
+ if (!err)
+ *pfilter = filter;
+
+ free_ps:
+ filter_opstack_clear(ps);
+ postfix_clear(ps);
+ kfree(ps);
+
+ free_filter:
+ if (err)
+ __free_filter(filter);
+
+ out:
+ return err;
+}
+
+#define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \
+{ \
+ .filter = FILTER, \
+ .rec = { .a = va, .b = vb, .c = vc, .d = vd, \
+ .e = ve, .f = vf, .g = vg, .h = vh }, \
+ .match = m, \
+ .not_visited = nvisit, \
+}
+#define YES 1
+#define NO 0
+
+static struct test_filter_data_t {
+ char *filter;
+ struct ftrace_raw_ftrace_test_filter rec;
+ int match;
+ char *not_visited;
+} test_filter_data[] = {
+#define FILTER "a == 1 && b == 1 && c == 1 && d == 1 && " \
+ "e == 1 && f == 1 && g == 1 && h == 1"
+ DATA_REC(YES, 1, 1, 1, 1, 1, 1, 1, 1, ""),
+ DATA_REC(NO, 0, 1, 1, 1, 1, 1, 1, 1, "bcdefgh"),
+ DATA_REC(NO, 1, 1, 1, 1, 1, 1, 1, 0, ""),
+#undef FILTER
+#define FILTER "a == 1 || b == 1 || c == 1 || d == 1 || " \
+ "e == 1 || f == 1 || g == 1 || h == 1"
+ DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 0, ""),
+ DATA_REC(YES, 0, 0, 0, 0, 0, 0, 0, 1, ""),
+ DATA_REC(YES, 1, 0, 0, 0, 0, 0, 0, 0, "bcdefgh"),
+#undef FILTER
+#define FILTER "(a == 1 || b == 1) && (c == 1 || d == 1) && " \
+ "(e == 1 || f == 1) && (g == 1 || h == 1)"
+ DATA_REC(NO, 0, 0, 1, 1, 1, 1, 1, 1, "dfh"),
+ DATA_REC(YES, 0, 1, 0, 1, 0, 1, 0, 1, ""),
+ DATA_REC(YES, 1, 0, 1, 0, 0, 1, 0, 1, "bd"),
+ DATA_REC(NO, 1, 0, 1, 0, 0, 1, 0, 0, "bd"),
+#undef FILTER
+#define FILTER "(a == 1 && b == 1) || (c == 1 && d == 1) || " \
+ "(e == 1 && f == 1) || (g == 1 && h == 1)"
+ DATA_REC(YES, 1, 0, 1, 1, 1, 1, 1, 1, "efgh"),
+ DATA_REC(YES, 0, 0, 0, 0, 0, 0, 1, 1, ""),
+ DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 1, ""),
+#undef FILTER
+#define FILTER "(a == 1 && b == 1) && (c == 1 && d == 1) && " \
+ "(e == 1 && f == 1) || (g == 1 && h == 1)"
+ DATA_REC(YES, 1, 1, 1, 1, 1, 1, 0, 0, "gh"),
+ DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 1, ""),
+ DATA_REC(YES, 1, 1, 1, 1, 1, 0, 1, 1, ""),
+#undef FILTER
+#define FILTER "((a == 1 || b == 1) || (c == 1 || d == 1) || " \
+ "(e == 1 || f == 1)) && (g == 1 || h == 1)"
+ DATA_REC(YES, 1, 1, 1, 1, 1, 1, 0, 1, "bcdef"),
+ DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 0, ""),
+ DATA_REC(YES, 1, 1, 1, 1, 1, 0, 1, 1, "h"),
+#undef FILTER
+#define FILTER "((((((((a == 1) && (b == 1)) || (c == 1)) && (d == 1)) || " \
+ "(e == 1)) && (f == 1)) || (g == 1)) && (h == 1))"
+ DATA_REC(YES, 1, 1, 1, 1, 1, 1, 1, 1, "ceg"),
+ DATA_REC(NO, 0, 1, 0, 1, 0, 1, 0, 1, ""),
+ DATA_REC(NO, 1, 0, 1, 0, 1, 0, 1, 0, ""),
+#undef FILTER
+#define FILTER "((((((((a == 1) || (b == 1)) && (c == 1)) || (d == 1)) && " \
+ "(e == 1)) || (f == 1)) && (g == 1)) || (h == 1))"
+ DATA_REC(YES, 1, 1, 1, 1, 1, 1, 1, 1, "bdfh"),
+ DATA_REC(YES, 0, 1, 0, 1, 0, 1, 0, 1, ""),
+ DATA_REC(YES, 1, 0, 1, 0, 1, 0, 1, 0, "bdfh"),
+};
+
+#undef DATA_REC
+#undef FILTER
+#undef YES
+#undef NO
+
+#define DATA_CNT (sizeof(test_filter_data)/sizeof(struct test_filter_data_t))
+
+static int test_pred_visited;
+
+static int test_pred_visited_fn(struct filter_pred *pred, void *event)
+{
+ struct ftrace_event_field *field = pred->field;
+
+ test_pred_visited = 1;
+ printk(KERN_INFO "\npred visited %s\n", field->name);
+ return 1;
+}
+
+static int test_walk_pred_cb(enum move_type move, struct filter_pred *pred,
+ int *err, void *data)
+{
+ char *fields = data;
+
+ if ((move == MOVE_DOWN) &&
+ (pred->left == FILTER_PRED_INVALID)) {
+ struct ftrace_event_field *field = pred->field;
+
+ if (!field) {
+ WARN(1, "all leafs should have field defined");
+ return WALK_PRED_DEFAULT;
+ }
+ if (!strchr(fields, *field->name))
+ return WALK_PRED_DEFAULT;
+
+ WARN_ON(!pred->fn);
+ pred->fn = test_pred_visited_fn;
+ }
+ return WALK_PRED_DEFAULT;
+}
+
+static __init int ftrace_test_event_filter(void)
+{
+ int i;
+
+ printk(KERN_INFO "Testing ftrace filter: ");
+
+ for (i = 0; i < DATA_CNT; i++) {
+ struct event_filter *filter = NULL;
+ struct test_filter_data_t *d = &test_filter_data[i];
+ int err;
+
+ err = test_get_filter(d->filter, &event_ftrace_test_filter,
+ &filter);
+ if (err) {
+ printk(KERN_INFO
+ "Failed to get filter for '%s', err %d\n",
+ d->filter, err);
+ break;
+ }
+
+ /*
+ * The preemption disabling is not really needed for self
+ * tests, but the rcu dereference will complain without it.
+ */
+ preempt_disable();
+ if (*d->not_visited)
+ walk_pred_tree(filter->preds, filter->root,
+ test_walk_pred_cb,
+ d->not_visited);
+
+ test_pred_visited = 0;
+ err = filter_match_preds(filter, &d->rec);
+ preempt_enable();
+
+ __free_filter(filter);
+
+ if (test_pred_visited) {
+ printk(KERN_INFO
+ "Failed, unwanted pred visited for filter %s\n",
+ d->filter);
+ break;
+ }
+
+ if (err != d->match) {
+ printk(KERN_INFO
+ "Failed to match filter '%s', expected %d\n",
+ d->filter, d->match);
+ break;
+ }
+ }
+
+ if (i == DATA_CNT)
+ printk(KERN_CONT "OK\n");
+
+ return 0;
+}
+
+late_initcall(ftrace_test_event_filter);
+
+#endif /* CONFIG_FTRACE_STARTUP_TEST */
diff --git a/kernel/trace/trace_events_filter_test.h b/kernel/trace/trace_events_filter_test.h
new file mode 100644
index 000000000000..bfd4dba0d603
--- /dev/null
+++ b/kernel/trace/trace_events_filter_test.h
@@ -0,0 +1,50 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM test
+
+#if !defined(_TRACE_TEST_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TEST_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(ftrace_test_filter,
+
+ TP_PROTO(int a, int b, int c, int d, int e, int f, int g, int h),
+
+ TP_ARGS(a, b, c, d, e, f, g, h),
+
+ TP_STRUCT__entry(
+ __field(int, a)
+ __field(int, b)
+ __field(int, c)
+ __field(int, d)
+ __field(int, e)
+ __field(int, f)
+ __field(int, g)
+ __field(int, h)
+ ),
+
+ TP_fast_assign(
+ __entry->a = a;
+ __entry->b = b;
+ __entry->c = c;
+ __entry->d = d;
+ __entry->e = e;
+ __entry->f = f;
+ __entry->g = g;
+ __entry->h = h;
+ ),
+
+ TP_printk("a %d, b %d, c %d, d %d, e %d, f %d, g %d, h %d",
+ __entry->a, __entry->b, __entry->c, __entry->d,
+ __entry->e, __entry->f, __entry->g, __entry->h)
+);
+
+#endif /* _TRACE_TEST_H || TRACE_HEADER_MULTI_READ */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_events_filter_test
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 8d0e1cc4e974..c7b0c6a7db09 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -324,7 +324,8 @@ ftrace_trace_onoff_unreg(char *glob, char *cmd, char *param)
}
static int
-ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable)
+ftrace_trace_onoff_callback(struct ftrace_hash *hash,
+ char *glob, char *cmd, char *param, int enable)
{
struct ftrace_probe_ops *ops;
void *count = (void *)-1;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 962cdb24ed81..a7d2a4c653d8 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -74,6 +74,20 @@ static struct tracer_flags tracer_flags = {
static struct trace_array *graph_array;
+/*
+ * DURATION column is being also used to display IRQ signs,
+ * following values are used by print_graph_irq and others
+ * to fill in space into DURATION column.
+ */
+enum {
+ DURATION_FILL_FULL = -1,
+ DURATION_FILL_START = -2,
+ DURATION_FILL_END = -3,
+};
+
+static enum print_line_t
+print_graph_duration(unsigned long long duration, struct trace_seq *s,
+ u32 flags);
/* Add a function return address to the trace stack on thread info.*/
int
@@ -213,7 +227,7 @@ int __trace_graph_entry(struct trace_array *tr,
static inline int ftrace_graph_ignore_irqs(void)
{
- if (!ftrace_graph_skip_irqs)
+ if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT))
return 0;
return in_irq();
@@ -577,32 +591,6 @@ get_return_for_leaf(struct trace_iterator *iter,
return next;
}
-/* Signal a overhead of time execution to the output */
-static int
-print_graph_overhead(unsigned long long duration, struct trace_seq *s,
- u32 flags)
-{
- /* If duration disappear, we don't need anything */
- if (!(flags & TRACE_GRAPH_PRINT_DURATION))
- return 1;
-
- /* Non nested entry or return */
- if (duration == -1)
- return trace_seq_printf(s, " ");
-
- if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
- /* Duration exceeded 100 msecs */
- if (duration > 100000ULL)
- return trace_seq_printf(s, "! ");
-
- /* Duration exceeded 10 msecs */
- if (duration > 10000ULL)
- return trace_seq_printf(s, "+ ");
- }
-
- return trace_seq_printf(s, " ");
-}
-
static int print_graph_abs_time(u64 t, struct trace_seq *s)
{
unsigned long usecs_rem;
@@ -625,34 +613,36 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
addr >= (unsigned long)__irqentry_text_end)
return TRACE_TYPE_UNHANDLED;
- /* Absolute time */
- if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
- ret = print_graph_abs_time(iter->ts, s);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- }
+ if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
+ /* Absolute time */
+ if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
+ ret = print_graph_abs_time(iter->ts, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
- /* Cpu */
- if (flags & TRACE_GRAPH_PRINT_CPU) {
- ret = print_graph_cpu(s, cpu);
- if (ret == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
- }
+ /* Cpu */
+ if (flags & TRACE_GRAPH_PRINT_CPU) {
+ ret = print_graph_cpu(s, cpu);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
- /* Proc */
- if (flags & TRACE_GRAPH_PRINT_PROC) {
- ret = print_graph_proc(s, pid);
- if (ret == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
- ret = trace_seq_printf(s, " | ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
+ /* Proc */
+ if (flags & TRACE_GRAPH_PRINT_PROC) {
+ ret = print_graph_proc(s, pid);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+ ret = trace_seq_printf(s, " | ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
}
/* No overhead */
- ret = print_graph_overhead(-1, s, flags);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
+ ret = print_graph_duration(DURATION_FILL_START, s, flags);
+ if (ret != TRACE_TYPE_HANDLED)
+ return ret;
if (type == TRACE_GRAPH_ENT)
ret = trace_seq_printf(s, "==========>");
@@ -662,9 +652,10 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
- /* Don't close the duration column if haven't one */
- if (flags & TRACE_GRAPH_PRINT_DURATION)
- trace_seq_printf(s, " |");
+ ret = print_graph_duration(DURATION_FILL_END, s, flags);
+ if (ret != TRACE_TYPE_HANDLED)
+ return ret;
+
ret = trace_seq_printf(s, "\n");
if (!ret)
@@ -716,9 +707,49 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
}
static enum print_line_t
-print_graph_duration(unsigned long long duration, struct trace_seq *s)
+print_graph_duration(unsigned long long duration, struct trace_seq *s,
+ u32 flags)
{
- int ret;
+ int ret = -1;
+
+ if (!(flags & TRACE_GRAPH_PRINT_DURATION) ||
+ !(trace_flags & TRACE_ITER_CONTEXT_INFO))
+ return TRACE_TYPE_HANDLED;
+
+ /* No real adata, just filling the column with spaces */
+ switch (duration) {
+ case DURATION_FILL_FULL:
+ ret = trace_seq_printf(s, " | ");
+ return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
+ case DURATION_FILL_START:
+ ret = trace_seq_printf(s, " ");
+ return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
+ case DURATION_FILL_END:
+ ret = trace_seq_printf(s, " |");
+ return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ /* Signal a overhead of time execution to the output */
+ if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
+ /* Duration exceeded 100 msecs */
+ if (duration > 100000ULL)
+ ret = trace_seq_printf(s, "! ");
+ /* Duration exceeded 10 msecs */
+ else if (duration > 10000ULL)
+ ret = trace_seq_printf(s, "+ ");
+ }
+
+ /*
+ * The -1 means we either did not exceed the duration tresholds
+ * or we dont want to print out the overhead. Either way we need
+ * to fill out the space.
+ */
+ if (ret == -1)
+ ret = trace_seq_printf(s, " ");
+
+ /* Catching here any failure happenned above */
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
ret = trace_print_graph_duration(duration, s);
if (ret != TRACE_TYPE_HANDLED)
@@ -767,18 +798,11 @@ print_graph_entry_leaf(struct trace_iterator *iter,
cpu_data->enter_funcs[call->depth] = 0;
}
- /* Overhead */
- ret = print_graph_overhead(duration, s, flags);
- if (!ret)
+ /* Overhead and duration */
+ ret = print_graph_duration(duration, s, flags);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
return TRACE_TYPE_PARTIAL_LINE;
- /* Duration */
- if (flags & TRACE_GRAPH_PRINT_DURATION) {
- ret = print_graph_duration(duration, s);
- if (ret == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
- }
-
/* Function */
for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
ret = trace_seq_printf(s, " ");
@@ -815,17 +839,10 @@ print_graph_entry_nested(struct trace_iterator *iter,
cpu_data->enter_funcs[call->depth] = call->func;
}
- /* No overhead */
- ret = print_graph_overhead(-1, s, flags);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
/* No time */
- if (flags & TRACE_GRAPH_PRINT_DURATION) {
- ret = trace_seq_printf(s, " | ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- }
+ ret = print_graph_duration(DURATION_FILL_FULL, s, flags);
+ if (ret != TRACE_TYPE_HANDLED)
+ return ret;
/* Function */
for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
@@ -865,6 +882,9 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
return TRACE_TYPE_PARTIAL_LINE;
}
+ if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
+ return 0;
+
/* Absolute time */
if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
ret = print_graph_abs_time(iter->ts, s);
@@ -1078,18 +1098,11 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
if (print_graph_prologue(iter, s, 0, 0, flags))
return TRACE_TYPE_PARTIAL_LINE;
- /* Overhead */
- ret = print_graph_overhead(duration, s, flags);
- if (!ret)
+ /* Overhead and duration */
+ ret = print_graph_duration(duration, s, flags);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
return TRACE_TYPE_PARTIAL_LINE;
- /* Duration */
- if (flags & TRACE_GRAPH_PRINT_DURATION) {
- ret = print_graph_duration(duration, s);
- if (ret == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
- }
-
/* Closing brace */
for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
ret = trace_seq_printf(s, " ");
@@ -1146,17 +1159,10 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
if (print_graph_prologue(iter, s, 0, 0, flags))
return TRACE_TYPE_PARTIAL_LINE;
- /* No overhead */
- ret = print_graph_overhead(-1, s, flags);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
/* No time */
- if (flags & TRACE_GRAPH_PRINT_DURATION) {
- ret = trace_seq_printf(s, " | ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- }
+ ret = print_graph_duration(DURATION_FILL_FULL, s, flags);
+ if (ret != TRACE_TYPE_HANDLED)
+ return ret;
/* Indentation */
if (depth > 0)
@@ -1207,7 +1213,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
enum print_line_t
-__print_graph_function_flags(struct trace_iterator *iter, u32 flags)
+print_graph_function_flags(struct trace_iterator *iter, u32 flags)
{
struct ftrace_graph_ent_entry *field;
struct fgraph_data *data = iter->private;
@@ -1270,18 +1276,7 @@ __print_graph_function_flags(struct trace_iterator *iter, u32 flags)
static enum print_line_t
print_graph_function(struct trace_iterator *iter)
{
- return __print_graph_function_flags(iter, tracer_flags.val);
-}
-
-enum print_line_t print_graph_function_flags(struct trace_iterator *iter,
- u32 flags)
-{
- if (trace_flags & TRACE_ITER_LATENCY_FMT)
- flags |= TRACE_GRAPH_PRINT_DURATION;
- else
- flags |= TRACE_GRAPH_PRINT_ABS_TIME;
-
- return __print_graph_function_flags(iter, flags);
+ return print_graph_function_flags(iter, tracer_flags.val);
}
static enum print_line_t
@@ -1309,8 +1304,7 @@ static void print_lat_header(struct seq_file *s, u32 flags)
seq_printf(s, "#%.*s / _----=> need-resched \n", size, spaces);
seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces);
seq_printf(s, "#%.*s|| / _--=> preempt-depth \n", size, spaces);
- seq_printf(s, "#%.*s||| / _-=> lock-depth \n", size, spaces);
- seq_printf(s, "#%.*s|||| / \n", size, spaces);
+ seq_printf(s, "#%.*s||| / \n", size, spaces);
}
static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
@@ -1329,7 +1323,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
if (flags & TRACE_GRAPH_PRINT_PROC)
seq_printf(s, " TASK/PID ");
if (lat)
- seq_printf(s, "|||||");
+ seq_printf(s, "||||");
if (flags & TRACE_GRAPH_PRINT_DURATION)
seq_printf(s, " DURATION ");
seq_printf(s, " FUNCTION CALLS\n");
@@ -1343,7 +1337,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
if (flags & TRACE_GRAPH_PRINT_PROC)
seq_printf(s, " | | ");
if (lat)
- seq_printf(s, "|||||");
+ seq_printf(s, "||||");
if (flags & TRACE_GRAPH_PRINT_DURATION)
seq_printf(s, " | | ");
seq_printf(s, " | | | |\n");
@@ -1358,15 +1352,16 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags)
{
struct trace_iterator *iter = s->private;
+ if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
+ return;
+
if (trace_flags & TRACE_ITER_LATENCY_FMT) {
/* print nothing if the buffers are empty */
if (trace_empty(iter))
return;
print_trace_header(s, iter);
- flags |= TRACE_GRAPH_PRINT_DURATION;
- } else
- flags |= TRACE_GRAPH_PRINT_ABS_TIME;
+ }
__print_graph_headers_flags(s, flags);
}
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index c77424be284d..20dad0d7a163 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -23,7 +23,7 @@ static int tracer_enabled __read_mostly;
static DEFINE_PER_CPU(int, tracing_cpu);
-static DEFINE_SPINLOCK(max_trace_lock);
+static DEFINE_RAW_SPINLOCK(max_trace_lock);
enum {
TRACER_IRQS_OFF = (1 << 1),
@@ -226,7 +226,9 @@ static void irqsoff_trace_close(struct trace_iterator *iter)
}
#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \
- TRACE_GRAPH_PRINT_PROC)
+ TRACE_GRAPH_PRINT_PROC | \
+ TRACE_GRAPH_PRINT_ABS_TIME | \
+ TRACE_GRAPH_PRINT_DURATION)
static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
{
@@ -319,7 +321,7 @@ check_critical_timing(struct trace_array *tr,
if (!report_latency(delta))
goto out;
- spin_lock_irqsave(&max_trace_lock, flags);
+ raw_spin_lock_irqsave(&max_trace_lock, flags);
/* check if we are still the max latency */
if (!report_latency(delta))
@@ -342,7 +344,7 @@ check_critical_timing(struct trace_array *tr,
max_sequence++;
out_unlock:
- spin_unlock_irqrestore(&max_trace_lock, flags);
+ raw_spin_unlock_irqrestore(&max_trace_lock, flags);
out:
data->critical_sequence = max_sequence;
@@ -503,13 +505,13 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller);
#ifdef CONFIG_PREEMPT_TRACER
void trace_preempt_on(unsigned long a0, unsigned long a1)
{
- if (preempt_trace())
+ if (preempt_trace() && !irq_trace())
stop_critical_timing(a0, a1);
}
void trace_preempt_off(unsigned long a0, unsigned long a1)
{
- if (preempt_trace())
+ if (preempt_trace() && !irq_trace())
start_critical_timing(a0, a1);
}
#endif /* CONFIG_PREEMPT_TRACER */
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 27d13b36b8be..00d527c945a4 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -343,6 +343,14 @@ DEFINE_BASIC_FETCH_FUNCS(deref)
DEFINE_FETCH_deref(string)
DEFINE_FETCH_deref(string_size)
+static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data)
+{
+ if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
+ update_deref_fetch_param(data->orig.data);
+ else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
+ update_symbol_cache(data->orig.data);
+}
+
static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
{
if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
@@ -377,6 +385,19 @@ DEFINE_BASIC_FETCH_FUNCS(bitfield)
#define fetch_bitfield_string_size NULL
static __kprobes void
+update_bitfield_fetch_param(struct bitfield_fetch_param *data)
+{
+ /*
+ * Don't check the bitfield itself, because this must be the
+ * last fetch function.
+ */
+ if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
+ update_deref_fetch_param(data->orig.data);
+ else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
+ update_symbol_cache(data->orig.data);
+}
+
+static __kprobes void
free_bitfield_fetch_param(struct bitfield_fetch_param *data)
{
/*
@@ -389,6 +410,7 @@ free_bitfield_fetch_param(struct bitfield_fetch_param *data)
free_symbol_cache(data->orig.data);
kfree(data);
}
+
/* Default (unsigned long) fetch type */
#define __DEFAULT_FETCH_TYPE(t) u##t
#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
@@ -536,6 +558,7 @@ struct probe_arg {
/* Flags for trace_probe */
#define TP_FLAG_TRACE 1
#define TP_FLAG_PROFILE 2
+#define TP_FLAG_REGISTERED 4
struct trace_probe {
struct list_head list;
@@ -555,16 +578,49 @@ struct trace_probe {
(sizeof(struct probe_arg) * (n)))
-static __kprobes int probe_is_return(struct trace_probe *tp)
+static __kprobes int trace_probe_is_return(struct trace_probe *tp)
{
return tp->rp.handler != NULL;
}
-static __kprobes const char *probe_symbol(struct trace_probe *tp)
+static __kprobes const char *trace_probe_symbol(struct trace_probe *tp)
{
return tp->symbol ? tp->symbol : "unknown";
}
+static __kprobes unsigned long trace_probe_offset(struct trace_probe *tp)
+{
+ return tp->rp.kp.offset;
+}
+
+static __kprobes bool trace_probe_is_enabled(struct trace_probe *tp)
+{
+ return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE));
+}
+
+static __kprobes bool trace_probe_is_registered(struct trace_probe *tp)
+{
+ return !!(tp->flags & TP_FLAG_REGISTERED);
+}
+
+static __kprobes bool trace_probe_has_gone(struct trace_probe *tp)
+{
+ return !!(kprobe_gone(&tp->rp.kp));
+}
+
+static __kprobes bool trace_probe_within_module(struct trace_probe *tp,
+ struct module *mod)
+{
+ int len = strlen(mod->name);
+ const char *name = trace_probe_symbol(tp);
+ return strncmp(mod->name, name, len) == 0 && name[len] == ':';
+}
+
+static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp)
+{
+ return !!strchr(trace_probe_symbol(tp), ':');
+}
+
static int register_probe_event(struct trace_probe *tp);
static void unregister_probe_event(struct trace_probe *tp);
@@ -646,6 +702,16 @@ error:
return ERR_PTR(ret);
}
+static void update_probe_arg(struct probe_arg *arg)
+{
+ if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
+ update_bitfield_fetch_param(arg->fetch.data);
+ else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
+ update_deref_fetch_param(arg->fetch.data);
+ else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
+ update_symbol_cache(arg->fetch.data);
+}
+
static void free_probe_arg(struct probe_arg *arg)
{
if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
@@ -671,7 +737,7 @@ static void free_trace_probe(struct trace_probe *tp)
kfree(tp);
}
-static struct trace_probe *find_probe_event(const char *event,
+static struct trace_probe *find_trace_probe(const char *event,
const char *group)
{
struct trace_probe *tp;
@@ -683,15 +749,104 @@ static struct trace_probe *find_probe_event(const char *event,
return NULL;
}
-/* Unregister a trace_probe and probe_event: call with locking probe_lock */
-static void unregister_trace_probe(struct trace_probe *tp)
+/* Enable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */
+static int enable_trace_probe(struct trace_probe *tp, int flag)
{
- if (probe_is_return(tp))
- unregister_kretprobe(&tp->rp);
+ int ret = 0;
+
+ tp->flags |= flag;
+ if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) &&
+ !trace_probe_has_gone(tp)) {
+ if (trace_probe_is_return(tp))
+ ret = enable_kretprobe(&tp->rp);
+ else
+ ret = enable_kprobe(&tp->rp.kp);
+ }
+
+ return ret;
+}
+
+/* Disable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */
+static void disable_trace_probe(struct trace_probe *tp, int flag)
+{
+ tp->flags &= ~flag;
+ if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) {
+ if (trace_probe_is_return(tp))
+ disable_kretprobe(&tp->rp);
+ else
+ disable_kprobe(&tp->rp.kp);
+ }
+}
+
+/* Internal register function - just handle k*probes and flags */
+static int __register_trace_probe(struct trace_probe *tp)
+{
+ int i, ret;
+
+ if (trace_probe_is_registered(tp))
+ return -EINVAL;
+
+ for (i = 0; i < tp->nr_args; i++)
+ update_probe_arg(&tp->args[i]);
+
+ /* Set/clear disabled flag according to tp->flag */
+ if (trace_probe_is_enabled(tp))
+ tp->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
+ else
+ tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
+
+ if (trace_probe_is_return(tp))
+ ret = register_kretprobe(&tp->rp);
else
- unregister_kprobe(&tp->rp.kp);
+ ret = register_kprobe(&tp->rp.kp);
+
+ if (ret == 0)
+ tp->flags |= TP_FLAG_REGISTERED;
+ else {
+ pr_warning("Could not insert probe at %s+%lu: %d\n",
+ trace_probe_symbol(tp), trace_probe_offset(tp), ret);
+ if (ret == -ENOENT && trace_probe_is_on_module(tp)) {
+ pr_warning("This probe might be able to register after"
+ "target module is loaded. Continue.\n");
+ ret = 0;
+ } else if (ret == -EILSEQ) {
+ pr_warning("Probing address(0x%p) is not an "
+ "instruction boundary.\n",
+ tp->rp.kp.addr);
+ ret = -EINVAL;
+ }
+ }
+
+ return ret;
+}
+
+/* Internal unregister function - just handle k*probes and flags */
+static void __unregister_trace_probe(struct trace_probe *tp)
+{
+ if (trace_probe_is_registered(tp)) {
+ if (trace_probe_is_return(tp))
+ unregister_kretprobe(&tp->rp);
+ else
+ unregister_kprobe(&tp->rp.kp);
+ tp->flags &= ~TP_FLAG_REGISTERED;
+ /* Cleanup kprobe for reuse */
+ if (tp->rp.kp.symbol_name)
+ tp->rp.kp.addr = NULL;
+ }
+}
+
+/* Unregister a trace_probe and probe_event: call with locking probe_lock */
+static int unregister_trace_probe(struct trace_probe *tp)
+{
+ /* Enabled event can not be unregistered */
+ if (trace_probe_is_enabled(tp))
+ return -EBUSY;
+
+ __unregister_trace_probe(tp);
list_del(&tp->list);
unregister_probe_event(tp);
+
+ return 0;
}
/* Register a trace_probe and probe_event */
@@ -702,41 +857,68 @@ static int register_trace_probe(struct trace_probe *tp)
mutex_lock(&probe_lock);
- /* register as an event */
- old_tp = find_probe_event(tp->call.name, tp->call.class->system);
+ /* Delete old (same name) event if exist */
+ old_tp = find_trace_probe(tp->call.name, tp->call.class->system);
if (old_tp) {
- /* delete old event */
- unregister_trace_probe(old_tp);
+ ret = unregister_trace_probe(old_tp);
+ if (ret < 0)
+ goto end;
free_trace_probe(old_tp);
}
+
+ /* Register new event */
ret = register_probe_event(tp);
if (ret) {
pr_warning("Failed to register probe event(%d)\n", ret);
goto end;
}
- tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
- if (probe_is_return(tp))
- ret = register_kretprobe(&tp->rp);
- else
- ret = register_kprobe(&tp->rp.kp);
-
- if (ret) {
- pr_warning("Could not insert probe(%d)\n", ret);
- if (ret == -EILSEQ) {
- pr_warning("Probing address(0x%p) is not an "
- "instruction boundary.\n",
- tp->rp.kp.addr);
- ret = -EINVAL;
- }
+ /* Register k*probe */
+ ret = __register_trace_probe(tp);
+ if (ret < 0)
unregister_probe_event(tp);
- } else
+ else
list_add_tail(&tp->list, &probe_list);
+
end:
mutex_unlock(&probe_lock);
return ret;
}
+/* Module notifier call back, checking event on the module */
+static int trace_probe_module_callback(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ struct module *mod = data;
+ struct trace_probe *tp;
+ int ret;
+
+ if (val != MODULE_STATE_COMING)
+ return NOTIFY_DONE;
+
+ /* Update probes on coming module */
+ mutex_lock(&probe_lock);
+ list_for_each_entry(tp, &probe_list, list) {
+ if (trace_probe_within_module(tp, mod)) {
+ /* Don't need to check busy - this should have gone. */
+ __unregister_trace_probe(tp);
+ ret = __register_trace_probe(tp);
+ if (ret)
+ pr_warning("Failed to re-register probe %s on"
+ "%s: %d\n",
+ tp->call.name, mod->name, ret);
+ }
+ }
+ mutex_unlock(&probe_lock);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block trace_probe_module_nb = {
+ .notifier_call = trace_probe_module_callback,
+ .priority = 1 /* Invoked after kprobe module callback */
+};
+
/* Split symbol and offset. */
static int split_symbol_offset(char *symbol, unsigned long *offset)
{
@@ -962,8 +1144,8 @@ static int create_trace_probe(int argc, char **argv)
{
/*
* Argument syntax:
- * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
- * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
+ * - Add kprobe: p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
+ * - Add kretprobe: r[:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
* Fetch args:
* $retval : fetch return value
* $stack : fetch stack address
@@ -1025,17 +1207,18 @@ static int create_trace_probe(int argc, char **argv)
return -EINVAL;
}
mutex_lock(&probe_lock);
- tp = find_probe_event(event, group);
+ tp = find_trace_probe(event, group);
if (!tp) {
mutex_unlock(&probe_lock);
pr_info("Event %s/%s doesn't exist.\n", group, event);
return -ENOENT;
}
/* delete an event */
- unregister_trace_probe(tp);
- free_trace_probe(tp);
+ ret = unregister_trace_probe(tp);
+ if (ret == 0)
+ free_trace_probe(tp);
mutex_unlock(&probe_lock);
- return 0;
+ return ret;
}
if (argc < 2) {
@@ -1144,20 +1327,30 @@ error:
return ret;
}
-static void cleanup_all_probes(void)
+static int release_all_trace_probes(void)
{
struct trace_probe *tp;
+ int ret = 0;
mutex_lock(&probe_lock);
+ /* Ensure no probe is in use. */
+ list_for_each_entry(tp, &probe_list, list)
+ if (trace_probe_is_enabled(tp)) {
+ ret = -EBUSY;
+ goto end;
+ }
/* TODO: Use batch unregistration */
while (!list_empty(&probe_list)) {
tp = list_entry(probe_list.next, struct trace_probe, list);
unregister_trace_probe(tp);
free_trace_probe(tp);
}
+
+end:
mutex_unlock(&probe_lock);
-}
+ return ret;
+}
/* Probes listing interfaces */
static void *probes_seq_start(struct seq_file *m, loff_t *pos)
@@ -1181,15 +1374,16 @@ static int probes_seq_show(struct seq_file *m, void *v)
struct trace_probe *tp = v;
int i;
- seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
+ seq_printf(m, "%c", trace_probe_is_return(tp) ? 'r' : 'p');
seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
if (!tp->symbol)
seq_printf(m, " 0x%p", tp->rp.kp.addr);
else if (tp->rp.kp.offset)
- seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
+ seq_printf(m, " %s+%u", trace_probe_symbol(tp),
+ tp->rp.kp.offset);
else
- seq_printf(m, " %s", probe_symbol(tp));
+ seq_printf(m, " %s", trace_probe_symbol(tp));
for (i = 0; i < tp->nr_args; i++)
seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
@@ -1207,9 +1401,13 @@ static const struct seq_operations probes_seq_op = {
static int probes_open(struct inode *inode, struct file *file)
{
- if ((file->f_mode & FMODE_WRITE) &&
- (file->f_flags & O_TRUNC))
- cleanup_all_probes();
+ int ret;
+
+ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
+ ret = release_all_trace_probes();
+ if (ret < 0)
+ return ret;
+ }
return seq_open(file, &probes_seq_op);
}
@@ -1397,7 +1595,8 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
if (!filter_current_check_discard(buffer, call, entry, event))
- trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
+ trace_nowake_buffer_unlock_commit_regs(buffer, event,
+ irq_flags, pc, regs);
}
/* Kretprobe handler */
@@ -1429,7 +1628,8 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
if (!filter_current_check_discard(buffer, call, entry, event))
- trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
+ trace_nowake_buffer_unlock_commit_regs(buffer, event,
+ irq_flags, pc, regs);
}
/* Event entry printers */
@@ -1511,30 +1711,6 @@ partial:
return TRACE_TYPE_PARTIAL_LINE;
}
-static int probe_event_enable(struct ftrace_event_call *call)
-{
- struct trace_probe *tp = (struct trace_probe *)call->data;
-
- tp->flags |= TP_FLAG_TRACE;
- if (probe_is_return(tp))
- return enable_kretprobe(&tp->rp);
- else
- return enable_kprobe(&tp->rp.kp);
-}
-
-static void probe_event_disable(struct ftrace_event_call *call)
-{
- struct trace_probe *tp = (struct trace_probe *)call->data;
-
- tp->flags &= ~TP_FLAG_TRACE;
- if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
- if (probe_is_return(tp))
- disable_kretprobe(&tp->rp);
- else
- disable_kprobe(&tp->rp.kp);
- }
-}
-
#undef DEFINE_FIELD
#define DEFINE_FIELD(type, item, name, is_signed) \
do { \
@@ -1596,7 +1772,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
const char *fmt, *arg;
- if (!probe_is_return(tp)) {
+ if (!trace_probe_is_return(tp)) {
fmt = "(%lx)";
arg = "REC->" FIELD_STRING_IP;
} else {
@@ -1713,49 +1889,25 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
head = this_cpu_ptr(call->perf_events);
perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
}
-
-static int probe_perf_enable(struct ftrace_event_call *call)
-{
- struct trace_probe *tp = (struct trace_probe *)call->data;
-
- tp->flags |= TP_FLAG_PROFILE;
-
- if (probe_is_return(tp))
- return enable_kretprobe(&tp->rp);
- else
- return enable_kprobe(&tp->rp.kp);
-}
-
-static void probe_perf_disable(struct ftrace_event_call *call)
-{
- struct trace_probe *tp = (struct trace_probe *)call->data;
-
- tp->flags &= ~TP_FLAG_PROFILE;
-
- if (!(tp->flags & TP_FLAG_TRACE)) {
- if (probe_is_return(tp))
- disable_kretprobe(&tp->rp);
- else
- disable_kprobe(&tp->rp.kp);
- }
-}
#endif /* CONFIG_PERF_EVENTS */
static __kprobes
int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
{
+ struct trace_probe *tp = (struct trace_probe *)event->data;
+
switch (type) {
case TRACE_REG_REGISTER:
- return probe_event_enable(event);
+ return enable_trace_probe(tp, TP_FLAG_TRACE);
case TRACE_REG_UNREGISTER:
- probe_event_disable(event);
+ disable_trace_probe(tp, TP_FLAG_TRACE);
return 0;
#ifdef CONFIG_PERF_EVENTS
case TRACE_REG_PERF_REGISTER:
- return probe_perf_enable(event);
+ return enable_trace_probe(tp, TP_FLAG_PROFILE);
case TRACE_REG_PERF_UNREGISTER:
- probe_perf_disable(event);
+ disable_trace_probe(tp, TP_FLAG_PROFILE);
return 0;
#endif
}
@@ -1805,7 +1957,7 @@ static int register_probe_event(struct trace_probe *tp)
/* Initialize ftrace_event_call */
INIT_LIST_HEAD(&call->class->fields);
- if (probe_is_return(tp)) {
+ if (trace_probe_is_return(tp)) {
call->event.funcs = &kretprobe_funcs;
call->class->define_fields = kretprobe_event_define_fields;
} else {
@@ -1844,6 +1996,9 @@ static __init int init_kprobe_trace(void)
struct dentry *d_tracer;
struct dentry *entry;
+ if (register_module_notifier(&trace_probe_module_nb))
+ return -EINVAL;
+
d_tracer = tracing_init_dentry();
if (!d_tracer)
return 0;
@@ -1897,12 +2052,12 @@ static __init int kprobe_trace_self_tests_init(void)
warn++;
} else {
/* Enable trace point */
- tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
+ tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
if (WARN_ON_ONCE(tp == NULL)) {
pr_warning("error on getting new probe.\n");
warn++;
} else
- probe_event_enable(&tp->call);
+ enable_trace_probe(tp, TP_FLAG_TRACE);
}
ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
@@ -1912,12 +2067,12 @@ static __init int kprobe_trace_self_tests_init(void)
warn++;
} else {
/* Enable trace point */
- tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
+ tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
if (WARN_ON_ONCE(tp == NULL)) {
pr_warning("error on getting new probe.\n");
warn++;
} else
- probe_event_enable(&tp->call);
+ enable_trace_probe(tp, TP_FLAG_TRACE);
}
if (warn)
@@ -1925,6 +2080,21 @@ static __init int kprobe_trace_self_tests_init(void)
ret = target(1, 2, 3, 4, 5, 6);
+ /* Disable trace points before removing it */
+ tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
+ if (WARN_ON_ONCE(tp == NULL)) {
+ pr_warning("error on getting test probe.\n");
+ warn++;
+ } else
+ disable_trace_probe(tp, TP_FLAG_TRACE);
+
+ tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
+ if (WARN_ON_ONCE(tp == NULL)) {
+ pr_warning("error on getting 2nd test probe.\n");
+ warn++;
+ } else
+ disable_trace_probe(tp, TP_FLAG_TRACE);
+
ret = command_trace_probe("-:testprobe");
if (WARN_ON_ONCE(ret)) {
pr_warning("error on deleting a probe.\n");
@@ -1938,7 +2108,7 @@ static __init int kprobe_trace_self_tests_init(void)
}
end:
- cleanup_all_probes();
+ release_all_trace_probes();
if (warn)
pr_cont("NG: Some tests are failed. Please check them.\n");
else
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 017fa376505d..fd3c8aae55e5 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -12,7 +12,7 @@
#include <linux/slab.h>
#include <linux/time.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include "trace.h"
#include "trace_output.h"
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index e37de492a9e1..51999309a6cf 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1107,19 +1107,20 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
{
struct stack_entry *field;
struct trace_seq *s = &iter->seq;
- int i;
+ unsigned long *p;
+ unsigned long *end;
trace_assign_type(field, iter->ent);
+ end = (unsigned long *)((long)iter->ent + iter->ent_size);
if (!trace_seq_puts(s, "<stack trace>\n"))
goto partial;
- for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
- if (!field->caller[i] || (field->caller[i] == ULONG_MAX))
- break;
+
+ for (p = field->caller; p && *p != ULONG_MAX && p < end; p++) {
if (!trace_seq_puts(s, " => "))
goto partial;
- if (!seq_print_ip_sym(s, field->caller[i], flags))
+ if (!seq_print_ip_sym(s, *p, flags))
goto partial;
if (!trace_seq_puts(s, "\n"))
goto partial;
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 1f06468a10d7..6fd4ffd042f9 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -59,18 +59,19 @@ void hold_module_trace_bprintk_format(const char **start, const char **end)
continue;
}
+ fmt = NULL;
tb_fmt = kmalloc(sizeof(*tb_fmt), GFP_KERNEL);
- if (tb_fmt)
+ if (tb_fmt) {
fmt = kmalloc(strlen(*iter) + 1, GFP_KERNEL);
- if (tb_fmt && fmt) {
- list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list);
- strcpy(fmt, *iter);
- tb_fmt->fmt = fmt;
- *iter = tb_fmt->fmt;
- } else {
- kfree(tb_fmt);
- *iter = NULL;
+ if (fmt) {
+ list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list);
+ strcpy(fmt, *iter);
+ tb_fmt->fmt = fmt;
+ } else
+ kfree(tb_fmt);
}
+ *iter = fmt;
+
}
mutex_unlock(&btrace_mutex);
}
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index f029dd4fd2ca..e4a70c0c71b6 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -227,7 +227,9 @@ static void wakeup_trace_close(struct trace_iterator *iter)
graph_trace_close(iter);
}
-#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC)
+#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC | \
+ TRACE_GRAPH_PRINT_ABS_TIME | \
+ TRACE_GRAPH_PRINT_DURATION)
static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
{
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index b0b53b8e4c25..77575b386d97 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -156,20 +156,11 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
{
long *ptr = filp->private_data;
unsigned long val, flags;
- char buf[64];
int ret;
int cpu;
- if (count >= sizeof(buf))
- return -EINVAL;
-
- if (copy_from_user(&buf, ubuf, count))
- return -EFAULT;
-
- buf[count] = 0;
-
- ret = strict_strtoul(buf, 10, &val);
- if (ret < 0)
+ ret = kstrtoul_from_user(ubuf, count, 10, &val);
+ if (ret)
return ret;
local_irq_save(flags);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index ee7b5a0bb9f8..cb654542c1a1 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -2,6 +2,7 @@
#include <trace/events/syscalls.h>
#include <linux/slab.h>
#include <linux/kernel.h>
+#include <linux/module.h> /* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */
#include <linux/ftrace.h>
#include <linux/perf_event.h>
#include <asm/syscall.h>