summaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig90
-rw-r--r--kernel/trace/Makefile7
-rw-r--r--kernel/trace/blktrace.c100
-rw-r--r--kernel/trace/ftrace.c149
-rw-r--r--kernel/trace/kmemtrace.c529
-rw-r--r--kernel/trace/ring_buffer.c402
-rw-r--r--kernel/trace/trace.c199
-rw-r--r--kernel/trace/trace.h111
-rw-r--r--kernel/trace/trace_boot.c185
-rw-r--r--kernel/trace/trace_clock.c7
-rw-r--r--kernel/trace/trace_entries.h94
-rw-r--r--kernel/trace/trace_event_perf.c58
-rw-r--r--kernel/trace/trace_events.c431
-rw-r--r--kernel/trace/trace_events_filter.c27
-rw-r--r--kernel/trace/trace_export.c8
-rw-r--r--kernel/trace/trace_functions.c6
-rw-r--r--kernel/trace/trace_functions_graph.c218
-rw-r--r--kernel/trace/trace_irqsoff.c155
-rw-r--r--kernel/trace/trace_kdb.c135
-rw-r--r--kernel/trace/trace_kprobe.c427
-rw-r--r--kernel/trace/trace_ksym.c508
-rw-r--r--kernel/trace/trace_output.c69
-rw-r--r--kernel/trace/trace_sched_wakeup.c263
-rw-r--r--kernel/trace/trace_selftest.c87
-rw-r--r--kernel/trace/trace_stack.c9
-rw-r--r--kernel/trace/trace_syscalls.c7
-rw-r--r--kernel/trace/trace_sysprof.c329
-rw-r--r--kernel/trace/trace_workqueue.c10
28 files changed, 1780 insertions, 2840 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 8b1797c4545b..e04b8bcdef88 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -49,6 +49,11 @@ config HAVE_SYSCALL_TRACEPOINTS
help
See Documentation/trace/ftrace-design.txt
+config HAVE_C_RECORDMCOUNT
+ bool
+ help
+ C version of recordmcount available?
+
config TRACER_MAX_TRACE
bool
@@ -121,7 +126,7 @@ if FTRACE
config FUNCTION_TRACER
bool "Kernel Function Tracer"
depends on HAVE_FUNCTION_TRACER
- select FRAME_POINTER
+ select FRAME_POINTER if (!ARM_UNWIND)
select KALLSYMS
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
@@ -153,7 +158,7 @@ config IRQSOFF_TRACER
bool "Interrupts-off Latency Tracer"
default n
depends on TRACE_IRQFLAGS_SUPPORT
- depends on GENERIC_TIME
+ depends on !ARCH_USES_GETTIMEOFFSET
select TRACE_IRQFLAGS
select GENERIC_TRACER
select TRACER_MAX_TRACE
@@ -175,7 +180,7 @@ config IRQSOFF_TRACER
config PREEMPT_TRACER
bool "Preemption-off Latency Tracer"
default n
- depends on GENERIC_TIME
+ depends on !ARCH_USES_GETTIMEOFFSET
depends on PREEMPT
select GENERIC_TRACER
select TRACER_MAX_TRACE
@@ -194,15 +199,6 @@ config PREEMPT_TRACER
enabled. This option and the irqs-off timing option can be
used together or separately.)
-config SYSPROF_TRACER
- bool "Sysprof Tracer"
- depends on X86
- select GENERIC_TRACER
- select CONTEXT_SWITCH_TRACER
- help
- This tracer provides the trace needed by the 'Sysprof' userspace
- tool.
-
config SCHED_TRACER
bool "Scheduling Latency Tracer"
select GENERIC_TRACER
@@ -229,23 +225,6 @@ config FTRACE_SYSCALLS
help
Basic tracer to catch the syscall entry and exit events.
-config BOOT_TRACER
- bool "Trace boot initcalls"
- select GENERIC_TRACER
- select CONTEXT_SWITCH_TRACER
- help
- This tracer helps developers to optimize boot times: it records
- the timings of the initcalls and traces key events and the identity
- of tasks that can cause boot delays, such as context-switches.
-
- Its aim is to be parsed by the scripts/bootgraph.pl tool to
- produce pretty graphics about boot inefficiencies, giving a visual
- representation of the delays during initcalls - but the raw
- /debug/tracing/trace text output is readable too.
-
- You must pass in initcall_debug and ftrace=initcall to the kernel
- command line to enable this on bootup.
-
config TRACE_BRANCH_PROFILING
bool
select GENERIC_TRACER
@@ -325,28 +304,6 @@ config BRANCH_TRACER
Say N if unsure.
-config KSYM_TRACER
- bool "Trace read and write access on kernel memory locations"
- depends on HAVE_HW_BREAKPOINT
- select TRACING
- help
- This tracer helps find read and write operations on any given kernel
- symbol i.e. /proc/kallsyms.
-
-config PROFILE_KSYM_TRACER
- bool "Profile all kernel memory accesses on 'watched' variables"
- depends on KSYM_TRACER
- help
- This tracer profiles kernel accesses on variables watched through the
- ksym tracer ftrace plugin. Depending upon the hardware, all read
- and write operations on kernel variables can be monitored for
- accesses.
-
- The results will be displayed in:
- /debugfs/tracing/profile_ksym
-
- Say N if unsure.
-
config STACK_TRACER
bool "Trace max stack"
depends on HAVE_FUNCTION_TRACER
@@ -371,37 +328,6 @@ config STACK_TRACER
Say N if unsure.
-config KMEMTRACE
- bool "Trace SLAB allocations"
- select GENERIC_TRACER
- help
- kmemtrace provides tracing for slab allocator functions, such as
- kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
- data is then fed to the userspace application in order to analyse
- allocation hotspots, internal fragmentation and so on, making it
- possible to see how well an allocator performs, as well as debug
- and profile kernel code.
-
- This requires an userspace application to use. See
- Documentation/trace/kmemtrace.txt for more information.
-
- Saying Y will make the kernel somewhat larger and slower. However,
- if you disable kmemtrace at run-time or boot-time, the performance
- impact is minimal (depending on the arch the kernel is built for).
-
- If unsure, say N.
-
-config WORKQUEUE_TRACER
- bool "Trace workqueues"
- select GENERIC_TRACER
- help
- The workqueue tracer provides some statistical information
- about each cpu workqueue thread such as the number of the
- works inserted and executed since their creation. It can help
- to evaluate the amount of work each of them has to perform.
- For example it can help a developer to decide whether he should
- choose a per-cpu workqueue instead of a singlethreaded one.
-
config BLK_DEV_IO_TRACE
bool "Support for tracing block IO actions"
depends on SYSFS
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index ffb1a5b0550e..53f338190b26 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -30,7 +30,6 @@ obj-$(CONFIG_TRACING) += trace_output.o
obj-$(CONFIG_TRACING) += trace_stat.o
obj-$(CONFIG_TRACING) += trace_printk.o
obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
-obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
@@ -38,10 +37,8 @@ obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
obj-$(CONFIG_NOP_TRACER) += trace_nop.o
obj-$(CONFIG_STACK_TRACER) += trace_stack.o
obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
-obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
-obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
ifeq ($(CONFIG_BLOCK),y)
@@ -55,7 +52,9 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
-obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
obj-$(CONFIG_EVENT_TRACING) += power-traces.o
+ifeq ($(CONFIG_TRACING),y)
+obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
+endif
libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 638711c17504..bc251ed66724 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -23,7 +23,6 @@
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/debugfs.h>
-#include <linux/smp_lock.h>
#include <linux/time.h>
#include <linux/uaccess.h>
@@ -169,9 +168,12 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
BLK_TC_ACT(BLK_TC_WRITE) };
+#define BLK_TC_HARDBARRIER BLK_TC_BARRIER
+#define BLK_TC_RAHEAD BLK_TC_AHEAD
+
/* The ilog2() calls fall out because they're constant */
-#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \
- (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name))
+#define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \
+ (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name))
/*
* The worker for the various blk_add_trace*() types. Fills out a
@@ -194,9 +196,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
return;
what |= ddir_act[rw & WRITE];
- what |= MASK_TC_BIT(rw, BARRIER);
- what |= MASK_TC_BIT(rw, SYNCIO);
- what |= MASK_TC_BIT(rw, AHEAD);
+ what |= MASK_TC_BIT(rw, HARDBARRIER);
+ what |= MASK_TC_BIT(rw, SYNC);
+ what |= MASK_TC_BIT(rw, RAHEAD);
what |= MASK_TC_BIT(rw, META);
what |= MASK_TC_BIT(rw, DISCARD);
@@ -323,6 +325,7 @@ static const struct file_operations blk_dropped_fops = {
.owner = THIS_MODULE,
.open = blk_dropped_open,
.read = blk_dropped_read,
+ .llseek = default_llseek,
};
static int blk_msg_open(struct inode *inode, struct file *filp)
@@ -362,6 +365,7 @@ static const struct file_operations blk_msg_fops = {
.owner = THIS_MODULE,
.open = blk_msg_open,
.write = blk_msg_write,
+ .llseek = noop_llseek,
};
/*
@@ -549,6 +553,41 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
}
EXPORT_SYMBOL_GPL(blk_trace_setup);
+#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
+static int compat_blk_trace_setup(struct request_queue *q, char *name,
+ dev_t dev, struct block_device *bdev,
+ char __user *arg)
+{
+ struct blk_user_trace_setup buts;
+ struct compat_blk_user_trace_setup cbuts;
+ int ret;
+
+ if (copy_from_user(&cbuts, arg, sizeof(cbuts)))
+ return -EFAULT;
+
+ buts = (struct blk_user_trace_setup) {
+ .act_mask = cbuts.act_mask,
+ .buf_size = cbuts.buf_size,
+ .buf_nr = cbuts.buf_nr,
+ .start_lba = cbuts.start_lba,
+ .end_lba = cbuts.end_lba,
+ .pid = cbuts.pid,
+ };
+ memcpy(&buts.name, &cbuts.name, 32);
+
+ ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
+ if (ret)
+ return ret;
+
+ if (copy_to_user(arg, &buts.name, 32)) {
+ blk_trace_remove(q);
+ return -EFAULT;
+ }
+
+ return 0;
+}
+#endif
+
int blk_trace_startstop(struct request_queue *q, int start)
{
int ret;
@@ -608,6 +647,12 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
bdevname(bdev, b);
ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
break;
+#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
+ case BLKTRACESETUP32:
+ bdevname(bdev, b);
+ ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
+ break;
+#endif
case BLKTRACESTART:
start = 1;
case BLKTRACESTOP:
@@ -661,10 +706,13 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
if (likely(!bt))
return;
- if (blk_discard_rq(rq))
- rw |= (1 << BIO_RW_DISCARD);
+ if (rq->cmd_flags & REQ_DISCARD)
+ rw |= REQ_DISCARD;
+
+ if (rq->cmd_flags & REQ_SECURE)
+ rw |= REQ_SECURE;
- if (blk_pc_request(rq)) {
+ if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
what |= BLK_TC_ACT(BLK_TC_PC);
__blk_add_trace(bt, 0, blk_rq_bytes(rq), rw,
what, rq->errors, rq->cmd_len, rq->cmd);
@@ -925,7 +973,7 @@ void blk_add_driver_data(struct request_queue *q,
if (likely(!bt))
return;
- if (blk_pc_request(rq))
+ if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
__blk_add_trace(bt, 0, blk_rq_bytes(rq), 0,
BLK_TA_DRV_DATA, rq->errors, len, data);
else
@@ -1603,10 +1651,9 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
struct block_device *bdev;
ssize_t ret = -ENXIO;
- lock_kernel();
bdev = bdget(part_devt(p));
if (bdev == NULL)
- goto out_unlock_kernel;
+ goto out;
q = blk_trace_get_queue(bdev);
if (q == NULL)
@@ -1634,8 +1681,7 @@ out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
out_bdput:
bdput(bdev);
-out_unlock_kernel:
- unlock_kernel();
+out:
return ret;
}
@@ -1665,11 +1711,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
ret = -ENXIO;
- lock_kernel();
p = dev_to_part(dev);
bdev = bdget(part_devt(p));
if (bdev == NULL)
- goto out_unlock_kernel;
+ goto out;
q = blk_trace_get_queue(bdev);
if (q == NULL)
@@ -1704,8 +1749,6 @@ out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
out_bdput:
bdput(bdev);
-out_unlock_kernel:
- unlock_kernel();
out:
return ret ? ret : count;
}
@@ -1730,7 +1773,7 @@ void blk_dump_cmd(char *buf, struct request *rq)
int len = rq->cmd_len;
unsigned char *cmd = rq->cmd;
- if (!blk_pc_request(rq)) {
+ if (rq->cmd_type != REQ_TYPE_BLOCK_PC) {
buf[0] = '\0';
return;
}
@@ -1755,21 +1798,23 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
if (rw & WRITE)
rwbs[i++] = 'W';
- else if (rw & 1 << BIO_RW_DISCARD)
+ else if (rw & REQ_DISCARD)
rwbs[i++] = 'D';
else if (bytes)
rwbs[i++] = 'R';
else
rwbs[i++] = 'N';
- if (rw & 1 << BIO_RW_AHEAD)
+ if (rw & REQ_RAHEAD)
rwbs[i++] = 'A';
- if (rw & 1 << BIO_RW_BARRIER)
+ if (rw & REQ_HARDBARRIER)
rwbs[i++] = 'B';
- if (rw & 1 << BIO_RW_SYNCIO)
+ if (rw & REQ_SYNC)
rwbs[i++] = 'S';
- if (rw & 1 << BIO_RW_META)
+ if (rw & REQ_META)
rwbs[i++] = 'M';
+ if (rw & REQ_SECURE)
+ rwbs[i++] = 'E';
rwbs[i] = '\0';
}
@@ -1779,8 +1824,11 @@ void blk_fill_rwbs_rq(char *rwbs, struct request *rq)
int rw = rq->cmd_flags & 0x03;
int bytes;
- if (blk_discard_rq(rq))
- rw |= (1 << BIO_RW_DISCARD);
+ if (rq->cmd_flags & REQ_DISCARD)
+ rw |= REQ_DISCARD;
+
+ if (rq->cmd_flags & REQ_SECURE)
+ rw |= REQ_SECURE;
bytes = blk_rq_bytes(rq);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6d2cb14f9449..f3dadae83883 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -381,12 +381,19 @@ static int function_stat_show(struct seq_file *m, void *v)
{
struct ftrace_profile *rec = v;
char str[KSYM_SYMBOL_LEN];
+ int ret = 0;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- static DEFINE_MUTEX(mutex);
static struct trace_seq s;
unsigned long long avg;
unsigned long long stddev;
#endif
+ mutex_lock(&ftrace_profile_lock);
+
+ /* we raced with function_profile_reset() */
+ if (unlikely(rec->counter == 0)) {
+ ret = -EBUSY;
+ goto out;
+ }
kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
seq_printf(m, " %-30.30s %10lu", str, rec->counter);
@@ -408,7 +415,6 @@ static int function_stat_show(struct seq_file *m, void *v)
do_div(stddev, (rec->counter - 1) * 1000);
}
- mutex_lock(&mutex);
trace_seq_init(&s);
trace_print_graph_duration(rec->time, &s);
trace_seq_puts(&s, " ");
@@ -416,11 +422,12 @@ static int function_stat_show(struct seq_file *m, void *v)
trace_seq_puts(&s, " ");
trace_print_graph_duration(stddev, &s);
trace_print_seq(m, &s);
- mutex_unlock(&mutex);
#endif
seq_putc(m, '\n');
+out:
+ mutex_unlock(&ftrace_profile_lock);
- return 0;
+ return ret;
}
static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
@@ -793,6 +800,7 @@ static const struct file_operations ftrace_profile_fops = {
.open = tracing_open_generic,
.read = ftrace_profile_read,
.write = ftrace_profile_write,
+ .llseek = default_llseek,
};
/* used to initialize the real stat files */
@@ -877,10 +885,8 @@ enum {
FTRACE_ENABLE_CALLS = (1 << 0),
FTRACE_DISABLE_CALLS = (1 << 1),
FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
- FTRACE_ENABLE_MCOUNT = (1 << 3),
- FTRACE_DISABLE_MCOUNT = (1 << 4),
- FTRACE_START_FUNC_RET = (1 << 5),
- FTRACE_STOP_FUNC_RET = (1 << 6),
+ FTRACE_START_FUNC_RET = (1 << 3),
+ FTRACE_STOP_FUNC_RET = (1 << 4),
};
static int ftrace_filtered;
@@ -1219,8 +1225,6 @@ static void ftrace_shutdown(int command)
static void ftrace_startup_sysctl(void)
{
- int command = FTRACE_ENABLE_MCOUNT;
-
if (unlikely(ftrace_disabled))
return;
@@ -1228,23 +1232,17 @@ static void ftrace_startup_sysctl(void)
saved_ftrace_func = NULL;
/* ftrace_start_up is true if we want ftrace running */
if (ftrace_start_up)
- command |= FTRACE_ENABLE_CALLS;
-
- ftrace_run_update_code(command);
+ ftrace_run_update_code(FTRACE_ENABLE_CALLS);
}
static void ftrace_shutdown_sysctl(void)
{
- int command = FTRACE_DISABLE_MCOUNT;
-
if (unlikely(ftrace_disabled))
return;
/* ftrace_start_up is true if ftrace is running */
if (ftrace_start_up)
- command |= FTRACE_DISABLE_CALLS;
-
- ftrace_run_update_code(command);
+ ftrace_run_update_code(FTRACE_DISABLE_CALLS);
}
static cycle_t ftrace_update_time;
@@ -1361,24 +1359,29 @@ enum {
#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
struct ftrace_iterator {
- struct ftrace_page *pg;
- int hidx;
- int idx;
- unsigned flags;
- struct trace_parser parser;
+ loff_t pos;
+ loff_t func_pos;
+ struct ftrace_page *pg;
+ struct dyn_ftrace *func;
+ struct ftrace_func_probe *probe;
+ struct trace_parser parser;
+ int hidx;
+ int idx;
+ unsigned flags;
};
static void *
-t_hash_next(struct seq_file *m, void *v, loff_t *pos)
+t_hash_next(struct seq_file *m, loff_t *pos)
{
struct ftrace_iterator *iter = m->private;
- struct hlist_node *hnd = v;
+ struct hlist_node *hnd = NULL;
struct hlist_head *hhd;
- WARN_ON(!(iter->flags & FTRACE_ITER_HASH));
-
(*pos)++;
+ iter->pos = *pos;
+ if (iter->probe)
+ hnd = &iter->probe->node;
retry:
if (iter->hidx >= FTRACE_FUNC_HASHSIZE)
return NULL;
@@ -1401,7 +1404,12 @@ t_hash_next(struct seq_file *m, void *v, loff_t *pos)
}
}
- return hnd;
+ if (WARN_ON_ONCE(!hnd))
+ return NULL;
+
+ iter->probe = hlist_entry(hnd, struct ftrace_func_probe, node);
+
+ return iter;
}
static void *t_hash_start(struct seq_file *m, loff_t *pos)
@@ -1410,26 +1418,32 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
void *p = NULL;
loff_t l;
- if (!(iter->flags & FTRACE_ITER_HASH))
- *pos = 0;
-
- iter->flags |= FTRACE_ITER_HASH;
+ if (iter->func_pos > *pos)
+ return NULL;
iter->hidx = 0;
- for (l = 0; l <= *pos; ) {
- p = t_hash_next(m, p, &l);
+ for (l = 0; l <= (*pos - iter->func_pos); ) {
+ p = t_hash_next(m, &l);
if (!p)
break;
}
- return p;
+ if (!p)
+ return NULL;
+
+ /* Only set this if we have an item */
+ iter->flags |= FTRACE_ITER_HASH;
+
+ return iter;
}
-static int t_hash_show(struct seq_file *m, void *v)
+static int
+t_hash_show(struct seq_file *m, struct ftrace_iterator *iter)
{
struct ftrace_func_probe *rec;
- struct hlist_node *hnd = v;
- rec = hlist_entry(hnd, struct ftrace_func_probe, node);
+ rec = iter->probe;
+ if (WARN_ON_ONCE(!rec))
+ return -EIO;
if (rec->ops->print)
return rec->ops->print(m, rec->ip, rec->ops, rec->data);
@@ -1450,12 +1464,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
struct dyn_ftrace *rec = NULL;
if (iter->flags & FTRACE_ITER_HASH)
- return t_hash_next(m, v, pos);
+ return t_hash_next(m, pos);
(*pos)++;
+ iter->pos = *pos;
if (iter->flags & FTRACE_ITER_PRINTALL)
- return NULL;
+ return t_hash_start(m, pos);
retry:
if (iter->idx >= iter->pg->index) {
@@ -1484,7 +1499,20 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
}
}
- return rec;
+ if (!rec)
+ return t_hash_start(m, pos);
+
+ iter->func_pos = *pos;
+ iter->func = rec;
+
+ return iter;
+}
+
+static void reset_iter_read(struct ftrace_iterator *iter)
+{
+ iter->pos = 0;
+ iter->func_pos = 0;
+ iter->flags &= ~(FTRACE_ITER_PRINTALL & FTRACE_ITER_HASH);
}
static void *t_start(struct seq_file *m, loff_t *pos)
@@ -1495,6 +1523,12 @@ static void *t_start(struct seq_file *m, loff_t *pos)
mutex_lock(&ftrace_lock);
/*
+ * If an lseek was done, then reset and start from beginning.
+ */
+ if (*pos < iter->pos)
+ reset_iter_read(iter);
+
+ /*
* For set_ftrace_filter reading, if we have the filter
* off, we can short cut and just print out that all
* functions are enabled.
@@ -1503,12 +1537,19 @@ static void *t_start(struct seq_file *m, loff_t *pos)
if (*pos > 0)
return t_hash_start(m, pos);
iter->flags |= FTRACE_ITER_PRINTALL;
+ /* reset in case of seek/pread */
+ iter->flags &= ~FTRACE_ITER_HASH;
return iter;
}
if (iter->flags & FTRACE_ITER_HASH)
return t_hash_start(m, pos);
+ /*
+ * Unfortunately, we need to restart at ftrace_pages_start
+ * every time we let go of the ftrace_mutex. This is because
+ * those pointers can change without the lock.
+ */
iter->pg = ftrace_pages_start;
iter->idx = 0;
for (l = 0; l <= *pos; ) {
@@ -1517,10 +1558,14 @@ static void *t_start(struct seq_file *m, loff_t *pos)
break;
}
- if (!p && iter->flags & FTRACE_ITER_FILTER)
- return t_hash_start(m, pos);
+ if (!p) {
+ if (iter->flags & FTRACE_ITER_FILTER)
+ return t_hash_start(m, pos);
- return p;
+ return NULL;
+ }
+
+ return iter;
}
static void t_stop(struct seq_file *m, void *p)
@@ -1531,16 +1576,18 @@ static void t_stop(struct seq_file *m, void *p)
static int t_show(struct seq_file *m, void *v)
{
struct ftrace_iterator *iter = m->private;
- struct dyn_ftrace *rec = v;
+ struct dyn_ftrace *rec;
if (iter->flags & FTRACE_ITER_HASH)
- return t_hash_show(m, v);
+ return t_hash_show(m, iter);
if (iter->flags & FTRACE_ITER_PRINTALL) {
seq_printf(m, "#### all functions enabled ####\n");
return 0;
}
+ rec = iter->func;
+
if (!rec)
return 0;
@@ -1592,8 +1639,8 @@ ftrace_failures_open(struct inode *inode, struct file *file)
ret = ftrace_avail_open(inode, file);
if (!ret) {
- m = (struct seq_file *)file->private_data;
- iter = (struct ftrace_iterator *)m->private;
+ m = file->private_data;
+ iter = m->private;
iter->flags = FTRACE_ITER_FAILURES;
}
@@ -1883,7 +1930,6 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
struct hlist_head *hhd;
struct hlist_node *n;
unsigned long key;
- int resched;
key = hash_long(ip, FTRACE_HASH_BITS);
@@ -1897,12 +1943,12 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
* period. This syncs the hash iteration and freeing of items
* on the hash. rcu_read_lock is too dangerous here.
*/
- resched = ftrace_preempt_disable();
+ preempt_disable_notrace();
hlist_for_each_entry_rcu(entry, n, hhd, node) {
if (entry->ip == ip)
entry->ops->func(ip, parent_ip, &entry->data);
}
- ftrace_preempt_enable(resched);
+ preempt_enable_notrace();
}
static struct ftrace_ops trace_probe_ops __read_mostly =
@@ -2624,6 +2670,7 @@ static const struct file_operations ftrace_graph_fops = {
.read = seq_read,
.write = ftrace_graph_write,
.release = ftrace_graph_release,
+ .llseek = seq_lseek,
};
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
deleted file mode 100644
index bbfc1bb1660b..000000000000
--- a/kernel/trace/kmemtrace.c
+++ /dev/null
@@ -1,529 +0,0 @@
-/*
- * Memory allocator tracing
- *
- * Copyright (C) 2008 Eduard - Gabriel Munteanu
- * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
- * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
- */
-
-#include <linux/tracepoint.h>
-#include <linux/seq_file.h>
-#include <linux/debugfs.h>
-#include <linux/dcache.h>
-#include <linux/fs.h>
-
-#include <linux/kmemtrace.h>
-
-#include "trace_output.h"
-#include "trace.h"
-
-/* Select an alternative, minimalistic output than the original one */
-#define TRACE_KMEM_OPT_MINIMAL 0x1
-
-static struct tracer_opt kmem_opts[] = {
- /* Default disable the minimalistic output */
- { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
- { }
-};
-
-static struct tracer_flags kmem_tracer_flags = {
- .val = 0,
- .opts = kmem_opts
-};
-
-static struct trace_array *kmemtrace_array;
-
-/* Trace allocations */
-static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
- unsigned long call_site,
- const void *ptr,
- size_t bytes_req,
- size_t bytes_alloc,
- gfp_t gfp_flags,
- int node)
-{
- struct ftrace_event_call *call = &event_kmem_alloc;
- struct trace_array *tr = kmemtrace_array;
- struct kmemtrace_alloc_entry *entry;
- struct ring_buffer_event *event;
-
- event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
- if (!event)
- return;
-
- entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, 0, 0);
-
- entry->ent.type = TRACE_KMEM_ALLOC;
- entry->type_id = type_id;
- entry->call_site = call_site;
- entry->ptr = ptr;
- entry->bytes_req = bytes_req;
- entry->bytes_alloc = bytes_alloc;
- entry->gfp_flags = gfp_flags;
- entry->node = node;
-
- if (!filter_check_discard(call, entry, tr->buffer, event))
- ring_buffer_unlock_commit(tr->buffer, event);
-
- trace_wake_up();
-}
-
-static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
- unsigned long call_site,
- const void *ptr)
-{
- struct ftrace_event_call *call = &event_kmem_free;
- struct trace_array *tr = kmemtrace_array;
- struct kmemtrace_free_entry *entry;
- struct ring_buffer_event *event;
-
- event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
- if (!event)
- return;
- entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, 0, 0);
-
- entry->ent.type = TRACE_KMEM_FREE;
- entry->type_id = type_id;
- entry->call_site = call_site;
- entry->ptr = ptr;
-
- if (!filter_check_discard(call, entry, tr->buffer, event))
- ring_buffer_unlock_commit(tr->buffer, event);
-
- trace_wake_up();
-}
-
-static void kmemtrace_kmalloc(void *ignore,
- unsigned long call_site,
- const void *ptr,
- size_t bytes_req,
- size_t bytes_alloc,
- gfp_t gfp_flags)
-{
- kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
- bytes_req, bytes_alloc, gfp_flags, -1);
-}
-
-static void kmemtrace_kmem_cache_alloc(void *ignore,
- unsigned long call_site,
- const void *ptr,
- size_t bytes_req,
- size_t bytes_alloc,
- gfp_t gfp_flags)
-{
- kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
- bytes_req, bytes_alloc, gfp_flags, -1);
-}
-
-static void kmemtrace_kmalloc_node(void *ignore,
- unsigned long call_site,
- const void *ptr,
- size_t bytes_req,
- size_t bytes_alloc,
- gfp_t gfp_flags,
- int node)
-{
- kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
- bytes_req, bytes_alloc, gfp_flags, node);
-}
-
-static void kmemtrace_kmem_cache_alloc_node(void *ignore,
- unsigned long call_site,
- const void *ptr,
- size_t bytes_req,
- size_t bytes_alloc,
- gfp_t gfp_flags,
- int node)
-{
- kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
- bytes_req, bytes_alloc, gfp_flags, node);
-}
-
-static void
-kmemtrace_kfree(void *ignore, unsigned long call_site, const void *ptr)
-{
- kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr);
-}
-
-static void kmemtrace_kmem_cache_free(void *ignore,
- unsigned long call_site, const void *ptr)
-{
- kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr);
-}
-
-static int kmemtrace_start_probes(void)
-{
- int err;
-
- err = register_trace_kmalloc(kmemtrace_kmalloc, NULL);
- if (err)
- return err;
- err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
- if (err)
- return err;
- err = register_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
- if (err)
- return err;
- err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
- if (err)
- return err;
- err = register_trace_kfree(kmemtrace_kfree, NULL);
- if (err)
- return err;
- err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
-
- return err;
-}
-
-static void kmemtrace_stop_probes(void)
-{
- unregister_trace_kmalloc(kmemtrace_kmalloc, NULL);
- unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
- unregister_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
- unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
- unregister_trace_kfree(kmemtrace_kfree, NULL);
- unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
-}
-
-static int kmem_trace_init(struct trace_array *tr)
-{
- kmemtrace_array = tr;
-
- tracing_reset_online_cpus(tr);
-
- kmemtrace_start_probes();
-
- return 0;
-}
-
-static void kmem_trace_reset(struct trace_array *tr)
-{
- kmemtrace_stop_probes();
-}
-
-static void kmemtrace_headers(struct seq_file *s)
-{
- /* Don't need headers for the original kmemtrace output */
- if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
- return;
-
- seq_printf(s, "#\n");
- seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS "
- " POINTER NODE CALLER\n");
- seq_printf(s, "# FREE | | | | "
- " | | | |\n");
- seq_printf(s, "# |\n\n");
-}
-
-/*
- * The following functions give the original output from kmemtrace,
- * plus the origin CPU, since reordering occurs in-kernel now.
- */
-
-#define KMEMTRACE_USER_ALLOC 0
-#define KMEMTRACE_USER_FREE 1
-
-struct kmemtrace_user_event {
- u8 event_id;
- u8 type_id;
- u16 event_size;
- u32 cpu;
- u64 timestamp;
- unsigned long call_site;
- unsigned long ptr;
-};
-
-struct kmemtrace_user_event_alloc {
- size_t bytes_req;
- size_t bytes_alloc;
- unsigned gfp_flags;
- int node;
-};
-
-static enum print_line_t
-kmemtrace_print_alloc(struct trace_iterator *iter, int flags,
- struct trace_event *event)
-{
- struct trace_seq *s = &iter->seq;
- struct kmemtrace_alloc_entry *entry;
- int ret;
-
- trace_assign_type(entry, iter->ent);
-
- ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu "
- "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
- entry->type_id, (void *)entry->call_site, (unsigned long)entry->ptr,
- (unsigned long)entry->bytes_req, (unsigned long)entry->bytes_alloc,
- (unsigned long)entry->gfp_flags, entry->node);
-
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_free(struct trace_iterator *iter, int flags,
- struct trace_event *event)
-{
- struct trace_seq *s = &iter->seq;
- struct kmemtrace_free_entry *entry;
- int ret;
-
- trace_assign_type(entry, iter->ent);
-
- ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu\n",
- entry->type_id, (void *)entry->call_site,
- (unsigned long)entry->ptr);
-
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags,
- struct trace_event *event)
-{
- struct trace_seq *s = &iter->seq;
- struct kmemtrace_alloc_entry *entry;
- struct kmemtrace_user_event *ev;
- struct kmemtrace_user_event_alloc *ev_alloc;
-
- trace_assign_type(entry, iter->ent);
-
- ev = trace_seq_reserve(s, sizeof(*ev));
- if (!ev)
- return TRACE_TYPE_PARTIAL_LINE;
-
- ev->event_id = KMEMTRACE_USER_ALLOC;
- ev->type_id = entry->type_id;
- ev->event_size = sizeof(*ev) + sizeof(*ev_alloc);
- ev->cpu = iter->cpu;
- ev->timestamp = iter->ts;
- ev->call_site = entry->call_site;
- ev->ptr = (unsigned long)entry->ptr;
-
- ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc));
- if (!ev_alloc)
- return TRACE_TYPE_PARTIAL_LINE;
-
- ev_alloc->bytes_req = entry->bytes_req;
- ev_alloc->bytes_alloc = entry->bytes_alloc;
- ev_alloc->gfp_flags = entry->gfp_flags;
- ev_alloc->node = entry->node;
-
- return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_free_user(struct trace_iterator *iter, int flags,
- struct trace_event *event)
-{
- struct trace_seq *s = &iter->seq;
- struct kmemtrace_free_entry *entry;
- struct kmemtrace_user_event *ev;
-
- trace_assign_type(entry, iter->ent);
-
- ev = trace_seq_reserve(s, sizeof(*ev));
- if (!ev)
- return TRACE_TYPE_PARTIAL_LINE;
-
- ev->event_id = KMEMTRACE_USER_FREE;
- ev->type_id = entry->type_id;
- ev->event_size = sizeof(*ev);
- ev->cpu = iter->cpu;
- ev->timestamp = iter->ts;
- ev->call_site = entry->call_site;
- ev->ptr = (unsigned long)entry->ptr;
-
- return TRACE_TYPE_HANDLED;
-}
-
-/* The two other following provide a more minimalistic output */
-static enum print_line_t
-kmemtrace_print_alloc_compress(struct trace_iterator *iter)
-{
- struct kmemtrace_alloc_entry *entry;
- struct trace_seq *s = &iter->seq;
- int ret;
-
- trace_assign_type(entry, iter->ent);
-
- /* Alloc entry */
- ret = trace_seq_printf(s, " + ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Type */
- switch (entry->type_id) {
- case KMEMTRACE_TYPE_KMALLOC:
- ret = trace_seq_printf(s, "K ");
- break;
- case KMEMTRACE_TYPE_CACHE:
- ret = trace_seq_printf(s, "C ");
- break;
- case KMEMTRACE_TYPE_PAGES:
- ret = trace_seq_printf(s, "P ");
- break;
- default:
- ret = trace_seq_printf(s, "? ");
- }
-
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Requested */
- ret = trace_seq_printf(s, "%4zu ", entry->bytes_req);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Allocated */
- ret = trace_seq_printf(s, "%4zu ", entry->bytes_alloc);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Flags
- * TODO: would be better to see the name of the GFP flag names
- */
- ret = trace_seq_printf(s, "%08x ", entry->gfp_flags);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Pointer to allocated */
- ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Node and call site*/
- ret = trace_seq_printf(s, "%4d %pf\n", entry->node,
- (void *)entry->call_site);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_free_compress(struct trace_iterator *iter)
-{
- struct kmemtrace_free_entry *entry;
- struct trace_seq *s = &iter->seq;
- int ret;
-
- trace_assign_type(entry, iter->ent);
-
- /* Free entry */
- ret = trace_seq_printf(s, " - ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Type */
- switch (entry->type_id) {
- case KMEMTRACE_TYPE_KMALLOC:
- ret = trace_seq_printf(s, "K ");
- break;
- case KMEMTRACE_TYPE_CACHE:
- ret = trace_seq_printf(s, "C ");
- break;
- case KMEMTRACE_TYPE_PAGES:
- ret = trace_seq_printf(s, "P ");
- break;
- default:
- ret = trace_seq_printf(s, "? ");
- }
-
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Skip requested/allocated/flags */
- ret = trace_seq_printf(s, " ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Pointer to allocated */
- ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Skip node and print call site*/
- ret = trace_seq_printf(s, " %pf\n", (void *)entry->call_site);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
-{
- struct trace_entry *entry = iter->ent;
-
- if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
- return TRACE_TYPE_UNHANDLED;
-
- switch (entry->type) {
- case TRACE_KMEM_ALLOC:
- return kmemtrace_print_alloc_compress(iter);
- case TRACE_KMEM_FREE:
- return kmemtrace_print_free_compress(iter);
- default:
- return TRACE_TYPE_UNHANDLED;
- }
-}
-
-static struct trace_event_functions kmem_trace_alloc_funcs = {
- .trace = kmemtrace_print_alloc,
- .binary = kmemtrace_print_alloc_user,
-};
-
-static struct trace_event kmem_trace_alloc = {
- .type = TRACE_KMEM_ALLOC,
- .funcs = &kmem_trace_alloc_funcs,
-};
-
-static struct trace_event_functions kmem_trace_free_funcs = {
- .trace = kmemtrace_print_free,
- .binary = kmemtrace_print_free_user,
-};
-
-static struct trace_event kmem_trace_free = {
- .type = TRACE_KMEM_FREE,
- .funcs = &kmem_trace_free_funcs,
-};
-
-static struct tracer kmem_tracer __read_mostly = {
- .name = "kmemtrace",
- .init = kmem_trace_init,
- .reset = kmem_trace_reset,
- .print_line = kmemtrace_print_line,
- .print_header = kmemtrace_headers,
- .flags = &kmem_tracer_flags
-};
-
-void kmemtrace_init(void)
-{
- /* earliest opportunity to start kmem tracing */
-}
-
-static int __init init_kmem_tracer(void)
-{
- if (!register_ftrace_event(&kmem_trace_alloc)) {
- pr_warning("Warning: could not register kmem events\n");
- return 1;
- }
-
- if (!register_ftrace_event(&kmem_trace_free)) {
- pr_warning("Warning: could not register kmem events\n");
- return 1;
- }
-
- if (register_tracer(&kmem_tracer) != 0) {
- pr_warning("Warning: could not register the kmem tracer\n");
- return 1;
- }
-
- return 0;
-}
-device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 1da7b6ea8b85..9ed509a015d8 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -224,6 +224,9 @@ enum {
RB_LEN_TIME_STAMP = 16,
};
+#define skip_time_extend(event) \
+ ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
+
static inline int rb_null_event(struct ring_buffer_event *event)
{
return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@ -248,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event)
return length + RB_EVNT_HDR_SIZE;
}
-/* inline for ring buffer fast paths */
-static unsigned
+/*
+ * Return the length of the given event. Will return
+ * the length of the time extend if the event is a
+ * time extend.
+ */
+static inline unsigned
rb_event_length(struct ring_buffer_event *event)
{
switch (event->type_len) {
@@ -274,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event)
return 0;
}
+/*
+ * Return total length of time extend and data,
+ * or just the event length for all other events.
+ */
+static inline unsigned
+rb_event_ts_length(struct ring_buffer_event *event)
+{
+ unsigned len = 0;
+
+ if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+ /* time extends include the data event after it */
+ len = RB_LEN_TIME_EXTEND;
+ event = skip_time_extend(event);
+ }
+ return len + rb_event_length(event);
+}
+
/**
* ring_buffer_event_length - return the length of the event
* @event: the event to get the length of
+ *
+ * Returns the size of the data load of a data event.
+ * If the event is something other than a data event, it
+ * returns the size of the event itself. With the exception
+ * of a TIME EXTEND, where it still returns the size of the
+ * data load of the data event after it.
*/
unsigned ring_buffer_event_length(struct ring_buffer_event *event)
{
- unsigned length = rb_event_length(event);
+ unsigned length;
+
+ if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ event = skip_time_extend(event);
+
+ length = rb_event_length(event);
if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
return length;
length -= RB_EVNT_HDR_SIZE;
@@ -294,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
static void *
rb_event_data(struct ring_buffer_event *event)
{
+ if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ event = skip_time_extend(event);
BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
/* If length is in len field, then array[0] has the data */
if (event->type_len)
@@ -404,9 +441,6 @@ static inline int test_time_stamp(u64 delta)
/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
-/* Max number of timestamps that can fit on a page */
-#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_STAMP)
-
int ring_buffer_print_page_header(struct trace_seq *s)
{
struct buffer_data_page field;
@@ -443,6 +477,7 @@ int ring_buffer_print_page_header(struct trace_seq *s)
*/
struct ring_buffer_per_cpu {
int cpu;
+ atomic_t record_disabled;
struct ring_buffer *buffer;
spinlock_t reader_lock; /* serialize readers */
arch_spinlock_t lock;
@@ -462,7 +497,6 @@ struct ring_buffer_per_cpu {
unsigned long read;
u64 write_stamp;
u64 read_stamp;
- atomic_t record_disabled;
};
struct ring_buffer {
@@ -1546,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
iter->head = 0;
}
+/* Slow path, do not inline */
+static noinline struct ring_buffer_event *
+rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
+{
+ event->type_len = RINGBUF_TYPE_TIME_EXTEND;
+
+ /* Not the first event on the page? */
+ if (rb_event_index(event)) {
+ event->time_delta = delta & TS_MASK;
+ event->array[0] = delta >> TS_SHIFT;
+ } else {
+ /* nope, just zero it */
+ event->time_delta = 0;
+ event->array[0] = 0;
+ }
+
+ return skip_time_extend(event);
+}
+
/**
* ring_buffer_update_event - update event type and data
* @event: the even to update
@@ -1558,28 +1611,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
* data field.
*/
static void
-rb_update_event(struct ring_buffer_event *event,
- unsigned type, unsigned length)
+rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_event *event, unsigned length,
+ int add_timestamp, u64 delta)
{
- event->type_len = type;
-
- switch (type) {
-
- case RINGBUF_TYPE_PADDING:
- case RINGBUF_TYPE_TIME_EXTEND:
- case RINGBUF_TYPE_TIME_STAMP:
- break;
+ /* Only a commit updates the timestamp */
+ if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
+ delta = 0;
- case 0:
- length -= RB_EVNT_HDR_SIZE;
- if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
- event->array[0] = length;
- else
- event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
- break;
- default:
- BUG();
+ /*
+ * If we need to add a timestamp, then we
+ * add it to the start of the resevered space.
+ */
+ if (unlikely(add_timestamp)) {
+ event = rb_add_time_stamp(event, delta);
+ length -= RB_LEN_TIME_EXTEND;
+ delta = 0;
}
+
+ event->time_delta = delta;
+ length -= RB_EVNT_HDR_SIZE;
+ if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
+ event->type_len = 0;
+ event->array[0] = length;
+ } else
+ event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
}
/*
@@ -1823,10 +1879,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
local_sub(length, &tail_page->write);
}
-static struct ring_buffer_event *
+/*
+ * This is the slow path, force gcc not to inline it.
+ */
+static noinline struct ring_buffer_event *
rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
unsigned long length, unsigned long tail,
- struct buffer_page *tail_page, u64 *ts)
+ struct buffer_page *tail_page, u64 ts)
{
struct buffer_page *commit_page = cpu_buffer->commit_page;
struct ring_buffer *buffer = cpu_buffer->buffer;
@@ -1909,8 +1968,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
* Nested commits always have zero deltas, so
* just reread the time stamp
*/
- *ts = rb_time_stamp(buffer);
- next_page->page->time_stamp = *ts;
+ ts = rb_time_stamp(buffer);
+ next_page->page->time_stamp = ts;
}
out_again:
@@ -1929,12 +1988,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
static struct ring_buffer_event *
__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
- unsigned type, unsigned long length, u64 *ts)
+ unsigned long length, u64 ts,
+ u64 delta, int add_timestamp)
{
struct buffer_page *tail_page;
struct ring_buffer_event *event;
unsigned long tail, write;
+ /*
+ * If the time delta since the last event is too big to
+ * hold in the time field of the event, then we append a
+ * TIME EXTEND event ahead of the data event.
+ */
+ if (unlikely(add_timestamp))
+ length += RB_LEN_TIME_EXTEND;
+
tail_page = cpu_buffer->tail_page;
write = local_add_return(length, &tail_page->write);
@@ -1943,7 +2011,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
tail = write - length;
/* See if we shot pass the end of this buffer page */
- if (write > BUF_PAGE_SIZE)
+ if (unlikely(write > BUF_PAGE_SIZE))
return rb_move_tail(cpu_buffer, length, tail,
tail_page, ts);
@@ -1951,18 +2019,16 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
event = __rb_page_index(tail_page, tail);
kmemcheck_annotate_bitfield(event, bitfield);
- rb_update_event(event, type, length);
+ rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
- /* The passed in type is zero for DATA */
- if (likely(!type))
- local_inc(&tail_page->entries);
+ local_inc(&tail_page->entries);
/*
* If this is the first commit on the page, then update
* its timestamp.
*/
if (!tail)
- tail_page->page->time_stamp = *ts;
+ tail_page->page->time_stamp = ts;
return event;
}
@@ -1977,7 +2043,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
unsigned long addr;
new_index = rb_event_index(event);
- old_index = new_index + rb_event_length(event);
+ old_index = new_index + rb_event_ts_length(event);
addr = (unsigned long)event;
addr &= PAGE_MASK;
@@ -2003,76 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
return 0;
}
-static int
-rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
- u64 *ts, u64 *delta)
-{
- struct ring_buffer_event *event;
- int ret;
-
- WARN_ONCE(*delta > (1ULL << 59),
- KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
- (unsigned long long)*delta,
- (unsigned long long)*ts,
- (unsigned long long)cpu_buffer->write_stamp);
-
- /*
- * The delta is too big, we to add a
- * new timestamp.
- */
- event = __rb_reserve_next(cpu_buffer,
- RINGBUF_TYPE_TIME_EXTEND,
- RB_LEN_TIME_EXTEND,
- ts);
- if (!event)
- return -EBUSY;
-
- if (PTR_ERR(event) == -EAGAIN)
- return -EAGAIN;
-
- /* Only a commited time event can update the write stamp */
- if (rb_event_is_commit(cpu_buffer, event)) {
- /*
- * If this is the first on the page, then it was
- * updated with the page itself. Try to discard it
- * and if we can't just make it zero.
- */
- if (rb_event_index(event)) {
- event->time_delta = *delta & TS_MASK;
- event->array[0] = *delta >> TS_SHIFT;
- } else {
- /* try to discard, since we do not need this */
- if (!rb_try_to_discard(cpu_buffer, event)) {
- /* nope, just zero it */
- event->time_delta = 0;
- event->array[0] = 0;
- }
- }
- cpu_buffer->write_stamp = *ts;
- /* let the caller know this was the commit */
- ret = 1;
- } else {
- /* Try to discard the event */
- if (!rb_try_to_discard(cpu_buffer, event)) {
- /* Darn, this is just wasted space */
- event->time_delta = 0;
- event->array[0] = 0;
- }
- ret = 0;
- }
-
- *delta = 0;
-
- return ret;
-}
-
static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
{
local_inc(&cpu_buffer->committing);
local_inc(&cpu_buffer->commits);
}
-static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
+static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
{
unsigned long commits;
@@ -2110,9 +2113,10 @@ rb_reserve_next_event(struct ring_buffer *buffer,
unsigned long length)
{
struct ring_buffer_event *event;
- u64 ts, delta = 0;
- int commit = 0;
+ u64 ts, delta;
int nr_loops = 0;
+ int add_timestamp;
+ u64 diff;
rb_start_commit(cpu_buffer);
@@ -2133,6 +2137,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
length = rb_calculate_event_length(length);
again:
+ add_timestamp = 0;
+ delta = 0;
+
/*
* We allow for interrupts to reenter here and do a trace.
* If one does, it will cause this original code to loop
@@ -2146,56 +2153,32 @@ rb_reserve_next_event(struct ring_buffer *buffer,
goto out_fail;
ts = rb_time_stamp(cpu_buffer->buffer);
+ diff = ts - cpu_buffer->write_stamp;
- /*
- * Only the first commit can update the timestamp.
- * Yes there is a race here. If an interrupt comes in
- * just after the conditional and it traces too, then it
- * will also check the deltas. More than one timestamp may
- * also be made. But only the entry that did the actual
- * commit will be something other than zero.
- */
- if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
- rb_page_write(cpu_buffer->tail_page) ==
- rb_commit_index(cpu_buffer))) {
- u64 diff;
-
- diff = ts - cpu_buffer->write_stamp;
-
- /* make sure this diff is calculated here */
- barrier();
-
- /* Did the write stamp get updated already? */
- if (unlikely(ts < cpu_buffer->write_stamp))
- goto get_event;
+ /* make sure this diff is calculated here */
+ barrier();
+ /* Did the write stamp get updated already? */
+ if (likely(ts >= cpu_buffer->write_stamp)) {
delta = diff;
if (unlikely(test_time_stamp(delta))) {
-
- commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
- if (commit == -EBUSY)
- goto out_fail;
-
- if (commit == -EAGAIN)
- goto again;
-
- RB_WARN_ON(cpu_buffer, commit < 0);
+ WARN_ONCE(delta > (1ULL << 59),
+ KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
+ (unsigned long long)delta,
+ (unsigned long long)ts,
+ (unsigned long long)cpu_buffer->write_stamp);
+ add_timestamp = 1;
}
}
- get_event:
- event = __rb_reserve_next(cpu_buffer, 0, length, &ts);
+ event = __rb_reserve_next(cpu_buffer, length, ts,
+ delta, add_timestamp);
if (unlikely(PTR_ERR(event) == -EAGAIN))
goto again;
if (!event)
goto out_fail;
- if (!rb_event_is_commit(cpu_buffer, event))
- delta = 0;
-
- event->time_delta = delta;
-
return event;
out_fail:
@@ -2207,13 +2190,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
#define TRACE_RECURSIVE_DEPTH 16
-static int trace_recursive_lock(void)
+/* Keep this code out of the fast path cache */
+static noinline void trace_recursive_fail(void)
{
- current->trace_recursion++;
-
- if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
- return 0;
-
/* Disable all tracing before we do anything else */
tracing_off_permanent();
@@ -2225,10 +2204,21 @@ static int trace_recursive_lock(void)
in_nmi());
WARN_ON_ONCE(1);
+}
+
+static inline int trace_recursive_lock(void)
+{
+ current->trace_recursion++;
+
+ if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
+ return 0;
+
+ trace_recursive_fail();
+
return -1;
}
-static void trace_recursive_unlock(void)
+static inline void trace_recursive_unlock(void)
{
WARN_ON_ONCE(!current->trace_recursion);
@@ -2242,8 +2232,6 @@ static void trace_recursive_unlock(void)
#endif
-static DEFINE_PER_CPU(int, rb_need_resched);
-
/**
* ring_buffer_lock_reserve - reserve a part of the buffer
* @buffer: the ring buffer to reserve from
@@ -2264,13 +2252,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event;
- int cpu, resched;
+ int cpu;
if (ring_buffer_flags != RB_BUFFERS_ON)
return NULL;
/* If we are tracing schedule, we don't want to recurse */
- resched = ftrace_preempt_disable();
+ preempt_disable_notrace();
if (atomic_read(&buffer->record_disabled))
goto out_nocheck;
@@ -2295,21 +2283,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
if (!event)
goto out;
- /*
- * Need to store resched state on this cpu.
- * Only the first needs to.
- */
-
- if (preempt_count() == 1)
- per_cpu(rb_need_resched, cpu) = resched;
-
return event;
out:
trace_recursive_unlock();
out_nocheck:
- ftrace_preempt_enable(resched);
+ preempt_enable_notrace();
return NULL;
}
EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
@@ -2318,12 +2298,28 @@ static void
rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
+ u64 delta;
+
/*
* The event first in the commit queue updates the
* time stamp.
*/
- if (rb_event_is_commit(cpu_buffer, event))
- cpu_buffer->write_stamp += event->time_delta;
+ if (rb_event_is_commit(cpu_buffer, event)) {
+ /*
+ * A commit event that is first on a page
+ * updates the write timestamp with the page stamp
+ */
+ if (!rb_event_index(event))
+ cpu_buffer->write_stamp =
+ cpu_buffer->commit_page->page->time_stamp;
+ else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+ delta = event->array[0];
+ delta <<= TS_SHIFT;
+ delta += event->time_delta;
+ cpu_buffer->write_stamp += delta;
+ } else
+ cpu_buffer->write_stamp += event->time_delta;
+ }
}
static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
@@ -2355,13 +2351,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
trace_recursive_unlock();
- /*
- * Only the last preempt count needs to restore preemption.
- */
- if (preempt_count() == 1)
- ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
- else
- preempt_enable_no_resched_notrace();
+ preempt_enable_notrace();
return 0;
}
@@ -2369,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
static inline void rb_event_discard(struct ring_buffer_event *event)
{
+ if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ event = skip_time_extend(event);
+
/* array[0] holds the actual length for the discarded event */
event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
event->type_len = RINGBUF_TYPE_PADDING;
@@ -2469,13 +2462,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
trace_recursive_unlock();
- /*
- * Only the last preempt count needs to restore preemption.
- */
- if (preempt_count() == 1)
- ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
- else
- preempt_enable_no_resched_notrace();
+ preempt_enable_notrace();
}
EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
@@ -2501,12 +2488,12 @@ int ring_buffer_write(struct ring_buffer *buffer,
struct ring_buffer_event *event;
void *body;
int ret = -EBUSY;
- int cpu, resched;
+ int cpu;
if (ring_buffer_flags != RB_BUFFERS_ON)
return -EBUSY;
- resched = ftrace_preempt_disable();
+ preempt_disable_notrace();
if (atomic_read(&buffer->record_disabled))
goto out;
@@ -2536,7 +2523,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
ret = 0;
out:
- ftrace_preempt_enable(resched);
+ preempt_enable_notrace();
return ret;
}
@@ -2628,6 +2615,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
}
EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
+/*
+ * The total entries in the ring buffer is the running counter
+ * of entries entered into the ring buffer, minus the sum of
+ * the entries read from the ring buffer and the number of
+ * entries that were overwritten.
+ */
+static inline unsigned long
+rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ return local_read(&cpu_buffer->entries) -
+ (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
+}
+
/**
* ring_buffer_entries_cpu - get the number of entries in a cpu buffer
* @buffer: The ring buffer
@@ -2636,16 +2636,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
- unsigned long ret;
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return 0;
cpu_buffer = buffer->buffers[cpu];
- ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun))
- - cpu_buffer->read;
- return ret;
+ return rb_num_of_entries(cpu_buffer);
}
EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
@@ -2706,8 +2703,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
/* if you care about this being correct, lock the buffer */
for_each_buffer_cpu(buffer, cpu) {
cpu_buffer = buffer->buffers[cpu];
- entries += (local_read(&cpu_buffer->entries) -
- local_read(&cpu_buffer->overrun)) - cpu_buffer->read;
+ entries += rb_num_of_entries(cpu_buffer);
}
return entries;
@@ -3007,13 +3003,11 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
static void rb_advance_iter(struct ring_buffer_iter *iter)
{
- struct ring_buffer *buffer;
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event;
unsigned length;
cpu_buffer = iter->cpu_buffer;
- buffer = cpu_buffer->buffer;
/*
* Check if we are at the end of the buffer.
@@ -3064,12 +3058,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
again:
/*
- * We repeat when a timestamp is encountered. It is possible
- * to get multiple timestamps from an interrupt entering just
- * as one timestamp is about to be written, or from discarded
- * commits. The most that we can have is the number on a single page.
+ * We repeat when a time extend is encountered.
+ * Since the time extend is always attached to a data event,
+ * we should never loop more than once.
+ * (We never hit the following condition more than twice).
*/
- if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
return NULL;
reader = rb_get_reader_page(cpu_buffer);
@@ -3145,14 +3139,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
return NULL;
/*
- * We repeat when a timestamp is encountered.
- * We can get multiple timestamps by nested interrupts or also
- * if filtering is on (discarding commits). Since discarding
- * commits can be frequent we can get a lot of timestamps.
- * But we limit them by not adding timestamps if they begin
- * at the start of a page.
+ * We repeat when a time extend is encountered.
+ * Since the time extend is always attached to a data event,
+ * we should never loop more than once.
+ * (We never hit the following condition more than twice).
*/
- if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
return NULL;
if (rb_per_cpu_empty(cpu_buffer))
@@ -3850,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
if (len > (commit - read))
len = (commit - read);
- size = rb_event_length(event);
+ /* Always keep the time extend and data together */
+ size = rb_event_ts_length(event);
if (len < size)
goto out_unlock;
@@ -3868,8 +3861,12 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
rpos = reader->read;
pos += size;
+ if (rpos >= commit)
+ break;
+
event = rb_reader_event(cpu_buffer);
- size = rb_event_length(event);
+ /* Always keep the time extend and data together */
+ size = rb_event_ts_length(event);
} while (len > size);
/* update bpage */
@@ -3986,6 +3983,7 @@ static const struct file_operations rb_simple_fops = {
.open = tracing_open_generic,
.read = rb_simple_read,
.write = rb_simple_write,
+ .llseek = default_llseek,
};
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 086d36316805..82d9b8106cd0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -101,10 +101,7 @@ static inline void ftrace_enable_cpu(void)
preempt_enable();
}
-static cpumask_var_t __read_mostly tracing_buffer_mask;
-
-#define for_each_tracing_cpu(cpu) \
- for_each_cpu(cpu, tracing_buffer_mask)
+cpumask_var_t __read_mostly tracing_buffer_mask;
/*
* ftrace_dump_on_oops - variable to dump ftrace buffer on oops
@@ -344,7 +341,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
/* trace_flags holds trace_options default values */
unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
- TRACE_ITER_GRAPH_TIME;
+ TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD;
static int trace_stop_count;
static DEFINE_SPINLOCK(tracing_start_lock);
@@ -428,6 +425,7 @@ static const char *trace_options[] = {
"latency-format",
"sleep-time",
"graph-time",
+ "record-cmd",
NULL
};
@@ -659,6 +657,10 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
return;
WARN_ON_ONCE(!irqs_disabled());
+ if (!current_trace->use_max_tr) {
+ WARN_ON_ONCE(1);
+ return;
+ }
arch_spin_lock(&ftrace_max_lock);
tr->buffer = max_tr.buffer;
@@ -685,6 +687,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
return;
WARN_ON_ONCE(!irqs_disabled());
+ if (!current_trace->use_max_tr) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+
arch_spin_lock(&ftrace_max_lock);
ftrace_disable_cpu();
@@ -729,18 +736,11 @@ __acquires(kernel_lock)
return -1;
}
- if (strlen(type->name) > MAX_TRACER_SIZE) {
+ if (strlen(type->name) >= MAX_TRACER_SIZE) {
pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
return -1;
}
- /*
- * When this gets called we hold the BKL which means that
- * preemption is disabled. Various trace selftests however
- * need to disable and enable preemption for successful tests.
- * So we drop the BKL here and grab it after the tests again.
- */
- unlock_kernel();
mutex_lock(&trace_types_lock);
tracing_selftest_running = true;
@@ -822,7 +822,6 @@ __acquires(kernel_lock)
#endif
out_unlock:
- lock_kernel();
return ret;
}
@@ -1331,61 +1330,6 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
#endif /* CONFIG_STACKTRACE */
-static void
-ftrace_trace_special(void *__tr,
- unsigned long arg1, unsigned long arg2, unsigned long arg3,
- int pc)
-{
- struct ftrace_event_call *call = &event_special;
- struct ring_buffer_event *event;
- struct trace_array *tr = __tr;
- struct ring_buffer *buffer = tr->buffer;
- struct special_entry *entry;
-
- event = trace_buffer_lock_reserve(buffer, TRACE_SPECIAL,
- sizeof(*entry), 0, pc);
- if (!event)
- return;
- entry = ring_buffer_event_data(event);
- entry->arg1 = arg1;
- entry->arg2 = arg2;
- entry->arg3 = arg3;
-
- if (!filter_check_discard(call, entry, buffer, event))
- trace_buffer_unlock_commit(buffer, event, 0, pc);
-}
-
-void
-__trace_special(void *__tr, void *__data,
- unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
- ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count());
-}
-
-void
-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
- struct trace_array *tr = &global_trace;
- struct trace_array_cpu *data;
- unsigned long flags;
- int cpu;
- int pc;
-
- if (tracing_disabled)
- return;
-
- pc = preempt_count();
- local_irq_save(flags);
- cpu = raw_smp_processor_id();
- data = tr->data[cpu];
-
- if (likely(atomic_inc_return(&data->disabled) == 1))
- ftrace_trace_special(tr, arg1, arg2, arg3, pc);
-
- atomic_dec(&data->disabled);
- local_irq_restore(flags);
-}
-
/**
* trace_vbprintk - write binary msg to tracing buffer
*
@@ -1404,7 +1348,6 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
struct bprint_entry *entry;
unsigned long flags;
int disable;
- int resched;
int cpu, len = 0, size, pc;
if (unlikely(tracing_selftest_running || tracing_disabled))
@@ -1414,7 +1357,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
pause_graph_tracing();
pc = preempt_count();
- resched = ftrace_preempt_disable();
+ preempt_disable_notrace();
cpu = raw_smp_processor_id();
data = tr->data[cpu];
@@ -1452,7 +1395,7 @@ out_unlock:
out:
atomic_dec_return(&data->disabled);
- ftrace_preempt_enable(resched);
+ preempt_enable_notrace();
unpause_graph_tracing();
return len;
@@ -1539,11 +1482,6 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
}
EXPORT_SYMBOL_GPL(trace_vprintk);
-enum trace_file_type {
- TRACE_FILE_LAT_FMT = 1,
- TRACE_FILE_ANNOTATE = 2,
-};
-
static void trace_iterator_increment(struct trace_iterator *iter)
{
/* Don't allow ftrace to trace into the ring buffers */
@@ -1641,7 +1579,7 @@ struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
}
/* Find the next real entry, and increment the iterator to the next entry */
-static void *find_next_entry_inc(struct trace_iterator *iter)
+void *trace_find_next_entry_inc(struct trace_iterator *iter)
{
iter->ent = __find_next_entry(iter, &iter->cpu,
&iter->lost_events, &iter->ts);
@@ -1676,19 +1614,19 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
return NULL;
if (iter->idx < 0)
- ent = find_next_entry_inc(iter);
+ ent = trace_find_next_entry_inc(iter);
else
ent = iter;
while (ent && iter->idx < i)
- ent = find_next_entry_inc(iter);
+ ent = trace_find_next_entry_inc(iter);
iter->pos = *pos;
return ent;
}
-static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
+void tracing_iter_reset(struct trace_iterator *iter, int cpu)
{
struct trace_array *tr = iter->tr;
struct ring_buffer_event *event;
@@ -2049,7 +1987,7 @@ int trace_empty(struct trace_iterator *iter)
}
/* Called with trace_event_read_lock() held. */
-static enum print_line_t print_trace_line(struct trace_iterator *iter)
+enum print_line_t print_trace_line(struct trace_iterator *iter)
{
enum print_line_t ret;
@@ -2258,7 +2196,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
static int tracing_release(struct inode *inode, struct file *file)
{
- struct seq_file *m = (struct seq_file *)file->private_data;
+ struct seq_file *m = file->private_data;
struct trace_iterator *iter;
int cpu;
@@ -2394,6 +2332,7 @@ static const struct file_operations show_traces_fops = {
.open = show_traces_open,
.read = seq_read,
.release = seq_release,
+ .llseek = seq_lseek,
};
/*
@@ -2487,6 +2426,7 @@ static const struct file_operations tracing_cpumask_fops = {
.open = tracing_open_generic,
.read = tracing_cpumask_read,
.write = tracing_cpumask_write,
+ .llseek = generic_file_llseek,
};
static int tracing_trace_options_show(struct seq_file *m, void *v)
@@ -2562,6 +2502,9 @@ static void set_tracer_flags(unsigned int mask, int enabled)
trace_flags |= mask;
else
trace_flags &= ~mask;
+
+ if (mask == TRACE_ITER_RECORD_CMD)
+ trace_event_enable_cmd_record(enabled);
}
static ssize_t
@@ -2653,6 +2596,7 @@ tracing_readme_read(struct file *filp, char __user *ubuf,
static const struct file_operations tracing_readme_fops = {
.open = tracing_open_generic,
.read = tracing_readme_read,
+ .llseek = generic_file_llseek,
};
static ssize_t
@@ -2703,6 +2647,7 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
static const struct file_operations tracing_saved_cmdlines_fops = {
.open = tracing_open_generic,
.read = tracing_saved_cmdlines_read,
+ .llseek = generic_file_llseek,
};
static ssize_t
@@ -2798,6 +2743,9 @@ static int tracing_resize_ring_buffer(unsigned long size)
if (ret < 0)
return ret;
+ if (!current_trace->use_max_tr)
+ goto out;
+
ret = ring_buffer_resize(max_tr.buffer, size);
if (ret < 0) {
int r;
@@ -2825,11 +2773,14 @@ static int tracing_resize_ring_buffer(unsigned long size)
return ret;
}
+ max_tr.entries = size;
+ out:
global_trace.entries = size;
return ret;
}
+
/**
* tracing_update_buffers - used by tracing facility to expand ring buffers
*
@@ -2890,12 +2841,26 @@ static int tracing_set_tracer(const char *buf)
trace_branch_disable();
if (current_trace && current_trace->reset)
current_trace->reset(tr);
-
+ if (current_trace && current_trace->use_max_tr) {
+ /*
+ * We don't free the ring buffer. instead, resize it because
+ * The max_tr ring buffer has some state (e.g. ring->clock) and
+ * we want preserve it.
+ */
+ ring_buffer_resize(max_tr.buffer, 1);
+ max_tr.entries = 1;
+ }
destroy_trace_option_files(topts);
current_trace = t;
topts = create_trace_option_files(current_trace);
+ if (current_trace->use_max_tr) {
+ ret = ring_buffer_resize(max_tr.buffer, global_trace.entries);
+ if (ret < 0)
+ goto out;
+ max_tr.entries = global_trace.entries;
+ }
if (t->init) {
ret = tracer_init(t, tr);
@@ -3032,6 +2997,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
if (iter->trace->pipe_open)
iter->trace->pipe_open(iter);
+ nonseekable_open(inode, filp);
out:
mutex_unlock(&trace_types_lock);
return ret;
@@ -3211,7 +3177,7 @@ waitagain:
trace_event_read_lock();
trace_access_lock(iter->cpu_file);
- while (find_next_entry_inc(iter) != NULL) {
+ while (trace_find_next_entry_inc(iter) != NULL) {
enum print_line_t ret;
int len = iter->seq.len;
@@ -3294,7 +3260,7 @@ tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
if (ret != TRACE_TYPE_NO_CONSUME)
trace_consume(iter);
rem -= count;
- if (!find_next_entry_inc(iter)) {
+ if (!trace_find_next_entry_inc(iter)) {
rem = 0;
iter->ent = NULL;
break;
@@ -3350,7 +3316,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
if (ret <= 0)
goto out_err;
- if (!iter->ent && !find_next_entry_inc(iter)) {
+ if (!iter->ent && !trace_find_next_entry_inc(iter)) {
ret = -EFAULT;
goto out_err;
}
@@ -3477,7 +3443,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
}
tracing_start();
- max_tr.entries = global_trace.entries;
mutex_unlock(&trace_types_lock);
return cnt;
@@ -3498,6 +3463,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *fpos)
{
char *buf;
+ size_t written;
if (tracing_disabled)
return -EINVAL;
@@ -3519,11 +3485,15 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
} else
buf[cnt] = '\0';
- cnt = mark_printk("%s", buf);
+ written = mark_printk("%s", buf);
kfree(buf);
- *fpos += cnt;
+ *fpos += written;
- return cnt;
+ /* don't tell userspace we wrote more - it might confuse them */
+ if (written > cnt)
+ written = cnt;
+
+ return written;
}
static int tracing_clock_show(struct seq_file *m, void *v)
@@ -3590,18 +3560,21 @@ static const struct file_operations tracing_max_lat_fops = {
.open = tracing_open_generic,
.read = tracing_max_lat_read,
.write = tracing_max_lat_write,
+ .llseek = generic_file_llseek,
};
static const struct file_operations tracing_ctrl_fops = {
.open = tracing_open_generic,
.read = tracing_ctrl_read,
.write = tracing_ctrl_write,
+ .llseek = generic_file_llseek,
};
static const struct file_operations set_tracer_fops = {
.open = tracing_open_generic,
.read = tracing_set_trace_read,
.write = tracing_set_trace_write,
+ .llseek = generic_file_llseek,
};
static const struct file_operations tracing_pipe_fops = {
@@ -3610,17 +3583,20 @@ static const struct file_operations tracing_pipe_fops = {
.read = tracing_read_pipe,
.splice_read = tracing_splice_read_pipe,
.release = tracing_release_pipe,
+ .llseek = no_llseek,
};
static const struct file_operations tracing_entries_fops = {
.open = tracing_open_generic,
.read = tracing_entries_read,
.write = tracing_entries_write,
+ .llseek = generic_file_llseek,
};
static const struct file_operations tracing_mark_fops = {
.open = tracing_open_generic,
.write = tracing_mark_write,
+ .llseek = generic_file_llseek,
};
static const struct file_operations trace_clock_fops = {
@@ -3926,6 +3902,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
static const struct file_operations tracing_stats_fops = {
.open = tracing_open_generic,
.read = tracing_stats_read,
+ .llseek = generic_file_llseek,
};
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -3962,6 +3939,7 @@ tracing_read_dyn_info(struct file *filp, char __user *ubuf,
static const struct file_operations tracing_dyn_info_fops = {
.open = tracing_open_generic,
.read = tracing_read_dyn_info,
+ .llseek = generic_file_llseek,
};
#endif
@@ -4018,13 +3996,9 @@ static void tracing_init_debugfs_percpu(long cpu)
{
struct dentry *d_percpu = tracing_dentry_percpu();
struct dentry *d_cpu;
- /* strlen(cpu) + MAX(log10(cpu)) + '\0' */
- char cpu_dir[7];
+ char cpu_dir[30]; /* 30 characters should be more than enough */
- if (cpu > 999 || cpu < 0)
- return;
-
- sprintf(cpu_dir, "cpu%ld", cpu);
+ snprintf(cpu_dir, 30, "cpu%ld", cpu);
d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
if (!d_cpu) {
pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
@@ -4115,6 +4089,7 @@ static const struct file_operations trace_options_fops = {
.open = tracing_open_generic,
.read = trace_options_read,
.write = trace_options_write,
+ .llseek = generic_file_llseek,
};
static ssize_t
@@ -4166,6 +4141,7 @@ static const struct file_operations trace_options_core_fops = {
.open = tracing_open_generic,
.read = trace_options_core_read,
.write = trace_options_core_write,
+ .llseek = generic_file_llseek,
};
struct dentry *trace_create_file(const char *name,
@@ -4355,9 +4331,6 @@ static __init int tracer_init_debugfs(void)
trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
#endif
-#ifdef CONFIG_SYSPROF_TRACER
- init_tracer_sysprof_debugfs(d_tracer);
-#endif
create_trace_options_dir();
@@ -4414,7 +4387,7 @@ static struct notifier_block trace_die_notifier = {
*/
#define KERN_TRACE KERN_EMERG
-static void
+void
trace_printk_seq(struct trace_seq *s)
{
/* Probably should print a warning here. */
@@ -4429,6 +4402,13 @@ trace_printk_seq(struct trace_seq *s)
trace_seq_init(s);
}
+void trace_init_global_iter(struct trace_iterator *iter)
+{
+ iter->tr = &global_trace;
+ iter->trace = current_trace;
+ iter->cpu_file = TRACE_PIPE_ALL_CPU;
+}
+
static void
__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
{
@@ -4454,8 +4434,10 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
if (disable_tracing)
ftrace_kill();
+ trace_init_global_iter(&iter);
+
for_each_tracing_cpu(cpu) {
- atomic_inc(&global_trace.data[cpu]->disabled);
+ atomic_inc(&iter.tr->data[cpu]->disabled);
}
old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
@@ -4504,7 +4486,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
iter.iter_flags |= TRACE_FILE_LAT_FMT;
iter.pos = -1;
- if (find_next_entry_inc(&iter) != NULL) {
+ if (trace_find_next_entry_inc(&iter) != NULL) {
int ret;
ret = print_trace_line(&iter);
@@ -4526,7 +4508,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
trace_flags |= old_userobj;
for_each_tracing_cpu(cpu) {
- atomic_dec(&global_trace.data[cpu]->disabled);
+ atomic_dec(&iter.tr->data[cpu]->disabled);
}
tracing_on();
}
@@ -4575,16 +4557,14 @@ __init static int tracer_alloc_buffers(void)
#ifdef CONFIG_TRACER_MAX_TRACE
- max_tr.buffer = ring_buffer_alloc(ring_buf_size,
- TRACE_BUFFER_FLAGS);
+ max_tr.buffer = ring_buffer_alloc(1, TRACE_BUFFER_FLAGS);
if (!max_tr.buffer) {
printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
WARN_ON(1);
ring_buffer_free(global_trace.buffer);
goto out_free_cpumask;
}
- max_tr.entries = ring_buffer_size(max_tr.buffer);
- WARN_ON(max_tr.entries != global_trace.entries);
+ max_tr.entries = 1;
#endif
/* Allocate the first page for all buffers */
@@ -4597,9 +4577,6 @@ __init static int tracer_alloc_buffers(void)
register_tracer(&nop_trace);
current_trace = &nop_trace;
-#ifdef CONFIG_BOOT_TRACER
- register_tracer(&boot_tracer);
-#endif
/* All seems OK, enable tracing */
tracing_disabled = 0;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2cd96399463f..9021f8c0c0c3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,10 +9,7 @@
#include <linux/mmiotrace.h>
#include <linux/tracepoint.h>
#include <linux/ftrace.h>
-#include <trace/boot.h>
-#include <linux/kmemtrace.h>
#include <linux/hw_breakpoint.h>
-
#include <linux/trace_seq.h>
#include <linux/ftrace_event.h>
@@ -25,30 +22,17 @@ enum trace_type {
TRACE_STACK,
TRACE_PRINT,
TRACE_BPRINT,
- TRACE_SPECIAL,
TRACE_MMIO_RW,
TRACE_MMIO_MAP,
TRACE_BRANCH,
- TRACE_BOOT_CALL,
- TRACE_BOOT_RET,
TRACE_GRAPH_RET,
TRACE_GRAPH_ENT,
TRACE_USER_STACK,
- TRACE_KMEM_ALLOC,
- TRACE_KMEM_FREE,
TRACE_BLK,
- TRACE_KSYM,
__TRACE_LAST_TYPE,
};
-enum kmemtrace_type_id {
- KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
- KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
- KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
-};
-
-extern struct tracer boot_tracer;
#undef __field
#define __field(type, item) type item;
@@ -204,23 +188,15 @@ extern void __ftrace_bad_type(void);
IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \
- IF_ASSIGN(var, ent, struct special_entry, 0); \
IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
TRACE_MMIO_RW); \
IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
TRACE_MMIO_MAP); \
- IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
- IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \
TRACE_GRAPH_ENT); \
IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
TRACE_GRAPH_RET); \
- IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
- TRACE_KMEM_ALLOC); \
- IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
- TRACE_KMEM_FREE); \
- IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
__ftrace_bad_type(); \
} while (0)
@@ -298,6 +274,7 @@ struct tracer {
struct tracer *next;
int print_max;
struct tracer_flags *flags;
+ int use_max_tr;
};
@@ -318,7 +295,6 @@ struct dentry *trace_create_file(const char *name,
const struct file_operations *fops);
struct dentry *tracing_init_dentry(void);
-void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
struct ring_buffer_event;
@@ -338,6 +314,14 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
int *ent_cpu, u64 *ent_ts);
+int trace_empty(struct trace_iterator *iter);
+
+void *trace_find_next_entry_inc(struct trace_iterator *iter);
+
+void trace_init_global_iter(struct trace_iterator *iter);
+
+void tracing_iter_reset(struct trace_iterator *iter, int cpu);
+
void default_wait_pipe(struct trace_iterator *iter);
void poll_wait_pipe(struct trace_iterator *iter);
@@ -355,15 +339,14 @@ void tracing_sched_wakeup_trace(struct trace_array *tr,
struct task_struct *wakee,
struct task_struct *cur,
unsigned long flags, int pc);
-void trace_special(struct trace_array *tr,
- struct trace_array_cpu *data,
- unsigned long arg1,
- unsigned long arg2,
- unsigned long arg3, int pc);
void trace_function(struct trace_array *tr,
unsigned long ip,
unsigned long parent_ip,
unsigned long flags, int pc);
+void trace_graph_function(struct trace_array *tr,
+ unsigned long ip,
+ unsigned long parent_ip,
+ unsigned long flags, int pc);
void trace_default_header(struct seq_file *m);
void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
int trace_empty(struct trace_iterator *iter);
@@ -380,8 +363,15 @@ void tracing_start_sched_switch_record(void);
int register_tracer(struct tracer *type);
void unregister_tracer(struct tracer *type);
int is_tracing_stopped(void);
+enum trace_file_type {
+ TRACE_FILE_LAT_FMT = 1,
+ TRACE_FILE_ANNOTATE = 2,
+};
+
+extern cpumask_var_t __read_mostly tracing_buffer_mask;
-extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
+#define for_each_tracing_cpu(cpu) \
+ for_each_cpu(cpu, tracing_buffer_mask)
extern unsigned long nsecs_to_usecs(unsigned long nsecs);
@@ -452,12 +442,8 @@ extern int trace_selftest_startup_nop(struct tracer *trace,
struct trace_array *tr);
extern int trace_selftest_startup_sched_switch(struct tracer *trace,
struct trace_array *tr);
-extern int trace_selftest_startup_sysprof(struct tracer *trace,
- struct trace_array *tr);
extern int trace_selftest_startup_branch(struct tracer *trace,
struct trace_array *tr);
-extern int trace_selftest_startup_ksym(struct tracer *trace,
- struct trace_array *tr);
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
@@ -471,6 +457,8 @@ trace_array_vprintk(struct trace_array *tr,
unsigned long ip, const char *fmt, va_list args);
int trace_array_printk(struct trace_array *tr,
unsigned long ip, const char *fmt, ...);
+void trace_printk_seq(struct trace_seq *s);
+enum print_line_t print_trace_line(struct trace_iterator *iter);
extern unsigned long trace_flags;
@@ -617,6 +605,7 @@ enum trace_iterator_flags {
TRACE_ITER_LATENCY_FMT = 0x20000,
TRACE_ITER_SLEEP_TIME = 0x40000,
TRACE_ITER_GRAPH_TIME = 0x80000,
+ TRACE_ITER_RECORD_CMD = 0x100000,
};
/*
@@ -628,54 +617,6 @@ enum trace_iterator_flags {
extern struct tracer nop_trace;
-/**
- * ftrace_preempt_disable - disable preemption scheduler safe
- *
- * When tracing can happen inside the scheduler, there exists
- * cases that the tracing might happen before the need_resched
- * flag is checked. If this happens and the tracer calls
- * preempt_enable (after a disable), a schedule might take place
- * causing an infinite recursion.
- *
- * To prevent this, we read the need_resched flag before
- * disabling preemption. When we want to enable preemption we
- * check the flag, if it is set, then we call preempt_enable_no_resched.
- * Otherwise, we call preempt_enable.
- *
- * The rational for doing the above is that if need_resched is set
- * and we have yet to reschedule, we are either in an atomic location
- * (where we do not need to check for scheduling) or we are inside
- * the scheduler and do not want to resched.
- */
-static inline int ftrace_preempt_disable(void)
-{
- int resched;
-
- resched = need_resched();
- preempt_disable_notrace();
-
- return resched;
-}
-
-/**
- * ftrace_preempt_enable - enable preemption scheduler safe
- * @resched: the return value from ftrace_preempt_disable
- *
- * This is a scheduler safe way to enable preemption and not miss
- * any preemption checks. The disabled saved the state of preemption.
- * If resched is set, then we are either inside an atomic or
- * are inside the scheduler (we would have already scheduled
- * otherwise). In this case, we do not want to call normal
- * preempt_enable, but preempt_enable_no_resched instead.
- */
-static inline void ftrace_preempt_enable(int resched)
-{
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
-}
-
#ifdef CONFIG_BRANCH_TRACER
extern int enable_branch_tracing(struct trace_array *tr);
extern void disable_branch_tracing(void);
@@ -766,6 +707,8 @@ struct filter_pred {
int pop_n;
};
+extern struct list_head ftrace_common_fields;
+
extern enum regex_type
filter_parse_regex(char *buff, int len, char **search, int *not);
extern void print_event_filter(struct ftrace_event_call *call,
@@ -795,6 +738,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
return 0;
}
+extern void trace_event_enable_cmd_record(bool enable);
+
extern struct mutex event_mutex;
extern struct list_head ftrace_events;
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
deleted file mode 100644
index c21d5f3956ad..000000000000
--- a/kernel/trace/trace_boot.c
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * ring buffer based initcalls tracer
- *
- * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
- *
- */
-
-#include <linux/init.h>
-#include <linux/debugfs.h>
-#include <linux/ftrace.h>
-#include <linux/kallsyms.h>
-#include <linux/time.h>
-
-#include "trace.h"
-#include "trace_output.h"
-
-static struct trace_array *boot_trace;
-static bool pre_initcalls_finished;
-
-/* Tells the boot tracer that the pre_smp_initcalls are finished.
- * So we are ready .
- * It doesn't enable sched events tracing however.
- * You have to call enable_boot_trace to do so.
- */
-void start_boot_trace(void)
-{
- pre_initcalls_finished = true;
-}
-
-void enable_boot_trace(void)
-{
- if (boot_trace && pre_initcalls_finished)
- tracing_start_sched_switch_record();
-}
-
-void disable_boot_trace(void)
-{
- if (boot_trace && pre_initcalls_finished)
- tracing_stop_sched_switch_record();
-}
-
-static int boot_trace_init(struct trace_array *tr)
-{
- boot_trace = tr;
-
- if (!tr)
- return 0;
-
- tracing_reset_online_cpus(tr);
-
- tracing_sched_switch_assign_trace(tr);
- return 0;
-}
-
-static enum print_line_t
-initcall_call_print_line(struct trace_iterator *iter)
-{
- struct trace_entry *entry = iter->ent;
- struct trace_seq *s = &iter->seq;
- struct trace_boot_call *field;
- struct boot_trace_call *call;
- u64 ts;
- unsigned long nsec_rem;
- int ret;
-
- trace_assign_type(field, entry);
- call = &field->boot_call;
- ts = iter->ts;
- nsec_rem = do_div(ts, NSEC_PER_SEC);
-
- ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
- (unsigned long)ts, nsec_rem, call->func, call->caller);
-
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- else
- return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-initcall_ret_print_line(struct trace_iterator *iter)
-{
- struct trace_entry *entry = iter->ent;
- struct trace_seq *s = &iter->seq;
- struct trace_boot_ret *field;
- struct boot_trace_ret *init_ret;
- u64 ts;
- unsigned long nsec_rem;
- int ret;
-
- trace_assign_type(field, entry);
- init_ret = &field->boot_ret;
- ts = iter->ts;
- nsec_rem = do_div(ts, NSEC_PER_SEC);
-
- ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
- "returned %d after %llu msecs\n",
- (unsigned long) ts,
- nsec_rem,
- init_ret->func, init_ret->result, init_ret->duration);
-
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- else
- return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t initcall_print_line(struct trace_iterator *iter)
-{
- struct trace_entry *entry = iter->ent;
-
- switch (entry->type) {
- case TRACE_BOOT_CALL:
- return initcall_call_print_line(iter);
- case TRACE_BOOT_RET:
- return initcall_ret_print_line(iter);
- default:
- return TRACE_TYPE_UNHANDLED;
- }
-}
-
-struct tracer boot_tracer __read_mostly =
-{
- .name = "initcall",
- .init = boot_trace_init,
- .reset = tracing_reset_online_cpus,
- .print_line = initcall_print_line,
-};
-
-void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
-{
- struct ftrace_event_call *call = &event_boot_call;
- struct ring_buffer_event *event;
- struct ring_buffer *buffer;
- struct trace_boot_call *entry;
- struct trace_array *tr = boot_trace;
-
- if (!tr || !pre_initcalls_finished)
- return;
-
- /* Get its name now since this function could
- * disappear because it is in the .init section.
- */
- sprint_symbol(bt->func, (unsigned long)fn);
- preempt_disable();
-
- buffer = tr->buffer;
- event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_CALL,
- sizeof(*entry), 0, 0);
- if (!event)
- goto out;
- entry = ring_buffer_event_data(event);
- entry->boot_call = *bt;
- if (!filter_check_discard(call, entry, buffer, event))
- trace_buffer_unlock_commit(buffer, event, 0, 0);
- out:
- preempt_enable();
-}
-
-void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
-{
- struct ftrace_event_call *call = &event_boot_ret;
- struct ring_buffer_event *event;
- struct ring_buffer *buffer;
- struct trace_boot_ret *entry;
- struct trace_array *tr = boot_trace;
-
- if (!tr || !pre_initcalls_finished)
- return;
-
- sprint_symbol(bt->func, (unsigned long)fn);
- preempt_disable();
-
- buffer = tr->buffer;
- event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_RET,
- sizeof(*entry), 0, 0);
- if (!event)
- goto out;
- entry = ring_buffer_event_data(event);
- entry->boot_ret = *bt;
- if (!filter_check_discard(call, entry, buffer, event))
- trace_buffer_unlock_commit(buffer, event, 0, 0);
- out:
- preempt_enable();
-}
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 9d589d8dcd1a..685a67d55db0 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -32,16 +32,15 @@
u64 notrace trace_clock_local(void)
{
u64 clock;
- int resched;
/*
* sched_clock() is an architecture implemented, fast, scalable,
* lockless clock. It is not guaranteed to be coherent across
* CPUs, nor across CPU idle events.
*/
- resched = ftrace_preempt_disable();
+ preempt_disable_notrace();
clock = sched_clock();
- ftrace_preempt_enable(resched);
+ preempt_enable_notrace();
return clock;
}
@@ -56,7 +55,7 @@ u64 notrace trace_clock_local(void)
*/
u64 notrace trace_clock(void)
{
- return cpu_clock(raw_smp_processor_id());
+ return local_clock();
}
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index dc008c1240da..e3dfecaf13e6 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -151,23 +151,6 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
);
/*
- * Special (free-form) trace entry:
- */
-FTRACE_ENTRY(special, special_entry,
-
- TRACE_SPECIAL,
-
- F_STRUCT(
- __field( unsigned long, arg1 )
- __field( unsigned long, arg2 )
- __field( unsigned long, arg3 )
- ),
-
- F_printk("(%08lx) (%08lx) (%08lx)",
- __entry->arg1, __entry->arg2, __entry->arg3)
-);
-
-/*
* Stack-trace entry:
*/
@@ -271,33 +254,6 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
__entry->map_id, __entry->opcode)
);
-FTRACE_ENTRY(boot_call, trace_boot_call,
-
- TRACE_BOOT_CALL,
-
- F_STRUCT(
- __field_struct( struct boot_trace_call, boot_call )
- __field_desc( pid_t, boot_call, caller )
- __array_desc( char, boot_call, func, KSYM_SYMBOL_LEN)
- ),
-
- F_printk("%d %s", __entry->caller, __entry->func)
-);
-
-FTRACE_ENTRY(boot_ret, trace_boot_ret,
-
- TRACE_BOOT_RET,
-
- F_STRUCT(
- __field_struct( struct boot_trace_ret, boot_ret )
- __array_desc( char, boot_ret, func, KSYM_SYMBOL_LEN)
- __field_desc( int, boot_ret, result )
- __field_desc( unsigned long, boot_ret, duration )
- ),
-
- F_printk("%s %d %lx",
- __entry->func, __entry->result, __entry->duration)
-);
#define TRACE_FUNC_SIZE 30
#define TRACE_FILE_SIZE 20
@@ -318,53 +274,3 @@ FTRACE_ENTRY(branch, trace_branch,
__entry->func, __entry->file, __entry->correct)
);
-FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
-
- TRACE_KMEM_ALLOC,
-
- F_STRUCT(
- __field( enum kmemtrace_type_id, type_id )
- __field( unsigned long, call_site )
- __field( const void *, ptr )
- __field( size_t, bytes_req )
- __field( size_t, bytes_alloc )
- __field( gfp_t, gfp_flags )
- __field( int, node )
- ),
-
- F_printk("type:%u call_site:%lx ptr:%p req:%zi alloc:%zi"
- " flags:%x node:%d",
- __entry->type_id, __entry->call_site, __entry->ptr,
- __entry->bytes_req, __entry->bytes_alloc,
- __entry->gfp_flags, __entry->node)
-);
-
-FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
-
- TRACE_KMEM_FREE,
-
- F_STRUCT(
- __field( enum kmemtrace_type_id, type_id )
- __field( unsigned long, call_site )
- __field( const void *, ptr )
- ),
-
- F_printk("type:%u call_site:%lx ptr:%p",
- __entry->type_id, __entry->call_site, __entry->ptr)
-);
-
-FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
-
- TRACE_KSYM,
-
- F_STRUCT(
- __field( unsigned long, ip )
- __field( unsigned char, type )
- __array( char , cmd, TASK_COMM_LEN )
- __field( unsigned long, addr )
- ),
-
- F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
- (void *)__entry->ip, (unsigned int)__entry->type,
- (void *)__entry->addr, __entry->cmd)
-);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 8a2b73f7c068..39c059ca670e 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -9,9 +9,7 @@
#include <linux/kprobes.h>
#include "trace.h"
-EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
-
-static char *perf_trace_buf[4];
+static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
/*
* Force it to be aligned to unsigned long to avoid misaligned accesses
@@ -26,7 +24,7 @@ static int total_ref_count;
static int perf_trace_event_init(struct ftrace_event_call *tp_event,
struct perf_event *p_event)
{
- struct hlist_head *list;
+ struct hlist_head __percpu *list;
int ret = -ENOMEM;
int cpu;
@@ -44,11 +42,11 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
tp_event->perf_events = list;
if (!total_ref_count) {
- char *buf;
+ char __percpu *buf;
int i;
- for (i = 0; i < 4; i++) {
- buf = (char *)alloc_percpu(perf_trace_t);
+ for (i = 0; i < PERF_NR_CONTEXTS; i++) {
+ buf = (char __percpu *)alloc_percpu(perf_trace_t);
if (!buf)
goto fail;
@@ -56,13 +54,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
}
}
- if (tp_event->class->reg)
- ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
- else
- ret = tracepoint_probe_register(tp_event->name,
- tp_event->class->perf_probe,
- tp_event);
-
+ ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
if (ret)
goto fail;
@@ -73,7 +65,7 @@ fail:
if (!total_ref_count) {
int i;
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < PERF_NR_CONTEXTS; i++) {
free_percpu(perf_trace_buf[i]);
perf_trace_buf[i] = NULL;
}
@@ -96,11 +88,11 @@ int perf_trace_init(struct perf_event *p_event)
mutex_lock(&event_mutex);
list_for_each_entry(tp_event, &ftrace_events, list) {
if (tp_event->event.type == event_id &&
- tp_event->class &&
- (tp_event->class->perf_probe ||
- tp_event->class->reg) &&
+ tp_event->class && tp_event->class->reg &&
try_module_get(tp_event->mod)) {
ret = perf_trace_event_init(tp_event, p_event);
+ if (ret)
+ module_put(tp_event->mod);
break;
}
}
@@ -109,22 +101,26 @@ int perf_trace_init(struct perf_event *p_event)
return ret;
}
-int perf_trace_enable(struct perf_event *p_event)
+int perf_trace_add(struct perf_event *p_event, int flags)
{
struct ftrace_event_call *tp_event = p_event->tp_event;
+ struct hlist_head __percpu *pcpu_list;
struct hlist_head *list;
- list = tp_event->perf_events;
- if (WARN_ON_ONCE(!list))
+ pcpu_list = tp_event->perf_events;
+ if (WARN_ON_ONCE(!pcpu_list))
return -EINVAL;
- list = this_cpu_ptr(list);
+ if (!(flags & PERF_EF_START))
+ p_event->hw.state = PERF_HES_STOPPED;
+
+ list = this_cpu_ptr(pcpu_list);
hlist_add_head_rcu(&p_event->hlist_entry, list);
return 0;
}
-void perf_trace_disable(struct perf_event *p_event)
+void perf_trace_del(struct perf_event *p_event, int flags)
{
hlist_del_rcu(&p_event->hlist_entry);
}
@@ -138,29 +134,25 @@ void perf_trace_destroy(struct perf_event *p_event)
if (--tp_event->perf_refcount > 0)
goto out;
- if (tp_event->class->reg)
- tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
- else
- tracepoint_probe_unregister(tp_event->name,
- tp_event->class->perf_probe,
- tp_event);
+ tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
/*
- * Ensure our callback won't be called anymore. See
- * tracepoint_probe_unregister() and __DO_TRACE().
+ * Ensure our callback won't be called anymore. The buffers
+ * will be freed after that.
*/
- synchronize_sched();
+ tracepoint_synchronize_unregister();
free_percpu(tp_event->perf_events);
tp_event->perf_events = NULL;
if (!--total_ref_count) {
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < PERF_NR_CONTEXTS; i++) {
free_percpu(perf_trace_buf[i]);
perf_trace_buf[i] = NULL;
}
}
out:
+ module_put(tp_event->mod);
mutex_unlock(&event_mutex);
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 53cffc0b0801..0725eeab1937 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -28,6 +28,7 @@
DEFINE_MUTEX(event_mutex);
LIST_HEAD(ftrace_events);
+LIST_HEAD(ftrace_common_fields);
struct list_head *
trace_get_fields(struct ftrace_event_call *event_call)
@@ -37,15 +38,11 @@ trace_get_fields(struct ftrace_event_call *event_call)
return event_call->class->get_fields(event_call);
}
-int trace_define_field(struct ftrace_event_call *call, const char *type,
- const char *name, int offset, int size, int is_signed,
- int filter_type)
+static int __trace_define_field(struct list_head *head, const char *type,
+ const char *name, int offset, int size,
+ int is_signed, int filter_type)
{
struct ftrace_event_field *field;
- struct list_head *head;
-
- if (WARN_ON(!call->class))
- return 0;
field = kzalloc(sizeof(*field), GFP_KERNEL);
if (!field)
@@ -68,7 +65,6 @@ int trace_define_field(struct ftrace_event_call *call, const char *type,
field->size = size;
field->is_signed = is_signed;
- head = trace_get_fields(call);
list_add(&field->link, head);
return 0;
@@ -80,17 +76,32 @@ err:
return -ENOMEM;
}
+
+int trace_define_field(struct ftrace_event_call *call, const char *type,
+ const char *name, int offset, int size, int is_signed,
+ int filter_type)
+{
+ struct list_head *head;
+
+ if (WARN_ON(!call->class))
+ return 0;
+
+ head = trace_get_fields(call);
+ return __trace_define_field(head, type, name, offset, size,
+ is_signed, filter_type);
+}
EXPORT_SYMBOL_GPL(trace_define_field);
#define __common_field(type, item) \
- ret = trace_define_field(call, #type, "common_" #item, \
- offsetof(typeof(ent), item), \
- sizeof(ent.item), \
- is_signed_type(type), FILTER_OTHER); \
+ ret = __trace_define_field(&ftrace_common_fields, #type, \
+ "common_" #item, \
+ offsetof(typeof(ent), item), \
+ sizeof(ent.item), \
+ is_signed_type(type), FILTER_OTHER); \
if (ret) \
return ret;
-static int trace_define_common_fields(struct ftrace_event_call *call)
+static int trace_define_common_fields(void)
{
int ret;
struct trace_entry ent;
@@ -130,6 +141,55 @@ int trace_event_raw_init(struct ftrace_event_call *call)
}
EXPORT_SYMBOL_GPL(trace_event_raw_init);
+int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type)
+{
+ switch (type) {
+ case TRACE_REG_REGISTER:
+ return tracepoint_probe_register(call->name,
+ call->class->probe,
+ call);
+ case TRACE_REG_UNREGISTER:
+ tracepoint_probe_unregister(call->name,
+ call->class->probe,
+ call);
+ return 0;
+
+#ifdef CONFIG_PERF_EVENTS
+ case TRACE_REG_PERF_REGISTER:
+ return tracepoint_probe_register(call->name,
+ call->class->perf_probe,
+ call);
+ case TRACE_REG_PERF_UNREGISTER:
+ tracepoint_probe_unregister(call->name,
+ call->class->perf_probe,
+ call);
+ return 0;
+#endif
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ftrace_event_reg);
+
+void trace_event_enable_cmd_record(bool enable)
+{
+ struct ftrace_event_call *call;
+
+ mutex_lock(&event_mutex);
+ list_for_each_entry(call, &ftrace_events, list) {
+ if (!(call->flags & TRACE_EVENT_FL_ENABLED))
+ continue;
+
+ if (enable) {
+ tracing_start_cmdline_record();
+ call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
+ } else {
+ tracing_stop_cmdline_record();
+ call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
+ }
+ }
+ mutex_unlock(&event_mutex);
+}
+
static int ftrace_event_enable_disable(struct ftrace_event_call *call,
int enable)
{
@@ -139,24 +199,20 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
case 0:
if (call->flags & TRACE_EVENT_FL_ENABLED) {
call->flags &= ~TRACE_EVENT_FL_ENABLED;
- tracing_stop_cmdline_record();
- if (call->class->reg)
- call->class->reg(call, TRACE_REG_UNREGISTER);
- else
- tracepoint_probe_unregister(call->name,
- call->class->probe,
- call);
+ if (call->flags & TRACE_EVENT_FL_RECORDED_CMD) {
+ tracing_stop_cmdline_record();
+ call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
+ }
+ call->class->reg(call, TRACE_REG_UNREGISTER);
}
break;
case 1:
if (!(call->flags & TRACE_EVENT_FL_ENABLED)) {
- tracing_start_cmdline_record();
- if (call->class->reg)
- ret = call->class->reg(call, TRACE_REG_REGISTER);
- else
- ret = tracepoint_probe_register(call->name,
- call->class->probe,
- call);
+ if (trace_flags & TRACE_ITER_RECORD_CMD) {
+ tracing_start_cmdline_record();
+ call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
+ }
+ ret = call->class->reg(call, TRACE_REG_REGISTER);
if (ret) {
tracing_stop_cmdline_record();
pr_info("event trace: Could not enable event "
@@ -194,8 +250,7 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,
mutex_lock(&event_mutex);
list_for_each_entry(call, &ftrace_events, list) {
- if (!call->name || !call->class ||
- (!call->class->probe && !call->class->reg))
+ if (!call->name || !call->class || !call->class->reg)
continue;
if (match &&
@@ -321,7 +376,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
* The ftrace subsystem is for showing formats only.
* They can not be enabled or disabled via the event files.
*/
- if (call->class && (call->class->probe || call->class->reg))
+ if (call->class && call->class->reg)
return call;
}
@@ -474,8 +529,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
mutex_lock(&event_mutex);
list_for_each_entry(call, &ftrace_events, list) {
- if (!call->name || !call->class ||
- (!call->class->probe && !call->class->reg))
+ if (!call->name || !call->class || !call->class->reg)
continue;
if (system && strcmp(call->class->system, system) != 0)
@@ -544,85 +598,146 @@ out:
return ret;
}
-static ssize_t
-event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
- loff_t *ppos)
+enum {
+ FORMAT_HEADER = 1,
+ FORMAT_FIELD_SEPERATOR = 2,
+ FORMAT_PRINTFMT = 3,
+};
+
+static void *f_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct ftrace_event_call *call = filp->private_data;
+ struct ftrace_event_call *call = m->private;
struct ftrace_event_field *field;
- struct list_head *head;
- struct trace_seq *s;
- int common_field_count = 5;
- char *buf;
- int r = 0;
+ struct list_head *common_head = &ftrace_common_fields;
+ struct list_head *head = trace_get_fields(call);
- if (*ppos)
- return 0;
+ (*pos)++;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
- if (!s)
- return -ENOMEM;
+ switch ((unsigned long)v) {
+ case FORMAT_HEADER:
+ if (unlikely(list_empty(common_head)))
+ return NULL;
- trace_seq_init(s);
+ field = list_entry(common_head->prev,
+ struct ftrace_event_field, link);
+ return field;
- trace_seq_printf(s, "name: %s\n", call->name);
- trace_seq_printf(s, "ID: %d\n", call->event.type);
- trace_seq_printf(s, "format:\n");
+ case FORMAT_FIELD_SEPERATOR:
+ if (unlikely(list_empty(head)))
+ return NULL;
- head = trace_get_fields(call);
- list_for_each_entry_reverse(field, head, link) {
- /*
- * Smartly shows the array type(except dynamic array).
- * Normal:
- * field:TYPE VAR
- * If TYPE := TYPE[LEN], it is shown:
- * field:TYPE VAR[LEN]
- */
- const char *array_descriptor = strchr(field->type, '[');
+ field = list_entry(head->prev, struct ftrace_event_field, link);
+ return field;
- if (!strncmp(field->type, "__data_loc", 10))
- array_descriptor = NULL;
+ case FORMAT_PRINTFMT:
+ /* all done */
+ return NULL;
+ }
- if (!array_descriptor) {
- r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
- "\tsize:%u;\tsigned:%d;\n",
- field->type, field->name, field->offset,
- field->size, !!field->is_signed);
- } else {
- r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
- "\tsize:%u;\tsigned:%d;\n",
- (int)(array_descriptor - field->type),
- field->type, field->name,
- array_descriptor, field->offset,
- field->size, !!field->is_signed);
- }
+ field = v;
+ if (field->link.prev == common_head)
+ return (void *)FORMAT_FIELD_SEPERATOR;
+ else if (field->link.prev == head)
+ return (void *)FORMAT_PRINTFMT;
- if (--common_field_count == 0)
- r = trace_seq_printf(s, "\n");
+ field = list_entry(field->link.prev, struct ftrace_event_field, link);
- if (!r)
- break;
- }
+ return field;
+}
- if (r)
- r = trace_seq_printf(s, "\nprint fmt: %s\n",
- call->print_fmt);
+static void *f_start(struct seq_file *m, loff_t *pos)
+{
+ loff_t l = 0;
+ void *p;
- if (!r) {
- /*
- * ug! The format output is bigger than a PAGE!!
- */
- buf = "FORMAT TOO BIG\n";
- r = simple_read_from_buffer(ubuf, cnt, ppos,
- buf, strlen(buf));
- goto out;
+ /* Start by showing the header */
+ if (!*pos)
+ return (void *)FORMAT_HEADER;
+
+ p = (void *)FORMAT_HEADER;
+ do {
+ p = f_next(m, p, &l);
+ } while (p && l < *pos);
+
+ return p;
+}
+
+static int f_show(struct seq_file *m, void *v)
+{
+ struct ftrace_event_call *call = m->private;
+ struct ftrace_event_field *field;
+ const char *array_descriptor;
+
+ switch ((unsigned long)v) {
+ case FORMAT_HEADER:
+ seq_printf(m, "name: %s\n", call->name);
+ seq_printf(m, "ID: %d\n", call->event.type);
+ seq_printf(m, "format:\n");
+ return 0;
+
+ case FORMAT_FIELD_SEPERATOR:
+ seq_putc(m, '\n');
+ return 0;
+
+ case FORMAT_PRINTFMT:
+ seq_printf(m, "\nprint fmt: %s\n",
+ call->print_fmt);
+ return 0;
}
- r = simple_read_from_buffer(ubuf, cnt, ppos,
- s->buffer, s->len);
- out:
- kfree(s);
- return r;
+ field = v;
+
+ /*
+ * Smartly shows the array type(except dynamic array).
+ * Normal:
+ * field:TYPE VAR
+ * If TYPE := TYPE[LEN], it is shown:
+ * field:TYPE VAR[LEN]
+ */
+ array_descriptor = strchr(field->type, '[');
+
+ if (!strncmp(field->type, "__data_loc", 10))
+ array_descriptor = NULL;
+
+ if (!array_descriptor)
+ seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
+ field->type, field->name, field->offset,
+ field->size, !!field->is_signed);
+ else
+ seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
+ (int)(array_descriptor - field->type),
+ field->type, field->name,
+ array_descriptor, field->offset,
+ field->size, !!field->is_signed);
+
+ return 0;
+}
+
+static void f_stop(struct seq_file *m, void *p)
+{
+}
+
+static const struct seq_operations trace_format_seq_ops = {
+ .start = f_start,
+ .next = f_next,
+ .stop = f_stop,
+ .show = f_show,
+};
+
+static int trace_format_open(struct inode *inode, struct file *file)
+{
+ struct ftrace_event_call *call = inode->i_private;
+ struct seq_file *m;
+ int ret;
+
+ ret = seq_open(file, &trace_format_seq_ops);
+ if (ret < 0)
+ return ret;
+
+ m = file->private_data;
+ m->private = call;
+
+ return 0;
}
static ssize_t
@@ -817,39 +932,47 @@ static const struct file_operations ftrace_enable_fops = {
.open = tracing_open_generic,
.read = event_enable_read,
.write = event_enable_write,
+ .llseek = default_llseek,
};
static const struct file_operations ftrace_event_format_fops = {
- .open = tracing_open_generic,
- .read = event_format_read,
+ .open = trace_format_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
};
static const struct file_operations ftrace_event_id_fops = {
.open = tracing_open_generic,
.read = event_id_read,
+ .llseek = default_llseek,
};
static const struct file_operations ftrace_event_filter_fops = {
.open = tracing_open_generic,
.read = event_filter_read,
.write = event_filter_write,
+ .llseek = default_llseek,
};
static const struct file_operations ftrace_subsystem_filter_fops = {
.open = tracing_open_generic,
.read = subsystem_filter_read,
.write = subsystem_filter_write,
+ .llseek = default_llseek,
};
static const struct file_operations ftrace_system_enable_fops = {
.open = tracing_open_generic,
.read = system_enable_read,
.write = system_enable_write,
+ .llseek = default_llseek,
};
static const struct file_operations ftrace_show_header_fops = {
.open = tracing_open_generic,
.read = show_header,
+ .llseek = default_llseek,
};
static struct dentry *event_trace_events_dir(void)
@@ -963,35 +1086,31 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
return -1;
}
- if (call->class->probe || call->class->reg)
+ if (call->class->reg)
trace_create_file("enable", 0644, call->dir, call,
enable);
#ifdef CONFIG_PERF_EVENTS
- if (call->event.type && (call->class->perf_probe || call->class->reg))
+ if (call->event.type && call->class->reg)
trace_create_file("id", 0444, call->dir, call,
id);
#endif
- if (call->class->define_fields) {
- /*
- * Other events may have the same class. Only update
- * the fields if they are not already defined.
- */
- head = trace_get_fields(call);
- if (list_empty(head)) {
- ret = trace_define_common_fields(call);
- if (!ret)
- ret = call->class->define_fields(call);
- if (ret < 0) {
- pr_warning("Could not initialize trace point"
- " events/%s\n", call->name);
- return ret;
- }
+ /*
+ * Other events may have the same class. Only update
+ * the fields if they are not already defined.
+ */
+ head = trace_get_fields(call);
+ if (list_empty(head)) {
+ ret = call->class->define_fields(call);
+ if (ret < 0) {
+ pr_warning("Could not initialize trace point"
+ " events/%s\n", call->name);
+ return ret;
}
- trace_create_file("filter", 0644, call->dir, call,
- filter);
}
+ trace_create_file("filter", 0644, call->dir, call,
+ filter);
trace_create_file("format", 0444, call->dir, call,
format);
@@ -999,11 +1118,17 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
return 0;
}
-static int __trace_add_event_call(struct ftrace_event_call *call)
+static int
+__trace_add_event_call(struct ftrace_event_call *call, struct module *mod,
+ const struct file_operations *id,
+ const struct file_operations *enable,
+ const struct file_operations *filter,
+ const struct file_operations *format)
{
struct dentry *d_events;
int ret;
+ /* The linker may leave blanks */
if (!call->name)
return -EINVAL;
@@ -1011,8 +1136,8 @@ static int __trace_add_event_call(struct ftrace_event_call *call)
ret = call->class->raw_init(call);
if (ret < 0) {
if (ret != -ENOSYS)
- pr_warning("Could not initialize trace "
- "events/%s\n", call->name);
+ pr_warning("Could not initialize trace events/%s\n",
+ call->name);
return ret;
}
}
@@ -1021,11 +1146,10 @@ static int __trace_add_event_call(struct ftrace_event_call *call)
if (!d_events)
return -ENOENT;
- ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
- &ftrace_enable_fops, &ftrace_event_filter_fops,
- &ftrace_event_format_fops);
+ ret = event_create_dir(call, d_events, id, enable, filter, format);
if (!ret)
list_add(&call->list, &ftrace_events);
+ call->mod = mod;
return ret;
}
@@ -1035,7 +1159,10 @@ int trace_add_event_call(struct ftrace_event_call *call)
{
int ret;
mutex_lock(&event_mutex);
- ret = __trace_add_event_call(call);
+ ret = __trace_add_event_call(call, NULL, &ftrace_event_id_fops,
+ &ftrace_enable_fops,
+ &ftrace_event_filter_fops,
+ &ftrace_event_format_fops);
mutex_unlock(&event_mutex);
return ret;
}
@@ -1152,8 +1279,6 @@ static void trace_module_add_events(struct module *mod)
{
struct ftrace_module_file_ops *file_ops = NULL;
struct ftrace_event_call *call, *start, *end;
- struct dentry *d_events;
- int ret;
start = mod->trace_events;
end = mod->trace_events + mod->num_trace_events;
@@ -1161,38 +1286,14 @@ static void trace_module_add_events(struct module *mod)
if (start == end)
return;
- d_events = event_trace_events_dir();
- if (!d_events)
+ file_ops = trace_create_file_ops(mod);
+ if (!file_ops)
return;
for_each_event(call, start, end) {
- /* The linker may leave blanks */
- if (!call->name)
- continue;
- if (call->class->raw_init) {
- ret = call->class->raw_init(call);
- if (ret < 0) {
- if (ret != -ENOSYS)
- pr_warning("Could not initialize trace "
- "point events/%s\n", call->name);
- continue;
- }
- }
- /*
- * This module has events, create file ops for this module
- * if not already done.
- */
- if (!file_ops) {
- file_ops = trace_create_file_ops(mod);
- if (!file_ops)
- return;
- }
- call->mod = mod;
- ret = event_create_dir(call, d_events,
+ __trace_add_event_call(call, mod,
&file_ops->id, &file_ops->enable,
&file_ops->filter, &file_ops->format);
- if (!ret)
- list_add(&call->list, &ftrace_events);
}
}
@@ -1319,25 +1420,14 @@ static __init int event_trace_init(void)
trace_create_file("enable", 0644, d_events,
NULL, &ftrace_system_enable_fops);
+ if (trace_define_common_fields())
+ pr_warning("tracing: Failed to allocate common fields");
+
for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
- /* The linker may leave blanks */
- if (!call->name)
- continue;
- if (call->class->raw_init) {
- ret = call->class->raw_init(call);
- if (ret < 0) {
- if (ret != -ENOSYS)
- pr_warning("Could not initialize trace "
- "point events/%s\n", call->name);
- continue;
- }
- }
- ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
+ __trace_add_event_call(call, NULL, &ftrace_event_id_fops,
&ftrace_enable_fops,
&ftrace_event_filter_fops,
&ftrace_event_format_fops);
- if (!ret)
- list_add(&call->list, &ftrace_events);
}
while (true) {
@@ -1524,12 +1614,11 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
struct ftrace_entry *entry;
unsigned long flags;
long disabled;
- int resched;
int cpu;
int pc;
pc = preempt_count();
- resched = ftrace_preempt_disable();
+ preempt_disable_notrace();
cpu = raw_smp_processor_id();
disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
@@ -1551,7 +1640,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
out:
atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
- ftrace_preempt_enable(resched);
+ preempt_enable_notrace();
}
static struct ftrace_ops trace_ops __initdata =
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 57bb1bb32999..36d40104b17f 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -497,12 +497,10 @@ void print_subsystem_event_filter(struct event_subsystem *system,
}
static struct ftrace_event_field *
-find_event_field(struct ftrace_event_call *call, char *name)
+__find_event_field(struct list_head *head, char *name)
{
struct ftrace_event_field *field;
- struct list_head *head;
- head = trace_get_fields(call);
list_for_each_entry(field, head, link) {
if (!strcmp(field->name, name))
return field;
@@ -511,6 +509,20 @@ find_event_field(struct ftrace_event_call *call, char *name)
return NULL;
}
+static struct ftrace_event_field *
+find_event_field(struct ftrace_event_call *call, char *name)
+{
+ struct ftrace_event_field *field;
+ struct list_head *head;
+
+ field = __find_event_field(&ftrace_common_fields, name);
+ if (field)
+ return field;
+
+ head = trace_get_fields(call);
+ return __find_event_field(head, name);
+}
+
static void filter_free_pred(struct filter_pred *pred)
{
if (!pred)
@@ -627,9 +639,6 @@ static int init_subsystem_preds(struct event_subsystem *system)
int err;
list_for_each_entry(call, &ftrace_events, list) {
- if (!call->class || !call->class->define_fields)
- continue;
-
if (strcmp(call->class->system, system->name) != 0)
continue;
@@ -646,9 +655,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
struct ftrace_event_call *call;
list_for_each_entry(call, &ftrace_events, list) {
- if (!call->class || !call->class->define_fields)
- continue;
-
if (strcmp(call->class->system, system->name) != 0)
continue;
@@ -1251,9 +1257,6 @@ static int replace_system_preds(struct event_subsystem *system,
list_for_each_entry(call, &ftrace_events, list) {
struct event_filter *filter = call->filter;
- if (!call->class || !call->class->define_fields)
- continue;
-
if (strcmp(call->class->system, system->name) != 0)
continue;
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 8536e2a65969..4ba44deaac25 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -125,12 +125,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
#include "trace_entries.h"
-static int ftrace_raw_init_event(struct ftrace_event_call *call)
-{
- INIT_LIST_HEAD(&call->class->fields);
- return 0;
-}
-
#undef __entry
#define __entry REC
@@ -158,7 +152,7 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
struct ftrace_event_class event_class_ftrace_##call = { \
.system = __stringify(TRACE_SYSTEM), \
.define_fields = ftrace_define_fields_##call, \
- .raw_init = ftrace_raw_init_event, \
+ .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
}; \
\
struct ftrace_event_call __used \
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index b3f3776b0cd6..16aee4d44e8f 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -54,14 +54,14 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
- int cpu, resched;
+ int cpu;
int pc;
if (unlikely(!ftrace_function_enabled))
return;
pc = preempt_count();
- resched = ftrace_preempt_disable();
+ preempt_disable_notrace();
local_save_flags(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
@@ -71,7 +71,7 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
trace_function(tr, ip, parent_ip, flags, pc);
atomic_dec(&data->disabled);
- ftrace_preempt_enable(resched);
+ preempt_enable_notrace();
}
static void
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 79f4bac99a94..76b05980225c 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -15,15 +15,19 @@
#include "trace.h"
#include "trace_output.h"
+/* When set, irq functions will be ignored */
+static int ftrace_graph_skip_irqs;
+
struct fgraph_cpu_data {
pid_t last_pid;
int depth;
+ int depth_irq;
int ignore;
unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH];
};
struct fgraph_data {
- struct fgraph_cpu_data *cpu_data;
+ struct fgraph_cpu_data __percpu *cpu_data;
/* Place to preserve last processed entry. */
struct ftrace_graph_ent_entry ent;
@@ -41,6 +45,7 @@ struct fgraph_data {
#define TRACE_GRAPH_PRINT_PROC 0x8
#define TRACE_GRAPH_PRINT_DURATION 0x10
#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
+#define TRACE_GRAPH_PRINT_IRQS 0x40
static struct tracer_opt trace_opts[] = {
/* Display overruns? (for self-debug purpose) */
@@ -55,13 +60,15 @@ static struct tracer_opt trace_opts[] = {
{ TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
/* Display absolute time of an entry */
{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
+ /* Display interrupts */
+ { TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
{ } /* Empty entry */
};
static struct tracer_flags tracer_flags = {
/* Don't display overruns and proc by default */
.val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
- TRACE_GRAPH_PRINT_DURATION,
+ TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS,
.opts = trace_opts
};
@@ -204,6 +211,14 @@ int __trace_graph_entry(struct trace_array *tr,
return 1;
}
+static inline int ftrace_graph_ignore_irqs(void)
+{
+ if (!ftrace_graph_skip_irqs)
+ return 0;
+
+ return in_irq();
+}
+
int trace_graph_entry(struct ftrace_graph_ent *trace)
{
struct trace_array *tr = graph_array;
@@ -218,7 +233,8 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
return 0;
/* trace it when it is-nested-in or is a function enabled. */
- if (!(trace->depth || ftrace_graph_addr(trace->func)))
+ if (!(trace->depth || ftrace_graph_addr(trace->func)) ||
+ ftrace_graph_ignore_irqs())
return 0;
local_irq_save(flags);
@@ -246,6 +262,34 @@ int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
return trace_graph_entry(trace);
}
+static void
+__trace_graph_function(struct trace_array *tr,
+ unsigned long ip, unsigned long flags, int pc)
+{
+ u64 time = trace_clock_local();
+ struct ftrace_graph_ent ent = {
+ .func = ip,
+ .depth = 0,
+ };
+ struct ftrace_graph_ret ret = {
+ .func = ip,
+ .depth = 0,
+ .calltime = time,
+ .rettime = time,
+ };
+
+ __trace_graph_entry(tr, &ent, flags, pc);
+ __trace_graph_return(tr, &ret, flags, pc);
+}
+
+void
+trace_graph_function(struct trace_array *tr,
+ unsigned long ip, unsigned long parent_ip,
+ unsigned long flags, int pc)
+{
+ __trace_graph_function(tr, ip, flags, pc);
+}
+
void __trace_graph_return(struct trace_array *tr,
struct ftrace_graph_ret *trace,
unsigned long flags,
@@ -507,7 +551,15 @@ get_return_for_leaf(struct trace_iterator *iter,
* if the output fails.
*/
data->ent = *curr;
- data->ret = *next;
+ /*
+ * If the next event is not a return type, then
+ * we only care about what type it is. Otherwise we can
+ * safely copy the entire event.
+ */
+ if (next->ent.type == TRACE_GRAPH_RET)
+ data->ret = *next;
+ else
+ data->ret.ent.type = next->ent.type;
}
}
@@ -641,7 +693,9 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
/* Print nsecs (we don't want to exceed 7 numbers) */
if (len < 7) {
- snprintf(nsecs_str, 8 - len, "%03lu", nsecs_rem);
+ size_t slen = min_t(size_t, sizeof(nsecs_str), 8UL - len);
+
+ snprintf(nsecs_str, slen, "%03lu", nsecs_rem);
ret = trace_seq_printf(s, ".%s", nsecs_str);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
@@ -846,6 +900,108 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
return 0;
}
+/*
+ * Entry check for irq code
+ *
+ * returns 1 if
+ * - we are inside irq code
+ * - we just extered irq code
+ *
+ * retunns 0 if
+ * - funcgraph-interrupts option is set
+ * - we are not inside irq code
+ */
+static int
+check_irq_entry(struct trace_iterator *iter, u32 flags,
+ unsigned long addr, int depth)
+{
+ int cpu = iter->cpu;
+ int *depth_irq;
+ struct fgraph_data *data = iter->private;
+
+ /*
+ * If we are either displaying irqs, or we got called as
+ * a graph event and private data does not exist,
+ * then we bypass the irq check.
+ */
+ if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
+ (!data))
+ return 0;
+
+ depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
+
+ /*
+ * We are inside the irq code
+ */
+ if (*depth_irq >= 0)
+ return 1;
+
+ if ((addr < (unsigned long)__irqentry_text_start) ||
+ (addr >= (unsigned long)__irqentry_text_end))
+ return 0;
+
+ /*
+ * We are entering irq code.
+ */
+ *depth_irq = depth;
+ return 1;
+}
+
+/*
+ * Return check for irq code
+ *
+ * returns 1 if
+ * - we are inside irq code
+ * - we just left irq code
+ *
+ * returns 0 if
+ * - funcgraph-interrupts option is set
+ * - we are not inside irq code
+ */
+static int
+check_irq_return(struct trace_iterator *iter, u32 flags, int depth)
+{
+ int cpu = iter->cpu;
+ int *depth_irq;
+ struct fgraph_data *data = iter->private;
+
+ /*
+ * If we are either displaying irqs, or we got called as
+ * a graph event and private data does not exist,
+ * then we bypass the irq check.
+ */
+ if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
+ (!data))
+ return 0;
+
+ depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
+
+ /*
+ * We are not inside the irq code.
+ */
+ if (*depth_irq == -1)
+ return 0;
+
+ /*
+ * We are inside the irq code, and this is returning entry.
+ * Let's not trace it and clear the entry depth, since
+ * we are out of irq code.
+ *
+ * This condition ensures that we 'leave the irq code' once
+ * we are out of the entry depth. Thus protecting us from
+ * the RETURN entry loss.
+ */
+ if (*depth_irq >= depth) {
+ *depth_irq = -1;
+ return 1;
+ }
+
+ /*
+ * We are inside the irq code, and this is not the entry.
+ */
+ return 1;
+}
+
static enum print_line_t
print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
struct trace_iterator *iter, u32 flags)
@@ -856,6 +1012,9 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
static enum print_line_t ret;
int cpu = iter->cpu;
+ if (check_irq_entry(iter, flags, call->func, call->depth))
+ return TRACE_TYPE_HANDLED;
+
if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags))
return TRACE_TYPE_PARTIAL_LINE;
@@ -893,6 +1052,9 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
int ret;
int i;
+ if (check_irq_return(iter, flags, trace->depth))
+ return TRACE_TYPE_HANDLED;
+
if (data) {
struct fgraph_cpu_data *cpu_data;
int cpu = iter->cpu;
@@ -1045,7 +1207,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
enum print_line_t
-print_graph_function_flags(struct trace_iterator *iter, u32 flags)
+__print_graph_function_flags(struct trace_iterator *iter, u32 flags)
{
struct ftrace_graph_ent_entry *field;
struct fgraph_data *data = iter->private;
@@ -1108,7 +1270,18 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
static enum print_line_t
print_graph_function(struct trace_iterator *iter)
{
- return print_graph_function_flags(iter, tracer_flags.val);
+ return __print_graph_function_flags(iter, tracer_flags.val);
+}
+
+enum print_line_t print_graph_function_flags(struct trace_iterator *iter,
+ u32 flags)
+{
+ if (trace_flags & TRACE_ITER_LATENCY_FMT)
+ flags |= TRACE_GRAPH_PRINT_DURATION;
+ else
+ flags |= TRACE_GRAPH_PRINT_ABS_TIME;
+
+ return __print_graph_function_flags(iter, flags);
}
static enum print_line_t
@@ -1140,7 +1313,7 @@ static void print_lat_header(struct seq_file *s, u32 flags)
seq_printf(s, "#%.*s|||| / \n", size, spaces);
}
-void print_graph_headers_flags(struct seq_file *s, u32 flags)
+static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
{
int lat = trace_flags & TRACE_ITER_LATENCY_FMT;
@@ -1181,6 +1354,23 @@ void print_graph_headers(struct seq_file *s)
print_graph_headers_flags(s, tracer_flags.val);
}
+void print_graph_headers_flags(struct seq_file *s, u32 flags)
+{
+ struct trace_iterator *iter = s->private;
+
+ if (trace_flags & TRACE_ITER_LATENCY_FMT) {
+ /* print nothing if the buffers are empty */
+ if (trace_empty(iter))
+ return;
+
+ print_trace_header(s, iter);
+ flags |= TRACE_GRAPH_PRINT_DURATION;
+ } else
+ flags |= TRACE_GRAPH_PRINT_ABS_TIME;
+
+ __print_graph_headers_flags(s, flags);
+}
+
void graph_trace_open(struct trace_iterator *iter)
{
/* pid and depth on the last trace processed */
@@ -1201,9 +1391,12 @@ void graph_trace_open(struct trace_iterator *iter)
pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
+ int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
+
*pid = -1;
*depth = 0;
*ignore = 0;
+ *depth_irq = -1;
}
iter->private = data;
@@ -1226,6 +1419,14 @@ void graph_trace_close(struct trace_iterator *iter)
}
}
+static int func_graph_set_flag(u32 old_flags, u32 bit, int set)
+{
+ if (bit == TRACE_GRAPH_PRINT_IRQS)
+ ftrace_graph_skip_irqs = !set;
+
+ return 0;
+}
+
static struct trace_event_functions graph_functions = {
.trace = print_graph_function_event,
};
@@ -1252,6 +1453,7 @@ static struct tracer graph_trace __read_mostly = {
.print_line = print_graph_function,
.print_header = print_graph_headers,
.flags = &tracer_flags,
+ .set_flag = func_graph_set_flag,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_function_graph,
#endif
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 6fd486e0cef4..5cf8c602b880 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -87,14 +87,22 @@ static __cacheline_aligned_in_smp unsigned long max_sequence;
#ifdef CONFIG_FUNCTION_TRACER
/*
- * irqsoff uses its own tracer function to keep the overhead down:
+ * Prologue for the preempt and irqs off function tracers.
+ *
+ * Returns 1 if it is OK to continue, and data->disabled is
+ * incremented.
+ * 0 if the trace is to be ignored, and data->disabled
+ * is kept the same.
+ *
+ * Note, this function is also used outside this ifdef but
+ * inside the #ifdef of the function graph tracer below.
+ * This is OK, since the function graph tracer is
+ * dependent on the function tracer.
*/
-static void
-irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
+static int func_prolog_dec(struct trace_array *tr,
+ struct trace_array_cpu **data,
+ unsigned long *flags)
{
- struct trace_array *tr = irqsoff_trace;
- struct trace_array_cpu *data;
- unsigned long flags;
long disabled;
int cpu;
@@ -106,18 +114,38 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
*/
cpu = raw_smp_processor_id();
if (likely(!per_cpu(tracing_cpu, cpu)))
- return;
+ return 0;
- local_save_flags(flags);
+ local_save_flags(*flags);
/* slight chance to get a false positive on tracing_cpu */
- if (!irqs_disabled_flags(flags))
- return;
+ if (!irqs_disabled_flags(*flags))
+ return 0;
- data = tr->data[cpu];
- disabled = atomic_inc_return(&data->disabled);
+ *data = tr->data[cpu];
+ disabled = atomic_inc_return(&(*data)->disabled);
if (likely(disabled == 1))
- trace_function(tr, ip, parent_ip, flags, preempt_count());
+ return 1;
+
+ atomic_dec(&(*data)->disabled);
+
+ return 0;
+}
+
+/*
+ * irqsoff uses its own tracer function to keep the overhead down:
+ */
+static void
+irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
+{
+ struct trace_array *tr = irqsoff_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+
+ if (!func_prolog_dec(tr, &data, &flags))
+ return;
+
+ trace_function(tr, ip, parent_ip, flags, preempt_count());
atomic_dec(&data->disabled);
}
@@ -155,30 +183,16 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
- long disabled;
int ret;
- int cpu;
int pc;
- cpu = raw_smp_processor_id();
- if (likely(!per_cpu(tracing_cpu, cpu)))
+ if (!func_prolog_dec(tr, &data, &flags))
return 0;
- local_save_flags(flags);
- /* slight chance to get a false positive on tracing_cpu */
- if (!irqs_disabled_flags(flags))
- return 0;
-
- data = tr->data[cpu];
- disabled = atomic_inc_return(&data->disabled);
-
- if (likely(disabled == 1)) {
- pc = preempt_count();
- ret = __trace_graph_entry(tr, trace, flags, pc);
- } else
- ret = 0;
-
+ pc = preempt_count();
+ ret = __trace_graph_entry(tr, trace, flags, pc);
atomic_dec(&data->disabled);
+
return ret;
}
@@ -187,27 +201,13 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace)
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
- long disabled;
- int cpu;
int pc;
- cpu = raw_smp_processor_id();
- if (likely(!per_cpu(tracing_cpu, cpu)))
+ if (!func_prolog_dec(tr, &data, &flags))
return;
- local_save_flags(flags);
- /* slight chance to get a false positive on tracing_cpu */
- if (!irqs_disabled_flags(flags))
- return;
-
- data = tr->data[cpu];
- disabled = atomic_inc_return(&data->disabled);
-
- if (likely(disabled == 1)) {
- pc = preempt_count();
- __trace_graph_return(tr, trace, flags, pc);
- }
-
+ pc = preempt_count();
+ __trace_graph_return(tr, trace, flags, pc);
atomic_dec(&data->disabled);
}
@@ -229,75 +229,33 @@ static void irqsoff_trace_close(struct trace_iterator *iter)
static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
{
- u32 flags = GRAPH_TRACER_FLAGS;
-
- if (trace_flags & TRACE_ITER_LATENCY_FMT)
- flags |= TRACE_GRAPH_PRINT_DURATION;
- else
- flags |= TRACE_GRAPH_PRINT_ABS_TIME;
-
/*
* In graph mode call the graph tracer output function,
* otherwise go with the TRACE_FN event handler
*/
if (is_graph())
- return print_graph_function_flags(iter, flags);
+ return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
return TRACE_TYPE_UNHANDLED;
}
static void irqsoff_print_header(struct seq_file *s)
{
- if (is_graph()) {
- struct trace_iterator *iter = s->private;
- u32 flags = GRAPH_TRACER_FLAGS;
-
- if (trace_flags & TRACE_ITER_LATENCY_FMT) {
- /* print nothing if the buffers are empty */
- if (trace_empty(iter))
- return;
-
- print_trace_header(s, iter);
- flags |= TRACE_GRAPH_PRINT_DURATION;
- } else
- flags |= TRACE_GRAPH_PRINT_ABS_TIME;
-
- print_graph_headers_flags(s, flags);
- } else
+ if (is_graph())
+ print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
+ else
trace_default_header(s);
}
static void
-trace_graph_function(struct trace_array *tr,
- unsigned long ip, unsigned long flags, int pc)
-{
- u64 time = trace_clock_local();
- struct ftrace_graph_ent ent = {
- .func = ip,
- .depth = 0,
- };
- struct ftrace_graph_ret ret = {
- .func = ip,
- .depth = 0,
- .calltime = time,
- .rettime = time,
- };
-
- __trace_graph_entry(tr, &ent, flags, pc);
- __trace_graph_return(tr, &ret, flags, pc);
-}
-
-static void
__trace_function(struct trace_array *tr,
unsigned long ip, unsigned long parent_ip,
unsigned long flags, int pc)
{
- if (!is_graph())
+ if (is_graph())
+ trace_graph_function(tr, ip, parent_ip, flags, pc);
+ else
trace_function(tr, ip, parent_ip, flags, pc);
- else {
- trace_graph_function(tr, parent_ip, flags, pc);
- trace_graph_function(tr, ip, flags, pc);
- }
}
#else
@@ -649,6 +607,7 @@ static struct tracer irqsoff_tracer __read_mostly =
#endif
.open = irqsoff_trace_open,
.close = irqsoff_trace_close,
+ .use_max_tr = 1,
};
# define register_irqsoff(trace) register_tracer(&trace)
#else
@@ -681,6 +640,7 @@ static struct tracer preemptoff_tracer __read_mostly =
#endif
.open = irqsoff_trace_open,
.close = irqsoff_trace_close,
+ .use_max_tr = 1,
};
# define register_preemptoff(trace) register_tracer(&trace)
#else
@@ -715,6 +675,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
#endif
.open = irqsoff_trace_open,
.close = irqsoff_trace_close,
+ .use_max_tr = 1,
};
# define register_preemptirqsoff(trace) register_tracer(&trace)
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
new file mode 100644
index 000000000000..3c5c5dfea0b3
--- /dev/null
+++ b/kernel/trace/trace_kdb.c
@@ -0,0 +1,135 @@
+/*
+ * kdb helper for dumping the ftrace buffer
+ *
+ * Copyright (C) 2010 Jason Wessel <jason.wessel@windriver.com>
+ *
+ * ftrace_dump_buf based on ftrace_dump:
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
+ *
+ */
+#include <linux/init.h>
+#include <linux/kgdb.h>
+#include <linux/kdb.h>
+#include <linux/ftrace.h>
+
+#include "trace.h"
+#include "trace_output.h"
+
+static void ftrace_dump_buf(int skip_lines, long cpu_file)
+{
+ /* use static because iter can be a bit big for the stack */
+ static struct trace_iterator iter;
+ unsigned int old_userobj;
+ int cnt = 0, cpu;
+
+ trace_init_global_iter(&iter);
+
+ for_each_tracing_cpu(cpu) {
+ atomic_inc(&iter.tr->data[cpu]->disabled);
+ }
+
+ old_userobj = trace_flags;
+
+ /* don't look at user memory in panic mode */
+ trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
+
+ kdb_printf("Dumping ftrace buffer:\n");
+
+ /* reset all but tr, trace, and overruns */
+ memset(&iter.seq, 0,
+ sizeof(struct trace_iterator) -
+ offsetof(struct trace_iterator, seq));
+ iter.iter_flags |= TRACE_FILE_LAT_FMT;
+ iter.pos = -1;
+
+ if (cpu_file == TRACE_PIPE_ALL_CPU) {
+ for_each_tracing_cpu(cpu) {
+ iter.buffer_iter[cpu] =
+ ring_buffer_read_prepare(iter.tr->buffer, cpu);
+ ring_buffer_read_start(iter.buffer_iter[cpu]);
+ tracing_iter_reset(&iter, cpu);
+ }
+ } else {
+ iter.cpu_file = cpu_file;
+ iter.buffer_iter[cpu_file] =
+ ring_buffer_read_prepare(iter.tr->buffer, cpu_file);
+ ring_buffer_read_start(iter.buffer_iter[cpu_file]);
+ tracing_iter_reset(&iter, cpu_file);
+ }
+ if (!trace_empty(&iter))
+ trace_find_next_entry_inc(&iter);
+ while (!trace_empty(&iter)) {
+ if (!cnt)
+ kdb_printf("---------------------------------\n");
+ cnt++;
+
+ if (trace_find_next_entry_inc(&iter) != NULL && !skip_lines)
+ print_trace_line(&iter);
+ if (!skip_lines)
+ trace_printk_seq(&iter.seq);
+ else
+ skip_lines--;
+ if (KDB_FLAG(CMD_INTERRUPT))
+ goto out;
+ }
+
+ if (!cnt)
+ kdb_printf(" (ftrace buffer empty)\n");
+ else
+ kdb_printf("---------------------------------\n");
+
+out:
+ trace_flags = old_userobj;
+
+ for_each_tracing_cpu(cpu) {
+ atomic_dec(&iter.tr->data[cpu]->disabled);
+ }
+
+ for_each_tracing_cpu(cpu)
+ if (iter.buffer_iter[cpu])
+ ring_buffer_read_finish(iter.buffer_iter[cpu]);
+}
+
+/*
+ * kdb_ftdump - Dump the ftrace log buffer
+ */
+static int kdb_ftdump(int argc, const char **argv)
+{
+ int skip_lines = 0;
+ long cpu_file;
+ char *cp;
+
+ if (argc > 2)
+ return KDB_ARGCOUNT;
+
+ if (argc) {
+ skip_lines = simple_strtol(argv[1], &cp, 0);
+ if (*cp)
+ skip_lines = 0;
+ }
+
+ if (argc == 2) {
+ cpu_file = simple_strtol(argv[2], &cp, 0);
+ if (*cp || cpu_file >= NR_CPUS || cpu_file < 0 ||
+ !cpu_online(cpu_file))
+ return KDB_BADINT;
+ } else {
+ cpu_file = TRACE_PIPE_ALL_CPU;
+ }
+
+ kdb_trap_printk++;
+ ftrace_dump_buf(skip_lines, cpu_file);
+ kdb_trap_printk--;
+
+ return 0;
+}
+
+static __init int kdb_ftrace_register(void)
+{
+ kdb_register_repeat("ftdump", kdb_ftdump, "[skip_#lines] [cpu]",
+ "Dump ftrace log", 0, KDB_REPEAT_NONE);
+ return 0;
+}
+
+late_initcall(kdb_ftrace_register);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index f52b5f50299d..2dec9bcde8b4 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -30,6 +30,7 @@
#include <linux/ptrace.h>
#include <linux/perf_event.h>
#include <linux/stringify.h>
+#include <linux/limits.h>
#include <asm/bitsperlong.h>
#include "trace.h"
@@ -38,6 +39,7 @@
#define MAX_TRACE_ARGS 128
#define MAX_ARGSTR_LEN 63
#define MAX_EVENT_NAME_LEN 64
+#define MAX_STRING_SIZE PATH_MAX
#define KPROBE_EVENT_SYSTEM "kprobes"
/* Reserved field names */
@@ -58,14 +60,16 @@ const char *reserved_field_names[] = {
};
/* Printing function type */
-typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *);
+typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *,
+ void *);
#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
/* Printing in basic type function template */
#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \
static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
- const char *name, void *data)\
+ const char *name, \
+ void *data, void *ent)\
{ \
return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
} \
@@ -80,6 +84,49 @@ DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
+/* data_rloc: data relative location, compatible with u32 */
+#define make_data_rloc(len, roffs) \
+ (((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
+#define get_rloc_len(dl) ((u32)(dl) >> 16)
+#define get_rloc_offs(dl) ((u32)(dl) & 0xffff)
+
+static inline void *get_rloc_data(u32 *dl)
+{
+ return (u8 *)dl + get_rloc_offs(*dl);
+}
+
+/* For data_loc conversion */
+static inline void *get_loc_data(u32 *dl, void *ent)
+{
+ return (u8 *)ent + get_rloc_offs(*dl);
+}
+
+/*
+ * Convert data_rloc to data_loc:
+ * data_rloc stores the offset from data_rloc itself, but data_loc
+ * stores the offset from event entry.
+ */
+#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
+
+/* For defining macros, define string/string_size types */
+typedef u32 string;
+typedef u32 string_size;
+
+/* Print type function for string type */
+static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
+ const char *name,
+ void *data, void *ent)
+{
+ int len = *(u32 *)data >> 16;
+
+ if (!len)
+ return trace_seq_printf(s, " %s=(fault)", name);
+ else
+ return trace_seq_printf(s, " %s=\"%s\"", name,
+ (const char *)get_loc_data(data, ent));
+}
+static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
+
/* Data fetch function type */
typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
@@ -94,32 +141,38 @@ static __kprobes void call_fetch(struct fetch_param *fprm,
return fprm->fn(regs, fprm->data, dest);
}
-#define FETCH_FUNC_NAME(kind, type) fetch_##kind##_##type
+#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
/*
* Define macro for basic types - we don't need to define s* types, because
* we have to care only about bitwidth at recording time.
*/
-#define DEFINE_BASIC_FETCH_FUNCS(kind) \
-DEFINE_FETCH_##kind(u8) \
-DEFINE_FETCH_##kind(u16) \
-DEFINE_FETCH_##kind(u32) \
-DEFINE_FETCH_##kind(u64)
-
-#define CHECK_BASIC_FETCH_FUNCS(kind, fn) \
- ((FETCH_FUNC_NAME(kind, u8) == fn) || \
- (FETCH_FUNC_NAME(kind, u16) == fn) || \
- (FETCH_FUNC_NAME(kind, u32) == fn) || \
- (FETCH_FUNC_NAME(kind, u64) == fn))
+#define DEFINE_BASIC_FETCH_FUNCS(method) \
+DEFINE_FETCH_##method(u8) \
+DEFINE_FETCH_##method(u16) \
+DEFINE_FETCH_##method(u32) \
+DEFINE_FETCH_##method(u64)
+
+#define CHECK_FETCH_FUNCS(method, fn) \
+ (((FETCH_FUNC_NAME(method, u8) == fn) || \
+ (FETCH_FUNC_NAME(method, u16) == fn) || \
+ (FETCH_FUNC_NAME(method, u32) == fn) || \
+ (FETCH_FUNC_NAME(method, u64) == fn) || \
+ (FETCH_FUNC_NAME(method, string) == fn) || \
+ (FETCH_FUNC_NAME(method, string_size) == fn)) \
+ && (fn != NULL))
/* Data fetch function templates */
#define DEFINE_FETCH_reg(type) \
static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \
- void *offset, void *dest) \
+ void *offset, void *dest) \
{ \
*(type *)dest = (type)regs_get_register(regs, \
(unsigned int)((unsigned long)offset)); \
}
DEFINE_BASIC_FETCH_FUNCS(reg)
+/* No string on the register */
+#define fetch_reg_string NULL
+#define fetch_reg_string_size NULL
#define DEFINE_FETCH_stack(type) \
static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
@@ -129,6 +182,9 @@ static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
(unsigned int)((unsigned long)offset)); \
}
DEFINE_BASIC_FETCH_FUNCS(stack)
+/* No string on the stack entry */
+#define fetch_stack_string NULL
+#define fetch_stack_string_size NULL
#define DEFINE_FETCH_retval(type) \
static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
@@ -137,6 +193,9 @@ static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
*(type *)dest = (type)regs_return_value(regs); \
}
DEFINE_BASIC_FETCH_FUNCS(retval)
+/* No string on the retval */
+#define fetch_retval_string NULL
+#define fetch_retval_string_size NULL
#define DEFINE_FETCH_memory(type) \
static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
@@ -149,6 +208,62 @@ static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
*(type *)dest = retval; \
}
DEFINE_BASIC_FETCH_FUNCS(memory)
+/*
+ * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
+ * length and relative data location.
+ */
+static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
+ void *addr, void *dest)
+{
+ long ret;
+ int maxlen = get_rloc_len(*(u32 *)dest);
+ u8 *dst = get_rloc_data(dest);
+ u8 *src = addr;
+ mm_segment_t old_fs = get_fs();
+ if (!maxlen)
+ return;
+ /*
+ * Try to get string again, since the string can be changed while
+ * probing.
+ */
+ set_fs(KERNEL_DS);
+ pagefault_disable();
+ do
+ ret = __copy_from_user_inatomic(dst++, src++, 1);
+ while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen);
+ dst[-1] = '\0';
+ pagefault_enable();
+ set_fs(old_fs);
+
+ if (ret < 0) { /* Failed to fetch string */
+ ((u8 *)get_rloc_data(dest))[0] = '\0';
+ *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
+ } else
+ *(u32 *)dest = make_data_rloc(src - (u8 *)addr,
+ get_rloc_offs(*(u32 *)dest));
+}
+/* Return the length of string -- including null terminal byte */
+static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
+ void *addr, void *dest)
+{
+ int ret, len = 0;
+ u8 c;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs(KERNEL_DS);
+ pagefault_disable();
+ do {
+ ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
+ len++;
+ } while (c && ret == 0 && len < MAX_STRING_SIZE);
+ pagefault_enable();
+ set_fs(old_fs);
+
+ if (ret < 0) /* Failed to check the length */
+ *(u32 *)dest = 0;
+ else
+ *(u32 *)dest = len;
+}
/* Memory fetching by symbol */
struct symbol_cache {
@@ -203,6 +318,8 @@ static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
*(type *)dest = 0; \
}
DEFINE_BASIC_FETCH_FUNCS(symbol)
+DEFINE_FETCH_symbol(string)
+DEFINE_FETCH_symbol(string_size)
/* Dereference memory access function */
struct deref_fetch_param {
@@ -224,12 +341,14 @@ static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
*(type *)dest = 0; \
}
DEFINE_BASIC_FETCH_FUNCS(deref)
+DEFINE_FETCH_deref(string)
+DEFINE_FETCH_deref(string_size)
static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
{
- if (CHECK_BASIC_FETCH_FUNCS(deref, data->orig.fn))
+ if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
free_deref_fetch_param(data->orig.data);
- else if (CHECK_BASIC_FETCH_FUNCS(symbol, data->orig.fn))
+ else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
free_symbol_cache(data->orig.data);
kfree(data);
}
@@ -240,23 +359,43 @@ static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
-#define ASSIGN_FETCH_FUNC(kind, type) \
- .kind = FETCH_FUNC_NAME(kind, type)
-
-#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
- {.name = #ptype, \
- .size = sizeof(ftype), \
- .is_signed = sign, \
- .print = PRINT_TYPE_FUNC_NAME(ptype), \
- .fmt = PRINT_TYPE_FMT_NAME(ptype), \
-ASSIGN_FETCH_FUNC(reg, ftype), \
-ASSIGN_FETCH_FUNC(stack, ftype), \
-ASSIGN_FETCH_FUNC(retval, ftype), \
-ASSIGN_FETCH_FUNC(memory, ftype), \
-ASSIGN_FETCH_FUNC(symbol, ftype), \
-ASSIGN_FETCH_FUNC(deref, ftype), \
+/* Fetch types */
+enum {
+ FETCH_MTD_reg = 0,
+ FETCH_MTD_stack,
+ FETCH_MTD_retval,
+ FETCH_MTD_memory,
+ FETCH_MTD_symbol,
+ FETCH_MTD_deref,
+ FETCH_MTD_END,
+};
+
+#define ASSIGN_FETCH_FUNC(method, type) \
+ [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
+
+#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
+ {.name = _name, \
+ .size = _size, \
+ .is_signed = sign, \
+ .print = PRINT_TYPE_FUNC_NAME(ptype), \
+ .fmt = PRINT_TYPE_FMT_NAME(ptype), \
+ .fmttype = _fmttype, \
+ .fetch = { \
+ASSIGN_FETCH_FUNC(reg, ftype), \
+ASSIGN_FETCH_FUNC(stack, ftype), \
+ASSIGN_FETCH_FUNC(retval, ftype), \
+ASSIGN_FETCH_FUNC(memory, ftype), \
+ASSIGN_FETCH_FUNC(symbol, ftype), \
+ASSIGN_FETCH_FUNC(deref, ftype), \
+ } \
}
+#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
+ __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
+
+#define FETCH_TYPE_STRING 0
+#define FETCH_TYPE_STRSIZE 1
+
/* Fetch type information table */
static const struct fetch_type {
const char *name; /* Name of type */
@@ -264,14 +403,16 @@ static const struct fetch_type {
int is_signed; /* Signed flag */
print_type_func_t print; /* Print functions */
const char *fmt; /* Fromat string */
+ const char *fmttype; /* Name in format file */
/* Fetch functions */
- fetch_func_t reg;
- fetch_func_t stack;
- fetch_func_t retval;
- fetch_func_t memory;
- fetch_func_t symbol;
- fetch_func_t deref;
+ fetch_func_t fetch[FETCH_MTD_END];
} fetch_type_table[] = {
+ /* Special types */
+ [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
+ sizeof(u32), 1, "__data_loc char[]"),
+ [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
+ string_size, sizeof(u32), 0, "u32"),
+ /* Basic types */
ASSIGN_FETCH_TYPE(u8, u8, 0),
ASSIGN_FETCH_TYPE(u16, u16, 0),
ASSIGN_FETCH_TYPE(u32, u32, 0),
@@ -302,12 +443,28 @@ static __kprobes void fetch_stack_address(struct pt_regs *regs,
*(unsigned long *)dest = kernel_stack_pointer(regs);
}
+static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
+ fetch_func_t orig_fn)
+{
+ int i;
+
+ if (type != &fetch_type_table[FETCH_TYPE_STRING])
+ return NULL; /* Only string type needs size function */
+ for (i = 0; i < FETCH_MTD_END; i++)
+ if (type->fetch[i] == orig_fn)
+ return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i];
+
+ WARN_ON(1); /* This should not happen */
+ return NULL;
+}
+
/**
* Kprobe event core functions
*/
struct probe_arg {
struct fetch_param fetch;
+ struct fetch_param fetch_size;
unsigned int offset; /* Offset from argument entry */
const char *name; /* Name of this argument */
const char *comm; /* Command of this argument */
@@ -356,8 +513,8 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
static int kretprobe_dispatcher(struct kretprobe_instance *ri,
struct pt_regs *regs);
-/* Check the name is good for event/group */
-static int check_event_name(const char *name)
+/* Check the name is good for event/group/fields */
+static int is_good_name(const char *name)
{
if (!isalpha(*name) && *name != '_')
return 0;
@@ -399,7 +556,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
else
tp->rp.kp.pre_handler = kprobe_dispatcher;
- if (!event || !check_event_name(event)) {
+ if (!event || !is_good_name(event)) {
ret = -EINVAL;
goto error;
}
@@ -409,7 +566,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
if (!tp->call.name)
goto error;
- if (!group || !check_event_name(group)) {
+ if (!group || !is_good_name(group)) {
ret = -EINVAL;
goto error;
}
@@ -429,9 +586,9 @@ error:
static void free_probe_arg(struct probe_arg *arg)
{
- if (CHECK_BASIC_FETCH_FUNCS(deref, arg->fetch.fn))
+ if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
free_deref_fetch_param(arg->fetch.data);
- else if (CHECK_BASIC_FETCH_FUNCS(symbol, arg->fetch.fn))
+ else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
free_symbol_cache(arg->fetch.data);
kfree(arg->name);
kfree(arg->comm);
@@ -490,7 +647,7 @@ static int register_trace_probe(struct trace_probe *tp)
}
ret = register_probe_event(tp);
if (ret) {
- pr_warning("Faild to register probe event(%d)\n", ret);
+ pr_warning("Failed to register probe event(%d)\n", ret);
goto end;
}
@@ -548,7 +705,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
if (strcmp(arg, "retval") == 0) {
if (is_return)
- f->fn = t->retval;
+ f->fn = t->fetch[FETCH_MTD_retval];
else
ret = -EINVAL;
} else if (strncmp(arg, "stack", 5) == 0) {
@@ -562,7 +719,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
if (ret || param > PARAM_MAX_STACK)
ret = -EINVAL;
else {
- f->fn = t->stack;
+ f->fn = t->fetch[FETCH_MTD_stack];
f->data = (void *)param;
}
} else
@@ -588,7 +745,7 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
case '%': /* named register */
ret = regs_query_register_offset(arg + 1);
if (ret >= 0) {
- f->fn = t->reg;
+ f->fn = t->fetch[FETCH_MTD_reg];
f->data = (void *)(unsigned long)ret;
ret = 0;
}
@@ -598,7 +755,7 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
ret = strict_strtoul(arg + 1, 0, &param);
if (ret)
break;
- f->fn = t->memory;
+ f->fn = t->fetch[FETCH_MTD_memory];
f->data = (void *)param;
} else {
ret = split_symbol_offset(arg + 1, &offset);
@@ -606,7 +763,7 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
break;
f->data = alloc_symbol_cache(arg + 1, offset);
if (f->data)
- f->fn = t->symbol;
+ f->fn = t->fetch[FETCH_MTD_symbol];
}
break;
case '+': /* deref memory */
@@ -636,14 +793,17 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
if (ret)
kfree(dprm);
else {
- f->fn = t->deref;
+ f->fn = t->fetch[FETCH_MTD_deref];
f->data = (void *)dprm;
}
}
break;
}
- if (!ret && !f->fn)
+ if (!ret && !f->fn) { /* Parsed, but do not find fetch method */
+ pr_info("%s type has no corresponding fetch method.\n",
+ t->name);
ret = -EINVAL;
+ }
return ret;
}
@@ -652,6 +812,7 @@ static int parse_probe_arg(char *arg, struct trace_probe *tp,
struct probe_arg *parg, int is_return)
{
const char *t;
+ int ret;
if (strlen(arg) > MAX_ARGSTR_LEN) {
pr_info("Argument is too long.: %s\n", arg);
@@ -674,7 +835,13 @@ static int parse_probe_arg(char *arg, struct trace_probe *tp,
}
parg->offset = tp->size;
tp->size += parg->type->size;
- return __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
+ ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
+ if (ret >= 0) {
+ parg->fetch_size.fn = get_fetch_size_function(parg->type,
+ parg->fetch.fn);
+ parg->fetch_size.data = parg->fetch.data;
+ }
+ return ret;
}
/* Return 1 if name is reserved or already used by another argument */
@@ -715,7 +882,7 @@ static int create_trace_probe(int argc, char **argv)
int i, ret = 0;
int is_return = 0, is_delete = 0;
char *symbol = NULL, *event = NULL, *group = NULL;
- char *arg, *tmp;
+ char *arg;
unsigned long offset = 0;
void *addr = NULL;
char buf[MAX_EVENT_NAME_LEN];
@@ -757,14 +924,17 @@ static int create_trace_probe(int argc, char **argv)
pr_info("Delete command needs an event name.\n");
return -EINVAL;
}
+ mutex_lock(&probe_lock);
tp = find_probe_event(event, group);
if (!tp) {
+ mutex_unlock(&probe_lock);
pr_info("Event %s/%s doesn't exist.\n", group, event);
return -ENOENT;
}
/* delete an event */
unregister_trace_probe(tp);
free_trace_probe(tp);
+ mutex_unlock(&probe_lock);
return 0;
}
@@ -821,26 +991,36 @@ static int create_trace_probe(int argc, char **argv)
/* parse arguments */
ret = 0;
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
+ /* Increment count for freeing args in error case */
+ tp->nr_args++;
+
/* Parse argument name */
arg = strchr(argv[i], '=');
- if (arg)
+ if (arg) {
*arg++ = '\0';
- else
+ tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
+ } else {
arg = argv[i];
+ /* If argument name is omitted, set "argN" */
+ snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
+ tp->args[i].name = kstrdup(buf, GFP_KERNEL);
+ }
- tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
if (!tp->args[i].name) {
- pr_info("Failed to allocate argument%d name '%s'.\n",
- i, argv[i]);
+ pr_info("Failed to allocate argument[%d] name.\n", i);
ret = -ENOMEM;
goto error;
}
- tmp = strchr(tp->args[i].name, ':');
- if (tmp)
- *tmp = '_'; /* convert : to _ */
+
+ if (!is_good_name(tp->args[i].name)) {
+ pr_info("Invalid argument[%d] name: %s\n",
+ i, tp->args[i].name);
+ ret = -EINVAL;
+ goto error;
+ }
if (conflict_field_name(tp->args[i].name, tp->args, i)) {
- pr_info("Argument%d name '%s' conflicts with "
+ pr_info("Argument[%d] name '%s' conflicts with "
"another field.\n", i, argv[i]);
ret = -EINVAL;
goto error;
@@ -849,12 +1029,9 @@ static int create_trace_probe(int argc, char **argv)
/* Parse fetch argument */
ret = parse_probe_arg(arg, tp, &tp->args[i], is_return);
if (ret) {
- pr_info("Parse error at argument%d. (%d)\n", i, ret);
- kfree(tp->args[i].name);
+ pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
goto error;
}
-
- tp->nr_args++;
}
ret = register_trace_probe(tp);
@@ -1043,6 +1220,54 @@ static const struct file_operations kprobe_profile_ops = {
.release = seq_release,
};
+/* Sum up total data length for dynamic arraies (strings) */
+static __kprobes int __get_data_size(struct trace_probe *tp,
+ struct pt_regs *regs)
+{
+ int i, ret = 0;
+ u32 len;
+
+ for (i = 0; i < tp->nr_args; i++)
+ if (unlikely(tp->args[i].fetch_size.fn)) {
+ call_fetch(&tp->args[i].fetch_size, regs, &len);
+ ret += len;
+ }
+
+ return ret;
+}
+
+/* Store the value of each argument */
+static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp,
+ struct pt_regs *regs,
+ u8 *data, int maxlen)
+{
+ int i;
+ u32 end = tp->size;
+ u32 *dl; /* Data (relative) location */
+
+ for (i = 0; i < tp->nr_args; i++) {
+ if (unlikely(tp->args[i].fetch_size.fn)) {
+ /*
+ * First, we set the relative location and
+ * maximum data length to *dl
+ */
+ dl = (u32 *)(data + tp->args[i].offset);
+ *dl = make_data_rloc(maxlen, end - tp->args[i].offset);
+ /* Then try to fetch string or dynamic array data */
+ call_fetch(&tp->args[i].fetch, regs, dl);
+ /* Reduce maximum length */
+ end += get_rloc_len(*dl);
+ maxlen -= get_rloc_len(*dl);
+ /* Trick here, convert data_rloc to data_loc */
+ *dl = convert_rloc_to_loc(*dl,
+ ent_size + tp->args[i].offset);
+ } else
+ /* Just fetching data normally */
+ call_fetch(&tp->args[i].fetch, regs,
+ data + tp->args[i].offset);
+ }
+}
+
/* Kprobe handler */
static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
{
@@ -1050,8 +1275,7 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
struct kprobe_trace_entry_head *entry;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
- u8 *data;
- int size, i, pc;
+ int size, dsize, pc;
unsigned long irq_flags;
struct ftrace_event_call *call = &tp->call;
@@ -1060,7 +1284,8 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
local_save_flags(irq_flags);
pc = preempt_count();
- size = sizeof(*entry) + tp->size;
+ dsize = __get_data_size(tp, regs);
+ size = sizeof(*entry) + tp->size + dsize;
event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
size, irq_flags, pc);
@@ -1069,9 +1294,7 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
entry = ring_buffer_event_data(event);
entry->ip = (unsigned long)kp->addr;
- data = (u8 *)&entry[1];
- for (i = 0; i < tp->nr_args; i++)
- call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
+ store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
if (!filter_current_check_discard(buffer, call, entry, event))
trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
@@ -1085,15 +1308,15 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
struct kretprobe_trace_entry_head *entry;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
- u8 *data;
- int size, i, pc;
+ int size, pc, dsize;
unsigned long irq_flags;
struct ftrace_event_call *call = &tp->call;
local_save_flags(irq_flags);
pc = preempt_count();
- size = sizeof(*entry) + tp->size;
+ dsize = __get_data_size(tp, regs);
+ size = sizeof(*entry) + tp->size + dsize;
event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
size, irq_flags, pc);
@@ -1103,9 +1326,7 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
entry = ring_buffer_event_data(event);
entry->func = (unsigned long)tp->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
- data = (u8 *)&entry[1];
- for (i = 0; i < tp->nr_args; i++)
- call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
+ store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
if (!filter_current_check_discard(buffer, call, entry, event))
trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
@@ -1137,7 +1358,7 @@ print_kprobe_event(struct trace_iterator *iter, int flags,
data = (u8 *)&field[1];
for (i = 0; i < tp->nr_args; i++)
if (!tp->args[i].type->print(s, tp->args[i].name,
- data + tp->args[i].offset))
+ data + tp->args[i].offset, field))
goto partial;
if (!trace_seq_puts(s, "\n"))
@@ -1179,7 +1400,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags,
data = (u8 *)&field[1];
for (i = 0; i < tp->nr_args; i++)
if (!tp->args[i].type->print(s, tp->args[i].name,
- data + tp->args[i].offset))
+ data + tp->args[i].offset, field))
goto partial;
if (!trace_seq_puts(s, "\n"))
@@ -1214,11 +1435,6 @@ static void probe_event_disable(struct ftrace_event_call *call)
}
}
-static int probe_event_raw_init(struct ftrace_event_call *event_call)
-{
- return 0;
-}
-
#undef DEFINE_FIELD
#define DEFINE_FIELD(type, item, name, is_signed) \
do { \
@@ -1239,7 +1455,7 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
/* Set argument names as fields */
for (i = 0; i < tp->nr_args; i++) {
- ret = trace_define_field(event_call, tp->args[i].type->name,
+ ret = trace_define_field(event_call, tp->args[i].type->fmttype,
tp->args[i].name,
sizeof(field) + tp->args[i].offset,
tp->args[i].type->size,
@@ -1261,7 +1477,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
/* Set argument names as fields */
for (i = 0; i < tp->nr_args; i++) {
- ret = trace_define_field(event_call, tp->args[i].type->name,
+ ret = trace_define_field(event_call, tp->args[i].type->fmttype,
tp->args[i].name,
sizeof(field) + tp->args[i].offset,
tp->args[i].type->size,
@@ -1301,8 +1517,13 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
for (i = 0; i < tp->nr_args; i++) {
- pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
- tp->args[i].name);
+ if (strcmp(tp->args[i].type->name, "string") == 0)
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ ", __get_str(%s)",
+ tp->args[i].name);
+ else
+ pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
+ tp->args[i].name);
}
#undef LEN_OR_ZERO
@@ -1339,11 +1560,11 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
struct ftrace_event_call *call = &tp->call;
struct kprobe_trace_entry_head *entry;
struct hlist_head *head;
- u8 *data;
- int size, __size, i;
+ int size, __size, dsize;
int rctx;
- __size = sizeof(*entry) + tp->size;
+ dsize = __get_data_size(tp, regs);
+ __size = sizeof(*entry) + tp->size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
@@ -1355,9 +1576,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
return;
entry->ip = (unsigned long)kp->addr;
- data = (u8 *)&entry[1];
- for (i = 0; i < tp->nr_args; i++)
- call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
+ memset(&entry[1], 0, dsize);
+ store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
head = this_cpu_ptr(call->perf_events);
perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
@@ -1371,11 +1591,11 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
struct ftrace_event_call *call = &tp->call;
struct kretprobe_trace_entry_head *entry;
struct hlist_head *head;
- u8 *data;
- int size, __size, i;
+ int size, __size, dsize;
int rctx;
- __size = sizeof(*entry) + tp->size;
+ dsize = __get_data_size(tp, regs);
+ __size = sizeof(*entry) + tp->size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
@@ -1388,9 +1608,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
entry->func = (unsigned long)tp->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
- data = (u8 *)&entry[1];
- for (i = 0; i < tp->nr_args; i++)
- call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
+ store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
head = this_cpu_ptr(call->perf_events);
perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
@@ -1486,15 +1704,12 @@ static int register_probe_event(struct trace_probe *tp)
int ret;
/* Initialize ftrace_event_call */
+ INIT_LIST_HEAD(&call->class->fields);
if (probe_is_return(tp)) {
- INIT_LIST_HEAD(&call->class->fields);
call->event.funcs = &kretprobe_funcs;
- call->class->raw_init = probe_event_raw_init;
call->class->define_fields = kretprobe_event_define_fields;
} else {
- INIT_LIST_HEAD(&call->class->fields);
call->event.funcs = &kprobe_funcs;
- call->class->raw_init = probe_event_raw_init;
call->class->define_fields = kprobe_event_define_fields;
}
if (set_print_fmt(tp) < 0)
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
deleted file mode 100644
index 8eaf00749b65..000000000000
--- a/kernel/trace/trace_ksym.c
+++ /dev/null
@@ -1,508 +0,0 @@
-/*
- * trace_ksym.c - Kernel Symbol Tracer
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2009
- */
-
-#include <linux/kallsyms.h>
-#include <linux/uaccess.h>
-#include <linux/debugfs.h>
-#include <linux/ftrace.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
-
-#include "trace_output.h"
-#include "trace.h"
-
-#include <linux/hw_breakpoint.h>
-#include <asm/hw_breakpoint.h>
-
-#include <asm/atomic.h>
-
-#define KSYM_TRACER_OP_LEN 3 /* rw- */
-
-struct trace_ksym {
- struct perf_event **ksym_hbp;
- struct perf_event_attr attr;
-#ifdef CONFIG_PROFILE_KSYM_TRACER
- atomic64_t counter;
-#endif
- struct hlist_node ksym_hlist;
-};
-
-static struct trace_array *ksym_trace_array;
-
-static unsigned int ksym_tracing_enabled;
-
-static HLIST_HEAD(ksym_filter_head);
-
-static DEFINE_MUTEX(ksym_tracer_mutex);
-
-#ifdef CONFIG_PROFILE_KSYM_TRACER
-
-#define MAX_UL_INT 0xffffffff
-
-void ksym_collect_stats(unsigned long hbp_hit_addr)
-{
- struct hlist_node *node;
- struct trace_ksym *entry;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
- if (entry->attr.bp_addr == hbp_hit_addr) {
- atomic64_inc(&entry->counter);
- break;
- }
- }
- rcu_read_unlock();
-}
-#endif /* CONFIG_PROFILE_KSYM_TRACER */
-
-void ksym_hbp_handler(struct perf_event *hbp, int nmi,
- struct perf_sample_data *data,
- struct pt_regs *regs)
-{
- struct ring_buffer_event *event;
- struct ksym_trace_entry *entry;
- struct ring_buffer *buffer;
- int pc;
-
- if (!ksym_tracing_enabled)
- return;
-
- buffer = ksym_trace_array->buffer;
-
- pc = preempt_count();
-
- event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
- sizeof(*entry), 0, pc);
- if (!event)
- return;
-
- entry = ring_buffer_event_data(event);
- entry->ip = instruction_pointer(regs);
- entry->type = hw_breakpoint_type(hbp);
- entry->addr = hw_breakpoint_addr(hbp);
- strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
-
-#ifdef CONFIG_PROFILE_KSYM_TRACER
- ksym_collect_stats(hw_breakpoint_addr(hbp));
-#endif /* CONFIG_PROFILE_KSYM_TRACER */
-
- trace_buffer_unlock_commit(buffer, event, 0, pc);
-}
-
-/* Valid access types are represented as
- *
- * rw- : Set Read/Write Access Breakpoint
- * -w- : Set Write Access Breakpoint
- * --- : Clear Breakpoints
- * --x : Set Execution Break points (Not available yet)
- *
- */
-static int ksym_trace_get_access_type(char *str)
-{
- int access = 0;
-
- if (str[0] == 'r')
- access |= HW_BREAKPOINT_R;
-
- if (str[1] == 'w')
- access |= HW_BREAKPOINT_W;
-
- if (str[2] == 'x')
- access |= HW_BREAKPOINT_X;
-
- switch (access) {
- case HW_BREAKPOINT_R:
- case HW_BREAKPOINT_W:
- case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
- return access;
- default:
- return -EINVAL;
- }
-}
-
-/*
- * There can be several possible malformed requests and we attempt to capture
- * all of them. We enumerate some of the rules
- * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
- * i.e. multiple ':' symbols disallowed. Possible uses are of the form
- * <module>:<ksym_name>:<op>.
- * 2. No delimiter symbol ':' in the input string
- * 3. Spurious operator symbols or symbols not in their respective positions
- * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
- * 5. Kernel symbol not a part of /proc/kallsyms
- * 6. Duplicate requests
- */
-static int parse_ksym_trace_str(char *input_string, char **ksymname,
- unsigned long *addr)
-{
- int ret;
-
- *ksymname = strsep(&input_string, ":");
- *addr = kallsyms_lookup_name(*ksymname);
-
- /* Check for malformed request: (2), (1) and (5) */
- if ((!input_string) ||
- (strlen(input_string) != KSYM_TRACER_OP_LEN) ||
- (*addr == 0))
- return -EINVAL;;
-
- ret = ksym_trace_get_access_type(input_string);
-
- return ret;
-}
-
-int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
-{
- struct trace_ksym *entry;
- int ret = -ENOMEM;
-
- entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
- if (!entry)
- return -ENOMEM;
-
- hw_breakpoint_init(&entry->attr);
-
- entry->attr.bp_type = op;
- entry->attr.bp_addr = addr;
- entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
-
- entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
- ksym_hbp_handler);
-
- if (IS_ERR(entry->ksym_hbp)) {
- ret = PTR_ERR(entry->ksym_hbp);
- if (ret == -ENOSPC) {
- printk(KERN_ERR "ksym_tracer: Maximum limit reached."
- " No new requests for tracing can be accepted now.\n");
- } else {
- printk(KERN_INFO "ksym_tracer request failed. Try again"
- " later!!\n");
- }
- goto err;
- }
-
- hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
-
- return 0;
-
-err:
- kfree(entry);
-
- return ret;
-}
-
-static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
- size_t count, loff_t *ppos)
-{
- struct trace_ksym *entry;
- struct hlist_node *node;
- struct trace_seq *s;
- ssize_t cnt = 0;
- int ret;
-
- s = kmalloc(sizeof(*s), GFP_KERNEL);
- if (!s)
- return -ENOMEM;
- trace_seq_init(s);
-
- mutex_lock(&ksym_tracer_mutex);
-
- hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
- ret = trace_seq_printf(s, "%pS:",
- (void *)(unsigned long)entry->attr.bp_addr);
- if (entry->attr.bp_type == HW_BREAKPOINT_R)
- ret = trace_seq_puts(s, "r--\n");
- else if (entry->attr.bp_type == HW_BREAKPOINT_W)
- ret = trace_seq_puts(s, "-w-\n");
- else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
- ret = trace_seq_puts(s, "rw-\n");
- WARN_ON_ONCE(!ret);
- }
-
- cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
-
- mutex_unlock(&ksym_tracer_mutex);
-
- kfree(s);
-
- return cnt;
-}
-
-static void __ksym_trace_reset(void)
-{
- struct trace_ksym *entry;
- struct hlist_node *node, *node1;
-
- mutex_lock(&ksym_tracer_mutex);
- hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
- ksym_hlist) {
- unregister_wide_hw_breakpoint(entry->ksym_hbp);
- hlist_del_rcu(&(entry->ksym_hlist));
- synchronize_rcu();
- kfree(entry);
- }
- mutex_unlock(&ksym_tracer_mutex);
-}
-
-static ssize_t ksym_trace_filter_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *ppos)
-{
- struct trace_ksym *entry;
- struct hlist_node *node;
- char *buf, *input_string, *ksymname = NULL;
- unsigned long ksym_addr = 0;
- int ret, op, changed = 0;
-
- buf = kzalloc(count + 1, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- ret = -EFAULT;
- if (copy_from_user(buf, buffer, count))
- goto out;
-
- buf[count] = '\0';
- input_string = strstrip(buf);
-
- /*
- * Clear all breakpoints if:
- * 1: echo > ksym_trace_filter
- * 2: echo 0 > ksym_trace_filter
- * 3: echo "*:---" > ksym_trace_filter
- */
- if (!input_string[0] || !strcmp(input_string, "0") ||
- !strcmp(input_string, "*:---")) {
- __ksym_trace_reset();
- ret = 0;
- goto out;
- }
-
- ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
- if (ret < 0)
- goto out;
-
- mutex_lock(&ksym_tracer_mutex);
-
- ret = -EINVAL;
- hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
- if (entry->attr.bp_addr == ksym_addr) {
- /* Check for malformed request: (6) */
- if (entry->attr.bp_type != op)
- changed = 1;
- else
- goto out_unlock;
- break;
- }
- }
- if (changed) {
- unregister_wide_hw_breakpoint(entry->ksym_hbp);
- entry->attr.bp_type = op;
- ret = 0;
- if (op > 0) {
- entry->ksym_hbp =
- register_wide_hw_breakpoint(&entry->attr,
- ksym_hbp_handler);
- if (IS_ERR(entry->ksym_hbp))
- ret = PTR_ERR(entry->ksym_hbp);
- else
- goto out_unlock;
- }
- /* Error or "symbol:---" case: drop it */
- hlist_del_rcu(&(entry->ksym_hlist));
- synchronize_rcu();
- kfree(entry);
- goto out_unlock;
- } else {
- /* Check for malformed request: (4) */
- if (op)
- ret = process_new_ksym_entry(ksymname, op, ksym_addr);
- }
-out_unlock:
- mutex_unlock(&ksym_tracer_mutex);
-out:
- kfree(buf);
- return !ret ? count : ret;
-}
-
-static const struct file_operations ksym_tracing_fops = {
- .open = tracing_open_generic,
- .read = ksym_trace_filter_read,
- .write = ksym_trace_filter_write,
-};
-
-static void ksym_trace_reset(struct trace_array *tr)
-{
- ksym_tracing_enabled = 0;
- __ksym_trace_reset();
-}
-
-static int ksym_trace_init(struct trace_array *tr)
-{
- int cpu, ret = 0;
-
- for_each_online_cpu(cpu)
- tracing_reset(tr, cpu);
- ksym_tracing_enabled = 1;
- ksym_trace_array = tr;
-
- return ret;
-}
-
-static void ksym_trace_print_header(struct seq_file *m)
-{
- seq_puts(m,
- "# TASK-PID CPU# Symbol "
- "Type Function\n");
- seq_puts(m,
- "# | | | "
- " | |\n");
-}
-
-static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
-{
- struct trace_entry *entry = iter->ent;
- struct trace_seq *s = &iter->seq;
- struct ksym_trace_entry *field;
- char str[KSYM_SYMBOL_LEN];
- int ret;
-
- if (entry->type != TRACE_KSYM)
- return TRACE_TYPE_UNHANDLED;
-
- trace_assign_type(field, entry);
-
- ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
- entry->pid, iter->cpu, (char *)field->addr);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- switch (field->type) {
- case HW_BREAKPOINT_R:
- ret = trace_seq_printf(s, " R ");
- break;
- case HW_BREAKPOINT_W:
- ret = trace_seq_printf(s, " W ");
- break;
- case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
- ret = trace_seq_printf(s, " RW ");
- break;
- default:
- return TRACE_TYPE_PARTIAL_LINE;
- }
-
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- sprint_symbol(str, field->ip);
- ret = trace_seq_printf(s, "%s\n", str);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- return TRACE_TYPE_HANDLED;
-}
-
-struct tracer ksym_tracer __read_mostly =
-{
- .name = "ksym_tracer",
- .init = ksym_trace_init,
- .reset = ksym_trace_reset,
-#ifdef CONFIG_FTRACE_SELFTEST
- .selftest = trace_selftest_startup_ksym,
-#endif
- .print_header = ksym_trace_print_header,
- .print_line = ksym_trace_output
-};
-
-#ifdef CONFIG_PROFILE_KSYM_TRACER
-static int ksym_profile_show(struct seq_file *m, void *v)
-{
- struct hlist_node *node;
- struct trace_ksym *entry;
- int access_type = 0;
- char fn_name[KSYM_NAME_LEN];
-
- seq_puts(m, " Access Type ");
- seq_puts(m, " Symbol Counter\n");
- seq_puts(m, " ----------- ");
- seq_puts(m, " ------ -------\n");
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
-
- access_type = entry->attr.bp_type;
-
- switch (access_type) {
- case HW_BREAKPOINT_R:
- seq_puts(m, " R ");
- break;
- case HW_BREAKPOINT_W:
- seq_puts(m, " W ");
- break;
- case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
- seq_puts(m, " RW ");
- break;
- default:
- seq_puts(m, " NA ");
- }
-
- if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
- seq_printf(m, " %-36s", fn_name);
- else
- seq_printf(m, " %-36s", "<NA>");
- seq_printf(m, " %15llu\n",
- (unsigned long long)atomic64_read(&entry->counter));
- }
- rcu_read_unlock();
-
- return 0;
-}
-
-static int ksym_profile_open(struct inode *node, struct file *file)
-{
- return single_open(file, ksym_profile_show, NULL);
-}
-
-static const struct file_operations ksym_profile_fops = {
- .open = ksym_profile_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-#endif /* CONFIG_PROFILE_KSYM_TRACER */
-
-__init static int init_ksym_trace(void)
-{
- struct dentry *d_tracer;
-
- d_tracer = tracing_init_dentry();
-
- trace_create_file("ksym_trace_filter", 0644, d_tracer,
- NULL, &ksym_tracing_fops);
-
-#ifdef CONFIG_PROFILE_KSYM_TRACER
- trace_create_file("ksym_profile", 0444, d_tracer,
- NULL, &ksym_profile_fops);
-#endif
-
- return register_tracer(&ksym_tracer);
-}
-device_initcall(init_ksym_trace);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 57c1b4596470..02272baa2206 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -16,9 +16,6 @@
DECLARE_RWSEM(trace_event_mutex);
-DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq);
-EXPORT_PER_CPU_SYMBOL(ftrace_event_seq);
-
static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
static int next_event_type = __TRACE_LAST_TYPE + 1;
@@ -1069,65 +1066,6 @@ static struct trace_event trace_wake_event = {
.funcs = &trace_wake_funcs,
};
-/* TRACE_SPECIAL */
-static enum print_line_t trace_special_print(struct trace_iterator *iter,
- int flags, struct trace_event *event)
-{
- struct special_entry *field;
-
- trace_assign_type(field, iter->ent);
-
- if (!trace_seq_printf(&iter->seq, "# %ld %ld %ld\n",
- field->arg1,
- field->arg2,
- field->arg3))
- return TRACE_TYPE_PARTIAL_LINE;
-
- return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t trace_special_hex(struct trace_iterator *iter,
- int flags, struct trace_event *event)
-{
- struct special_entry *field;
- struct trace_seq *s = &iter->seq;
-
- trace_assign_type(field, iter->ent);
-
- SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
- SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
- SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
-
- return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t trace_special_bin(struct trace_iterator *iter,
- int flags, struct trace_event *event)
-{
- struct special_entry *field;
- struct trace_seq *s = &iter->seq;
-
- trace_assign_type(field, iter->ent);
-
- SEQ_PUT_FIELD_RET(s, field->arg1);
- SEQ_PUT_FIELD_RET(s, field->arg2);
- SEQ_PUT_FIELD_RET(s, field->arg3);
-
- return TRACE_TYPE_HANDLED;
-}
-
-static struct trace_event_functions trace_special_funcs = {
- .trace = trace_special_print,
- .raw = trace_special_print,
- .hex = trace_special_hex,
- .binary = trace_special_bin,
-};
-
-static struct trace_event trace_special_event = {
- .type = TRACE_SPECIAL,
- .funcs = &trace_special_funcs,
-};
-
/* TRACE_STACK */
static enum print_line_t trace_stack_print(struct trace_iterator *iter,
@@ -1161,9 +1099,6 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
static struct trace_event_functions trace_stack_funcs = {
.trace = trace_stack_print,
- .raw = trace_special_print,
- .hex = trace_special_hex,
- .binary = trace_special_bin,
};
static struct trace_event trace_stack_event = {
@@ -1194,9 +1129,6 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
static struct trace_event_functions trace_user_stack_funcs = {
.trace = trace_user_stack_print,
- .raw = trace_special_print,
- .hex = trace_special_hex,
- .binary = trace_special_bin,
};
static struct trace_event trace_user_stack_event = {
@@ -1314,7 +1246,6 @@ static struct trace_event *events[] __initdata = {
&trace_fn_event,
&trace_ctx_event,
&trace_wake_event,
- &trace_special_event,
&trace_stack_event,
&trace_user_stack_event,
&trace_bprint_event,
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 0e73bc2ef8c5..7319559ed59f 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -31,50 +31,99 @@ static int wakeup_rt;
static arch_spinlock_t wakeup_lock =
(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
+static void wakeup_reset(struct trace_array *tr);
static void __wakeup_reset(struct trace_array *tr);
+static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
+static void wakeup_graph_return(struct ftrace_graph_ret *trace);
static int save_lat_flag;
+#define TRACE_DISPLAY_GRAPH 1
+
+static struct tracer_opt trace_opts[] = {
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ /* display latency trace as call graph */
+ { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
+#endif
+ { } /* Empty entry */
+};
+
+static struct tracer_flags tracer_flags = {
+ .val = 0,
+ .opts = trace_opts,
+};
+
+#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)
+
#ifdef CONFIG_FUNCTION_TRACER
+
/*
- * irqsoff uses its own tracer function to keep the overhead down:
+ * Prologue for the wakeup function tracers.
+ *
+ * Returns 1 if it is OK to continue, and preemption
+ * is disabled and data->disabled is incremented.
+ * 0 if the trace is to be ignored, and preemption
+ * is not disabled and data->disabled is
+ * kept the same.
+ *
+ * Note, this function is also used outside this ifdef but
+ * inside the #ifdef of the function graph tracer below.
+ * This is OK, since the function graph tracer is
+ * dependent on the function tracer.
*/
-static void
-wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
+static int
+func_prolog_preempt_disable(struct trace_array *tr,
+ struct trace_array_cpu **data,
+ int *pc)
{
- struct trace_array *tr = wakeup_trace;
- struct trace_array_cpu *data;
- unsigned long flags;
long disabled;
- int resched;
int cpu;
- int pc;
if (likely(!wakeup_task))
- return;
+ return 0;
- pc = preempt_count();
- resched = ftrace_preempt_disable();
+ *pc = preempt_count();
+ preempt_disable_notrace();
cpu = raw_smp_processor_id();
if (cpu != wakeup_current_cpu)
goto out_enable;
- data = tr->data[cpu];
- disabled = atomic_inc_return(&data->disabled);
+ *data = tr->data[cpu];
+ disabled = atomic_inc_return(&(*data)->disabled);
if (unlikely(disabled != 1))
goto out;
- local_irq_save(flags);
+ return 1;
- trace_function(tr, ip, parent_ip, flags, pc);
+out:
+ atomic_dec(&(*data)->disabled);
+
+out_enable:
+ preempt_enable_notrace();
+ return 0;
+}
+/*
+ * wakeup uses its own tracer function to keep the overhead down:
+ */
+static void
+wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
+{
+ struct trace_array *tr = wakeup_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ int pc;
+
+ if (!func_prolog_preempt_disable(tr, &data, &pc))
+ return;
+
+ local_irq_save(flags);
+ trace_function(tr, ip, parent_ip, flags, pc);
local_irq_restore(flags);
- out:
atomic_dec(&data->disabled);
- out_enable:
- ftrace_preempt_enable(resched);
+ preempt_enable_notrace();
}
static struct ftrace_ops trace_ops __read_mostly =
@@ -83,6 +132,156 @@ static struct ftrace_ops trace_ops __read_mostly =
};
#endif /* CONFIG_FUNCTION_TRACER */
+static int start_func_tracer(int graph)
+{
+ int ret;
+
+ if (!graph)
+ ret = register_ftrace_function(&trace_ops);
+ else
+ ret = register_ftrace_graph(&wakeup_graph_return,
+ &wakeup_graph_entry);
+
+ if (!ret && tracing_is_enabled())
+ tracer_enabled = 1;
+ else
+ tracer_enabled = 0;
+
+ return ret;
+}
+
+static void stop_func_tracer(int graph)
+{
+ tracer_enabled = 0;
+
+ if (!graph)
+ unregister_ftrace_function(&trace_ops);
+ else
+ unregister_ftrace_graph();
+}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
+{
+
+ if (!(bit & TRACE_DISPLAY_GRAPH))
+ return -EINVAL;
+
+ if (!(is_graph() ^ set))
+ return 0;
+
+ stop_func_tracer(!set);
+
+ wakeup_reset(wakeup_trace);
+ tracing_max_latency = 0;
+
+ return start_func_tracer(set);
+}
+
+static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
+{
+ struct trace_array *tr = wakeup_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ int pc, ret = 0;
+
+ if (!func_prolog_preempt_disable(tr, &data, &pc))
+ return 0;
+
+ local_save_flags(flags);
+ ret = __trace_graph_entry(tr, trace, flags, pc);
+ atomic_dec(&data->disabled);
+ preempt_enable_notrace();
+
+ return ret;
+}
+
+static void wakeup_graph_return(struct ftrace_graph_ret *trace)
+{
+ struct trace_array *tr = wakeup_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ int pc;
+
+ if (!func_prolog_preempt_disable(tr, &data, &pc))
+ return;
+
+ local_save_flags(flags);
+ __trace_graph_return(tr, trace, flags, pc);
+ atomic_dec(&data->disabled);
+
+ preempt_enable_notrace();
+ return;
+}
+
+static void wakeup_trace_open(struct trace_iterator *iter)
+{
+ if (is_graph())
+ graph_trace_open(iter);
+}
+
+static void wakeup_trace_close(struct trace_iterator *iter)
+{
+ if (iter->private)
+ graph_trace_close(iter);
+}
+
+#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC)
+
+static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
+{
+ /*
+ * In graph mode call the graph tracer output function,
+ * otherwise go with the TRACE_FN event handler
+ */
+ if (is_graph())
+ return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
+
+ return TRACE_TYPE_UNHANDLED;
+}
+
+static void wakeup_print_header(struct seq_file *s)
+{
+ if (is_graph())
+ print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
+ else
+ trace_default_header(s);
+}
+
+static void
+__trace_function(struct trace_array *tr,
+ unsigned long ip, unsigned long parent_ip,
+ unsigned long flags, int pc)
+{
+ if (is_graph())
+ trace_graph_function(tr, ip, parent_ip, flags, pc);
+ else
+ trace_function(tr, ip, parent_ip, flags, pc);
+}
+#else
+#define __trace_function trace_function
+
+static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
+{
+ return -EINVAL;
+}
+
+static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
+{
+ return -1;
+}
+
+static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
+{
+ return TRACE_TYPE_UNHANDLED;
+}
+
+static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
+static void wakeup_print_header(struct seq_file *s) { }
+static void wakeup_trace_open(struct trace_iterator *iter) { }
+static void wakeup_trace_close(struct trace_iterator *iter) { }
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
/*
* Should this new latency be reported/recorded?
*/
@@ -153,7 +352,7 @@ probe_wakeup_sched_switch(void *ignore,
/* The task we are waiting for is waking up */
data = wakeup_trace->data[wakeup_cpu];
- trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
+ __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
T0 = data->preempt_timestamp;
@@ -253,7 +452,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success)
* is not called by an assembly function (where as schedule is)
* it should be safe to use it here.
*/
- trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
+ __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
out_locked:
arch_spin_unlock(&wakeup_lock);
@@ -304,12 +503,8 @@ static void start_wakeup_tracer(struct trace_array *tr)
*/
smp_wmb();
- register_ftrace_function(&trace_ops);
-
- if (tracing_is_enabled())
- tracer_enabled = 1;
- else
- tracer_enabled = 0;
+ if (start_func_tracer(is_graph()))
+ printk(KERN_ERR "failed to start wakeup tracer\n");
return;
fail_deprobe_wake_new:
@@ -321,7 +516,7 @@ fail_deprobe:
static void stop_wakeup_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
- unregister_ftrace_function(&trace_ops);
+ stop_func_tracer(is_graph());
unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
unregister_trace_sched_wakeup(probe_wakeup, NULL);
@@ -380,9 +575,16 @@ static struct tracer wakeup_tracer __read_mostly =
.start = wakeup_tracer_start,
.stop = wakeup_tracer_stop,
.print_max = 1,
+ .print_header = wakeup_print_header,
+ .print_line = wakeup_print_line,
+ .flags = &tracer_flags,
+ .set_flag = wakeup_set_flag,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_wakeup,
#endif
+ .open = wakeup_trace_open,
+ .close = wakeup_trace_close,
+ .use_max_tr = 1,
};
static struct tracer wakeup_rt_tracer __read_mostly =
@@ -394,9 +596,16 @@ static struct tracer wakeup_rt_tracer __read_mostly =
.stop = wakeup_tracer_stop,
.wait_pipe = poll_wait_pipe,
.print_max = 1,
+ .print_header = wakeup_print_header,
+ .print_line = wakeup_print_line,
+ .flags = &tracer_flags,
+ .set_flag = wakeup_set_flag,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_wakeup,
#endif
+ .open = wakeup_trace_open,
+ .close = wakeup_trace_close,
+ .use_max_tr = 1,
};
__init static int init_wakeup_tracer(void)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 250e7f9bd2f0..155a415b3209 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -13,11 +13,9 @@ static inline int trace_valid_entry(struct trace_entry *entry)
case TRACE_WAKE:
case TRACE_STACK:
case TRACE_PRINT:
- case TRACE_SPECIAL:
case TRACE_BRANCH:
case TRACE_GRAPH_ENT:
case TRACE_GRAPH_RET:
- case TRACE_KSYM:
return 1;
}
return 0;
@@ -691,38 +689,6 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
}
#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
-#ifdef CONFIG_SYSPROF_TRACER
-int
-trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
-{
- unsigned long count;
- int ret;
-
- /* start the tracing */
- ret = tracer_init(trace, tr);
- if (ret) {
- warn_failed_init_tracer(trace, ret);
- return ret;
- }
-
- /* Sleep for a 1/10 of a second */
- msleep(100);
- /* stop the tracing. */
- tracing_stop();
- /* check the trace buffer */
- ret = trace_test_buffer(tr, &count);
- trace->reset(tr);
- tracing_start();
-
- if (!ret && !count) {
- printk(KERN_CONT ".. no entries found ..");
- ret = -1;
- }
-
- return ret;
-}
-#endif /* CONFIG_SYSPROF_TRACER */
-
#ifdef CONFIG_BRANCH_TRACER
int
trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
@@ -755,56 +721,3 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
}
#endif /* CONFIG_BRANCH_TRACER */
-#ifdef CONFIG_KSYM_TRACER
-static int ksym_selftest_dummy;
-
-int
-trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
-{
- unsigned long count;
- int ret;
-
- /* start the tracing */
- ret = tracer_init(trace, tr);
- if (ret) {
- warn_failed_init_tracer(trace, ret);
- return ret;
- }
-
- ksym_selftest_dummy = 0;
- /* Register the read-write tracing request */
-
- ret = process_new_ksym_entry("ksym_selftest_dummy",
- HW_BREAKPOINT_R | HW_BREAKPOINT_W,
- (unsigned long)(&ksym_selftest_dummy));
-
- if (ret < 0) {
- printk(KERN_CONT "ksym_trace read-write startup test failed\n");
- goto ret_path;
- }
- /* Perform a read and a write operation over the dummy variable to
- * trigger the tracer
- */
- if (ksym_selftest_dummy == 0)
- ksym_selftest_dummy++;
-
- /* stop the tracing. */
- tracing_stop();
- /* check the trace buffer */
- ret = trace_test_buffer(tr, &count);
- trace->reset(tr);
- tracing_start();
-
- /* read & write operations - one each is performed on the dummy variable
- * triggering two entries in the trace buffer
- */
- if (!ret && count != 2) {
- printk(KERN_CONT "Ksym tracer startup test failed");
- ret = -1;
- }
-
-ret_path:
- return ret;
-}
-#endif /* CONFIG_KSYM_TRACER */
-
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index f4bc9b27de5f..4c5dead0c239 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -110,12 +110,12 @@ static inline void check_stack(void)
static void
stack_trace_call(unsigned long ip, unsigned long parent_ip)
{
- int cpu, resched;
+ int cpu;
if (unlikely(!ftrace_enabled || stack_trace_disabled))
return;
- resched = ftrace_preempt_disable();
+ preempt_disable_notrace();
cpu = raw_smp_processor_id();
/* no atomic needed, we only modify this variable by this cpu */
@@ -127,7 +127,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
out:
per_cpu(trace_active, cpu)--;
/* prevent recursion in schedule */
- ftrace_preempt_enable(resched);
+ preempt_enable_notrace();
}
static struct ftrace_ops trace_ops __read_mostly =
@@ -195,6 +195,7 @@ static const struct file_operations stack_max_size_fops = {
.open = tracing_open_generic,
.read = stack_max_size_read,
.write = stack_max_size_write,
+ .llseek = default_llseek,
};
static void *
@@ -249,7 +250,7 @@ static int trace_lookup_stack(struct seq_file *m, long i)
{
unsigned long addr = stack_dump_trace[i];
- return seq_printf(m, "%pF\n", (void *)addr);
+ return seq_printf(m, "%pS\n", (void *)addr);
}
static void print_disabled(struct seq_file *m)
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 34e35804304b..bac752f0cfb5 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -23,6 +23,9 @@ static int syscall_exit_register(struct ftrace_event_call *event,
static int syscall_enter_define_fields(struct ftrace_event_call *call);
static int syscall_exit_define_fields(struct ftrace_event_call *call);
+/* All syscall exit events have the same fields */
+static LIST_HEAD(syscall_exit_fields);
+
static struct list_head *
syscall_get_enter_fields(struct ftrace_event_call *call)
{
@@ -34,9 +37,7 @@ syscall_get_enter_fields(struct ftrace_event_call *call)
static struct list_head *
syscall_get_exit_fields(struct ftrace_event_call *call)
{
- struct syscall_metadata *entry = call->data;
-
- return &entry->exit_fields;
+ return &syscall_exit_fields;
}
struct trace_event_functions enter_syscall_print_funcs = {
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
deleted file mode 100644
index a7974a552ca9..000000000000
--- a/kernel/trace/trace_sysprof.c
+++ /dev/null
@@ -1,329 +0,0 @@
-/*
- * trace stack traces
- *
- * Copyright (C) 2004-2008, Soeren Sandmann
- * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
- * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
- */
-#include <linux/kallsyms.h>
-#include <linux/debugfs.h>
-#include <linux/hrtimer.h>
-#include <linux/uaccess.h>
-#include <linux/ftrace.h>
-#include <linux/module.h>
-#include <linux/irq.h>
-#include <linux/fs.h>
-
-#include <asm/stacktrace.h>
-
-#include "trace.h"
-
-static struct trace_array *sysprof_trace;
-static int __read_mostly tracer_enabled;
-
-/*
- * 1 msec sample interval by default:
- */
-static unsigned long sample_period = 1000000;
-static const unsigned int sample_max_depth = 512;
-
-static DEFINE_MUTEX(sample_timer_lock);
-/*
- * Per CPU hrtimers that do the profiling:
- */
-static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer);
-
-struct stack_frame {
- const void __user *next_fp;
- unsigned long return_address;
-};
-
-static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
-{
- int ret;
-
- if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
- return 0;
-
- ret = 1;
- pagefault_disable();
- if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
- ret = 0;
- pagefault_enable();
-
- return ret;
-}
-
-struct backtrace_info {
- struct trace_array_cpu *data;
- struct trace_array *tr;
- int pos;
-};
-
-static void
-backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
- /* Ignore warnings */
-}
-
-static void backtrace_warning(void *data, char *msg)
-{
- /* Ignore warnings */
-}
-
-static int backtrace_stack(void *data, char *name)
-{
- /* Don't bother with IRQ stacks for now */
- return -1;
-}
-
-static void backtrace_address(void *data, unsigned long addr, int reliable)
-{
- struct backtrace_info *info = data;
-
- if (info->pos < sample_max_depth && reliable) {
- __trace_special(info->tr, info->data, 1, addr, 0);
-
- info->pos++;
- }
-}
-
-static const struct stacktrace_ops backtrace_ops = {
- .warning = backtrace_warning,
- .warning_symbol = backtrace_warning_symbol,
- .stack = backtrace_stack,
- .address = backtrace_address,
- .walk_stack = print_context_stack,
-};
-
-static int
-trace_kernel(struct pt_regs *regs, struct trace_array *tr,
- struct trace_array_cpu *data)
-{
- struct backtrace_info info;
- unsigned long bp;
- char *stack;
-
- info.tr = tr;
- info.data = data;
- info.pos = 1;
-
- __trace_special(info.tr, info.data, 1, regs->ip, 0);
-
- stack = ((char *)regs + sizeof(struct pt_regs));
-#ifdef CONFIG_FRAME_POINTER
- bp = regs->bp;
-#else
- bp = 0;
-#endif
-
- dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info);
-
- return info.pos;
-}
-
-static void timer_notify(struct pt_regs *regs, int cpu)
-{
- struct trace_array_cpu *data;
- struct stack_frame frame;
- struct trace_array *tr;
- const void __user *fp;
- int is_user;
- int i;
-
- if (!regs)
- return;
-
- tr = sysprof_trace;
- data = tr->data[cpu];
- is_user = user_mode(regs);
-
- if (!current || current->pid == 0)
- return;
-
- if (is_user && current->state != TASK_RUNNING)
- return;
-
- __trace_special(tr, data, 0, 0, current->pid);
-
- if (!is_user)
- i = trace_kernel(regs, tr, data);
- else
- i = 0;
-
- /*
- * Trace user stack if we are not a kernel thread
- */
- if (current->mm && i < sample_max_depth) {
- regs = (struct pt_regs *)current->thread.sp0 - 1;
-
- fp = (void __user *)regs->bp;
-
- __trace_special(tr, data, 2, regs->ip, 0);
-
- while (i < sample_max_depth) {
- frame.next_fp = NULL;
- frame.return_address = 0;
- if (!copy_stack_frame(fp, &frame))
- break;
- if ((unsigned long)fp < regs->sp)
- break;
-
- __trace_special(tr, data, 2, frame.return_address,
- (unsigned long)fp);
- fp = frame.next_fp;
-
- i++;
- }
-
- }
-
- /*
- * Special trace entry if we overflow the max depth:
- */
- if (i == sample_max_depth)
- __trace_special(tr, data, -1, -1, -1);
-
- __trace_special(tr, data, 3, current->pid, i);
-}
-
-static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer)
-{
- /* trace here */
- timer_notify(get_irq_regs(), smp_processor_id());
-
- hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
-
- return HRTIMER_RESTART;
-}
-
-static void start_stack_timer(void *unused)
-{
- struct hrtimer *hrtimer = &__get_cpu_var(stack_trace_hrtimer);
-
- hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- hrtimer->function = stack_trace_timer_fn;
-
- hrtimer_start(hrtimer, ns_to_ktime(sample_period),
- HRTIMER_MODE_REL_PINNED);
-}
-
-static void start_stack_timers(void)
-{
- on_each_cpu(start_stack_timer, NULL, 1);
-}
-
-static void stop_stack_timer(int cpu)
-{
- struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
-
- hrtimer_cancel(hrtimer);
-}
-
-static void stop_stack_timers(void)
-{
- int cpu;
-
- for_each_online_cpu(cpu)
- stop_stack_timer(cpu);
-}
-
-static void stop_stack_trace(struct trace_array *tr)
-{
- mutex_lock(&sample_timer_lock);
- stop_stack_timers();
- tracer_enabled = 0;
- mutex_unlock(&sample_timer_lock);
-}
-
-static int stack_trace_init(struct trace_array *tr)
-{
- sysprof_trace = tr;
-
- tracing_start_cmdline_record();
-
- mutex_lock(&sample_timer_lock);
- start_stack_timers();
- tracer_enabled = 1;
- mutex_unlock(&sample_timer_lock);
- return 0;
-}
-
-static void stack_trace_reset(struct trace_array *tr)
-{
- tracing_stop_cmdline_record();
- stop_stack_trace(tr);
-}
-
-static struct tracer stack_trace __read_mostly =
-{
- .name = "sysprof",
- .init = stack_trace_init,
- .reset = stack_trace_reset,
-#ifdef CONFIG_FTRACE_SELFTEST
- .selftest = trace_selftest_startup_sysprof,
-#endif
-};
-
-__init static int init_stack_trace(void)
-{
- return register_tracer(&stack_trace);
-}
-device_initcall(init_stack_trace);
-
-#define MAX_LONG_DIGITS 22
-
-static ssize_t
-sysprof_sample_read(struct file *filp, char __user *ubuf,
- size_t cnt, loff_t *ppos)
-{
- char buf[MAX_LONG_DIGITS];
- int r;
-
- r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period));
-
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
-}
-
-static ssize_t
-sysprof_sample_write(struct file *filp, const char __user *ubuf,
- size_t cnt, loff_t *ppos)
-{
- char buf[MAX_LONG_DIGITS];
- unsigned long val;
-
- if (cnt > MAX_LONG_DIGITS-1)
- cnt = MAX_LONG_DIGITS-1;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = 0;
-
- val = simple_strtoul(buf, NULL, 10);
- /*
- * Enforce a minimum sample period of 100 usecs:
- */
- if (val < 100)
- val = 100;
-
- mutex_lock(&sample_timer_lock);
- stop_stack_timers();
- sample_period = val * 1000;
- start_stack_timers();
- mutex_unlock(&sample_timer_lock);
-
- return cnt;
-}
-
-static const struct file_operations sysprof_sample_fops = {
- .read = sysprof_sample_read,
- .write = sysprof_sample_write,
-};
-
-void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
-{
-
- trace_create_file("sysprof_sample_period", 0644,
- d_tracer, NULL, &sysprof_sample_fops);
-}
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index a7cc3793baf6..209b379a4721 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -263,6 +263,11 @@ int __init trace_workqueue_early_init(void)
{
int ret, cpu;
+ for_each_possible_cpu(cpu) {
+ spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
+ INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
+ }
+
ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL);
if (ret)
goto out;
@@ -279,11 +284,6 @@ int __init trace_workqueue_early_init(void)
if (ret)
goto no_creation;
- for_each_possible_cpu(cpu) {
- spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
- INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
- }
-
return 0;
no_creation: