summaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2021-02-22 21:21:03 -0800
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2021-02-22 21:21:03 -0800
commit415e915fdfc775ad0c6675fde1008f6f43dd6251 (patch)
tree429851187c0e85daa78f5d2bb6853959a1f5545b /kernel/trace
parente64123949e6c9581c97fc14594f1cf34bf1d87a8 (diff)
parentf40ddce88593482919761f74910f42f4b84c004b (diff)
Merge tag 'v5.11' into next
Merge with mainline to get latest APIs and device tree bindings.
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig74
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/blktrace.c196
-rw-r--r--kernel/trace/bpf_trace.c236
-rw-r--r--kernel/trace/fgraph.c13
-rw-r--r--kernel/trace/ftrace.c103
-rw-r--r--kernel/trace/ring_buffer.c319
-rw-r--r--kernel/trace/synth_event_gen_test.c20
-rw-r--r--kernel/trace/trace.c459
-rw-r--r--kernel/trace/trace.h193
-rw-r--r--kernel/trace/trace_benchmark.c6
-rw-r--r--kernel/trace/trace_boot.c25
-rw-r--r--kernel/trace/trace_dynevent.c12
-rw-r--r--kernel/trace/trace_dynevent.h6
-rw-r--r--kernel/trace/trace_entries.h6
-rw-r--r--kernel/trace/trace_event_perf.c15
-rw-r--r--kernel/trace/trace_events.c164
-rw-r--r--kernel/trace/trace_events_filter.c23
-rw-r--r--kernel/trace/trace_events_hist.c47
-rw-r--r--kernel/trace/trace_events_synth.c440
-rw-r--r--kernel/trace/trace_export.c4
-rw-r--r--kernel/trace/trace_functions.c45
-rw-r--r--kernel/trace/trace_functions_graph.c10
-rw-r--r--kernel/trace/trace_hwlat.c14
-rw-r--r--kernel/trace/trace_irqsoff.c6
-rw-r--r--kernel/trace/trace_kprobe.c65
-rw-r--r--kernel/trace/trace_output.c6
-rw-r--r--kernel/trace/trace_output.h1
-rw-r--r--kernel/trace/trace_printk.c12
-rw-r--r--kernel/trace/trace_probe.h14
-rw-r--r--kernel/trace/trace_recursion_record.c236
-rw-r--r--kernel/trace/trace_sched_wakeup.c2
-rw-r--r--kernel/trace/trace_selftest.c38
-rw-r--r--kernel/trace/trace_stack.c15
-rw-r--r--kernel/trace/trace_stat.c8
-rw-r--r--kernel/trace/trace_synth.h6
-rw-r--r--kernel/trace/trace_uprobe.c24
-rw-r--r--kernel/trace/tracing_map.c8
-rw-r--r--kernel/trace/tracing_map.h2
39 files changed, 1942 insertions, 932 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index a4020c0b4508..c1a62ae7e812 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -31,6 +31,15 @@ config HAVE_DYNAMIC_FTRACE_WITH_REGS
config HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
bool
+config HAVE_DYNAMIC_FTRACE_WITH_ARGS
+ bool
+ help
+ If this is set, then arguments and stack can be found from
+ the pt_regs passed into the function callback regs parameter
+ by default, even without setting the REGS flag in the ftrace_ops.
+ This allows for use of regs_get_kernel_argument() and
+ kernel_stack_pointer().
+
config HAVE_FTRACE_MCOUNT_RECORD
bool
help
@@ -202,7 +211,7 @@ config DYNAMIC_FTRACE_WITH_REGS
config DYNAMIC_FTRACE_WITH_DIRECT_CALLS
def_bool y
- depends on DYNAMIC_FTRACE
+ depends on DYNAMIC_FTRACE_WITH_REGS
depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
config FUNCTION_PROFILER
@@ -253,7 +262,6 @@ config IRQSOFF_TRACER
bool "Interrupts-off Latency Tracer"
default n
depends on TRACE_IRQFLAGS_SUPPORT
- depends on !ARCH_USES_GETTIMEOFFSET
select TRACE_IRQFLAGS
select GENERIC_TRACER
select TRACER_MAX_TRACE
@@ -277,7 +285,6 @@ config IRQSOFF_TRACER
config PREEMPT_TRACER
bool "Preemption-off Latency Tracer"
default n
- depends on !ARCH_USES_GETTIMEOFFSET
depends on PREEMPTION
select GENERIC_TRACER
select TRACER_MAX_TRACE
@@ -531,7 +538,7 @@ config KPROBE_EVENTS
config KPROBE_EVENTS_ON_NOTRACE
bool "Do NOT protect notrace function from kprobe events"
depends on KPROBE_EVENTS
- depends on KPROBES_ON_FTRACE
+ depends on DYNAMIC_FTRACE
default n
help
This is only for the developers who want to debug ftrace itself
@@ -727,6 +734,45 @@ config TRACE_EVAL_MAP_FILE
If unsure, say N.
+config FTRACE_RECORD_RECURSION
+ bool "Record functions that recurse in function tracing"
+ depends on FUNCTION_TRACER
+ help
+ All callbacks that attach to the function tracing have some sort
+ of protection against recursion. Even though the protection exists,
+ it adds overhead. This option will create a file in the tracefs
+ file system called "recursed_functions" that will list the functions
+ that triggered a recursion.
+
+ This will add more overhead to cases that have recursion.
+
+ If unsure, say N
+
+config FTRACE_RECORD_RECURSION_SIZE
+ int "Max number of recursed functions to record"
+ default 128
+ depends on FTRACE_RECORD_RECURSION
+ help
+ This defines the limit of number of functions that can be
+ listed in the "recursed_functions" file, that lists all
+ the functions that caused a recursion to happen.
+ This file can be reset, but the limit can not change in
+ size at runtime.
+
+config RING_BUFFER_RECORD_RECURSION
+ bool "Record functions that recurse in the ring buffer"
+ depends on FTRACE_RECORD_RECURSION
+ # default y, because it is coupled with FTRACE_RECORD_RECURSION
+ default y
+ help
+ The ring buffer has its own internal recursion. Although when
+ recursion happens it wont cause harm because of the protection,
+ but it does cause an unwanted overhead. Enabling this option will
+ place where recursion was detected into the ftrace "recursed_functions"
+ file.
+
+ This will add more overhead to cases that have recursion.
+
config GCOV_PROFILE_FTRACE
bool "Enable GCOV profiling on ftrace subsystem"
depends on GCOV_KERNEL
@@ -797,6 +843,26 @@ config RING_BUFFER_STARTUP_TEST
If unsure, say N
+config RING_BUFFER_VALIDATE_TIME_DELTAS
+ bool "Verify ring buffer time stamp deltas"
+ depends on RING_BUFFER
+ help
+ This will audit the time stamps on the ring buffer sub
+ buffer to make sure that all the time deltas for the
+ events on a sub buffer matches the current time stamp.
+ This audit is performed for every event that is not
+ interrupted, or interrupting another event. A check
+ is also made when traversing sub buffers to make sure
+ that all the deltas on the previous sub buffer do not
+ add up to be greater than the current time stamp.
+
+ NOTE: This adds significant overhead to recording of events,
+ and should only be used to test the logic of the ring buffer.
+ Do not use it on production systems.
+
+ Only say Y if you understand what this does, and you
+ still want it enabled. Otherwise say N
+
config MMIOTRACE_TEST
tristate "Test module for mmiotrace"
depends on MMIOTRACE && m
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index e153be351548..7e44cea89fdc 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -92,6 +92,7 @@ obj-$(CONFIG_DYNAMIC_EVENTS) += trace_dynevent.o
obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o
+obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o
obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 4b3a42fc3b24..fb0fe4c66b84 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -449,7 +449,7 @@ static struct dentry *blk_create_buf_file_callback(const char *filename,
&relay_file_operations);
}
-static struct rchan_callbacks blk_relay_callbacks = {
+static const struct rchan_callbacks blk_relay_callbacks = {
.subbuf_start = blk_subbuf_start_callback,
.create_buf_file = blk_create_buf_file_callback,
.remove_buf_file = blk_remove_buf_file_callback,
@@ -458,14 +458,9 @@ static struct rchan_callbacks blk_relay_callbacks = {
static void blk_trace_setup_lba(struct blk_trace *bt,
struct block_device *bdev)
{
- struct hd_struct *part = NULL;
-
- if (bdev)
- part = bdev->bd_part;
-
- if (part) {
- bt->start_lba = part->start_sect;
- bt->end_lba = part->start_sect + part->nr_sects;
+ if (bdev) {
+ bt->start_lba = bdev->bd_start_sect;
+ bt->end_lba = bdev->bd_start_sect + bdev_nr_sectors(bdev);
} else {
bt->start_lba = 0;
bt->end_lba = -1ULL;
@@ -527,7 +522,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
* and scsi-generic block devices we create a temporary new debugfs
* directory that will be removed once the trace ends.
*/
- if (bdev && bdev == bdev->bd_contains)
+ if (bdev && !bdev_is_partition(bdev))
dir = q->debugfs_dir;
else
bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root);
@@ -793,19 +788,19 @@ static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
return cgroup_id(bio_blkcg(bio)->css.cgroup);
}
#else
-u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
+static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
{
return 0;
}
#endif
static u64
-blk_trace_request_get_cgid(struct request_queue *q, struct request *rq)
+blk_trace_request_get_cgid(struct request *rq)
{
if (!rq->bio)
return 0;
/* Use the first bio */
- return blk_trace_bio_get_cgid(q, rq->bio);
+ return blk_trace_bio_get_cgid(rq->q, rq->bio);
}
/*
@@ -846,40 +841,35 @@ static void blk_add_trace_rq(struct request *rq, int error,
rcu_read_unlock();
}
-static void blk_add_trace_rq_insert(void *ignore,
- struct request_queue *q, struct request *rq)
+static void blk_add_trace_rq_insert(void *ignore, struct request *rq)
{
blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_INSERT,
- blk_trace_request_get_cgid(q, rq));
+ blk_trace_request_get_cgid(rq));
}
-static void blk_add_trace_rq_issue(void *ignore,
- struct request_queue *q, struct request *rq)
+static void blk_add_trace_rq_issue(void *ignore, struct request *rq)
{
blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE,
- blk_trace_request_get_cgid(q, rq));
+ blk_trace_request_get_cgid(rq));
}
-static void blk_add_trace_rq_merge(void *ignore,
- struct request_queue *q, struct request *rq)
+static void blk_add_trace_rq_merge(void *ignore, struct request *rq)
{
blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_BACKMERGE,
- blk_trace_request_get_cgid(q, rq));
+ blk_trace_request_get_cgid(rq));
}
-static void blk_add_trace_rq_requeue(void *ignore,
- struct request_queue *q,
- struct request *rq)
+static void blk_add_trace_rq_requeue(void *ignore, struct request *rq)
{
blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_REQUEUE,
- blk_trace_request_get_cgid(q, rq));
+ blk_trace_request_get_cgid(rq));
}
static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
int error, unsigned int nr_bytes)
{
blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE,
- blk_trace_request_get_cgid(rq->q, rq));
+ blk_trace_request_get_cgid(rq));
}
/**
@@ -911,10 +901,9 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
rcu_read_unlock();
}
-static void blk_add_trace_bio_bounce(void *ignore,
- struct request_queue *q, struct bio *bio)
+static void blk_add_trace_bio_bounce(void *ignore, struct bio *bio)
{
- blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0);
+ blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_BOUNCE, 0);
}
static void blk_add_trace_bio_complete(void *ignore,
@@ -924,63 +913,24 @@ static void blk_add_trace_bio_complete(void *ignore,
blk_status_to_errno(bio->bi_status));
}
-static void blk_add_trace_bio_backmerge(void *ignore,
- struct request_queue *q,
- struct request *rq,
- struct bio *bio)
+static void blk_add_trace_bio_backmerge(void *ignore, struct bio *bio)
{
- blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0);
+ blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_BACKMERGE, 0);
}
-static void blk_add_trace_bio_frontmerge(void *ignore,
- struct request_queue *q,
- struct request *rq,
- struct bio *bio)
+static void blk_add_trace_bio_frontmerge(void *ignore, struct bio *bio)
{
- blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0);
+ blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_FRONTMERGE, 0);
}
-static void blk_add_trace_bio_queue(void *ignore,
- struct request_queue *q, struct bio *bio)
+static void blk_add_trace_bio_queue(void *ignore, struct bio *bio)
{
- blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0);
+ blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_QUEUE, 0);
}
-static void blk_add_trace_getrq(void *ignore,
- struct request_queue *q,
- struct bio *bio, int rw)
-{
- if (bio)
- blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
- else {
- struct blk_trace *bt;
-
- rcu_read_lock();
- bt = rcu_dereference(q->blk_trace);
- if (bt)
- __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0,
- NULL, 0);
- rcu_read_unlock();
- }
-}
-
-
-static void blk_add_trace_sleeprq(void *ignore,
- struct request_queue *q,
- struct bio *bio, int rw)
+static void blk_add_trace_getrq(void *ignore, struct bio *bio)
{
- if (bio)
- blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
- else {
- struct blk_trace *bt;
-
- rcu_read_lock();
- bt = rcu_dereference(q->blk_trace);
- if (bt)
- __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ,
- 0, 0, NULL, 0);
- rcu_read_unlock();
- }
+ blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_GETRQ, 0);
}
static void blk_add_trace_plug(void *ignore, struct request_queue *q)
@@ -1015,10 +965,9 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
rcu_read_unlock();
}
-static void blk_add_trace_split(void *ignore,
- struct request_queue *q, struct bio *bio,
- unsigned int pdu)
+static void blk_add_trace_split(void *ignore, struct bio *bio, unsigned int pdu)
{
+ struct request_queue *q = bio->bi_disk->queue;
struct blk_trace *bt;
rcu_read_lock();
@@ -1039,20 +988,16 @@ static void blk_add_trace_split(void *ignore,
/**
* blk_add_trace_bio_remap - Add a trace for a bio-remap operation
* @ignore: trace callback data parameter (not used)
- * @q: queue the io is for
* @bio: the source bio
- * @dev: target device
+ * @dev: source device
* @from: source sector
*
- * Description:
- * Device mapper or raid target sometimes need to split a bio because
- * it spans a stripe (or similar). Add a trace for that action.
- *
+ * Called after a bio is remapped to a different device and/or sector.
**/
-static void blk_add_trace_bio_remap(void *ignore,
- struct request_queue *q, struct bio *bio,
- dev_t dev, sector_t from)
+static void blk_add_trace_bio_remap(void *ignore, struct bio *bio, dev_t dev,
+ sector_t from)
{
+ struct request_queue *q = bio->bi_disk->queue;
struct blk_trace *bt;
struct blk_io_trace_remap r;
@@ -1077,7 +1022,6 @@ static void blk_add_trace_bio_remap(void *ignore,
/**
* blk_add_trace_rq_remap - Add a trace for a request-remap operation
* @ignore: trace callback data parameter (not used)
- * @q: queue the io is for
* @rq: the source request
* @dev: target device
* @from: source sector
@@ -1087,16 +1031,14 @@ static void blk_add_trace_bio_remap(void *ignore,
* Add a trace for that action.
*
**/
-static void blk_add_trace_rq_remap(void *ignore,
- struct request_queue *q,
- struct request *rq, dev_t dev,
+static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev,
sector_t from)
{
struct blk_trace *bt;
struct blk_io_trace_remap r;
rcu_read_lock();
- bt = rcu_dereference(q->blk_trace);
+ bt = rcu_dereference(rq->q->blk_trace);
if (likely(!bt)) {
rcu_read_unlock();
return;
@@ -1108,13 +1050,12 @@ static void blk_add_trace_rq_remap(void *ignore,
__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
rq_data_dir(rq), 0, BLK_TA_REMAP, 0,
- sizeof(r), &r, blk_trace_request_get_cgid(q, rq));
+ sizeof(r), &r, blk_trace_request_get_cgid(rq));
rcu_read_unlock();
}
/**
* blk_add_driver_data - Add binary message with driver-specific data
- * @q: queue the io is for
* @rq: io request
* @data: driver-specific data
* @len: length of driver-specific data
@@ -1123,14 +1064,12 @@ static void blk_add_trace_rq_remap(void *ignore,
* Some drivers might want to write driver-specific data per request.
*
**/
-void blk_add_driver_data(struct request_queue *q,
- struct request *rq,
- void *data, size_t len)
+void blk_add_driver_data(struct request *rq, void *data, size_t len)
{
struct blk_trace *bt;
rcu_read_lock();
- bt = rcu_dereference(q->blk_trace);
+ bt = rcu_dereference(rq->q->blk_trace);
if (likely(!bt)) {
rcu_read_unlock();
return;
@@ -1138,7 +1077,7 @@ void blk_add_driver_data(struct request_queue *q,
__blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,
BLK_TA_DRV_DATA, 0, len, data,
- blk_trace_request_get_cgid(q, rq));
+ blk_trace_request_get_cgid(rq));
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(blk_add_driver_data);
@@ -1169,8 +1108,6 @@ static void blk_register_tracepoints(void)
WARN_ON(ret);
ret = register_trace_block_getrq(blk_add_trace_getrq, NULL);
WARN_ON(ret);
- ret = register_trace_block_sleeprq(blk_add_trace_sleeprq, NULL);
- WARN_ON(ret);
ret = register_trace_block_plug(blk_add_trace_plug, NULL);
WARN_ON(ret);
ret = register_trace_block_unplug(blk_add_trace_unplug, NULL);
@@ -1190,7 +1127,6 @@ static void blk_unregister_tracepoints(void)
unregister_trace_block_split(blk_add_trace_split, NULL);
unregister_trace_block_unplug(blk_add_trace_unplug, NULL);
unregister_trace_block_plug(blk_add_trace_plug, NULL);
- unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL);
unregister_trace_block_getrq(blk_add_trace_getrq, NULL);
unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL);
unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL);
@@ -1343,7 +1279,7 @@ static void blk_log_action(struct trace_iterator *iter, const char *act,
* ones now use the 64bit ino as the whole ID and
* no longer use generation.
*
- * Regarldess of the content, always output
+ * Regardless of the content, always output
* "LOW32,HIGH32" so that FILEID_INO32_GEN fid can
* be mapped back to @id on both 64 and 32bit ino
* setups. See __kernfs_fh_to_dentry().
@@ -1385,7 +1321,7 @@ static void blk_log_dump_pdu(struct trace_seq *s,
i == 0 ? "" : " ", pdu_buf[i]);
/*
- * stop when the rest is just zeroes and indicate so
+ * stop when the rest is just zeros and indicate so
* with a ".." appended
*/
if (i == end && end != pdu_len - 1) {
@@ -1815,32 +1751,15 @@ static ssize_t blk_trace_mask2str(char *buf, int mask)
return p - buf;
}
-static struct request_queue *blk_trace_get_queue(struct block_device *bdev)
-{
- if (bdev->bd_disk == NULL)
- return NULL;
-
- return bdev_get_queue(bdev);
-}
-
static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct hd_struct *p = dev_to_part(dev);
- struct request_queue *q;
- struct block_device *bdev;
+ struct block_device *bdev = dev_to_bdev(dev);
+ struct request_queue *q = bdev_get_queue(bdev);
struct blk_trace *bt;
ssize_t ret = -ENXIO;
- bdev = bdget(part_devt(p));
- if (bdev == NULL)
- goto out;
-
- q = blk_trace_get_queue(bdev);
- if (q == NULL)
- goto out_bdput;
-
mutex_lock(&q->debugfs_mutex);
bt = rcu_dereference_protected(q->blk_trace,
@@ -1863,9 +1782,6 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
out_unlock_bdev:
mutex_unlock(&q->debugfs_mutex);
-out_bdput:
- bdput(bdev);
-out:
return ret;
}
@@ -1873,9 +1789,8 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
- struct block_device *bdev;
- struct request_queue *q;
- struct hd_struct *p;
+ struct block_device *bdev = dev_to_bdev(dev);
+ struct request_queue *q = bdev_get_queue(bdev);
struct blk_trace *bt;
u64 value;
ssize_t ret = -EINVAL;
@@ -1891,19 +1806,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
goto out;
value = ret;
}
- } else if (kstrtoull(buf, 0, &value))
- goto out;
-
- ret = -ENXIO;
-
- p = dev_to_part(dev);
- bdev = bdget(part_devt(p));
- if (bdev == NULL)
- goto out;
-
- q = blk_trace_get_queue(bdev);
- if (q == NULL)
- goto out_bdput;
+ } else {
+ if (kstrtoull(buf, 0, &value))
+ goto out;
+ }
mutex_lock(&q->debugfs_mutex);
@@ -1941,8 +1847,6 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
out_unlock_bdev:
mutex_unlock(&q->debugfs_mutex);
-out_bdput:
- bdput(bdev);
out:
return ret ? ret : count;
}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index a8d4f253ed77..764400260eb6 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -7,6 +7,7 @@
#include <linux/slab.h>
#include <linux/bpf.h>
#include <linux/bpf_perf_event.h>
+#include <linux/btf.h>
#include <linux/filter.h>
#include <linux/uaccess.h>
#include <linux/ctype.h>
@@ -15,6 +16,12 @@
#include <linux/syscalls.h>
#include <linux/error-injection.h>
#include <linux/btf_ids.h>
+#include <linux/bpf_lsm.h>
+
+#include <net/bpf_sk_storage.h>
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/btf.h>
#include <asm/tlb.h>
@@ -67,6 +74,10 @@ static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name)
u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
+ u64 flags, const struct btf **btf,
+ s32 *btf_id);
+
/**
* trace_call_bpf - invoke BPF program
* @call: tracepoint event
@@ -85,9 +96,6 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
unsigned int ret;
- if (in_nmi()) /* not supported yet */
- return 1;
-
cant_sleep();
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
@@ -105,7 +113,7 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
* Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock
* to all call sites, we did a bpf_prog_array_valid() there to check
* whether call->prog_array is empty or not, which is
- * a heurisitc to speed up execution.
+ * a heuristic to speed up execution.
*
* If bpf_prog_array_valid() fetched prog_array was
* non-NULL, we go into trace_call_bpf() and do the actual
@@ -173,6 +181,16 @@ bpf_probe_read_user_str_common(void *dst, u32 size,
{
int ret;
+ /*
+ * NB: We rely on strncpy_from_user() not copying junk past the NUL
+ * terminator into `dst`.
+ *
+ * strncpy_from_user() does long-sized strides in the fast path. If the
+ * strncpy does not mask out the bytes after the NUL in `unsafe_ptr`,
+ * then there could be junk after the NUL in `dst`. If user takes `dst`
+ * and keys a hash map with it, then semantically identical strings can
+ * occupy multiple entries in the map.
+ */
ret = strncpy_from_user_nofault(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
memset(dst, 0, size);
@@ -743,19 +761,18 @@ out:
return err;
}
-BTF_ID_LIST(bpf_seq_printf_btf_ids)
-BTF_ID(struct, seq_file)
+BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file)
static const struct bpf_func_proto bpf_seq_printf_proto = {
.func = bpf_seq_printf,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &btf_seq_file_ids[0],
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_PTR_TO_MEM_OR_NULL,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
- .btf_id = bpf_seq_printf_btf_ids,
};
BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len)
@@ -763,17 +780,39 @@ BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len)
return seq_write(m, data, len) ? -EOVERFLOW : 0;
}
-BTF_ID_LIST(bpf_seq_write_btf_ids)
-BTF_ID(struct, seq_file)
-
static const struct bpf_func_proto bpf_seq_write_proto = {
.func = bpf_seq_write,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &btf_seq_file_ids[0],
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
- .btf_id = bpf_seq_write_btf_ids,
+};
+
+BPF_CALL_4(bpf_seq_printf_btf, struct seq_file *, m, struct btf_ptr *, ptr,
+ u32, btf_ptr_size, u64, flags)
+{
+ const struct btf *btf;
+ s32 btf_id;
+ int ret;
+
+ ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id);
+ if (ret)
+ return ret;
+
+ return btf_type_seq_show_flags(btf, btf_id, ptr->ptr, m, flags);
+}
+
+static const struct bpf_func_proto bpf_seq_printf_btf_proto = {
+ .func = bpf_seq_printf_btf,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &btf_seq_file_ids[0],
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg4_type = ARG_ANYTHING,
};
static __always_inline int
@@ -993,6 +1032,20 @@ const struct bpf_func_proto bpf_get_current_task_proto = {
.ret_type = RET_INTEGER,
};
+BPF_CALL_0(bpf_get_current_task_btf)
+{
+ return (unsigned long) current;
+}
+
+BTF_ID_LIST_SINGLE(bpf_get_current_btf_ids, struct, task_struct)
+
+static const struct bpf_func_proto bpf_get_current_task_btf_proto = {
+ .func = bpf_get_current_task_btf,
+ .gpl_only = true,
+ .ret_type = RET_PTR_TO_BTF_ID,
+ .ret_btf_id = &bpf_get_current_btf_ids[0],
+};
+
BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
@@ -1057,7 +1110,7 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type)
return -EINVAL;
work = this_cpu_ptr(&send_signal_work);
- if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY)
+ if (irq_work_is_busy(&work->irq_work))
return -EBUSY;
/* Add the current task, which is the target of sending signal,
@@ -1098,6 +1151,122 @@ static const struct bpf_func_proto bpf_send_signal_thread_proto = {
.arg1_type = ARG_ANYTHING,
};
+BPF_CALL_3(bpf_d_path, struct path *, path, char *, buf, u32, sz)
+{
+ long len;
+ char *p;
+
+ if (!sz)
+ return 0;
+
+ p = d_path(path, buf, sz);
+ if (IS_ERR(p)) {
+ len = PTR_ERR(p);
+ } else {
+ len = buf + sz - p;
+ memmove(buf, p, len);
+ }
+
+ return len;
+}
+
+BTF_SET_START(btf_allowlist_d_path)
+#ifdef CONFIG_SECURITY
+BTF_ID(func, security_file_permission)
+BTF_ID(func, security_inode_getattr)
+BTF_ID(func, security_file_open)
+#endif
+#ifdef CONFIG_SECURITY_PATH
+BTF_ID(func, security_path_truncate)
+#endif
+BTF_ID(func, vfs_truncate)
+BTF_ID(func, vfs_fallocate)
+BTF_ID(func, dentry_open)
+BTF_ID(func, vfs_getattr)
+BTF_ID(func, filp_close)
+BTF_SET_END(btf_allowlist_d_path)
+
+static bool bpf_d_path_allowed(const struct bpf_prog *prog)
+{
+ if (prog->type == BPF_PROG_TYPE_LSM)
+ return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id);
+
+ return btf_id_set_contains(&btf_allowlist_d_path,
+ prog->aux->attach_btf_id);
+}
+
+BTF_ID_LIST_SINGLE(bpf_d_path_btf_ids, struct, path)
+
+static const struct bpf_func_proto bpf_d_path_proto = {
+ .func = bpf_d_path,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_d_path_btf_ids[0],
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
+ .allowed = bpf_d_path_allowed,
+};
+
+#define BTF_F_ALL (BTF_F_COMPACT | BTF_F_NONAME | \
+ BTF_F_PTR_RAW | BTF_F_ZERO)
+
+static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
+ u64 flags, const struct btf **btf,
+ s32 *btf_id)
+{
+ const struct btf_type *t;
+
+ if (unlikely(flags & ~(BTF_F_ALL)))
+ return -EINVAL;
+
+ if (btf_ptr_size != sizeof(struct btf_ptr))
+ return -EINVAL;
+
+ *btf = bpf_get_btf_vmlinux();
+
+ if (IS_ERR_OR_NULL(*btf))
+ return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL;
+
+ if (ptr->type_id > 0)
+ *btf_id = ptr->type_id;
+ else
+ return -EINVAL;
+
+ if (*btf_id > 0)
+ t = btf_type_by_id(*btf, *btf_id);
+ if (*btf_id <= 0 || !t)
+ return -ENOENT;
+
+ return 0;
+}
+
+BPF_CALL_5(bpf_snprintf_btf, char *, str, u32, str_size, struct btf_ptr *, ptr,
+ u32, btf_ptr_size, u64, flags)
+{
+ const struct btf *btf;
+ s32 btf_id;
+ int ret;
+
+ ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id);
+ if (ret)
+ return ret;
+
+ return btf_type_snprintf_show(btf, btf_id, ptr->ptr, str, str_size,
+ flags);
+}
+
+const struct bpf_func_proto bpf_snprintf_btf_proto = {
+ .func = bpf_snprintf_btf,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_PTR_TO_MEM,
+ .arg4_type = ARG_CONST_SIZE,
+ .arg5_type = ARG_ANYTHING,
+};
+
const struct bpf_func_proto *
bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -1118,12 +1287,16 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_ktime_get_ns_proto;
case BPF_FUNC_ktime_get_boot_ns:
return &bpf_ktime_get_boot_ns_proto;
+ case BPF_FUNC_ktime_get_coarse_ns:
+ return &bpf_ktime_get_coarse_ns_proto;
case BPF_FUNC_tail_call:
return &bpf_tail_call_proto;
case BPF_FUNC_get_current_pid_tgid:
return &bpf_get_current_pid_tgid_proto;
case BPF_FUNC_get_current_task:
return &bpf_get_current_task_proto;
+ case BPF_FUNC_get_current_task_btf:
+ return &bpf_get_current_task_btf_proto;
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
case BPF_FUNC_get_current_comm:
@@ -1182,6 +1355,14 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_jiffies64_proto;
case BPF_FUNC_get_task_stack:
return &bpf_get_task_stack_proto;
+ case BPF_FUNC_copy_from_user:
+ return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
+ case BPF_FUNC_snprintf_btf:
+ return &bpf_snprintf_btf_proto;
+ case BPF_FUNC_per_cpu_ptr:
+ return &bpf_per_cpu_ptr_proto;
+ case BPF_FUNC_this_cpu_ptr:
+ return &bpf_this_cpu_ptr_proto;
default:
return NULL;
}
@@ -1570,6 +1751,12 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_skc_to_tcp_request_sock_proto;
case BPF_FUNC_skc_to_udp6_sock:
return &bpf_skc_to_udp6_sock_proto;
+ case BPF_FUNC_sk_storage_get:
+ return &bpf_sk_storage_get_tracing_proto;
+ case BPF_FUNC_sk_storage_delete:
+ return &bpf_sk_storage_delete_tracing_proto;
+ case BPF_FUNC_sock_from_file:
+ return &bpf_sock_from_file_proto;
#endif
case BPF_FUNC_seq_printf:
return prog->expected_attach_type == BPF_TRACE_ITER ?
@@ -1579,6 +1766,12 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return prog->expected_attach_type == BPF_TRACE_ITER ?
&bpf_seq_write_proto :
NULL;
+ case BPF_FUNC_seq_printf_btf:
+ return prog->expected_attach_type == BPF_TRACE_ITER ?
+ &bpf_seq_printf_btf_proto :
+ NULL;
+ case BPF_FUNC_d_path:
+ return &bpf_d_path_proto;
default:
return raw_tp_prog_func_proto(func_id, prog);
}
@@ -1625,6 +1818,9 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
};
const struct bpf_prog_ops raw_tracepoint_prog_ops = {
+#ifdef CONFIG_NET
+ .test_run = bpf_prog_test_run_raw_tp,
+#endif
};
const struct bpf_verifier_ops tracing_verifier_ops = {
@@ -1873,10 +2069,12 @@ struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name)
void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
{
- struct module *mod = __module_address((unsigned long)btp);
+ struct module *mod;
- if (mod)
- module_put(mod);
+ preempt_disable();
+ mod = __module_address((unsigned long)btp);
+ module_put(mod);
+ preempt_enable();
}
static __always_inline
@@ -2027,10 +2225,11 @@ static int bpf_event_notify(struct notifier_block *nb, unsigned long op,
{
struct bpf_trace_module *btm, *tmp;
struct module *mod = module;
+ int ret = 0;
if (mod->num_bpf_raw_events == 0 ||
(op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
- return 0;
+ goto out;
mutex_lock(&bpf_module_mutex);
@@ -2040,6 +2239,8 @@ static int bpf_event_notify(struct notifier_block *nb, unsigned long op,
if (btm) {
btm->module = module;
list_add(&btm->list, &bpf_trace_modules);
+ } else {
+ ret = -ENOMEM;
}
break;
case MODULE_STATE_GOING:
@@ -2055,7 +2256,8 @@ static int bpf_event_notify(struct notifier_block *nb, unsigned long op,
mutex_unlock(&bpf_module_mutex);
- return 0;
+out:
+ return notifier_from_errno(ret);
}
static struct notifier_block bpf_module_nb = {
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 1af321dec0f1..29a6ebeebc9e 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -334,8 +334,7 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
static struct ftrace_ops graph_ops = {
.func = ftrace_stub,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE |
- FTRACE_OPS_FL_INITIALIZED |
+ .flags = FTRACE_OPS_FL_INITIALIZED |
FTRACE_OPS_FL_PID |
FTRACE_OPS_FL_STUB,
#ifdef FTRACE_GRAPH_TRAMP_ADDR
@@ -387,15 +386,14 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
}
}
- read_lock(&tasklist_lock);
- do_each_thread(g, t) {
+ rcu_read_lock();
+ for_each_process_thread(g, t) {
if (start == end) {
ret = -EAGAIN;
goto unlock;
}
if (t->ret_stack == NULL) {
- atomic_set(&t->tracing_graph_pause, 0);
atomic_set(&t->trace_overrun, 0);
t->curr_ret_stack = -1;
t->curr_ret_depth = -1;
@@ -403,10 +401,10 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
smp_wmb();
t->ret_stack = ret_stack_list[start++];
}
- } while_each_thread(g, t);
+ }
unlock:
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
free:
for (i = start; i < end; i++)
kfree(ret_stack_list[i]);
@@ -490,7 +488,6 @@ static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
static void
graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
{
- atomic_set(&t->tracing_graph_pause, 0);
atomic_set(&t->trace_overrun, 0);
t->ftrace_timestamp = 0;
/* make curr_ret_stack visible before we add the ret_stack */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 541453927c82..4d8e35575549 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -80,7 +80,7 @@ enum {
struct ftrace_ops ftrace_list_end __read_mostly = {
.func = ftrace_stub,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
+ .flags = FTRACE_OPS_FL_STUB,
INIT_OPS_HASH(ftrace_list_end)
};
@@ -121,7 +121,7 @@ struct ftrace_ops global_ops;
#if ARCH_SUPPORTS_FTRACE_OPS
static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *regs);
+ struct ftrace_ops *op, struct ftrace_regs *fregs);
#else
/* See comment below, where ftrace_ops_list_func is defined */
static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
@@ -140,7 +140,7 @@ static inline void ftrace_ops_init(struct ftrace_ops *ops)
}
static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *regs)
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
struct trace_array *tr = op->private;
int pid;
@@ -154,7 +154,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
return;
}
- op->saved_func(ip, parent_ip, op, regs);
+ op->saved_func(ip, parent_ip, op, fregs);
}
static void ftrace_sync_ipi(void *data)
@@ -230,7 +230,7 @@ static void update_ftrace_function(void)
/*
* For static tracing, we need to be a bit more careful.
* The function change takes affect immediately. Thus,
- * we need to coorditate the setting of the function_trace_ops
+ * we need to coordinate the setting of the function_trace_ops
* with the setting of the ftrace_trace_function.
*
* Set the function to the list ops, which will call the
@@ -754,7 +754,7 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)
static void
function_profile_call(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct pt_regs *regs)
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
struct ftrace_profile_stat *stat;
struct ftrace_profile *rec;
@@ -866,7 +866,7 @@ static void unregister_ftrace_profiler(void)
#else
static struct ftrace_ops ftrace_profile_ops __read_mostly = {
.func = function_profile_call,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
+ .flags = FTRACE_OPS_FL_INITIALIZED,
INIT_OPS_HASH(ftrace_profile_ops)
};
@@ -1040,8 +1040,7 @@ struct ftrace_ops global_ops = {
.local_hash.notrace_hash = EMPTY_HASH,
.local_hash.filter_hash = EMPTY_HASH,
INIT_OPS_HASH(global_ops)
- .flags = FTRACE_OPS_FL_RECURSION_SAFE |
- FTRACE_OPS_FL_INITIALIZED |
+ .flags = FTRACE_OPS_FL_INITIALIZED |
FTRACE_OPS_FL_PID,
};
@@ -1368,10 +1367,10 @@ static struct ftrace_hash *dup_hash(struct ftrace_hash *src, int size)
int i;
/*
- * Make the hash size about 1/2 the # found
+ * Use around half the size (max bit of it), but
+ * a minimum of 2 is fine (as size of 0 or 1 both give 1 for bits).
*/
- for (size /= 2; size; size >>= 1)
- bits++;
+ bits = fls(size / 2);
/* Don't allocate too much */
if (bits > FTRACE_HASH_MAX_BITS)
@@ -1451,7 +1450,7 @@ static bool hash_contains_ip(unsigned long ip,
{
/*
* The function record is a match if it exists in the filter
- * hash and not in the notrace hash. Note, an emty hash is
+ * hash and not in the notrace hash. Note, an empty hash is
* considered a match for the filter hash, but an empty
* notrace hash is considered not in the notrace hash.
*/
@@ -1629,6 +1628,8 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec)
static struct ftrace_ops *
ftrace_find_tramp_ops_any(struct dyn_ftrace *rec);
static struct ftrace_ops *
+ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops *op_exclude);
+static struct ftrace_ops *
ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops);
static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
@@ -1778,7 +1779,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
* to it.
*/
if (ftrace_rec_count(rec) == 1 &&
- ftrace_find_tramp_ops_any(rec))
+ ftrace_find_tramp_ops_any_other(rec, ops))
rec->flags |= FTRACE_FL_TRAMP;
else
rec->flags &= ~FTRACE_FL_TRAMP;
@@ -2144,6 +2145,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
else
rec->flags &= ~FTRACE_FL_TRAMP_EN;
}
+
if (flag & FTRACE_FL_DIRECT) {
/*
* If there's only one user (direct_ops helper)
@@ -2245,6 +2247,24 @@ ftrace_find_tramp_ops_any(struct dyn_ftrace *rec)
}
static struct ftrace_ops *
+ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops *op_exclude)
+{
+ struct ftrace_ops *op;
+ unsigned long ip = rec->ip;
+
+ do_for_each_ftrace_op(op, ftrace_ops_list) {
+
+ if (op == op_exclude || !op->trampoline)
+ continue;
+
+ if (hash_contains_ip(ip, op->func_hash))
+ return op;
+ } while_for_each_ftrace_op(op);
+
+ return NULL;
+}
+
+static struct ftrace_ops *
ftrace_find_tramp_ops_next(struct dyn_ftrace *rec,
struct ftrace_ops *op)
{
@@ -2369,8 +2389,9 @@ unsigned long ftrace_find_rec_direct(unsigned long ip)
}
static void call_direct_funcs(unsigned long ip, unsigned long pip,
- struct ftrace_ops *ops, struct pt_regs *regs)
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
+ struct pt_regs *regs = ftrace_get_regs(fregs);
unsigned long addr;
addr = ftrace_find_rec_direct(ip);
@@ -2382,7 +2403,7 @@ static void call_direct_funcs(unsigned long ip, unsigned long pip,
struct ftrace_ops direct_ops = {
.func = call_direct_funcs,
- .flags = FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_RECURSION_SAFE
+ .flags = FTRACE_OPS_FL_IPMODIFY
| FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS
| FTRACE_OPS_FL_PERMANENT,
/*
@@ -2402,7 +2423,7 @@ struct ftrace_ops direct_ops = {
*
* If the record has the FTRACE_FL_REGS set, that means that it
* wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
- * is not not set, then it wants to convert to the normal callback.
+ * is not set, then it wants to convert to the normal callback.
*
* Returns the address of the trampoline to set to
*/
@@ -2976,7 +2997,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command)
synchronize_rcu_tasks_rude();
/*
- * When the kernel is preeptive, tasks can be preempted
+ * When the kernel is preemptive, tasks can be preempted
* while on a ftrace trampoline. Just scheduling a task on
* a CPU is not good enough to flush them. Calling
* synchornize_rcu_tasks() will wait for those tasks to
@@ -3129,18 +3150,20 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
static int ftrace_allocate_records(struct ftrace_page *pg, int count)
{
int order;
+ int pages;
int cnt;
if (WARN_ON(!count))
return -EINVAL;
- order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE));
+ pages = DIV_ROUND_UP(count, ENTRIES_PER_PAGE);
+ order = get_count_order(pages);
/*
* We want to fill as much as possible. No more than a page
* may be empty.
*/
- while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE)
+ if (!is_power_of_2(pages))
order--;
again:
@@ -4161,7 +4184,6 @@ static void process_mod_list(struct list_head *head, struct ftrace_ops *ops,
struct ftrace_hash **orig_hash, *new_hash;
LIST_HEAD(process_mods);
char *func;
- int ret;
mutex_lock(&ops->func_hash->regex_lock);
@@ -4214,7 +4236,7 @@ static void process_mod_list(struct list_head *head, struct ftrace_ops *ops,
mutex_lock(&ftrace_lock);
- ret = ftrace_hash_move_and_update_ops(ops, orig_hash,
+ ftrace_hash_move_and_update_ops(ops, orig_hash,
new_hash, enable);
mutex_unlock(&ftrace_lock);
@@ -4292,7 +4314,7 @@ static int __init ftrace_mod_cmd_init(void)
core_initcall(ftrace_mod_cmd_init);
static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *pt_regs)
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
struct ftrace_probe_ops *probe_ops;
struct ftrace_func_probe *probe;
@@ -4368,7 +4390,7 @@ void **ftrace_func_mapper_find_ip(struct ftrace_func_mapper *mapper,
* @ip: The instruction pointer address to map @data to
* @data: The data to map to @ip
*
- * Returns 0 on succes otherwise an error.
+ * Returns 0 on success otherwise an error.
*/
int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper,
unsigned long ip, void *data)
@@ -4536,7 +4558,7 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr,
/*
* Note, there's a small window here that the func_hash->filter_hash
- * may be NULL or empty. Need to be carefule when reading the loop.
+ * may be NULL or empty. Need to be careful when reading the loop.
*/
mutex_lock(&probe->ops.func_hash->regex_lock);
@@ -5566,7 +5588,6 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
struct ftrace_hash **orig_hash;
struct trace_parser *parser;
int filter_hash;
- int ret;
if (file->f_mode & FMODE_READ) {
iter = m->private;
@@ -5594,7 +5615,7 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
orig_hash = &iter->ops->func_hash->notrace_hash;
mutex_lock(&ftrace_lock);
- ret = ftrace_hash_move_and_update_ops(iter->ops, orig_hash,
+ ftrace_hash_move_and_update_ops(iter->ops, orig_hash,
iter->hash, filter_hash);
mutex_unlock(&ftrace_lock);
} else {
@@ -6862,8 +6883,7 @@ void ftrace_init_trace_array(struct trace_array *tr)
struct ftrace_ops global_ops = {
.func = ftrace_stub,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE |
- FTRACE_OPS_FL_INITIALIZED |
+ .flags = FTRACE_OPS_FL_INITIALIZED |
FTRACE_OPS_FL_PID,
};
@@ -6913,12 +6933,13 @@ void ftrace_reset_array_ops(struct trace_array *tr)
static nokprobe_inline void
__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ignored, struct pt_regs *regs)
+ struct ftrace_ops *ignored, struct ftrace_regs *fregs)
{
+ struct pt_regs *regs = ftrace_get_regs(fregs);
struct ftrace_ops *op;
int bit;
- bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
+ bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX);
if (bit < 0)
return;
@@ -6947,7 +6968,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
pr_warn("op=%p %pS\n", op, op);
goto out;
}
- op->func(ip, parent_ip, op, regs);
+ op->func(ip, parent_ip, op, fregs);
}
} while_for_each_ftrace_op(op);
out:
@@ -6970,9 +6991,9 @@ out:
*/
#if ARCH_SUPPORTS_FTRACE_OPS
static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *regs)
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
- __ftrace_ops_list_func(ip, parent_ip, NULL, regs);
+ __ftrace_ops_list_func(ip, parent_ip, NULL, fregs);
}
NOKPROBE_SYMBOL(ftrace_ops_list_func);
#else
@@ -6989,18 +7010,18 @@ NOKPROBE_SYMBOL(ftrace_ops_no_ops);
* this function will be called by the mcount trampoline.
*/
static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *regs)
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
int bit;
- bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
+ bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX);
if (bit < 0)
return;
preempt_disable_notrace();
if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching())
- op->func(ip, parent_ip, op, regs);
+ op->func(ip, parent_ip, op, fregs);
preempt_enable_notrace();
trace_clear_recursion(bit);
@@ -7021,11 +7042,11 @@ NOKPROBE_SYMBOL(ftrace_ops_assist_func);
ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
{
/*
- * If the function does not handle recursion, needs to be RCU safe,
- * or does per cpu logic, then we need to call the assist handler.
+ * If the function does not handle recursion or needs to be RCU safe,
+ * then we need to call the assist handler.
*/
- if (!(ops->flags & FTRACE_OPS_FL_RECURSION_SAFE) ||
- ops->flags & FTRACE_OPS_FL_RCU)
+ if (ops->flags & (FTRACE_OPS_FL_RECURSION |
+ FTRACE_OPS_FL_RCU))
return ftrace_ops_assist_func;
return ops->func;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 93ef0ab6ea20..ec08f948dd80 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4,6 +4,7 @@
*
* Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
*/
+#include <linux/trace_recursion.h>
#include <linux/trace_events.h>
#include <linux/ring_buffer.h>
#include <linux/trace_clock.h>
@@ -129,7 +130,16 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
#define RB_ALIGNMENT 4U
#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
-#define RB_ALIGN_DATA __aligned(RB_ALIGNMENT)
+
+#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
+# define RB_FORCE_8BYTE_ALIGNMENT 0
+# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
+#else
+# define RB_FORCE_8BYTE_ALIGNMENT 1
+# define RB_ARCH_ALIGNMENT 8U
+#endif
+
+#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
@@ -438,14 +448,16 @@ enum {
};
/*
* Used for which event context the event is in.
- * NMI = 0
- * IRQ = 1
- * SOFTIRQ = 2
- * NORMAL = 3
+ * TRANSITION = 0
+ * NMI = 1
+ * IRQ = 2
+ * SOFTIRQ = 3
+ * NORMAL = 4
*
* See trace_recursive_lock() comment below for more details.
*/
enum {
+ RB_CTX_TRANSITION,
RB_CTX_NMI,
RB_CTX_IRQ,
RB_CTX_SOFTIRQ,
@@ -793,7 +805,7 @@ static void rb_wake_up_waiters(struct irq_work *work)
* ring_buffer_wait - wait for input to the ring buffer
* @buffer: buffer to wait on
* @cpu: the cpu buffer to wait on
- * @full: wait until a full page is available, if @cpu != RING_BUFFER_ALL_CPUS
+ * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
*
* If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
* as data is added to any of the @buffer's cpu buffers. Otherwise
@@ -1420,7 +1432,8 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
return 0;
}
-static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
+static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
+ long nr_pages, struct list_head *pages)
{
struct buffer_page *bpage, *tmp;
bool user_thread = current->mm != NULL;
@@ -1460,13 +1473,15 @@ static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
struct page *page;
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
- mflags, cpu_to_node(cpu));
+ mflags, cpu_to_node(cpu_buffer->cpu));
if (!bpage)
goto free_pages;
+ rb_check_bpage(cpu_buffer, bpage);
+
list_add(&bpage->list, pages);
- page = alloc_pages_node(cpu_to_node(cpu), mflags, 0);
+ page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), mflags, 0);
if (!page)
goto free_pages;
bpage->page = page_address(page);
@@ -1498,7 +1513,7 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
WARN_ON(!nr_pages);
- if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
+ if (__rb_allocate_pages(cpu_buffer, nr_pages, &pages))
return -ENOMEM;
/*
@@ -1952,18 +1967,18 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
{
struct ring_buffer_per_cpu *cpu_buffer;
unsigned long nr_pages;
- int cpu, err = 0;
+ int cpu, err;
/*
* Always succeed at resizing a non-existent buffer:
*/
if (!buffer)
- return size;
+ return 0;
/* Make sure the requested buffer exists */
if (cpu_id != RING_BUFFER_ALL_CPUS &&
!cpumask_test_cpu(cpu_id, buffer->cpumask))
- return size;
+ return 0;
nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
@@ -1971,8 +1986,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
if (nr_pages < 2)
nr_pages = 2;
- size = nr_pages * BUF_PAGE_SIZE;
-
/* prevent another thread from changing buffer sizes */
mutex_lock(&buffer->mutex);
@@ -2007,8 +2020,8 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
* allocated without receiving ENOMEM
*/
INIT_LIST_HEAD(&cpu_buffer->new_pages);
- if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
- &cpu_buffer->new_pages, cpu)) {
+ if (__rb_allocate_pages(cpu_buffer, cpu_buffer->nr_pages_to_update,
+ &cpu_buffer->new_pages)) {
/* not enough memory for new pages */
err = -ENOMEM;
goto out_err;
@@ -2073,8 +2086,8 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
INIT_LIST_HEAD(&cpu_buffer->new_pages);
if (cpu_buffer->nr_pages_to_update > 0 &&
- __rb_allocate_pages(cpu_buffer->nr_pages_to_update,
- &cpu_buffer->new_pages, cpu_id)) {
+ __rb_allocate_pages(cpu_buffer, cpu_buffer->nr_pages_to_update,
+ &cpu_buffer->new_pages)) {
err = -ENOMEM;
goto out_err;
}
@@ -2119,7 +2132,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
}
mutex_unlock(&buffer->mutex);
- return size;
+ return 0;
out_err:
for_each_buffer_cpu(buffer, cpu) {
@@ -2626,9 +2639,6 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs)
return skip_time_extend(event);
}
-static inline bool rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
- struct ring_buffer_event *event);
-
#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
static inline bool sched_clock_stable(void)
{
@@ -2717,7 +2727,7 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
event->time_delta = delta;
length -= RB_EVNT_HDR_SIZE;
- if (length > RB_MAX_SMALL_DATA) {
+ if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
event->type_len = 0;
event->array[0] = length;
} else
@@ -2732,11 +2742,11 @@ static unsigned rb_calculate_event_length(unsigned length)
if (!length)
length++;
- if (length > RB_MAX_SMALL_DATA)
+ if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
length += sizeof(event.array[0]);
length += RB_EVNT_HDR_SIZE;
- length = ALIGN(length, RB_ALIGNMENT);
+ length = ALIGN(length, RB_ARCH_ALIGNMENT);
/*
* In case the time delta is larger than the 27 bits for it
@@ -2756,20 +2766,6 @@ static unsigned rb_calculate_event_length(unsigned length)
return length;
}
-static __always_inline bool
-rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
- struct ring_buffer_event *event)
-{
- unsigned long addr = (unsigned long)event;
- unsigned long index;
-
- index = rb_event_index(event);
- addr &= PAGE_MASK;
-
- return cpu_buffer->commit_page->page == (void *)addr &&
- rb_commit_index(cpu_buffer) == index;
-}
-
static u64 rb_time_delta(struct ring_buffer_event *event)
{
switch (event->type_len) {
@@ -3004,6 +3000,13 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
irq_work_queue(&cpu_buffer->irq_work.work);
}
+#ifdef CONFIG_RING_BUFFER_RECORD_RECURSION
+# define do_ring_buffer_record_recursion() \
+ do_ftrace_record_recursion(_THIS_IP_, _RET_IP_)
+#else
+# define do_ring_buffer_record_recursion() do { } while (0)
+#endif
+
/*
* The lock and unlock are done within a preempt disable section.
* The current_context per_cpu variable can only be modified
@@ -3014,10 +3017,10 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* a bit of overhead in something as critical as function tracing,
* we use a bitmask trick.
*
- * bit 0 = NMI context
- * bit 1 = IRQ context
- * bit 2 = SoftIRQ context
- * bit 3 = normal context.
+ * bit 1 = NMI context
+ * bit 2 = IRQ context
+ * bit 3 = SoftIRQ context
+ * bit 4 = normal context.
*
* This works because this is the order of contexts that can
* preempt other contexts. A SoftIRQ never preempts an IRQ
@@ -3040,6 +3043,30 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* The least significant bit can be cleared this way, and it
* just so happens that it is the same bit corresponding to
* the current context.
+ *
+ * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit
+ * is set when a recursion is detected at the current context, and if
+ * the TRANSITION bit is already set, it will fail the recursion.
+ * This is needed because there's a lag between the changing of
+ * interrupt context and updating the preempt count. In this case,
+ * a false positive will be found. To handle this, one extra recursion
+ * is allowed, and this is done by the TRANSITION bit. If the TRANSITION
+ * bit is already set, then it is considered a recursion and the function
+ * ends. Otherwise, the TRANSITION bit is set, and that bit is returned.
+ *
+ * On the trace_recursive_unlock(), the TRANSITION bit will be the first
+ * to be cleared. Even if it wasn't the context that set it. That is,
+ * if an interrupt comes in while NORMAL bit is set and the ring buffer
+ * is called before preempt_count() is updated, since the check will
+ * be on the NORMAL bit, the TRANSITION bit will then be set. If an
+ * NMI then comes in, it will set the NMI bit, but when the NMI code
+ * does the trace_recursive_unlock() it will clear the TRANSTION bit
+ * and leave the NMI bit set. But this is fine, because the interrupt
+ * code that set the TRANSITION bit will then clear the NMI bit when it
+ * calls trace_recursive_unlock(). If another NMI comes in, it will
+ * set the TRANSITION bit and continue.
+ *
+ * Note: The TRANSITION bit only handles a single transition between context.
*/
static __always_inline int
@@ -3055,8 +3082,18 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
bit = pc & NMI_MASK ? RB_CTX_NMI :
pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
- if (unlikely(val & (1 << (bit + cpu_buffer->nest))))
- return 1;
+ if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) {
+ /*
+ * It is possible that this was called by transitioning
+ * between interrupt context, and preempt_count() has not
+ * been updated yet. In this case, use the TRANSITION bit.
+ */
+ bit = RB_CTX_TRANSITION;
+ if (val & (1 << (bit + cpu_buffer->nest))) {
+ do_ring_buffer_record_recursion();
+ return 1;
+ }
+ }
val |= (1 << (bit + cpu_buffer->nest));
cpu_buffer->current_context = val;
@@ -3071,8 +3108,8 @@ trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->current_context - (1 << cpu_buffer->nest);
}
-/* The recursive locking above uses 4 bits */
-#define NESTED_BITS 4
+/* The recursive locking above uses 5 bits */
+#define NESTED_BITS 5
/**
* ring_buffer_nest_start - Allow to trace while nested
@@ -3149,6 +3186,153 @@ int ring_buffer_unlock_commit(struct trace_buffer *buffer,
}
EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
+/* Special value to validate all deltas on a page. */
+#define CHECK_FULL_PAGE 1L
+
+#ifdef CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS
+static void dump_buffer_page(struct buffer_data_page *bpage,
+ struct rb_event_info *info,
+ unsigned long tail)
+{
+ struct ring_buffer_event *event;
+ u64 ts, delta;
+ int e;
+
+ ts = bpage->time_stamp;
+ pr_warn(" [%lld] PAGE TIME STAMP\n", ts);
+
+ for (e = 0; e < tail; e += rb_event_length(event)) {
+
+ event = (struct ring_buffer_event *)(bpage->data + e);
+
+ switch (event->type_len) {
+
+ case RINGBUF_TYPE_TIME_EXTEND:
+ delta = ring_buffer_event_time_stamp(event);
+ ts += delta;
+ pr_warn(" [%lld] delta:%lld TIME EXTEND\n", ts, delta);
+ break;
+
+ case RINGBUF_TYPE_TIME_STAMP:
+ delta = ring_buffer_event_time_stamp(event);
+ ts = delta;
+ pr_warn(" [%lld] absolute:%lld TIME STAMP\n", ts, delta);
+ break;
+
+ case RINGBUF_TYPE_PADDING:
+ ts += event->time_delta;
+ pr_warn(" [%lld] delta:%d PADDING\n", ts, event->time_delta);
+ break;
+
+ case RINGBUF_TYPE_DATA:
+ ts += event->time_delta;
+ pr_warn(" [%lld] delta:%d\n", ts, event->time_delta);
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+static DEFINE_PER_CPU(atomic_t, checking);
+static atomic_t ts_dump;
+
+/*
+ * Check if the current event time stamp matches the deltas on
+ * the buffer page.
+ */
+static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
+ struct rb_event_info *info,
+ unsigned long tail)
+{
+ struct ring_buffer_event *event;
+ struct buffer_data_page *bpage;
+ u64 ts, delta;
+ bool full = false;
+ int e;
+
+ bpage = info->tail_page->page;
+
+ if (tail == CHECK_FULL_PAGE) {
+ full = true;
+ tail = local_read(&bpage->commit);
+ } else if (info->add_timestamp &
+ (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE)) {
+ /* Ignore events with absolute time stamps */
+ return;
+ }
+
+ /*
+ * Do not check the first event (skip possible extends too).
+ * Also do not check if previous events have not been committed.
+ */
+ if (tail <= 8 || tail > local_read(&bpage->commit))
+ return;
+
+ /*
+ * If this interrupted another event,
+ */
+ if (atomic_inc_return(this_cpu_ptr(&checking)) != 1)
+ goto out;
+
+ ts = bpage->time_stamp;
+
+ for (e = 0; e < tail; e += rb_event_length(event)) {
+
+ event = (struct ring_buffer_event *)(bpage->data + e);
+
+ switch (event->type_len) {
+
+ case RINGBUF_TYPE_TIME_EXTEND:
+ delta = ring_buffer_event_time_stamp(event);
+ ts += delta;
+ break;
+
+ case RINGBUF_TYPE_TIME_STAMP:
+ delta = ring_buffer_event_time_stamp(event);
+ ts = delta;
+ break;
+
+ case RINGBUF_TYPE_PADDING:
+ if (event->time_delta == 1)
+ break;
+ /* fall through */
+ case RINGBUF_TYPE_DATA:
+ ts += event->time_delta;
+ break;
+
+ default:
+ RB_WARN_ON(cpu_buffer, 1);
+ }
+ }
+ if ((full && ts > info->ts) ||
+ (!full && ts + info->delta != info->ts)) {
+ /* If another report is happening, ignore this one */
+ if (atomic_inc_return(&ts_dump) != 1) {
+ atomic_dec(&ts_dump);
+ goto out;
+ }
+ atomic_inc(&cpu_buffer->record_disabled);
+ pr_warn("[CPU: %d]TIME DOES NOT MATCH expected:%lld actual:%lld delta:%lld after:%lld\n",
+ cpu_buffer->cpu,
+ ts + info->delta, info->ts, info->delta, info->after);
+ dump_buffer_page(bpage, info, tail);
+ atomic_dec(&ts_dump);
+ /* Do not re-enable checking */
+ return;
+ }
+out:
+ atomic_dec(this_cpu_ptr(&checking));
+}
+#else
+static inline void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
+ struct rb_event_info *info,
+ unsigned long tail)
+{
+}
+#endif /* CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS */
+
static struct ring_buffer_event *
__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
struct rb_event_info *info)
@@ -3200,14 +3384,14 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
/* See if we shot pass the end of this buffer page */
if (unlikely(write > BUF_PAGE_SIZE)) {
- if (tail != w) {
- /* before and after may now different, fix it up*/
- b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before);
- a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
- if (a_ok && b_ok && info->before != info->after)
- (void)rb_time_cmpxchg(&cpu_buffer->before_stamp,
- info->before, info->after);
- }
+ /* before and after may now different, fix it up*/
+ b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before);
+ a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
+ if (a_ok && b_ok && info->before != info->after)
+ (void)rb_time_cmpxchg(&cpu_buffer->before_stamp,
+ info->before, info->after);
+ if (a_ok && b_ok)
+ check_buffer(cpu_buffer, info, CHECK_FULL_PAGE);
return rb_move_tail(cpu_buffer, tail, info);
}
@@ -3225,9 +3409,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
/* This did not interrupt any time update */
info->delta = info->ts - info->after;
else
- /* Just use full timestamp for inerrupting event */
+ /* Just use full timestamp for interrupting event */
info->delta = info->ts;
barrier();
+ check_buffer(cpu_buffer, info, tail);
if (unlikely(info->ts != save_before)) {
/* SLOW PATH - Interrupted between C and E */
@@ -3253,15 +3438,15 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
ts = rb_time_stamp(cpu_buffer->buffer);
barrier();
/*E*/ if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) &&
- info->after < ts) {
+ info->after < ts &&
+ rb_time_cmpxchg(&cpu_buffer->write_stamp,
+ info->after, ts)) {
/* Nothing came after this event between C and E */
info->delta = ts - info->after;
- (void)rb_time_cmpxchg(&cpu_buffer->write_stamp,
- info->after, info->ts);
info->ts = ts;
} else {
/*
- * Interrupted beween C and E:
+ * Interrupted between C and E:
* Lost the previous events time stamp. Just set the
* delta to zero, and this will be the same time as
* the event this event interrupted. And the events that
@@ -3468,7 +3653,7 @@ rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer,
}
/**
- * ring_buffer_commit_discard - discard an event that has not been committed
+ * ring_buffer_discard_commit - discard an event that has not been committed
* @buffer: the ring buffer
* @event: non committed event to discard
*
@@ -4866,6 +5051,9 @@ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return;
+ /* prevent another thread from changing buffer sizes */
+ mutex_lock(&buffer->mutex);
+
atomic_inc(&cpu_buffer->resize_disabled);
atomic_inc(&cpu_buffer->record_disabled);
@@ -4876,6 +5064,8 @@ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
atomic_dec(&cpu_buffer->record_disabled);
atomic_dec(&cpu_buffer->resize_disabled);
+
+ mutex_unlock(&buffer->mutex);
}
EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
@@ -4889,6 +5079,9 @@ void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
struct ring_buffer_per_cpu *cpu_buffer;
int cpu;
+ /* prevent another thread from changing buffer sizes */
+ mutex_lock(&buffer->mutex);
+
for_each_online_buffer_cpu(buffer, cpu) {
cpu_buffer = buffer->buffers[cpu];
@@ -4907,6 +5100,8 @@ void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
atomic_dec(&cpu_buffer->record_disabled);
atomic_dec(&cpu_buffer->resize_disabled);
}
+
+ mutex_unlock(&buffer->mutex);
}
/**
diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c
index 7d56d621ffea..a4b4bbf8c3bf 100644
--- a/kernel/trace/synth_event_gen_test.c
+++ b/kernel/trace/synth_event_gen_test.c
@@ -242,9 +242,11 @@ static struct synth_field_desc create_synth_test_fields[] = {
{ .type = "pid_t", .name = "next_pid_field" },
{ .type = "char[16]", .name = "next_comm_field" },
{ .type = "u64", .name = "ts_ns" },
+ { .type = "char[]", .name = "dynstring_field_1" },
{ .type = "u64", .name = "ts_ms" },
{ .type = "unsigned int", .name = "cpu" },
{ .type = "char[64]", .name = "my_string_field" },
+ { .type = "char[]", .name = "dynstring_field_2" },
{ .type = "int", .name = "my_int_field" },
};
@@ -254,7 +256,7 @@ static struct synth_field_desc create_synth_test_fields[] = {
*/
static int __init test_create_synth_event(void)
{
- u64 vals[7];
+ u64 vals[9];
int ret;
/* Create the create_synth_test event with the fields above */
@@ -292,10 +294,12 @@ static int __init test_create_synth_event(void)
vals[0] = 777; /* next_pid_field */
vals[1] = (u64)(long)"tiddlywinks"; /* next_comm_field */
vals[2] = 1000000; /* ts_ns */
- vals[3] = 1000; /* ts_ms */
- vals[4] = raw_smp_processor_id(); /* cpu */
- vals[5] = (u64)(long)"thneed"; /* my_string_field */
- vals[6] = 398; /* my_int_field */
+ vals[3] = (u64)(long)"xrayspecs"; /* dynstring_field_1 */
+ vals[4] = 1000; /* ts_ms */
+ vals[5] = raw_smp_processor_id(); /* cpu */
+ vals[6] = (u64)(long)"thneed"; /* my_string_field */
+ vals[7] = (u64)(long)"kerplunk"; /* dynstring_field_2 */
+ vals[8] = 398; /* my_int_field */
/* Now generate a create_synth_test event */
ret = synth_event_trace_array(create_synth_test, vals, ARRAY_SIZE(vals));
@@ -303,7 +307,7 @@ static int __init test_create_synth_event(void)
return ret;
delete:
/* We got an error after creating the event, delete it */
- ret = synth_event_delete("create_synth_test");
+ synth_event_delete("create_synth_test");
goto out;
}
@@ -422,13 +426,15 @@ static int __init test_trace_synth_event(void)
int ret;
/* Trace some bogus values just for testing */
- ret = synth_event_trace(create_synth_test, 7, /* number of values */
+ ret = synth_event_trace(create_synth_test, 9, /* number of values */
(u64)444, /* next_pid_field */
(u64)(long)"clackers", /* next_comm_field */
(u64)1000000, /* ts_ns */
+ (u64)(long)"viewmaster",/* dynstring_field_1 */
(u64)1000, /* ts_ms */
(u64)raw_smp_processor_id(), /* cpu */
(u64)(long)"Thneed", /* my_string_field */
+ (u64)(long)"yoyos", /* dynstring_field_2 */
(u64)999); /* my_int_field */
return ret;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d3e5de717df2..b5815a022ecc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -68,10 +68,21 @@ bool ring_buffer_expanded;
static bool __read_mostly tracing_selftest_running;
/*
- * If a tracer is running, we do not want to run SELFTEST.
+ * If boot-time tracing including tracers/events via kernel cmdline
+ * is running, we do not want to run SELFTEST.
*/
bool __read_mostly tracing_selftest_disabled;
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+void __init disable_tracing_selftest(const char *reason)
+{
+ if (!tracing_selftest_disabled) {
+ tracing_selftest_disabled = true;
+ pr_info("Ftrace startup test is disabled due to %s\n", reason);
+ }
+}
+#endif
+
/* Pipe tracepoints to printk */
struct trace_iterator *tracepoint_print_iter;
int tracepoint_printk;
@@ -163,7 +174,8 @@ static union trace_eval_map_item *trace_eval_maps;
#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
int tracing_set_tracer(struct trace_array *tr, const char *buf);
-static void ftrace_trace_userstack(struct trace_buffer *buffer,
+static void ftrace_trace_userstack(struct trace_array *tr,
+ struct trace_buffer *buffer,
unsigned long flags, int pc);
#define MAX_TRACER_SIZE 100
@@ -251,6 +263,145 @@ unsigned long long ns2usecs(u64 nsec)
return nsec;
}
+static void
+trace_process_export(struct trace_export *export,
+ struct ring_buffer_event *event, int flag)
+{
+ struct trace_entry *entry;
+ unsigned int size = 0;
+
+ if (export->flags & flag) {
+ entry = ring_buffer_event_data(event);
+ size = ring_buffer_event_length(event);
+ export->write(export, entry, size);
+ }
+}
+
+static DEFINE_MUTEX(ftrace_export_lock);
+
+static struct trace_export __rcu *ftrace_exports_list __read_mostly;
+
+static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
+static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
+static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
+
+static inline void ftrace_exports_enable(struct trace_export *export)
+{
+ if (export->flags & TRACE_EXPORT_FUNCTION)
+ static_branch_inc(&trace_function_exports_enabled);
+
+ if (export->flags & TRACE_EXPORT_EVENT)
+ static_branch_inc(&trace_event_exports_enabled);
+
+ if (export->flags & TRACE_EXPORT_MARKER)
+ static_branch_inc(&trace_marker_exports_enabled);
+}
+
+static inline void ftrace_exports_disable(struct trace_export *export)
+{
+ if (export->flags & TRACE_EXPORT_FUNCTION)
+ static_branch_dec(&trace_function_exports_enabled);
+
+ if (export->flags & TRACE_EXPORT_EVENT)
+ static_branch_dec(&trace_event_exports_enabled);
+
+ if (export->flags & TRACE_EXPORT_MARKER)
+ static_branch_dec(&trace_marker_exports_enabled);
+}
+
+static void ftrace_exports(struct ring_buffer_event *event, int flag)
+{
+ struct trace_export *export;
+
+ preempt_disable_notrace();
+
+ export = rcu_dereference_raw_check(ftrace_exports_list);
+ while (export) {
+ trace_process_export(export, event, flag);
+ export = rcu_dereference_raw_check(export->next);
+ }
+
+ preempt_enable_notrace();
+}
+
+static inline void
+add_trace_export(struct trace_export **list, struct trace_export *export)
+{
+ rcu_assign_pointer(export->next, *list);
+ /*
+ * We are entering export into the list but another
+ * CPU might be walking that list. We need to make sure
+ * the export->next pointer is valid before another CPU sees
+ * the export pointer included into the list.
+ */
+ rcu_assign_pointer(*list, export);
+}
+
+static inline int
+rm_trace_export(struct trace_export **list, struct trace_export *export)
+{
+ struct trace_export **p;
+
+ for (p = list; *p != NULL; p = &(*p)->next)
+ if (*p == export)
+ break;
+
+ if (*p != export)
+ return -1;
+
+ rcu_assign_pointer(*p, (*p)->next);
+
+ return 0;
+}
+
+static inline void
+add_ftrace_export(struct trace_export **list, struct trace_export *export)
+{
+ ftrace_exports_enable(export);
+
+ add_trace_export(list, export);
+}
+
+static inline int
+rm_ftrace_export(struct trace_export **list, struct trace_export *export)
+{
+ int ret;
+
+ ret = rm_trace_export(list, export);
+ ftrace_exports_disable(export);
+
+ return ret;
+}
+
+int register_ftrace_export(struct trace_export *export)
+{
+ if (WARN_ON_ONCE(!export->write))
+ return -1;
+
+ mutex_lock(&ftrace_export_lock);
+
+ add_ftrace_export(&ftrace_exports_list, export);
+
+ mutex_unlock(&ftrace_export_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(register_ftrace_export);
+
+int unregister_ftrace_export(struct trace_export *export)
+{
+ int ret;
+
+ mutex_lock(&ftrace_export_lock);
+
+ ret = rm_ftrace_export(&ftrace_exports_list, export);
+
+ mutex_unlock(&ftrace_export_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(unregister_ftrace_export);
+
/* trace_flags holds trace_options default values */
#define TRACE_DEFAULT_FLAGS \
(FUNCTION_DEFAULT_FLAGS | \
@@ -1973,11 +2124,7 @@ int __init register_tracer(struct tracer *type)
apply_trace_boot_options();
/* disable other selftests, since this will break it. */
- tracing_selftest_disabled = true;
-#ifdef CONFIG_FTRACE_STARTUP_TEST
- printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
- type->name);
-#endif
+ disable_tracing_selftest("running a tracer");
out_unlock:
return ret;
@@ -2511,7 +2658,7 @@ void trace_buffered_event_enable(void)
preempt_disable();
if (cpu == smp_processor_id() &&
- this_cpu_read(trace_buffered_event) !=
+ __this_cpu_read(trace_buffered_event) !=
per_cpu(trace_buffered_event, cpu))
WARN_ON_ONCE(1);
preempt_enable();
@@ -2598,7 +2745,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
(entry = this_cpu_read(trace_buffered_event))) {
/* Try to use the per cpu buffer first */
val = this_cpu_inc_return(trace_buffered_event_cnt);
- if (val == 1) {
+ if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) {
trace_event_setup(entry, type, flags, pc);
entry->array[0] = len;
return entry;
@@ -2611,7 +2758,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
/*
* If tracing is off, but we have triggers enabled
* we still need to look at the event data. Use the temp_buffer
- * to store the trace event for the tigger to use. It's recusive
+ * to store the trace event for the trigger to use. It's recursive
* safe and will not be recorded anywhere.
*/
if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
@@ -2699,6 +2846,8 @@ void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
if (static_key_false(&tracepoint_printk_key.key))
output_printk(fbuffer);
+ if (static_branch_unlikely(&trace_event_exports_enabled))
+ ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
fbuffer->event, fbuffer->entry,
fbuffer->flags, fbuffer->pc, fbuffer->regs);
@@ -2729,7 +2878,7 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
* two. They are not that meaningful.
*/
ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
- ftrace_trace_userstack(buffer, flags, pc);
+ ftrace_trace_userstack(tr, buffer, flags, pc);
}
/*
@@ -2742,129 +2891,6 @@ trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
__buffer_unlock_commit(buffer, event);
}
-static void
-trace_process_export(struct trace_export *export,
- struct ring_buffer_event *event)
-{
- struct trace_entry *entry;
- unsigned int size = 0;
-
- entry = ring_buffer_event_data(event);
- size = ring_buffer_event_length(event);
- export->write(export, entry, size);
-}
-
-static DEFINE_MUTEX(ftrace_export_lock);
-
-static struct trace_export __rcu *ftrace_exports_list __read_mostly;
-
-static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
-
-static inline void ftrace_exports_enable(void)
-{
- static_branch_enable(&ftrace_exports_enabled);
-}
-
-static inline void ftrace_exports_disable(void)
-{
- static_branch_disable(&ftrace_exports_enabled);
-}
-
-static void ftrace_exports(struct ring_buffer_event *event)
-{
- struct trace_export *export;
-
- preempt_disable_notrace();
-
- export = rcu_dereference_raw_check(ftrace_exports_list);
- while (export) {
- trace_process_export(export, event);
- export = rcu_dereference_raw_check(export->next);
- }
-
- preempt_enable_notrace();
-}
-
-static inline void
-add_trace_export(struct trace_export **list, struct trace_export *export)
-{
- rcu_assign_pointer(export->next, *list);
- /*
- * We are entering export into the list but another
- * CPU might be walking that list. We need to make sure
- * the export->next pointer is valid before another CPU sees
- * the export pointer included into the list.
- */
- rcu_assign_pointer(*list, export);
-}
-
-static inline int
-rm_trace_export(struct trace_export **list, struct trace_export *export)
-{
- struct trace_export **p;
-
- for (p = list; *p != NULL; p = &(*p)->next)
- if (*p == export)
- break;
-
- if (*p != export)
- return -1;
-
- rcu_assign_pointer(*p, (*p)->next);
-
- return 0;
-}
-
-static inline void
-add_ftrace_export(struct trace_export **list, struct trace_export *export)
-{
- if (*list == NULL)
- ftrace_exports_enable();
-
- add_trace_export(list, export);
-}
-
-static inline int
-rm_ftrace_export(struct trace_export **list, struct trace_export *export)
-{
- int ret;
-
- ret = rm_trace_export(list, export);
- if (*list == NULL)
- ftrace_exports_disable();
-
- return ret;
-}
-
-int register_ftrace_export(struct trace_export *export)
-{
- if (WARN_ON_ONCE(!export->write))
- return -1;
-
- mutex_lock(&ftrace_export_lock);
-
- add_ftrace_export(&ftrace_exports_list, export);
-
- mutex_unlock(&ftrace_export_lock);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(register_ftrace_export);
-
-int unregister_ftrace_export(struct trace_export *export)
-{
- int ret;
-
- mutex_lock(&ftrace_export_lock);
-
- ret = rm_ftrace_export(&ftrace_exports_list, export);
-
- mutex_unlock(&ftrace_export_lock);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(unregister_ftrace_export);
-
void
trace_function(struct trace_array *tr,
unsigned long ip, unsigned long parent_ip, unsigned long flags,
@@ -2884,8 +2910,8 @@ trace_function(struct trace_array *tr,
entry->parent_ip = parent_ip;
if (!call_filter_check_discard(call, entry, buffer, event)) {
- if (static_branch_unlikely(&ftrace_exports_enabled))
- ftrace_exports(event);
+ if (static_branch_unlikely(&trace_function_exports_enabled))
+ ftrace_exports(event, TRACE_EXPORT_FUNCTION);
__buffer_unlock_commit(buffer, event);
}
}
@@ -2934,7 +2960,7 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer,
stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
/* This should never happen. If it does, yell once and skip */
- if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
+ if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
goto out;
/*
@@ -3038,13 +3064,14 @@ EXPORT_SYMBOL_GPL(trace_dump_stack);
static DEFINE_PER_CPU(int, user_stack_count);
static void
-ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
+ftrace_trace_userstack(struct trace_array *tr,
+ struct trace_buffer *buffer, unsigned long flags, int pc)
{
struct trace_event_call *call = &event_user_stack;
struct ring_buffer_event *event;
struct userstack_entry *entry;
- if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
+ if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
return;
/*
@@ -3083,7 +3110,8 @@ ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
preempt_enable();
}
#else /* CONFIG_USER_STACKTRACE_SUPPORT */
-static void ftrace_trace_userstack(struct trace_buffer *buffer,
+static void ftrace_trace_userstack(struct trace_array *tr,
+ struct trace_buffer *buffer,
unsigned long flags, int pc)
{
}
@@ -3100,7 +3128,7 @@ struct trace_buffer_struct {
static struct trace_buffer_struct *trace_percpu_buffer;
/*
- * Thise allows for lockless recording. If we're nested too deeply, then
+ * This allows for lockless recording. If we're nested too deeply, then
* this returns NULL.
*/
static char *get_trace_buf(void)
@@ -3114,7 +3142,7 @@ static char *get_trace_buf(void)
/* Interrupts must see nesting incremented before we use the buffer */
barrier();
- return &buffer->buffer[buffer->nesting][0];
+ return &buffer->buffer[buffer->nesting - 1][0];
}
static void put_trace_buf(void)
@@ -3516,7 +3544,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
}
#define STATIC_TEMP_BUF_SIZE 128
-static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
+static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
/* Find the next real entry, without updating the iterator itself */
struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
@@ -5124,10 +5152,10 @@ static const char readme_msg[] =
"\t -:[<group>/]<event>\n"
#ifdef CONFIG_KPROBE_EVENTS
"\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
- "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
+ "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
#endif
#ifdef CONFIG_UPROBE_EVENTS
- " place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
+ " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
#endif
"\t args: <name>=fetcharg[:type]\n"
"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
@@ -5251,7 +5279,12 @@ static const char readme_msg[] =
"\t trace(<synthetic_event>,param list) - generate synthetic event\n"
"\t save(field,...) - save current event fields\n"
#ifdef CONFIG_TRACER_SNAPSHOT
- "\t snapshot() - snapshot the trace buffer\n"
+ "\t snapshot() - snapshot the trace buffer\n\n"
+#endif
+#ifdef CONFIG_SYNTH_EVENTS
+ " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
+ "\t Write into this file to define/undefine new synthetic events.\n"
+ "\t example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
#endif
#endif
;
@@ -6664,7 +6697,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
written = -EFAULT;
} else
written = cnt;
- len = cnt;
if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
/* do not add \n before testing triggers, but add \0 */
@@ -6678,6 +6710,8 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
} else
entry->buf[cnt] = '\0';
+ if (static_branch_unlikely(&trace_marker_exports_enabled))
+ ftrace_exports(event, TRACE_EXPORT_MARKER);
__buffer_unlock_commit(buffer, event);
if (tt)
@@ -8638,6 +8672,24 @@ struct trace_array *trace_array_find_get(const char *instance)
return tr;
}
+static int trace_array_create_dir(struct trace_array *tr)
+{
+ int ret;
+
+ tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
+ if (!tr->dir)
+ return -EINVAL;
+
+ ret = event_trace_add_tracer(tr->dir, tr);
+ if (ret)
+ tracefs_remove(tr->dir);
+
+ init_tracer_tracefs(tr, tr->dir);
+ __update_tracer_options(tr);
+
+ return ret;
+}
+
static struct trace_array *trace_array_create(const char *name)
{
struct trace_array *tr;
@@ -8673,30 +8725,28 @@ static struct trace_array *trace_array_create(const char *name)
if (allocate_trace_buffers(tr, trace_buf_size) < 0)
goto out_free_tr;
- tr->dir = tracefs_create_dir(name, trace_instance_dir);
- if (!tr->dir)
- goto out_free_tr;
-
- ret = event_trace_add_tracer(tr->dir, tr);
- if (ret) {
- tracefs_remove(tr->dir);
+ if (ftrace_allocate_ftrace_ops(tr) < 0)
goto out_free_tr;
- }
ftrace_init_trace_array(tr);
- init_tracer_tracefs(tr, tr->dir);
init_trace_flags_index(tr);
- __update_tracer_options(tr);
+
+ if (trace_instance_dir) {
+ ret = trace_array_create_dir(tr);
+ if (ret)
+ goto out_free_tr;
+ } else
+ __trace_early_add_events(tr);
list_add(&tr->list, &ftrace_trace_arrays);
tr->ref++;
-
return tr;
out_free_tr:
+ ftrace_free_ftrace_ops(tr);
free_trace_buffers(tr);
free_cpumask_var(tr->tracing_cpumask);
kfree(tr->name);
@@ -8801,7 +8851,6 @@ static int __remove_instance(struct trace_array *tr)
free_cpumask_var(tr->tracing_cpumask);
kfree(tr->name);
kfree(tr);
- tr = NULL;
return 0;
}
@@ -8855,11 +8904,27 @@ static int instance_rmdir(const char *name)
static __init void create_trace_instances(struct dentry *d_tracer)
{
+ struct trace_array *tr;
+
trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
instance_mkdir,
instance_rmdir);
if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
return;
+
+ mutex_lock(&event_mutex);
+ mutex_lock(&trace_types_lock);
+
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ if (!tr->name)
+ continue;
+ if (MEM_FAIL(trace_array_create_dir(tr) < 0,
+ "Failed to create instance directory\n"))
+ break;
+ }
+
+ mutex_unlock(&trace_types_lock);
+ mutex_unlock(&event_mutex);
}
static void
@@ -8973,21 +9038,21 @@ static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
* directory. It is called via fs_initcall() by any of the boot up code
* and expects to return the dentry of the top level tracing directory.
*/
-struct dentry *tracing_init_dentry(void)
+int tracing_init_dentry(void)
{
struct trace_array *tr = &global_trace;
if (security_locked_down(LOCKDOWN_TRACEFS)) {
pr_warn("Tracing disabled due to lockdown\n");
- return ERR_PTR(-EPERM);
+ return -EPERM;
}
/* The top level trace array uses NULL as parent */
if (tr->dir)
- return NULL;
+ return 0;
if (WARN_ON(!tracefs_initialized()))
- return ERR_PTR(-ENODEV);
+ return -ENODEV;
/*
* As there may still be users that expect the tracing
@@ -8998,13 +9063,16 @@ struct dentry *tracing_init_dentry(void)
tr->dir = debugfs_create_automount("tracing", NULL,
trace_automount, NULL);
- return NULL;
+ return 0;
}
extern struct trace_eval_map *__start_ftrace_eval_maps[];
extern struct trace_eval_map *__stop_ftrace_eval_maps[];
-static void __init trace_eval_init(void)
+static struct workqueue_struct *eval_map_wq __initdata;
+static struct work_struct eval_map_work __initdata;
+
+static void __init eval_map_work_func(struct work_struct *work)
{
int len;
@@ -9012,6 +9080,33 @@ static void __init trace_eval_init(void)
trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
}
+static int __init trace_eval_init(void)
+{
+ INIT_WORK(&eval_map_work, eval_map_work_func);
+
+ eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
+ if (!eval_map_wq) {
+ pr_err("Unable to allocate eval_map_wq\n");
+ /* Do work here */
+ eval_map_work_func(&eval_map_work);
+ return -ENOMEM;
+ }
+
+ queue_work(eval_map_wq, &eval_map_work);
+ return 0;
+}
+
+static int __init trace_eval_sync(void)
+{
+ /* Make sure the eval map updates are finished */
+ if (eval_map_wq)
+ destroy_workqueue(eval_map_wq);
+ return 0;
+}
+
+late_initcall_sync(trace_eval_sync);
+
+
#ifdef CONFIG_MODULES
static void trace_module_add_evals(struct module *mod)
{
@@ -9074,7 +9169,7 @@ static int trace_module_notify(struct notifier_block *self,
break;
}
- return 0;
+ return NOTIFY_OK;
}
static struct notifier_block trace_module_nb = {
@@ -9085,48 +9180,48 @@ static struct notifier_block trace_module_nb = {
static __init int tracer_init_tracefs(void)
{
- struct dentry *d_tracer;
+ int ret;
trace_access_lock_init();
- d_tracer = tracing_init_dentry();
- if (IS_ERR(d_tracer))
+ ret = tracing_init_dentry();
+ if (ret)
return 0;
event_trace_init();
- init_tracer_tracefs(&global_trace, d_tracer);
- ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
+ init_tracer_tracefs(&global_trace, NULL);
+ ftrace_init_tracefs_toplevel(&global_trace, NULL);
- trace_create_file("tracing_thresh", 0644, d_tracer,
+ trace_create_file("tracing_thresh", 0644, NULL,
&global_trace, &tracing_thresh_fops);
- trace_create_file("README", 0444, d_tracer,
+ trace_create_file("README", 0444, NULL,
NULL, &tracing_readme_fops);
- trace_create_file("saved_cmdlines", 0444, d_tracer,
+ trace_create_file("saved_cmdlines", 0444, NULL,
NULL, &tracing_saved_cmdlines_fops);
- trace_create_file("saved_cmdlines_size", 0644, d_tracer,
+ trace_create_file("saved_cmdlines_size", 0644, NULL,
NULL, &tracing_saved_cmdlines_size_fops);
- trace_create_file("saved_tgids", 0444, d_tracer,
+ trace_create_file("saved_tgids", 0444, NULL,
NULL, &tracing_saved_tgids_fops);
trace_eval_init();
- trace_create_eval_file(d_tracer);
+ trace_create_eval_file(NULL);
#ifdef CONFIG_MODULES
register_module_notifier(&trace_module_nb);
#endif
#ifdef CONFIG_DYNAMIC_FTRACE
- trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
+ trace_create_file("dyn_ftrace_total_info", 0444, NULL,
NULL, &tracing_dyn_info_fops);
#endif
- create_trace_instances(d_tracer);
+ create_trace_instances(NULL);
update_tracer_options(&global_trace);
@@ -9289,7 +9384,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
}
/*
- * We need to stop all tracing on all CPUS to read the
+ * We need to stop all tracing on all CPUS to read
* the next buffer. This is a bit expensive, but is
* not done often. We fill all what we can read,
* and then release the locks again.
@@ -9432,7 +9527,7 @@ __init static int tracer_alloc_buffers(void)
}
/*
- * Make sure we don't accidently add more trace options
+ * Make sure we don't accidentally add more trace options
* than we have bits for.
*/
BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
@@ -9461,7 +9556,7 @@ __init static int tracer_alloc_buffers(void)
/*
* The prepare callbacks allocates some memory for the ring buffer. We
- * don't free the buffer if the if the CPU goes down. If we were to free
+ * don't free the buffer if the CPU goes down. If we were to free
* the buffer, then the user would lose any trace that was in the
* buffer. The memory will be removed once the "instance" is removed.
*/
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 610d21355526..e448d2da0b99 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -19,6 +19,7 @@
#include <linux/glob.h>
#include <linux/irq_work.h>
#include <linux/workqueue.h>
+#include <linux/ctype.h>
#ifdef CONFIG_FTRACE_SYSCALLS
#include <asm/unistd.h> /* For NR_SYSCALLS */
@@ -98,7 +99,7 @@ enum trace_type {
/* Use this for memory failure errors */
#define MEM_FAIL(condition, fmt, ...) ({ \
- static bool __section(.data.once) __warned; \
+ static bool __section(".data.once") __warned; \
int __ret_warn_once = !!(condition); \
\
if (unlikely(__ret_warn_once && !__warned)) { \
@@ -246,7 +247,7 @@ typedef bool (*cond_update_fn_t)(struct trace_array *tr, void *cond_data);
* tracing_snapshot_cond(tr, cond_data), the cond_data passed in is
* passed in turn to the cond_snapshot.update() function. That data
* can be compared by the update() implementation with the cond_data
- * contained wihin the struct cond_snapshot instance associated with
+ * contained within the struct cond_snapshot instance associated with
* the trace_array. Because the tr->max_lock is held throughout the
* update() call, the update() function can directly retrieve the
* cond_snapshot and cond_data associated with the per-instance
@@ -271,7 +272,7 @@ typedef bool (*cond_update_fn_t)(struct trace_array *tr, void *cond_data);
* take the snapshot, by returning 'true' if so, 'false' if no
* snapshot should be taken. Because the max_lock is held for
* the duration of update(), the implementation is safe to
- * directly retrieven and save any implementation data it needs
+ * directly retrieved and save any implementation data it needs
* to in association with the snapshot.
*/
struct cond_snapshot {
@@ -557,163 +558,6 @@ struct tracer {
bool noboot;
};
-
-/* Only current can touch trace_recursion */
-
-/*
- * For function tracing recursion:
- * The order of these bits are important.
- *
- * When function tracing occurs, the following steps are made:
- * If arch does not support a ftrace feature:
- * call internal function (uses INTERNAL bits) which calls...
- * If callback is registered to the "global" list, the list
- * function is called and recursion checks the GLOBAL bits.
- * then this function calls...
- * The function callback, which can use the FTRACE bits to
- * check for recursion.
- *
- * Now if the arch does not suppport a feature, and it calls
- * the global list function which calls the ftrace callback
- * all three of these steps will do a recursion protection.
- * There's no reason to do one if the previous caller already
- * did. The recursion that we are protecting against will
- * go through the same steps again.
- *
- * To prevent the multiple recursion checks, if a recursion
- * bit is set that is higher than the MAX bit of the current
- * check, then we know that the check was made by the previous
- * caller, and we can skip the current check.
- */
-enum {
- /* Function recursion bits */
- TRACE_FTRACE_BIT,
- TRACE_FTRACE_NMI_BIT,
- TRACE_FTRACE_IRQ_BIT,
- TRACE_FTRACE_SIRQ_BIT,
-
- /* INTERNAL_BITs must be greater than FTRACE_BITs */
- TRACE_INTERNAL_BIT,
- TRACE_INTERNAL_NMI_BIT,
- TRACE_INTERNAL_IRQ_BIT,
- TRACE_INTERNAL_SIRQ_BIT,
-
- TRACE_BRANCH_BIT,
-/*
- * Abuse of the trace_recursion.
- * As we need a way to maintain state if we are tracing the function
- * graph in irq because we want to trace a particular function that
- * was called in irq context but we have irq tracing off. Since this
- * can only be modified by current, we can reuse trace_recursion.
- */
- TRACE_IRQ_BIT,
-
- /* Set if the function is in the set_graph_function file */
- TRACE_GRAPH_BIT,
-
- /*
- * In the very unlikely case that an interrupt came in
- * at a start of graph tracing, and we want to trace
- * the function in that interrupt, the depth can be greater
- * than zero, because of the preempted start of a previous
- * trace. In an even more unlikely case, depth could be 2
- * if a softirq interrupted the start of graph tracing,
- * followed by an interrupt preempting a start of graph
- * tracing in the softirq, and depth can even be 3
- * if an NMI came in at the start of an interrupt function
- * that preempted a softirq start of a function that
- * preempted normal context!!!! Luckily, it can't be
- * greater than 3, so the next two bits are a mask
- * of what the depth is when we set TRACE_GRAPH_BIT
- */
-
- TRACE_GRAPH_DEPTH_START_BIT,
- TRACE_GRAPH_DEPTH_END_BIT,
-
- /*
- * To implement set_graph_notrace, if this bit is set, we ignore
- * function graph tracing of called functions, until the return
- * function is called to clear it.
- */
- TRACE_GRAPH_NOTRACE_BIT,
-};
-
-#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0)
-#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0)
-#define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit)))
-
-#define trace_recursion_depth() \
- (((current)->trace_recursion >> TRACE_GRAPH_DEPTH_START_BIT) & 3)
-#define trace_recursion_set_depth(depth) \
- do { \
- current->trace_recursion &= \
- ~(3 << TRACE_GRAPH_DEPTH_START_BIT); \
- current->trace_recursion |= \
- ((depth) & 3) << TRACE_GRAPH_DEPTH_START_BIT; \
- } while (0)
-
-#define TRACE_CONTEXT_BITS 4
-
-#define TRACE_FTRACE_START TRACE_FTRACE_BIT
-#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
-
-#define TRACE_LIST_START TRACE_INTERNAL_BIT
-#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
-
-#define TRACE_CONTEXT_MASK TRACE_LIST_MAX
-
-static __always_inline int trace_get_context_bit(void)
-{
- int bit;
-
- if (in_interrupt()) {
- if (in_nmi())
- bit = 0;
-
- else if (in_irq())
- bit = 1;
- else
- bit = 2;
- } else
- bit = 3;
-
- return bit;
-}
-
-static __always_inline int trace_test_and_set_recursion(int start, int max)
-{
- unsigned int val = current->trace_recursion;
- int bit;
-
- /* A previous recursion check was made */
- if ((val & TRACE_CONTEXT_MASK) > max)
- return 0;
-
- bit = trace_get_context_bit() + start;
- if (unlikely(val & (1 << bit)))
- return -1;
-
- val |= 1 << bit;
- current->trace_recursion = val;
- barrier();
-
- return bit;
-}
-
-static __always_inline void trace_clear_recursion(int bit)
-{
- unsigned int val = current->trace_recursion;
-
- if (!bit)
- return;
-
- bit = 1 << bit;
- val &= ~bit;
-
- barrier();
- current->trace_recursion = val;
-}
-
static inline struct ring_buffer_iter *
trace_buffer_iter(struct trace_iterator *iter, int cpu)
{
@@ -737,7 +581,7 @@ struct dentry *trace_create_file(const char *name,
void *data,
const struct file_operations *fops);
-struct dentry *tracing_init_dentry(void);
+int tracing_init_dentry(void);
struct ring_buffer_event;
@@ -875,6 +719,8 @@ extern bool ring_buffer_expanded;
extern bool tracing_selftest_disabled;
#ifdef CONFIG_FTRACE_STARTUP_TEST
+extern void __init disable_tracing_selftest(const char *reason);
+
extern int trace_selftest_startup_function(struct tracer *trace,
struct trace_array *tr);
extern int trace_selftest_startup_function_graph(struct tracer *trace,
@@ -898,6 +744,9 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
*/
#define __tracer_data __refdata
#else
+static inline void __init disable_tracing_selftest(const char *reason)
+{
+}
/* Tracers are seldom changed. Optimize when selftests are disabled. */
#define __tracer_data __read_mostly
#endif /* CONFIG_FTRACE_STARTUP_TEST */
@@ -1125,6 +974,8 @@ extern int ftrace_is_dead(void);
int ftrace_create_function_files(struct trace_array *tr,
struct dentry *parent);
void ftrace_destroy_function_files(struct trace_array *tr);
+int ftrace_allocate_ftrace_ops(struct trace_array *tr);
+void ftrace_free_ftrace_ops(struct trace_array *tr);
void ftrace_init_global_array_ops(struct trace_array *tr);
void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func);
void ftrace_reset_array_ops(struct trace_array *tr);
@@ -1146,6 +997,11 @@ ftrace_create_function_files(struct trace_array *tr,
{
return 0;
}
+static inline int ftrace_allocate_ftrace_ops(struct trace_array *tr)
+{
+ return 0;
+}
+static inline void ftrace_free_ftrace_ops(struct trace_array *tr) { }
static inline void ftrace_destroy_function_files(struct trace_array *tr) { }
static inline __init void
ftrace_init_global_array_ops(struct trace_array *tr) { }
@@ -1472,7 +1328,7 @@ __trace_event_discard_commit(struct trace_buffer *buffer,
/*
* Helper function for event_trigger_unlock_commit{_regs}().
* If there are event triggers attached to this event that requires
- * filtering against its fields, then they wil be called as the
+ * filtering against its fields, then they will be called as the
* entry already holds the field information of the current event.
*
* It also checks if the event should be discarded or not.
@@ -1651,6 +1507,7 @@ extern void trace_event_enable_tgid_record(bool enable);
extern int event_trace_init(void);
extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
extern int event_trace_del_tracer(struct trace_array *tr);
+extern void __trace_early_add_events(struct trace_array *tr);
extern struct trace_event_file *__find_event_file(struct trace_array *tr,
const char *system,
@@ -2082,4 +1939,16 @@ static __always_inline void trace_iterator_reset(struct trace_iterator *iter)
iter->pos = -1;
}
+/* Check the name is good for event/group/fields */
+static inline bool is_good_name(const char *name)
+{
+ if (!isalpha(*name) && *name != '_')
+ return false;
+ while (*++name != '\0') {
+ if (!isalpha(*name) && !isdigit(*name) && *name != '_')
+ return false;
+ }
+ return true;
+}
+
#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c
index 2e9a4746ea85..801c2a7f7605 100644
--- a/kernel/trace/trace_benchmark.c
+++ b/kernel/trace/trace_benchmark.c
@@ -31,7 +31,7 @@ static bool ok_to_run;
* it simply writes "START". As the first write is cold cache and
* the rest is hot, we save off that time in bm_first and it is
* reported as "first", which is shown in the second write to the
- * tracepoint. The "first" field is writen within the statics from
+ * tracepoint. The "first" field is written within the statics from
* then on but never changes.
*/
static void trace_do_benchmark(void)
@@ -112,7 +112,7 @@ static void trace_do_benchmark(void)
int i = 0;
/*
* stddev is the square of standard deviation but
- * we want the actualy number. Use the average
+ * we want the actually number. Use the average
* as our seed to find the std.
*
* The next try is:
@@ -155,7 +155,7 @@ static int benchmark_event_kthread(void *arg)
/*
* We don't go to sleep, but let others run as well.
- * This is bascially a "yield()" to let any task that
+ * This is basically a "yield()" to let any task that
* wants to run, schedule in, but if the CPU is idle,
* we'll keep burning cycles.
*
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index fa0fc08c6ef8..a82f03f385f8 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -40,6 +40,16 @@ trace_boot_set_instance_options(struct trace_array *tr, struct xbc_node *node)
pr_err("Failed to set option: %s\n", buf);
}
+ p = xbc_node_find_value(node, "tracing_on", NULL);
+ if (p && *p != '\0') {
+ if (kstrtoul(p, 10, &v))
+ pr_err("Failed to set tracing on: %s\n", p);
+ if (v)
+ tracer_tracing_on(tr);
+ else
+ tracer_tracing_off(tr);
+ }
+
p = xbc_node_find_value(node, "trace_clock", NULL);
if (p && *p != '\0') {
if (tracing_set_clock(tr, p) < 0)
@@ -274,6 +284,12 @@ trace_boot_enable_tracer(struct trace_array *tr, struct xbc_node *node)
if (tracing_set_tracer(tr, p) < 0)
pr_err("Failed to set given tracer: %s\n", p);
}
+
+ /* Since tracer can free snapshot buffer, allocate snapshot here.*/
+ if (xbc_node_find_value(node, "alloc_snapshot", NULL)) {
+ if (tracing_alloc_snapshot_instance(tr) < 0)
+ pr_err("Failed to allocate snapshot buffer\n");
+ }
}
static void __init
@@ -328,7 +344,12 @@ static int __init trace_boot_init(void)
trace_boot_init_one_instance(tr, trace_node);
trace_boot_init_instances(trace_node);
+ disable_tracing_selftest("running boot-time tracing");
+
return 0;
}
-
-fs_initcall(trace_boot_init);
+/*
+ * Start tracing at the end of core-initcall, so that it starts tracing
+ * from the beginning of postcore_initcall.
+ */
+core_initcall_sync(trace_boot_init);
diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
index 9f2e8520b748..4f967d5cd917 100644
--- a/kernel/trace/trace_dynevent.c
+++ b/kernel/trace/trace_dynevent.c
@@ -206,14 +206,14 @@ static const struct file_operations dynamic_events_ops = {
/* Make a tracefs interface for controlling dynamic events */
static __init int init_dynamic_event(void)
{
- struct dentry *d_tracer;
struct dentry *entry;
+ int ret;
- d_tracer = tracing_init_dentry();
- if (IS_ERR(d_tracer))
+ ret = tracing_init_dentry();
+ if (ret)
return 0;
- entry = tracefs_create_file("dynamic_events", 0644, d_tracer,
+ entry = tracefs_create_file("dynamic_events", 0644, NULL,
NULL, &dynamic_events_ops);
/* Event list interface */
@@ -276,7 +276,7 @@ int dynevent_arg_add(struct dynevent_cmd *cmd,
* arguments of the form 'type variable_name;' or 'x+y'.
*
* The lhs argument string will be appended to the current cmd string,
- * followed by an operator, if applicable, followd by the rhs string,
+ * followed by an operator, if applicable, followed by the rhs string,
* followed finally by a separator, if applicable. Before the
* argument is added, the @check_arg function, if present, will be
* used to check the sanity of the current arg strings.
@@ -402,7 +402,7 @@ void dynevent_arg_init(struct dynevent_arg *arg,
* whitespace, all followed by a separator, if applicable. After the
* first arg string is successfully appended to the command string,
* the optional @operator is appended, followed by the second arg and
- * and optional @separator. If no separator was specified when
+ * optional @separator. If no separator was specified when
* initializing the arg, a space will be appended.
*/
void dynevent_arg_pair_init(struct dynevent_arg_pair *arg_pair,
diff --git a/kernel/trace/trace_dynevent.h b/kernel/trace/trace_dynevent.h
index d6857a254ede..d6f72dcb7269 100644
--- a/kernel/trace/trace_dynevent.h
+++ b/kernel/trace/trace_dynevent.h
@@ -29,10 +29,10 @@ struct dyn_event;
* @show: Showing method. This is invoked when user reads the event definitions
* via dynamic_events interface.
* @is_busy: Check whether given event is busy so that it can not be deleted.
- * Return true if it is busy, otherwides false.
- * @free: Delete the given event. Return 0 if success, otherwides error.
+ * Return true if it is busy, otherwise false.
+ * @free: Delete the given event. Return 0 if success, otherwise error.
* @match: Check whether given event and system name match this event. The argc
- * and argv is used for exact match. Return true if it matches, otherwides
+ * and argv is used for exact match. Return true if it matches, otherwise
* false.
*
* Except for @create, these methods are called under holding event_mutex.
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 18c4a58aff79..4547ac59da61 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -32,7 +32,7 @@
* to be deciphered for the format file. Although these macros
* may become out of sync with the internal structure, they
* will create a compile error if it happens. Since the
- * internel structures are just tracing helpers, this is not
+ * internal structures are just tracing helpers, this is not
* an issue.
*
* When an internal structure is used, it should use:
@@ -93,10 +93,10 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
F_STRUCT(
__field_struct( struct ftrace_graph_ret, ret )
__field_packed( unsigned long, ret, func )
- __field_packed( unsigned long, ret, overrun )
+ __field_packed( int, ret, depth )
+ __field_packed( unsigned int, ret, overrun )
__field_packed( unsigned long long, ret, calltime)
__field_packed( unsigned long long, ret, rettime )
- __field_packed( int, ret, depth )
),
F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d",
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 643e0b19920d..a71181655958 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -432,17 +432,25 @@ NOKPROBE_SYMBOL(perf_trace_buf_update);
#ifdef CONFIG_FUNCTION_TRACER
static void
perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct pt_regs *pt_regs)
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
struct ftrace_entry *entry;
struct perf_event *event;
struct hlist_head head;
struct pt_regs regs;
int rctx;
+ int bit;
+
+ if (!rcu_is_watching())
+ return;
if ((unsigned long)ops->private != smp_processor_id())
return;
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
+ if (bit < 0)
+ return;
+
event = container_of(ops, struct perf_event, ftrace_ops);
/*
@@ -463,13 +471,15 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx);
if (!entry)
- return;
+ goto out;
entry->ip = ip;
entry->parent_ip = parent_ip;
perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
1, &regs, &head, NULL);
+out:
+ ftrace_test_recursion_unlock(bit);
#undef ENTRY_SIZE
}
@@ -477,7 +487,6 @@ static int perf_ftrace_function_register(struct perf_event *event)
{
struct ftrace_ops *ops = &event->ftrace_ops;
- ops->flags = FTRACE_OPS_FL_RCU;
ops->func = perf_ftrace_function_call;
ops->private = (void *)(unsigned long)nr_cpu_ids;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index a85effb2373b..d387b774ceeb 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -38,6 +38,7 @@ DEFINE_MUTEX(event_mutex);
LIST_HEAD(ftrace_events);
static LIST_HEAD(ftrace_generic_fields);
static LIST_HEAD(ftrace_common_fields);
+static bool eventdir_initialized;
#define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
@@ -1211,7 +1212,8 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
mutex_lock(&event_mutex);
list_for_each_entry(file, &tr->events, list) {
call = file->event_call;
- if (!trace_event_name(call) || !call->class || !call->class->reg)
+ if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
+ !trace_event_name(call) || !call->class || !call->class->reg)
continue;
if (system && strcmp(call->class->system, system->name) != 0)
@@ -2124,11 +2126,47 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
}
static int
+event_define_fields(struct trace_event_call *call)
+{
+ struct list_head *head;
+ int ret = 0;
+
+ /*
+ * Other events may have the same class. Only update
+ * the fields if they are not already defined.
+ */
+ head = trace_get_fields(call);
+ if (list_empty(head)) {
+ struct trace_event_fields *field = call->class->fields_array;
+ unsigned int offset = sizeof(struct trace_entry);
+
+ for (; field->type; field++) {
+ if (field->type == TRACE_FUNCTION_TYPE) {
+ field->define_fields(call);
+ break;
+ }
+
+ offset = ALIGN(offset, field->align);
+ ret = trace_define_field(call, field->type, field->name,
+ offset, field->size,
+ field->is_signed, field->filter_type);
+ if (WARN_ON_ONCE(ret)) {
+ pr_err("error code is %d\n", ret);
+ break;
+ }
+
+ offset += field->size;
+ }
+ }
+
+ return ret;
+}
+
+static int
event_create_dir(struct dentry *parent, struct trace_event_file *file)
{
struct trace_event_call *call = file->event_call;
struct trace_array *tr = file->tr;
- struct list_head *head;
struct dentry *d_events;
const char *name;
int ret;
@@ -2162,35 +2200,10 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
&ftrace_event_id_fops);
#endif
- /*
- * Other events may have the same class. Only update
- * the fields if they are not already defined.
- */
- head = trace_get_fields(call);
- if (list_empty(head)) {
- struct trace_event_fields *field = call->class->fields_array;
- unsigned int offset = sizeof(struct trace_entry);
-
- for (; field->type; field++) {
- if (field->type == TRACE_FUNCTION_TYPE) {
- ret = field->define_fields(call);
- break;
- }
-
- offset = ALIGN(offset, field->align);
- ret = trace_define_field(call, field->type, field->name,
- offset, field->size,
- field->is_signed, field->filter_type);
- if (ret)
- break;
-
- offset += field->size;
- }
- if (ret < 0) {
- pr_warn("Could not initialize trace point events/%s\n",
- name);
- return -1;
- }
+ ret = event_define_fields(call);
+ if (ret < 0) {
+ pr_warn("Could not initialize trace point events/%s\n", name);
+ return ret;
}
/*
@@ -2424,7 +2437,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len)
/*
* Since calls are grouped by systems, the likelyhood that the
* next call in the iteration belongs to the same system as the
- * previous call is high. As an optimization, we skip seaching
+ * previous call is high. As an optimization, we skip searching
* for a map[] that matches the call's system if the last call
* was from the same system. That's what last_i is for. If the
* call has the same system as the previous call, then last_i
@@ -2475,7 +2488,10 @@ __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
if (!file)
return -ENOMEM;
- return event_create_dir(tr->event_dir, file);
+ if (eventdir_initialized)
+ return event_create_dir(tr->event_dir, file);
+ else
+ return event_define_fields(call);
}
/*
@@ -2483,7 +2499,7 @@ __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
* for enabling events at boot. We want to enable events before
* the filesystem is initialized.
*/
-static __init int
+static int
__trace_early_add_new_event(struct trace_event_call *call,
struct trace_array *tr)
{
@@ -2493,7 +2509,7 @@ __trace_early_add_new_event(struct trace_event_call *call,
if (!file)
return -ENOMEM;
- return 0;
+ return event_define_fields(call);
}
struct ftrace_module_file_ops;
@@ -2646,7 +2662,7 @@ static int trace_module_notify(struct notifier_block *self,
mutex_unlock(&trace_types_lock);
mutex_unlock(&event_mutex);
- return 0;
+ return NOTIFY_OK;
}
static struct notifier_block trace_module_nb = {
@@ -3116,14 +3132,13 @@ static inline int register_event_cmds(void) { return 0; }
#endif /* CONFIG_DYNAMIC_FTRACE */
/*
- * The top level array has already had its trace_event_file
- * descriptors created in order to allow for early events to
- * be recorded. This function is called after the tracefs has been
- * initialized, and we now have to create the files associated
- * to the events.
+ * The top level array and trace arrays created by boot-time tracing
+ * have already had its trace_event_file descriptors created in order
+ * to allow for early events to be recorded.
+ * This function is called after the tracefs has been initialized,
+ * and we now have to create the files associated to the events.
*/
-static __init void
-__trace_early_add_event_dirs(struct trace_array *tr)
+static void __trace_early_add_event_dirs(struct trace_array *tr)
{
struct trace_event_file *file;
int ret;
@@ -3138,13 +3153,12 @@ __trace_early_add_event_dirs(struct trace_array *tr)
}
/*
- * For early boot up, the top trace array requires to have
- * a list of events that can be enabled. This must be done before
- * the filesystem is set up in order to allow events to be traced
- * early.
+ * For early boot up, the top trace array and the trace arrays created
+ * by boot-time tracing require to have a list of events that can be
+ * enabled. This must be done before the filesystem is set up in order
+ * to allow events to be traced early.
*/
-static __init void
-__trace_early_add_events(struct trace_array *tr)
+void __trace_early_add_events(struct trace_array *tr)
{
struct trace_event_call *call;
int ret;
@@ -3188,7 +3202,7 @@ static __init int setup_trace_event(char *str)
{
strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
ring_buffer_expanded = true;
- tracing_selftest_disabled = true;
+ disable_tracing_selftest("running event tracing");
return 1;
}
@@ -3258,7 +3272,7 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
*
* When a new instance is created, it needs to set up its events
* directory, as well as other files associated with events. It also
- * creates the event hierachry in the @parent/events directory.
+ * creates the event hierarchy in the @parent/events directory.
*
* Returns 0 on success.
*
@@ -3275,7 +3289,11 @@ int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr)
goto out;
down_write(&trace_event_sem);
- __trace_add_event_dirs(tr);
+ /* If tr already has the event list, it is initialized in early boot. */
+ if (unlikely(!list_empty(&tr->events)))
+ __trace_early_add_event_dirs(tr);
+ else
+ __trace_add_event_dirs(tr);
up_write(&trace_event_sem);
out:
@@ -3411,10 +3429,10 @@ static __init int event_trace_enable(void)
* initialize events and perhaps start any events that are on the
* command line. Unfortunately, there are some events that will not
* start this early, like the system call tracepoints that need
- * to set the TIF_SYSCALL_TRACEPOINT flag of pid 1. But event_trace_enable()
- * is called before pid 1 starts, and this flag is never set, making
- * the syscall tracepoint never get reached, but the event is enabled
- * regardless (and not doing anything).
+ * to set the %SYSCALL_WORK_SYSCALL_TRACEPOINT flag of pid 1. But
+ * event_trace_enable() is called before pid 1 starts, and this flag
+ * is never set, making the syscall tracepoint never get reached, but
+ * the event is enabled regardless (and not doing anything).
*/
static __init int event_trace_enable_again(void)
{
@@ -3431,10 +3449,21 @@ static __init int event_trace_enable_again(void)
early_initcall(event_trace_enable_again);
+/* Init fields which doesn't related to the tracefs */
+static __init int event_trace_init_fields(void)
+{
+ if (trace_define_generic_fields())
+ pr_warn("tracing: Failed to allocated generic fields");
+
+ if (trace_define_common_fields())
+ pr_warn("tracing: Failed to allocate common fields");
+
+ return 0;
+}
+
__init int event_trace_init(void)
{
struct trace_array *tr;
- struct dentry *d_tracer;
struct dentry *entry;
int ret;
@@ -3442,22 +3471,12 @@ __init int event_trace_init(void)
if (!tr)
return -ENODEV;
- d_tracer = tracing_init_dentry();
- if (IS_ERR(d_tracer))
- return 0;
-
- entry = tracefs_create_file("available_events", 0444, d_tracer,
+ entry = tracefs_create_file("available_events", 0444, NULL,
tr, &ftrace_avail_fops);
if (!entry)
pr_warn("Could not create tracefs 'available_events' entry\n");
- if (trace_define_generic_fields())
- pr_warn("tracing: Failed to allocated generic fields");
-
- if (trace_define_common_fields())
- pr_warn("tracing: Failed to allocate common fields");
-
- ret = early_event_add_tracer(d_tracer, tr);
+ ret = early_event_add_tracer(NULL, tr);
if (ret)
return ret;
@@ -3466,6 +3485,9 @@ __init int event_trace_init(void)
if (ret)
pr_warn("Failed to register trace events module notifier\n");
#endif
+
+ eventdir_initialized = true;
+
return 0;
}
@@ -3474,6 +3496,7 @@ void __init trace_event_init(void)
event_trace_memsetup();
init_ftrace_syscalls();
event_trace_enable();
+ event_trace_init_fields();
}
#ifdef CONFIG_EVENT_TRACE_STARTUP_TEST
@@ -3651,7 +3674,7 @@ static struct trace_event_file event_trace_file __initdata;
static void __init
function_test_events_call(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *pt_regs)
+ struct ftrace_ops *op, struct ftrace_regs *regs)
{
struct trace_buffer *buffer;
struct ring_buffer_event *event;
@@ -3690,7 +3713,6 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip,
static struct ftrace_ops trace_ops __initdata =
{
.func = function_test_events_call,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE,
};
static __init void event_trace_self_test_with_function(void)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 78a678eeb140..e91259f6a722 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1561,27 +1561,6 @@ static inline void event_clear_filter(struct trace_event_file *file)
RCU_INIT_POINTER(file->filter, NULL);
}
-static inline void
-event_set_no_set_filter_flag(struct trace_event_file *file)
-{
- file->flags |= EVENT_FILE_FL_NO_SET_FILTER;
-}
-
-static inline void
-event_clear_no_set_filter_flag(struct trace_event_file *file)
-{
- file->flags &= ~EVENT_FILE_FL_NO_SET_FILTER;
-}
-
-static inline bool
-event_no_set_filter_flag(struct trace_event_file *file)
-{
- if (file->flags & EVENT_FILE_FL_NO_SET_FILTER)
- return true;
-
- return false;
-}
-
struct filter_list {
struct list_head list;
struct event_filter *filter;
@@ -1950,7 +1929,7 @@ static int __ftrace_function_set_filter(int filter, char *buf, int len,
/*
* The 'ip' field could have multiple filters set, separated
* either by space or comma. We first cut the filter and apply
- * all pieces separatelly.
+ * all pieces separately.
*/
re = ftrace_function_filter_re(buf, len, &re_cnt);
if (!re)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 1b2ef6490229..39ebe1826fc3 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -147,6 +147,8 @@ struct hist_field {
*/
unsigned int var_ref_idx;
bool read_once;
+
+ unsigned int var_str_idx;
};
static u64 hist_field_none(struct hist_field *field,
@@ -349,6 +351,7 @@ struct hist_trigger_data {
unsigned int n_keys;
unsigned int n_fields;
unsigned int n_vars;
+ unsigned int n_var_str;
unsigned int key_size;
struct tracing_map_sort_key sort_keys[TRACING_MAP_SORT_KEYS_MAX];
unsigned int n_sort_keys;
@@ -1396,7 +1399,14 @@ static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt)
}
}
- n_str = hist_data->n_field_var_str + hist_data->n_save_var_str;
+ n_str = hist_data->n_field_var_str + hist_data->n_save_var_str +
+ hist_data->n_var_str;
+ if (n_str > SYNTH_FIELDS_MAX) {
+ hist_elt_data_free(elt_data);
+ return -EINVAL;
+ }
+
+ BUILD_BUG_ON(STR_VAR_LEN_MAX & (sizeof(u64) - 1));
size = STR_VAR_LEN_MAX;
@@ -3279,6 +3289,15 @@ static int check_synth_field(struct synth_event *event,
field = event->fields[field_pos];
+ /*
+ * A dynamic string synth field can accept static or
+ * dynamic. A static string synth field can only accept a
+ * same-sized static string, which is checked for later.
+ */
+ if (strstr(hist_field->type, "char[") && field->is_string
+ && field->is_dynamic)
+ return 0;
+
if (strcmp(field->type, hist_field->type) != 0) {
if (field->size != hist_field->size ||
field->is_signed != hist_field->is_signed)
@@ -3336,7 +3355,7 @@ trace_action_create_field_var(struct hist_trigger_data *hist_data,
} else {
field_var = NULL;
/*
- * If no explicit system.event is specfied, default to
+ * If no explicit system.event is specified, default to
* looking for fields on the onmatch(system.event.xxx)
* event.
*/
@@ -3651,6 +3670,7 @@ static int create_var_field(struct hist_trigger_data *hist_data,
{
struct trace_array *tr = hist_data->event_file->tr;
unsigned long flags = 0;
+ int ret;
if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX))
return -EINVAL;
@@ -3665,7 +3685,12 @@ static int create_var_field(struct hist_trigger_data *hist_data,
if (WARN_ON(hist_data->n_vars > TRACING_MAP_VARS_MAX))
return -EINVAL;
- return __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags);
+ ret = __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags);
+
+ if (!ret && hist_data->fields[val_idx]->flags & HIST_FIELD_FL_STRING)
+ hist_data->fields[val_idx]->var_str_idx = hist_data->n_var_str++;
+
+ return ret;
}
static int create_val_fields(struct hist_trigger_data *hist_data,
@@ -4392,6 +4417,22 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
hist_val = hist_field->fn(hist_field, elt, rbe, rec);
if (hist_field->flags & HIST_FIELD_FL_VAR) {
var_idx = hist_field->var.idx;
+
+ if (hist_field->flags & HIST_FIELD_FL_STRING) {
+ unsigned int str_start, var_str_idx, idx;
+ char *str, *val_str;
+
+ str_start = hist_data->n_field_var_str +
+ hist_data->n_save_var_str;
+ var_str_idx = hist_field->var_str_idx;
+ idx = str_start + var_str_idx;
+
+ str = elt_data->field_var_str[idx];
+ val_str = (char *)(uintptr_t)hist_val;
+ strscpy(str, val_str, STR_VAR_LEN_MAX);
+
+ hist_val = (u64)(uintptr_t)str;
+ }
tracing_map_set_var(elt, var_idx, hist_val);
continue;
}
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index c6cca0d1d584..5a8bc0b421f1 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -20,6 +20,48 @@
#include "trace_synth.h"
+#undef ERRORS
+#define ERRORS \
+ C(BAD_NAME, "Illegal name"), \
+ C(CMD_INCOMPLETE, "Incomplete command"), \
+ C(EVENT_EXISTS, "Event already exists"), \
+ C(TOO_MANY_FIELDS, "Too many fields"), \
+ C(INCOMPLETE_TYPE, "Incomplete type"), \
+ C(INVALID_TYPE, "Invalid type"), \
+ C(INVALID_FIELD, "Invalid field"), \
+ C(CMD_TOO_LONG, "Command too long"),
+
+#undef C
+#define C(a, b) SYNTH_ERR_##a
+
+enum { ERRORS };
+
+#undef C
+#define C(a, b) b
+
+static const char *err_text[] = { ERRORS };
+
+static char last_cmd[MAX_FILTER_STR_VAL];
+
+static int errpos(const char *str)
+{
+ return err_pos(last_cmd, str);
+}
+
+static void last_cmd_set(char *str)
+{
+ if (!str)
+ return;
+
+ strncpy(last_cmd, str, MAX_FILTER_STR_VAL - 1);
+}
+
+static void synth_err(u8 err_type, u8 err_pos)
+{
+ tracing_log_err(NULL, "synthetic_events", last_cmd, err_text,
+ err_type, err_pos);
+}
+
static int create_synth_event(int argc, const char **argv);
static int synth_event_show(struct seq_file *m, struct dyn_event *ev);
static int synth_event_release(struct dyn_event *ev);
@@ -88,7 +130,7 @@ static int synth_event_define_fields(struct trace_event_call *call)
event->fields[i]->offset = n_u64;
- if (event->fields[i]->is_string) {
+ if (event->fields[i]->is_string && !event->fields[i]->is_dynamic) {
offset += STR_VAR_LEN_MAX;
n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
} else {
@@ -132,13 +174,16 @@ static int synth_field_string_size(char *type)
start += sizeof("char[") - 1;
end = strchr(type, ']');
- if (!end || end < start)
+ if (!end || end < start || type + strlen(type) > end + 1)
return -EINVAL;
len = end - start;
if (len > 3)
return -EINVAL;
+ if (len == 0)
+ return 0; /* variable-length string */
+
strncpy(buf, start, len);
buf[len] = '\0';
@@ -184,6 +229,8 @@ static int synth_field_size(char *type)
size = sizeof(long);
else if (strcmp(type, "unsigned long") == 0)
size = sizeof(unsigned long);
+ else if (strcmp(type, "bool") == 0)
+ size = sizeof(bool);
else if (strcmp(type, "pid_t") == 0)
size = sizeof(pid_t);
else if (strcmp(type, "gfp_t") == 0)
@@ -226,12 +273,14 @@ static const char *synth_field_fmt(char *type)
fmt = "%ld";
else if (strcmp(type, "unsigned long") == 0)
fmt = "%lu";
+ else if (strcmp(type, "bool") == 0)
+ fmt = "%d";
else if (strcmp(type, "pid_t") == 0)
fmt = "%d";
else if (strcmp(type, "gfp_t") == 0)
fmt = "%x";
else if (synth_field_is_string(type))
- fmt = "%s";
+ fmt = "%.*s";
return fmt;
}
@@ -290,10 +339,27 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
/* parameter values */
if (se->fields[i]->is_string) {
- trace_seq_printf(s, print_fmt, se->fields[i]->name,
- (char *)&entry->fields[n_u64],
- i == se->n_fields - 1 ? "" : " ");
- n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+ if (se->fields[i]->is_dynamic) {
+ u32 offset, data_offset;
+ char *str_field;
+
+ offset = (u32)entry->fields[n_u64];
+ data_offset = offset & 0xffff;
+
+ str_field = (char *)entry + data_offset;
+
+ trace_seq_printf(s, print_fmt, se->fields[i]->name,
+ STR_VAR_LEN_MAX,
+ str_field,
+ i == se->n_fields - 1 ? "" : " ");
+ n_u64++;
+ } else {
+ trace_seq_printf(s, print_fmt, se->fields[i]->name,
+ STR_VAR_LEN_MAX,
+ (char *)&entry->fields[n_u64],
+ i == se->n_fields - 1 ? "" : " ");
+ n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+ }
} else {
struct trace_print_flags __flags[] = {
__def_gfpflag_names, {-1, NULL} };
@@ -325,16 +391,52 @@ static struct trace_event_functions synth_event_funcs = {
.trace = print_synth_event
};
+static unsigned int trace_string(struct synth_trace_event *entry,
+ struct synth_event *event,
+ char *str_val,
+ bool is_dynamic,
+ unsigned int data_size,
+ unsigned int *n_u64)
+{
+ unsigned int len = 0;
+ char *str_field;
+
+ if (is_dynamic) {
+ u32 data_offset;
+
+ data_offset = offsetof(typeof(*entry), fields);
+ data_offset += event->n_u64 * sizeof(u64);
+ data_offset += data_size;
+
+ str_field = (char *)entry + data_offset;
+
+ len = strlen(str_val) + 1;
+ strscpy(str_field, str_val, len);
+
+ data_offset |= len << 16;
+ *(u32 *)&entry->fields[*n_u64] = data_offset;
+
+ (*n_u64)++;
+ } else {
+ str_field = (char *)&entry->fields[*n_u64];
+
+ strscpy(str_field, str_val, STR_VAR_LEN_MAX);
+ (*n_u64) += STR_VAR_LEN_MAX / sizeof(u64);
+ }
+
+ return len;
+}
+
static notrace void trace_event_raw_event_synth(void *__data,
u64 *var_ref_vals,
unsigned int *var_ref_idx)
{
+ unsigned int i, n_u64, val_idx, len, data_size = 0;
struct trace_event_file *trace_file = __data;
struct synth_trace_event *entry;
struct trace_event_buffer fbuffer;
struct trace_buffer *buffer;
struct synth_event *event;
- unsigned int i, n_u64, val_idx;
int fields_size = 0;
event = trace_file->event_call->data;
@@ -344,6 +446,18 @@ static notrace void trace_event_raw_event_synth(void *__data,
fields_size = event->n_u64 * sizeof(u64);
+ for (i = 0; i < event->n_dynamic_fields; i++) {
+ unsigned int field_pos = event->dynamic_fields[i]->field_pos;
+ char *str_val;
+
+ val_idx = var_ref_idx[field_pos];
+ str_val = (char *)(long)var_ref_vals[val_idx];
+
+ len = strlen(str_val) + 1;
+
+ fields_size += len;
+ }
+
/*
* Avoid ring buffer recursion detection, as this event
* is being performed within another event.
@@ -360,10 +474,11 @@ static notrace void trace_event_raw_event_synth(void *__data,
val_idx = var_ref_idx[i];
if (event->fields[i]->is_string) {
char *str_val = (char *)(long)var_ref_vals[val_idx];
- char *str_field = (char *)&entry->fields[n_u64];
- strscpy(str_field, str_val, STR_VAR_LEN_MAX);
- n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+ len = trace_string(entry, event, str_val,
+ event->fields[i]->is_dynamic,
+ data_size, &n_u64);
+ data_size += len; /* only dynamic string increments */
} else {
struct synth_field *field = event->fields[i];
u64 val = var_ref_vals[val_idx];
@@ -422,8 +537,13 @@ static int __set_synth_event_print_fmt(struct synth_event *event,
pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
for (i = 0; i < event->n_fields; i++) {
- pos += snprintf(buf + pos, LEN_OR_ZERO,
- ", REC->%s", event->fields[i]->name);
+ if (event->fields[i]->is_string &&
+ event->fields[i]->is_dynamic)
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ ", __get_str(%s)", event->fields[i]->name);
+ else
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ ", REC->%s", event->fields[i]->name);
}
#undef LEN_OR_ZERO
@@ -464,14 +584,18 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
{
struct synth_field *field;
const char *prefix = NULL, *field_type = argv[0], *field_name, *array;
- int len, ret = 0;
+ int len, ret = -ENOMEM;
+ struct seq_buf s;
+ ssize_t size;
if (field_type[0] == ';')
field_type++;
if (!strcmp(field_type, "unsigned")) {
- if (argc < 3)
+ if (argc < 3) {
+ synth_err(SYNTH_ERR_INCOMPLETE_TYPE, errpos(field_type));
return ERR_PTR(-EINVAL);
+ }
prefix = "unsigned ";
field_type = argv[1];
field_name = argv[2];
@@ -493,44 +617,82 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
len--;
field->name = kmemdup_nul(field_name, len, GFP_KERNEL);
- if (!field->name) {
- ret = -ENOMEM;
+ if (!field->name)
+ goto free;
+
+ if (!is_good_name(field->name)) {
+ synth_err(SYNTH_ERR_BAD_NAME, errpos(field_name));
+ ret = -EINVAL;
goto free;
}
if (field_type[0] == ';')
field_type++;
len = strlen(field_type) + 1;
+
if (array)
len += strlen(array);
+
if (prefix)
len += strlen(prefix);
field->type = kzalloc(len, GFP_KERNEL);
- if (!field->type) {
- ret = -ENOMEM;
+ if (!field->type)
goto free;
- }
+
+ seq_buf_init(&s, field->type, len);
if (prefix)
- strcat(field->type, prefix);
- strcat(field->type, field_type);
+ seq_buf_puts(&s, prefix);
+ seq_buf_puts(&s, field_type);
if (array) {
- strcat(field->type, array);
- if (field->type[len - 1] == ';')
- field->type[len - 1] = '\0';
+ seq_buf_puts(&s, array);
+ if (s.buffer[s.len - 1] == ';')
+ s.len--;
}
+ if (WARN_ON_ONCE(!seq_buf_buffer_left(&s)))
+ goto free;
- field->size = synth_field_size(field->type);
- if (!field->size) {
+ s.buffer[s.len] = '\0';
+
+ size = synth_field_size(field->type);
+ if (size < 0) {
+ synth_err(SYNTH_ERR_INVALID_TYPE, errpos(field_type));
ret = -EINVAL;
goto free;
+ } else if (size == 0) {
+ if (synth_field_is_string(field->type)) {
+ char *type;
+
+ len = sizeof("__data_loc ") + strlen(field->type) + 1;
+ type = kzalloc(len, GFP_KERNEL);
+ if (!type)
+ goto free;
+
+ seq_buf_init(&s, type, len);
+ seq_buf_puts(&s, "__data_loc ");
+ seq_buf_puts(&s, field->type);
+
+ if (WARN_ON_ONCE(!seq_buf_buffer_left(&s)))
+ goto free;
+ s.buffer[s.len] = '\0';
+
+ kfree(field->type);
+ field->type = type;
+
+ field->is_dynamic = true;
+ size = sizeof(u64);
+ } else {
+ synth_err(SYNTH_ERR_INVALID_TYPE, errpos(field_type));
+ ret = -EINVAL;
+ goto free;
+ }
}
+ field->size = size;
if (synth_field_is_string(field->type))
field->is_string = true;
field->is_signed = synth_field_signed(field->type);
-
out:
return field;
free:
@@ -661,6 +823,7 @@ static void free_synth_event(struct synth_event *event)
free_synth_field(event->fields[i]);
kfree(event->fields);
+ kfree(event->dynamic_fields);
kfree(event->name);
kfree(event->class.system);
free_synth_tracepoint(event->tp);
@@ -671,8 +834,8 @@ static void free_synth_event(struct synth_event *event)
static struct synth_event *alloc_synth_event(const char *name, int n_fields,
struct synth_field **fields)
{
+ unsigned int i, j, n_dynamic_fields = 0;
struct synth_event *event;
- unsigned int i;
event = kzalloc(sizeof(*event), GFP_KERNEL);
if (!event) {
@@ -694,11 +857,33 @@ static struct synth_event *alloc_synth_event(const char *name, int n_fields,
goto out;
}
+ for (i = 0; i < n_fields; i++)
+ if (fields[i]->is_dynamic)
+ n_dynamic_fields++;
+
+ if (n_dynamic_fields) {
+ event->dynamic_fields = kcalloc(n_dynamic_fields,
+ sizeof(*event->dynamic_fields),
+ GFP_KERNEL);
+ if (!event->dynamic_fields) {
+ free_synth_event(event);
+ event = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+ }
+
dyn_event_init(&event->devent, &synth_event_ops);
- for (i = 0; i < n_fields; i++)
+ for (i = 0, j = 0; i < n_fields; i++) {
event->fields[i] = fields[i];
+ if (fields[i]->is_dynamic) {
+ event->dynamic_fields[j] = fields[i];
+ event->dynamic_fields[j]->field_pos = i;
+ event->dynamic_fields[j++] = fields[i];
+ event->n_dynamic_fields++;
+ }
+ }
event->n_fields = n_fields;
out:
return event;
@@ -710,6 +895,10 @@ static int synth_event_check_arg_fn(void *data)
int size;
size = synth_field_size((char *)arg_pair->lhs);
+ if (size == 0) {
+ if (strstr((char *)arg_pair->lhs, "["))
+ return 0;
+ }
return size ? 0 : -EINVAL;
}
@@ -971,12 +1160,47 @@ int synth_event_gen_cmd_array_start(struct dynevent_cmd *cmd, const char *name,
}
EXPORT_SYMBOL_GPL(synth_event_gen_cmd_array_start);
+static int save_cmdstr(int argc, const char *name, const char **argv)
+{
+ struct seq_buf s;
+ char *buf;
+ int i;
+
+ buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ seq_buf_init(&s, buf, MAX_DYNEVENT_CMD_LEN);
+
+ seq_buf_puts(&s, name);
+
+ for (i = 0; i < argc; i++) {
+ seq_buf_putc(&s, ' ');
+ seq_buf_puts(&s, argv[i]);
+ }
+
+ if (!seq_buf_buffer_left(&s)) {
+ synth_err(SYNTH_ERR_CMD_TOO_LONG, 0);
+ kfree(buf);
+ return -EINVAL;
+ }
+ buf[s.len] = 0;
+ last_cmd_set(buf);
+
+ kfree(buf);
+ return 0;
+}
+
static int __create_synth_event(int argc, const char *name, const char **argv)
{
struct synth_field *field, *fields[SYNTH_FIELDS_MAX];
struct synth_event *event = NULL;
int i, consumed = 0, n_fields = 0, ret = 0;
+ ret = save_cmdstr(argc, name, argv);
+ if (ret)
+ return ret;
+
/*
* Argument syntax:
* - Add synthetic event: <event_name> field[;field] ...
@@ -984,13 +1208,22 @@ static int __create_synth_event(int argc, const char *name, const char **argv)
* where 'field' = type field_name
*/
- if (name[0] == '\0' || argc < 1)
+ if (name[0] == '\0' || argc < 1) {
+ synth_err(SYNTH_ERR_CMD_INCOMPLETE, 0);
return -EINVAL;
+ }
mutex_lock(&event_mutex);
+ if (!is_good_name(name)) {
+ synth_err(SYNTH_ERR_BAD_NAME, errpos(name));
+ ret = -EINVAL;
+ goto out;
+ }
+
event = find_synth_event(name);
if (event) {
+ synth_err(SYNTH_ERR_EVENT_EXISTS, errpos(name));
ret = -EEXIST;
goto out;
}
@@ -999,6 +1232,7 @@ static int __create_synth_event(int argc, const char *name, const char **argv)
if (strcmp(argv[i], ";") == 0)
continue;
if (n_fields == SYNTH_FIELDS_MAX) {
+ synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0);
ret = -EINVAL;
goto err;
}
@@ -1013,6 +1247,7 @@ static int __create_synth_event(int argc, const char *name, const char **argv)
}
if (i < argc && strcmp(argv[i], ";") != 0) {
+ synth_err(SYNTH_ERR_INVALID_FIELD, errpos(argv[i]));
ret = -EINVAL;
goto err;
}
@@ -1041,7 +1276,7 @@ static int __create_synth_event(int argc, const char *name, const char **argv)
/**
* synth_event_create - Create a new synthetic event
- * @name: The name of the new sythetic event
+ * @name: The name of the new synthetic event
* @fields: An array of type/name field descriptions
* @n_fields: The number of field descriptions contained in the fields array
* @mod: The module creating the event, NULL if not created from a module
@@ -1198,10 +1433,9 @@ void synth_event_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen)
EXPORT_SYMBOL_GPL(synth_event_cmd_init);
static inline int
-__synth_event_trace_start(struct trace_event_file *file,
- struct synth_event_trace_state *trace_state)
+__synth_event_trace_init(struct trace_event_file *file,
+ struct synth_event_trace_state *trace_state)
{
- int entry_size, fields_size = 0;
int ret = 0;
memset(trace_state, '\0', sizeof(*trace_state));
@@ -1211,8 +1445,8 @@ __synth_event_trace_start(struct trace_event_file *file,
* ENABLED bit is set (which attaches the probe thus allowing
* this code to be called, etc). Because this is called
* directly by the user, we don't have that but we still need
- * to honor not logging when disabled. For the the iterated
- * trace case, we save the enabed state upon start and just
+ * to honor not logging when disabled. For the iterated
+ * trace case, we save the enabled state upon start and just
* ignore the following data calls.
*/
if (!(file->flags & EVENT_FILE_FL_ENABLED) ||
@@ -1223,8 +1457,20 @@ __synth_event_trace_start(struct trace_event_file *file,
}
trace_state->event = file->event_call->data;
+out:
+ return ret;
+}
+
+static inline int
+__synth_event_trace_start(struct trace_event_file *file,
+ struct synth_event_trace_state *trace_state,
+ int dynamic_fields_size)
+{
+ int entry_size, fields_size = 0;
+ int ret = 0;
fields_size = trace_state->event->n_u64 * sizeof(u64);
+ fields_size += dynamic_fields_size;
/*
* Avoid ring buffer recursion detection, as this event
@@ -1241,7 +1487,7 @@ __synth_event_trace_start(struct trace_event_file *file,
ring_buffer_nest_end(trace_state->buffer);
ret = -EINVAL;
}
-out:
+
return ret;
}
@@ -1274,23 +1520,46 @@ __synth_event_trace_end(struct synth_event_trace_state *trace_state)
*/
int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...)
{
+ unsigned int i, n_u64, len, data_size = 0;
struct synth_event_trace_state state;
- unsigned int i, n_u64;
va_list args;
int ret;
- ret = __synth_event_trace_start(file, &state);
+ ret = __synth_event_trace_init(file, &state);
if (ret) {
if (ret == -ENOENT)
ret = 0; /* just disabled, not really an error */
return ret;
}
+ if (state.event->n_dynamic_fields) {
+ va_start(args, n_vals);
+
+ for (i = 0; i < state.event->n_fields; i++) {
+ u64 val = va_arg(args, u64);
+
+ if (state.event->fields[i]->is_string &&
+ state.event->fields[i]->is_dynamic) {
+ char *str_val = (char *)(long)val;
+
+ data_size += strlen(str_val) + 1;
+ }
+ }
+
+ va_end(args);
+ }
+
+ ret = __synth_event_trace_start(file, &state, data_size);
+ if (ret)
+ return ret;
+
if (n_vals != state.event->n_fields) {
ret = -EINVAL;
goto out;
}
+ data_size = 0;
+
va_start(args, n_vals);
for (i = 0, n_u64 = 0; i < state.event->n_fields; i++) {
u64 val;
@@ -1299,10 +1568,11 @@ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...)
if (state.event->fields[i]->is_string) {
char *str_val = (char *)(long)val;
- char *str_field = (char *)&state.entry->fields[n_u64];
- strscpy(str_field, str_val, STR_VAR_LEN_MAX);
- n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+ len = trace_string(state.entry, state.event, str_val,
+ state.event->fields[i]->is_dynamic,
+ data_size, &n_u64);
+ data_size += len; /* only dynamic string increments */
} else {
struct synth_field *field = state.event->fields[i];
@@ -1355,29 +1625,46 @@ EXPORT_SYMBOL_GPL(synth_event_trace);
int synth_event_trace_array(struct trace_event_file *file, u64 *vals,
unsigned int n_vals)
{
+ unsigned int i, n_u64, field_pos, len, data_size = 0;
struct synth_event_trace_state state;
- unsigned int i, n_u64;
+ char *str_val;
int ret;
- ret = __synth_event_trace_start(file, &state);
+ ret = __synth_event_trace_init(file, &state);
if (ret) {
if (ret == -ENOENT)
ret = 0; /* just disabled, not really an error */
return ret;
}
+ if (state.event->n_dynamic_fields) {
+ for (i = 0; i < state.event->n_dynamic_fields; i++) {
+ field_pos = state.event->dynamic_fields[i]->field_pos;
+ str_val = (char *)(long)vals[field_pos];
+ len = strlen(str_val) + 1;
+ data_size += len;
+ }
+ }
+
+ ret = __synth_event_trace_start(file, &state, data_size);
+ if (ret)
+ return ret;
+
if (n_vals != state.event->n_fields) {
ret = -EINVAL;
goto out;
}
+ data_size = 0;
+
for (i = 0, n_u64 = 0; i < state.event->n_fields; i++) {
if (state.event->fields[i]->is_string) {
char *str_val = (char *)(long)vals[i];
- char *str_field = (char *)&state.entry->fields[n_u64];
- strscpy(str_field, str_val, STR_VAR_LEN_MAX);
- n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+ len = trace_string(state.entry, state.event, str_val,
+ state.event->fields[i]->is_dynamic,
+ data_size, &n_u64);
+ data_size += len; /* only dynamic string increments */
} else {
struct synth_field *field = state.event->fields[i];
u64 val = vals[i];
@@ -1445,9 +1732,17 @@ int synth_event_trace_start(struct trace_event_file *file,
if (!trace_state)
return -EINVAL;
- ret = __synth_event_trace_start(file, trace_state);
- if (ret == -ENOENT)
- ret = 0; /* just disabled, not really an error */
+ ret = __synth_event_trace_init(file, trace_state);
+ if (ret) {
+ if (ret == -ENOENT)
+ ret = 0; /* just disabled, not really an error */
+ return ret;
+ }
+
+ if (trace_state->event->n_dynamic_fields)
+ return -ENOTSUPP;
+
+ ret = __synth_event_trace_start(file, trace_state, 0);
return ret;
}
@@ -1508,6 +1803,11 @@ static int __synth_event_add_val(const char *field_name, u64 val,
char *str_val = (char *)(long)val;
char *str_field;
+ if (field->is_dynamic) { /* add_val can't do dynamic strings */
+ ret = -EINVAL;
+ goto out;
+ }
+
if (!str_val) {
ret = -EINVAL;
goto out;
@@ -1679,14 +1979,22 @@ static int __synth_event_show(struct seq_file *m, struct synth_event *event)
{
struct synth_field *field;
unsigned int i;
+ char *type, *t;
seq_printf(m, "%s\t", event->name);
for (i = 0; i < event->n_fields; i++) {
field = event->fields[i];
+ type = field->type;
+ t = strstr(type, "__data_loc");
+ if (t) { /* __data_loc belongs in format but not event desc */
+ t += sizeof("__data_loc");
+ type = t;
+ }
+
/* parameter values */
- seq_printf(m, "%s %s%s", field->type, field->name,
+ seq_printf(m, "%s %s%s", type, field->name,
i == event->n_fields - 1 ? "" : "; ");
}
@@ -1754,25 +2062,31 @@ static const struct file_operations synth_events_fops = {
.release = seq_release,
};
-static __init int trace_events_synth_init(void)
+/*
+ * Register dynevent at core_initcall. This allows kernel to setup kprobe
+ * events in postcore_initcall without tracefs.
+ */
+static __init int trace_events_synth_init_early(void)
{
- struct dentry *entry = NULL;
- struct dentry *d_tracer;
int err = 0;
err = dyn_event_register(&synth_event_ops);
- if (err) {
+ if (err)
pr_warn("Could not register synth_event_ops\n");
- return err;
- }
- d_tracer = tracing_init_dentry();
- if (IS_ERR(d_tracer)) {
- err = PTR_ERR(d_tracer);
+ return err;
+}
+core_initcall(trace_events_synth_init_early);
+
+static __init int trace_events_synth_init(void)
+{
+ struct dentry *entry = NULL;
+ int err = 0;
+ err = tracing_init_dentry();
+ if (err)
goto err;
- }
- entry = tracefs_create_file("synthetic_events", 0644, d_tracer,
+ entry = tracefs_create_file("synthetic_events", 0644, NULL,
NULL, &synth_events_fops);
if (!entry) {
err = -ENODEV;
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 70d3d0a09053..d960f6b11b5e 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -26,7 +26,7 @@ static int ftrace_event_register(struct trace_event_call *call,
/*
* The FTRACE_ENTRY_REG macro allows ftrace entry to define register
- * function and thus become accesible via perf.
+ * function and thus become accessible via perf.
*/
#undef FTRACE_ENTRY_REG
#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, regfn) \
@@ -176,7 +176,7 @@ struct trace_event_call __used event_##call = { \
.flags = TRACE_EVENT_FL_IGNORE_ENABLE, \
}; \
static struct trace_event_call __used \
-__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
+__section("_ftrace_events") *__event_##call = &event_##call;
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print) \
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index dd4dff71d89a..c5095dd28e20 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -23,10 +23,10 @@ static void tracing_start_function_trace(struct trace_array *tr);
static void tracing_stop_function_trace(struct trace_array *tr);
static void
function_trace_call(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *pt_regs);
+ struct ftrace_ops *op, struct ftrace_regs *fregs);
static void
function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *pt_regs);
+ struct ftrace_ops *op, struct ftrace_regs *fregs);
static struct tracer_flags func_flags;
/* Our option */
@@ -34,29 +34,37 @@ enum {
TRACE_FUNC_OPT_STACK = 0x1,
};
-static int allocate_ftrace_ops(struct trace_array *tr)
+int ftrace_allocate_ftrace_ops(struct trace_array *tr)
{
struct ftrace_ops *ops;
+ /* The top level array uses the "global_ops" */
+ if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
+ return 0;
+
ops = kzalloc(sizeof(*ops), GFP_KERNEL);
if (!ops)
return -ENOMEM;
/* Currently only the non stack version is supported */
ops->func = function_trace_call;
- ops->flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_PID;
+ ops->flags = FTRACE_OPS_FL_PID;
tr->ops = ops;
ops->private = tr;
+
return 0;
}
+void ftrace_free_ftrace_ops(struct trace_array *tr)
+{
+ kfree(tr->ops);
+ tr->ops = NULL;
+}
int ftrace_create_function_files(struct trace_array *tr,
struct dentry *parent)
{
- int ret;
-
/*
* The top level array uses the "global_ops", and the files are
* created on boot up.
@@ -64,9 +72,8 @@ int ftrace_create_function_files(struct trace_array *tr,
if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
return 0;
- ret = allocate_ftrace_ops(tr);
- if (ret)
- return ret;
+ if (!tr->ops)
+ return -EINVAL;
ftrace_create_filter_files(tr->ops, parent);
@@ -76,14 +83,12 @@ int ftrace_create_function_files(struct trace_array *tr,
void ftrace_destroy_function_files(struct trace_array *tr)
{
ftrace_destroy_filter_files(tr->ops);
- kfree(tr->ops);
- tr->ops = NULL;
+ ftrace_free_ftrace_ops(tr);
}
static int function_trace_init(struct trace_array *tr)
{
ftrace_func_t func;
-
/*
* Instance trace_arrays get their ops allocated
* at instance creation. Unless it failed
@@ -123,7 +128,7 @@ static void function_trace_start(struct trace_array *tr)
static void
function_trace_call(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *pt_regs)
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
struct trace_array *tr = op->private;
struct trace_array_cpu *data;
@@ -135,22 +140,20 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
if (unlikely(!tr->function_enabled))
return;
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
+ if (bit < 0)
+ return;
+
pc = preempt_count();
preempt_disable_notrace();
- bit = trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX);
- if (bit < 0)
- goto out;
-
cpu = smp_processor_id();
data = per_cpu_ptr(tr->array_buffer.data, cpu);
if (!atomic_read(&data->disabled)) {
local_save_flags(flags);
trace_function(tr, ip, parent_ip, flags, pc);
}
- trace_clear_recursion(bit);
-
- out:
+ ftrace_test_recursion_unlock(bit);
preempt_enable_notrace();
}
@@ -174,7 +177,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
static void
function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *pt_regs)
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
struct trace_array *tr = op->private;
struct trace_array_cpu *data;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 4a9c49c08ec9..d874dec87131 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -957,7 +957,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
/* Overrun */
if (flags & TRACE_GRAPH_PRINT_OVERRUN)
- trace_seq_printf(s, " (Overruns: %lu)\n",
+ trace_seq_printf(s, " (Overruns: %u)\n",
trace->overrun);
print_graph_irq(iter, trace->func, TRACE_GRAPH_RET,
@@ -1336,13 +1336,13 @@ static const struct file_operations graph_depth_fops = {
static __init int init_graph_tracefs(void)
{
- struct dentry *d_tracer;
+ int ret;
- d_tracer = tracing_init_dentry();
- if (IS_ERR(d_tracer))
+ ret = tracing_init_dentry();
+ if (ret)
return 0;
- trace_create_file("max_graph_depth", 0644, d_tracer,
+ trace_create_file("max_graph_depth", 0644, NULL,
NULL, &graph_depth_fops);
return 0;
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index 17873e5d0353..c0df9b97f147 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -368,7 +368,7 @@ static int start_kthread(struct trace_array *tr)
struct task_struct *kthread;
int next_cpu;
- if (WARN_ON(hwlat_kthread))
+ if (hwlat_kthread)
return 0;
/* Just pick the first CPU on first iteration */
@@ -485,11 +485,11 @@ hwlat_width_write(struct file *filp, const char __user *ubuf,
* @ppos: The current position in @file
*
* This function provides a write implementation for the "window" interface
- * to the hardware latency detetector. The window is the total time
+ * to the hardware latency detector. The window is the total time
* in us that will be considered one sample period. Conceptually, windows
* occur back-to-back and contain a sample width period during which
* actual sampling occurs. Can be used to write a new total window size. It
- * is enfoced that any value written must be greater than the sample width
+ * is enforced that any value written must be greater than the sample width
* size, or an error results.
*/
static ssize_t
@@ -538,14 +538,14 @@ static const struct file_operations window_fops = {
*/
static int init_tracefs(void)
{
- struct dentry *d_tracer;
+ int ret;
struct dentry *top_dir;
- d_tracer = tracing_init_dentry();
- if (IS_ERR(d_tracer))
+ ret = tracing_init_dentry();
+ if (ret)
return -ENOMEM;
- top_dir = tracefs_create_dir("hwlat_detector", d_tracer);
+ top_dir = tracefs_create_dir("hwlat_detector", NULL);
if (!top_dir)
return -ENOMEM;
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 10bbb0f381d5..6756379b661f 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -138,7 +138,7 @@ static int func_prolog_dec(struct trace_array *tr,
*/
static void
irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *pt_regs)
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
@@ -562,6 +562,8 @@ static int __irqsoff_tracer_init(struct trace_array *tr)
/* non overwrite screws up the latency tracers */
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1);
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1);
+ /* without pause, we will produce garbage if another latency occurs */
+ set_tracer_flag(tr, TRACE_ITER_PAUSE_ON_TRACE, 1);
tr->max_latency = 0;
irqsoff_trace = tr;
@@ -583,11 +585,13 @@ static void __irqsoff_tracer_reset(struct trace_array *tr)
{
int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT;
int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE;
+ int pause_flag = save_flags & TRACE_ITER_PAUSE_ON_TRACE;
stop_irqsoff_tracer(tr, is_graph(tr));
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag);
+ set_tracer_flag(tr, TRACE_ITER_PAUSE_ON_TRACE, pause_flag);
ftrace_reset_array_ops(tr);
irqsoff_busy = false;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index aefb6065b508..56c7fbff7bd7 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -25,11 +25,12 @@
/* Kprobe early definition from command line */
static char kprobe_boot_events_buf[COMMAND_LINE_SIZE] __initdata;
-static bool kprobe_boot_events_enabled __initdata;
static int __init set_kprobe_boot_events(char *str)
{
strlcpy(kprobe_boot_events_buf, str, COMMAND_LINE_SIZE);
+ disable_tracing_selftest("running kprobe events");
+
return 0;
}
__setup("kprobe_event=", set_kprobe_boot_events);
@@ -106,9 +107,10 @@ static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
struct module *mod)
{
- int len = strlen(mod->name);
+ int len = strlen(module_name(mod));
const char *name = trace_kprobe_symbol(tk);
- return strncmp(mod->name, name, len) == 0 && name[len] == ':';
+
+ return strncmp(module_name(mod), name, len) == 0 && name[len] == ':';
}
static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
@@ -219,9 +221,9 @@ bool trace_kprobe_on_func_entry(struct trace_event_call *call)
{
struct trace_kprobe *tk = trace_kprobe_primary_from_call(call);
- return tk ? kprobe_on_func_entry(tk->rp.kp.addr,
+ return tk ? (kprobe_on_func_entry(tk->rp.kp.addr,
tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name,
- tk->rp.kp.addr ? 0 : tk->rp.kp.offset) : false;
+ tk->rp.kp.addr ? 0 : tk->rp.kp.offset) == 0) : false;
}
bool trace_kprobe_error_injectable(struct trace_event_call *call)
@@ -432,7 +434,7 @@ static int disable_trace_kprobe(struct trace_event_call *call,
return 0;
}
-#if defined(CONFIG_KPROBES_ON_FTRACE) && \
+#if defined(CONFIG_DYNAMIC_FTRACE) && \
!defined(CONFIG_KPROBE_EVENTS_ON_NOTRACE)
static bool __within_notrace_func(unsigned long addr)
{
@@ -688,7 +690,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb,
if (ret)
pr_warn("Failed to re-register probe %s on %s: %d\n",
trace_probe_name(&tk->tp),
- mod->name, ret);
+ module_name(mod), ret);
}
}
mutex_unlock(&event_mutex);
@@ -717,6 +719,9 @@ static int trace_kprobe_create(int argc, const char *argv[])
* p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
* - Add kretprobe:
* r[MAXACTIVE][:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
+ * Or
+ * p:[GRP/]EVENT] [MOD:]KSYM[+0]%return [FETCHARGS]
+ *
* Fetch args:
* $retval : fetch return value
* $stack : fetch stack address
@@ -746,7 +751,6 @@ static int trace_kprobe_create(int argc, const char *argv[])
switch (argv[0][0]) {
case 'r':
is_return = true;
- flags |= TPARG_FL_RETURN;
break;
case 'p':
break;
@@ -804,15 +808,31 @@ static int trace_kprobe_create(int argc, const char *argv[])
symbol = kstrdup(argv[1], GFP_KERNEL);
if (!symbol)
return -ENOMEM;
+
+ tmp = strchr(symbol, '%');
+ if (tmp) {
+ if (!strcmp(tmp, "%return")) {
+ *tmp = '\0';
+ is_return = true;
+ } else {
+ trace_probe_log_err(tmp - symbol, BAD_ADDR_SUFFIX);
+ goto parse_error;
+ }
+ }
+
/* TODO: support .init module functions */
ret = traceprobe_split_symbol_offset(symbol, &offset);
if (ret || offset < 0 || offset > UINT_MAX) {
trace_probe_log_err(0, BAD_PROBE_ADDR);
goto parse_error;
}
- if (kprobe_on_func_entry(NULL, symbol, offset))
+ if (is_return)
+ flags |= TPARG_FL_RETURN;
+ ret = kprobe_on_func_entry(NULL, symbol, offset);
+ if (ret == 0)
flags |= TPARG_FL_FENTRY;
- if (offset && is_return && !(flags & TPARG_FL_FENTRY)) {
+ /* Defer the ENOENT case until register kprobe */
+ if (ret == -EINVAL && is_return) {
trace_probe_log_err(0, BAD_RETPROBE);
goto parse_error;
}
@@ -1714,7 +1734,8 @@ NOKPROBE_SYMBOL(kprobe_dispatcher);
static int
kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
{
- struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
+ struct kretprobe *rp = get_kretprobe(ri);
+ struct trace_kprobe *tk = container_of(rp, struct trace_kprobe, rp);
raw_cpu_inc(*tk->nhit);
@@ -1870,8 +1891,6 @@ static __init void setup_boot_kprobe_events(void)
ret = trace_run_command(cmd, create_or_delete_trace_kprobe);
if (ret)
pr_warn("Failed to add event(%d): %s\n", ret, cmd);
- else
- kprobe_boot_events_enabled = true;
cmd = p;
}
@@ -1880,8 +1899,8 @@ static __init void setup_boot_kprobe_events(void)
}
/*
- * Register dynevent at subsys_initcall. This allows kernel to setup kprobe
- * events in fs_initcall without tracefs.
+ * Register dynevent at core_initcall. This allows kernel to setup kprobe
+ * events in postcore_initcall without tracefs.
*/
static __init int init_kprobe_trace_early(void)
{
@@ -1896,19 +1915,19 @@ static __init int init_kprobe_trace_early(void)
return 0;
}
-subsys_initcall(init_kprobe_trace_early);
+core_initcall(init_kprobe_trace_early);
/* Make a tracefs interface for controlling probe points */
static __init int init_kprobe_trace(void)
{
- struct dentry *d_tracer;
+ int ret;
struct dentry *entry;
- d_tracer = tracing_init_dentry();
- if (IS_ERR(d_tracer))
+ ret = tracing_init_dentry();
+ if (ret)
return 0;
- entry = tracefs_create_file("kprobe_events", 0644, d_tracer,
+ entry = tracefs_create_file("kprobe_events", 0644, NULL,
NULL, &kprobe_events_ops);
/* Event list interface */
@@ -1916,7 +1935,7 @@ static __init int init_kprobe_trace(void)
pr_warn("Could not create tracefs 'kprobe_events' entry\n");
/* Profile interface */
- entry = tracefs_create_file("kprobe_profile", 0444, d_tracer,
+ entry = tracefs_create_file("kprobe_profile", 0444, NULL,
NULL, &kprobe_profile_ops);
if (!entry)
@@ -1956,10 +1975,8 @@ static __init int kprobe_trace_self_tests_init(void)
if (tracing_is_disabled())
return -ENODEV;
- if (kprobe_boot_events_enabled) {
- pr_info("Skipping kprobe tests due to kprobe_event on cmdline\n");
+ if (tracing_selftest_disabled)
return 0;
- }
target = kprobe_trace_selftest_target;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 000e9dc224c6..92b1575ae0ca 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -353,8 +353,8 @@ static inline const char *kretprobed(const char *name)
}
#endif /* CONFIG_KRETPROBES */
-static void
-seq_print_sym(struct trace_seq *s, unsigned long address, bool offset)
+void
+trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset)
{
#ifdef CONFIG_KALLSYMS
char str[KSYM_SYMBOL_LEN];
@@ -420,7 +420,7 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
goto out;
}
- seq_print_sym(s, ip, sym_flags & TRACE_ITER_SYM_OFFSET);
+ trace_seq_print_sym(s, ip, sym_flags & TRACE_ITER_SYM_OFFSET);
if (sym_flags & TRACE_ITER_SYM_ADDR)
trace_seq_printf(s, " <" IP_FMT ">", ip);
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 2f742b74e7e6..4c954636caf0 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -16,6 +16,7 @@ extern int
seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
unsigned long sym_flags);
+extern void trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset);
extern int trace_print_context(struct trace_iterator *iter);
extern int trace_print_lat_context(struct trace_iterator *iter);
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index d4e31e969206..ff32476df072 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -96,7 +96,7 @@ static int module_trace_bprintk_format_notify(struct notifier_block *self,
if (val == MODULE_STATE_COMING)
hold_module_trace_bprintk_format(start, end);
}
- return 0;
+ return NOTIFY_OK;
}
/*
@@ -174,7 +174,7 @@ __init static int
module_trace_bprintk_format_notify(struct notifier_block *self,
unsigned long val, void *data)
{
- return 0;
+ return NOTIFY_OK;
}
static inline const char **
find_next_mod_format(int start_index, void *v, const char **fmt, loff_t *pos)
@@ -367,13 +367,13 @@ static const struct file_operations ftrace_formats_fops = {
static __init int init_trace_printk_function_export(void)
{
- struct dentry *d_tracer;
+ int ret;
- d_tracer = tracing_init_dentry();
- if (IS_ERR(d_tracer))
+ ret = tracing_init_dentry();
+ if (ret)
return 0;
- trace_create_file("printk_formats", 0444, d_tracer,
+ trace_create_file("printk_formats", 0444, NULL,
NULL, &ftrace_formats_fops);
return 0;
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index a22b62813f8c..2f703a20c724 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -16,7 +16,6 @@
#include <linux/tracefs.h>
#include <linux/types.h>
#include <linux/string.h>
-#include <linux/ctype.h>
#include <linux/ptrace.h>
#include <linux/perf_event.h>
#include <linux/kprobes.h>
@@ -348,18 +347,6 @@ bool trace_probe_match_command_args(struct trace_probe *tp,
#define trace_probe_for_each_link_rcu(pos, tp) \
list_for_each_entry_rcu(pos, &(tp)->event->files, list)
-/* Check the name is good for event/group/fields */
-static inline bool is_good_name(const char *name)
-{
- if (!isalpha(*name) && *name != '_')
- return false;
- while (*++name != '\0') {
- if (!isalpha(*name) && !isdigit(*name) && *name != '_')
- return false;
- }
- return true;
-}
-
#define TPARG_FL_RETURN BIT(0)
#define TPARG_FL_KERNEL BIT(1)
#define TPARG_FL_FENTRY BIT(2)
@@ -404,6 +391,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
C(MAXACT_TOO_BIG, "Maxactive is too big"), \
C(BAD_PROBE_ADDR, "Invalid probed address or symbol"), \
C(BAD_RETPROBE, "Retprobe address must be an function entry"), \
+ C(BAD_ADDR_SUFFIX, "Invalid probed address suffix"), \
C(NO_GROUP_NAME, "Group name is not specified"), \
C(GROUP_TOO_LONG, "Group name is too long"), \
C(BAD_GROUP_NAME, "Group name must follow the same rules as C identifiers"), \
diff --git a/kernel/trace/trace_recursion_record.c b/kernel/trace/trace_recursion_record.c
new file mode 100644
index 000000000000..b2edac1fe156
--- /dev/null
+++ b/kernel/trace/trace_recursion_record.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/seq_file.h>
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+#include <linux/fs.h>
+
+#include "trace_output.h"
+
+struct recursed_functions {
+ unsigned long ip;
+ unsigned long parent_ip;
+};
+
+static struct recursed_functions recursed_functions[CONFIG_FTRACE_RECORD_RECURSION_SIZE];
+static atomic_t nr_records;
+
+/*
+ * Cache the last found function. Yes, updates to this is racey, but
+ * so is memory cache ;-)
+ */
+static unsigned long cached_function;
+
+void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip)
+{
+ int index = 0;
+ int i;
+ unsigned long old;
+
+ again:
+ /* First check the last one recorded */
+ if (ip == cached_function)
+ return;
+
+ i = atomic_read(&nr_records);
+ /* nr_records is -1 when clearing records */
+ smp_mb__after_atomic();
+ if (i < 0)
+ return;
+
+ /*
+ * If there's two writers and this writer comes in second,
+ * the cmpxchg() below to update the ip will fail. Then this
+ * writer will try again. It is possible that index will now
+ * be greater than nr_records. This is because the writer
+ * that succeeded has not updated the nr_records yet.
+ * This writer could keep trying again until the other writer
+ * updates nr_records. But if the other writer takes an
+ * interrupt, and that interrupt locks up that CPU, we do
+ * not want this CPU to lock up due to the recursion protection,
+ * and have a bug report showing this CPU as the cause of
+ * locking up the computer. To not lose this record, this
+ * writer will simply use the next position to update the
+ * recursed_functions, and it will update the nr_records
+ * accordingly.
+ */
+ if (index < i)
+ index = i;
+ if (index >= CONFIG_FTRACE_RECORD_RECURSION_SIZE)
+ return;
+
+ for (i = index - 1; i >= 0; i--) {
+ if (recursed_functions[i].ip == ip) {
+ cached_function = ip;
+ return;
+ }
+ }
+
+ cached_function = ip;
+
+ /*
+ * We only want to add a function if it hasn't been added before.
+ * Add to the current location before incrementing the count.
+ * If it fails to add, then increment the index (save in i)
+ * and try again.
+ */
+ old = cmpxchg(&recursed_functions[index].ip, 0, ip);
+ if (old != 0) {
+ /* Did something else already added this for us? */
+ if (old == ip)
+ return;
+ /* Try the next location (use i for the next index) */
+ index++;
+ goto again;
+ }
+
+ recursed_functions[index].parent_ip = parent_ip;
+
+ /*
+ * It's still possible that we could race with the clearing
+ * CPU0 CPU1
+ * ---- ----
+ * ip = func
+ * nr_records = -1;
+ * recursed_functions[0] = 0;
+ * i = -1
+ * if (i < 0)
+ * nr_records = 0;
+ * (new recursion detected)
+ * recursed_functions[0] = func
+ * cmpxchg(recursed_functions[0],
+ * func, 0)
+ *
+ * But the worse that could happen is that we get a zero in
+ * the recursed_functions array, and it's likely that "func" will
+ * be recorded again.
+ */
+ i = atomic_read(&nr_records);
+ smp_mb__after_atomic();
+ if (i < 0)
+ cmpxchg(&recursed_functions[index].ip, ip, 0);
+ else if (i <= index)
+ atomic_cmpxchg(&nr_records, i, index + 1);
+}
+EXPORT_SYMBOL_GPL(ftrace_record_recursion);
+
+static DEFINE_MUTEX(recursed_function_lock);
+static struct trace_seq *tseq;
+
+static void *recursed_function_seq_start(struct seq_file *m, loff_t *pos)
+{
+ void *ret = NULL;
+ int index;
+
+ mutex_lock(&recursed_function_lock);
+ index = atomic_read(&nr_records);
+ if (*pos < index) {
+ ret = &recursed_functions[*pos];
+ }
+
+ tseq = kzalloc(sizeof(*tseq), GFP_KERNEL);
+ if (!tseq)
+ return ERR_PTR(-ENOMEM);
+
+ trace_seq_init(tseq);
+
+ return ret;
+}
+
+static void *recursed_function_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ int index;
+ int p;
+
+ index = atomic_read(&nr_records);
+ p = ++(*pos);
+
+ return p < index ? &recursed_functions[p] : NULL;
+}
+
+static void recursed_function_seq_stop(struct seq_file *m, void *v)
+{
+ kfree(tseq);
+ mutex_unlock(&recursed_function_lock);
+}
+
+static int recursed_function_seq_show(struct seq_file *m, void *v)
+{
+ struct recursed_functions *record = v;
+ int ret = 0;
+
+ if (record) {
+ trace_seq_print_sym(tseq, record->parent_ip, true);
+ trace_seq_puts(tseq, ":\t");
+ trace_seq_print_sym(tseq, record->ip, true);
+ trace_seq_putc(tseq, '\n');
+ ret = trace_print_seq(m, tseq);
+ }
+
+ return ret;
+}
+
+static const struct seq_operations recursed_function_seq_ops = {
+ .start = recursed_function_seq_start,
+ .next = recursed_function_seq_next,
+ .stop = recursed_function_seq_stop,
+ .show = recursed_function_seq_show
+};
+
+static int recursed_function_open(struct inode *inode, struct file *file)
+{
+ int ret = 0;
+
+ mutex_lock(&recursed_function_lock);
+ /* If this file was opened for write, then erase contents */
+ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
+ /* disable updating records */
+ atomic_set(&nr_records, -1);
+ smp_mb__after_atomic();
+ memset(recursed_functions, 0, sizeof(recursed_functions));
+ smp_wmb();
+ /* enable them again */
+ atomic_set(&nr_records, 0);
+ }
+ if (file->f_mode & FMODE_READ)
+ ret = seq_open(file, &recursed_function_seq_ops);
+ mutex_unlock(&recursed_function_lock);
+
+ return ret;
+}
+
+static ssize_t recursed_function_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ return count;
+}
+
+static int recursed_function_release(struct inode *inode, struct file *file)
+{
+ if (file->f_mode & FMODE_READ)
+ seq_release(inode, file);
+ return 0;
+}
+
+static const struct file_operations recursed_functions_fops = {
+ .open = recursed_function_open,
+ .write = recursed_function_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = recursed_function_release,
+};
+
+__init static int create_recursed_functions(void)
+{
+ struct dentry *dentry;
+
+ dentry = trace_create_file("recursed_functions", 0644, NULL, NULL,
+ &recursed_functions_fops);
+ if (!dentry)
+ pr_warn("WARNING: Failed to create recursed_functions\n");
+ return 0;
+}
+
+fs_initcall(create_recursed_functions);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 97b10bb31a1f..c0181066dbe9 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -212,7 +212,7 @@ static void wakeup_print_header(struct seq_file *s)
*/
static void
wakeup_tracer_call(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *pt_regs)
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
struct trace_array *tr = wakeup_trace;
struct trace_array_cpu *data;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index b5e3496cf803..73ef12092250 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -107,7 +107,7 @@ static int trace_selftest_test_probe1_cnt;
static void trace_selftest_test_probe1_func(unsigned long ip,
unsigned long pip,
struct ftrace_ops *op,
- struct pt_regs *pt_regs)
+ struct ftrace_regs *fregs)
{
trace_selftest_test_probe1_cnt++;
}
@@ -116,7 +116,7 @@ static int trace_selftest_test_probe2_cnt;
static void trace_selftest_test_probe2_func(unsigned long ip,
unsigned long pip,
struct ftrace_ops *op,
- struct pt_regs *pt_regs)
+ struct ftrace_regs *fregs)
{
trace_selftest_test_probe2_cnt++;
}
@@ -125,7 +125,7 @@ static int trace_selftest_test_probe3_cnt;
static void trace_selftest_test_probe3_func(unsigned long ip,
unsigned long pip,
struct ftrace_ops *op,
- struct pt_regs *pt_regs)
+ struct ftrace_regs *fregs)
{
trace_selftest_test_probe3_cnt++;
}
@@ -134,7 +134,7 @@ static int trace_selftest_test_global_cnt;
static void trace_selftest_test_global_func(unsigned long ip,
unsigned long pip,
struct ftrace_ops *op,
- struct pt_regs *pt_regs)
+ struct ftrace_regs *fregs)
{
trace_selftest_test_global_cnt++;
}
@@ -143,24 +143,21 @@ static int trace_selftest_test_dyn_cnt;
static void trace_selftest_test_dyn_func(unsigned long ip,
unsigned long pip,
struct ftrace_ops *op,
- struct pt_regs *pt_regs)
+ struct ftrace_regs *fregs)
{
trace_selftest_test_dyn_cnt++;
}
static struct ftrace_ops test_probe1 = {
.func = trace_selftest_test_probe1_func,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE,
};
static struct ftrace_ops test_probe2 = {
.func = trace_selftest_test_probe2_func,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE,
};
static struct ftrace_ops test_probe3 = {
.func = trace_selftest_test_probe3_func,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE,
};
static void print_counts(void)
@@ -417,7 +414,7 @@ static int trace_selftest_recursion_cnt;
static void trace_selftest_test_recursion_func(unsigned long ip,
unsigned long pip,
struct ftrace_ops *op,
- struct pt_regs *pt_regs)
+ struct ftrace_regs *fregs)
{
/*
* This function is registered without the recursion safe flag.
@@ -432,7 +429,7 @@ static void trace_selftest_test_recursion_func(unsigned long ip,
static void trace_selftest_test_recursion_safe_func(unsigned long ip,
unsigned long pip,
struct ftrace_ops *op,
- struct pt_regs *pt_regs)
+ struct ftrace_regs *fregs)
{
/*
* We said we would provide our own recursion. By calling
@@ -448,11 +445,11 @@ static void trace_selftest_test_recursion_safe_func(unsigned long ip,
static struct ftrace_ops test_rec_probe = {
.func = trace_selftest_test_recursion_func,
+ .flags = FTRACE_OPS_FL_RECURSION,
};
static struct ftrace_ops test_recsafe_probe = {
.func = trace_selftest_test_recursion_safe_func,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE,
};
static int
@@ -492,8 +489,13 @@ trace_selftest_function_recursion(void)
unregister_ftrace_function(&test_rec_probe);
ret = -1;
- if (trace_selftest_recursion_cnt != 1) {
- pr_cont("*callback not called once (%d)* ",
+ /*
+ * Recursion allows for transitions between context,
+ * and may call the callback twice.
+ */
+ if (trace_selftest_recursion_cnt != 1 &&
+ trace_selftest_recursion_cnt != 2) {
+ pr_cont("*callback not called once (or twice) (%d)* ",
trace_selftest_recursion_cnt);
goto out;
}
@@ -546,9 +548,11 @@ static enum {
static void trace_selftest_test_regs_func(unsigned long ip,
unsigned long pip,
struct ftrace_ops *op,
- struct pt_regs *pt_regs)
+ struct ftrace_regs *fregs)
{
- if (pt_regs)
+ struct pt_regs *regs = ftrace_get_regs(fregs);
+
+ if (regs)
trace_selftest_regs_stat = TRACE_SELFTEST_REGS_FOUND;
else
trace_selftest_regs_stat = TRACE_SELFTEST_REGS_NOT_FOUND;
@@ -556,7 +560,7 @@ static void trace_selftest_test_regs_func(unsigned long ip,
static struct ftrace_ops test_regs_probe = {
.func = trace_selftest_test_regs_func,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_SAVE_REGS,
+ .flags = FTRACE_OPS_FL_SAVE_REGS,
};
static int
@@ -782,7 +786,7 @@ trace_selftest_startup_function_graph(struct tracer *trace,
/* Have we just recovered from a hang? */
if (graph_hang_thresh > GRAPH_MAX_FUNC_TEST) {
- tracing_selftest_disabled = true;
+ disable_tracing_selftest("recovering from a hang");
ret = -1;
goto out;
}
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 98bba4764c52..63c285042051 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -290,7 +290,7 @@ static void check_stack(unsigned long ip, unsigned long *stack)
static void
stack_trace_call(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *pt_regs)
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
unsigned long stack;
@@ -318,7 +318,6 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip,
static struct ftrace_ops trace_ops __read_mostly =
{
.func = stack_trace_call,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE,
};
static ssize_t
@@ -554,20 +553,20 @@ __setup("stacktrace", enable_stacktrace);
static __init int stack_trace_init(void)
{
- struct dentry *d_tracer;
+ int ret;
- d_tracer = tracing_init_dentry();
- if (IS_ERR(d_tracer))
+ ret = tracing_init_dentry();
+ if (ret)
return 0;
- trace_create_file("stack_max_size", 0644, d_tracer,
+ trace_create_file("stack_max_size", 0644, NULL,
&stack_trace_max_size, &stack_max_size_fops);
- trace_create_file("stack_trace", 0444, d_tracer,
+ trace_create_file("stack_trace", 0444, NULL,
NULL, &stack_trace_fops);
#ifdef CONFIG_DYNAMIC_FTRACE
- trace_create_file("stack_trace_filter", 0644, d_tracer,
+ trace_create_file("stack_trace_filter", 0644, NULL,
&trace_ops, &stack_trace_filter_fops);
#endif
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index d1fa19773cc8..8d141c3825a9 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -276,13 +276,13 @@ static const struct file_operations tracing_stat_fops = {
static int tracing_stat_init(void)
{
- struct dentry *d_tracing;
+ int ret;
- d_tracing = tracing_init_dentry();
- if (IS_ERR(d_tracing))
+ ret = tracing_init_dentry();
+ if (ret)
return -ENODEV;
- stat_dir = tracefs_create_dir("trace_stat", d_tracing);
+ stat_dir = tracefs_create_dir("trace_stat", NULL);
if (!stat_dir) {
pr_warn("Could not create tracefs 'trace_stat' entry\n");
return -ENOMEM;
diff --git a/kernel/trace/trace_synth.h b/kernel/trace/trace_synth.h
index ac35c45207c4..6e146b959dcd 100644
--- a/kernel/trace/trace_synth.h
+++ b/kernel/trace/trace_synth.h
@@ -7,7 +7,7 @@
#define SYNTH_SYSTEM "synthetic"
#define SYNTH_FIELDS_MAX 32
-#define STR_VAR_LEN_MAX 32 /* must be multiple of sizeof(u64) */
+#define STR_VAR_LEN_MAX MAX_FILTER_STR_VAL /* must be multiple of sizeof(u64) */
struct synth_field {
char *type;
@@ -16,6 +16,8 @@ struct synth_field {
unsigned int offset;
bool is_signed;
bool is_string;
+ bool is_dynamic;
+ bool field_pos;
};
struct synth_event {
@@ -24,6 +26,8 @@ struct synth_event {
char *name;
struct synth_field **fields;
unsigned int n_fields;
+ struct synth_field **dynamic_fields;
+ unsigned int n_dynamic_fields;
unsigned int n_u64;
struct trace_event_class class;
struct trace_event_call call;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index f4286c9bdeb4..3cf7128e1ad3 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -528,7 +528,7 @@ end:
/*
* Argument syntax:
- * - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS]
+ * - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET[%return][(REF)] [FETCHARGS]
*/
static int trace_uprobe_create(int argc, const char **argv)
{
@@ -617,6 +617,19 @@ static int trace_uprobe_create(int argc, const char **argv)
}
}
+ /* Check if there is %return suffix */
+ tmp = strchr(arg, '%');
+ if (tmp) {
+ if (!strcmp(tmp, "%return")) {
+ *tmp = '\0';
+ is_return = true;
+ } else {
+ trace_probe_log_err(tmp - filename, BAD_ADDR_SUFFIX);
+ ret = -EINVAL;
+ goto fail_address_parse;
+ }
+ }
+
/* Parse uprobe offset. */
ret = kstrtoul(arg, 0, &offset);
if (ret) {
@@ -1625,21 +1638,20 @@ void destroy_local_trace_uprobe(struct trace_event_call *event_call)
/* Make a trace interface for controling probe points */
static __init int init_uprobe_trace(void)
{
- struct dentry *d_tracer;
int ret;
ret = dyn_event_register(&trace_uprobe_ops);
if (ret)
return ret;
- d_tracer = tracing_init_dentry();
- if (IS_ERR(d_tracer))
+ ret = tracing_init_dentry();
+ if (ret)
return 0;
- trace_create_file("uprobe_events", 0644, d_tracer,
+ trace_create_file("uprobe_events", 0644, NULL,
NULL, &uprobe_events_ops);
/* Profile interface */
- trace_create_file("uprobe_profile", 0444, d_tracer,
+ trace_create_file("uprobe_profile", 0444, NULL,
NULL, &uprobe_profile_ops);
return 0;
}
diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c
index 74738c9856f1..d6bddb157ef2 100644
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -260,7 +260,7 @@ int tracing_map_add_var(struct tracing_map *map)
* to use cmp_fn.
*
* A key can be a subset of a compound key; for that purpose, the
- * offset param is used to describe where within the the compound key
+ * offset param is used to describe where within the compound key
* the key referenced by this key field resides.
*
* Return: The index identifying the field in the map and associated
@@ -609,7 +609,7 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only)
* signal that state. There are two user-visible tracing_map
* variables, 'hits' and 'drops', which are updated by this function.
* Every time an element is either successfully inserted or retrieved,
- * the 'hits' value is incrememented. Every time an element insertion
+ * the 'hits' value is incremented. Every time an element insertion
* fails, the 'drops' value is incremented.
*
* This is a lock-free tracing map insertion function implementing a
@@ -642,9 +642,9 @@ struct tracing_map_elt *tracing_map_insert(struct tracing_map *map, void *key)
* tracing_map_elt. This is a lock-free lookup; see
* tracing_map_insert() for details on tracing_map and how it works.
* Every time an element is retrieved, the 'hits' value is
- * incrememented. There is one user-visible tracing_map variable,
+ * incremented. There is one user-visible tracing_map variable,
* 'hits', which is updated by this function. Every time an element
- * is successfully retrieved, the 'hits' value is incrememented. The
+ * is successfully retrieved, the 'hits' value is incremented. The
* 'drops' value is never updated by this function.
*
* Return: the tracing_map_elt pointer val associated with the key.
diff --git a/kernel/trace/tracing_map.h b/kernel/trace/tracing_map.h
index a6de61fc22de..2c765ee2a4d4 100644
--- a/kernel/trace/tracing_map.h
+++ b/kernel/trace/tracing_map.h
@@ -50,7 +50,7 @@ typedef int (*tracing_map_cmp_fn_t) (void *val_a, void *val_b);
* an instance of tracing_map_elt, where 'elt' in the latter part of
* that variable name is short for 'element'. The purpose of a
* tracing_map_elt is to hold values specific to the particular
- * 32-bit hashed key it's assocated with. Things such as the unique
+ * 32-bit hashed key it's associated with. Things such as the unique
* set of aggregated sums associated with the 32-bit hashed key, along
* with a copy of the full key associated with the entry, and which
* was used to produce the 32-bit hashed key.