diff options
Diffstat (limited to 'kernel')
28 files changed, 1375 insertions, 1075 deletions
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 31cfdb6b4bc3..159900736f25 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -47,12 +47,6 @@ config ARCH_HAS_DMA_SET_MASK config ARCH_HAS_DMA_WRITE_COMBINE bool -# -# Select if the architectures provides the arch_dma_mark_clean hook -# -config ARCH_HAS_DMA_MARK_CLEAN - bool - config DMA_DECLARE_COHERENT bool diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 138ede653de4..43d6a996d7a7 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -63,6 +63,7 @@ enum map_err_types { * @sg_mapped_ents: 'mapped_ents' from dma_map_sg * @paddr: physical start address of the mapping * @map_err_type: track whether dma_mapping_error() was checked + * @is_cache_clean: driver promises not to write to buffer while mapped * @stack_len: number of backtrace entries in @stack_entries * @stack_entries: stack of backtrace history */ @@ -76,7 +77,8 @@ struct dma_debug_entry { int sg_call_ents; int sg_mapped_ents; phys_addr_t paddr; - enum map_err_types map_err_type; + enum map_err_types map_err_type; + bool is_cache_clean; #ifdef CONFIG_STACKTRACE unsigned int stack_len; unsigned long stack_entries[DMA_DEBUG_STACKTRACE_ENTRIES]; @@ -472,12 +474,15 @@ static int active_cacheline_dec_overlap(phys_addr_t cln) return active_cacheline_set_overlap(cln, --overlap); } -static int active_cacheline_insert(struct dma_debug_entry *entry) +static int active_cacheline_insert(struct dma_debug_entry *entry, + bool *overlap_cache_clean) { phys_addr_t cln = to_cacheline_number(entry); unsigned long flags; int rc; + *overlap_cache_clean = false; + /* If the device is not writing memory then we don't have any * concerns about the cpu consuming stale data. This mitigates * legitimate usages of overlapping mappings. @@ -487,8 +492,16 @@ static int active_cacheline_insert(struct dma_debug_entry *entry) spin_lock_irqsave(&radix_lock, flags); rc = radix_tree_insert(&dma_active_cacheline, cln, entry); - if (rc == -EEXIST) + if (rc == -EEXIST) { + struct dma_debug_entry *existing; + active_cacheline_inc_overlap(cln); + existing = radix_tree_lookup(&dma_active_cacheline, cln); + /* A lookup failure here after we got -EEXIST is unexpected. */ + WARN_ON(!existing); + if (existing) + *overlap_cache_clean = existing->is_cache_clean; + } spin_unlock_irqrestore(&radix_lock, flags); return rc; @@ -583,19 +596,24 @@ DEFINE_SHOW_ATTRIBUTE(dump); */ static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs) { + bool overlap_cache_clean; struct hash_bucket *bucket; unsigned long flags; int rc; + entry->is_cache_clean = !!(attrs & DMA_ATTR_CPU_CACHE_CLEAN); + bucket = get_hash_bucket(entry, &flags); hash_bucket_add(bucket, entry); put_hash_bucket(bucket, flags); - rc = active_cacheline_insert(entry); + rc = active_cacheline_insert(entry, &overlap_cache_clean); if (rc == -ENOMEM) { pr_err_once("cacheline tracking ENOMEM, dma-debug disabled\n"); global_disable = true; - } else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + } else if (rc == -EEXIST && + !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + !(entry->is_cache_clean && overlap_cache_clean) && !(IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && is_swiotlb_active(entry->dev))) { err_printk(entry->dev, entry, diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 50c3fe2a1d55..c9fa983990cd 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -425,9 +425,6 @@ void dma_direct_sync_sg_for_cpu(struct device *dev, arch_sync_dma_for_cpu(paddr, sg->length, dir); swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir); - - if (dir == DMA_FROM_DEVICE) - arch_dma_mark_clean(paddr, sg->length); } if (!dev_is_dma_coherent(dev)) diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h index da2fadf45bcd..f476c63b668c 100644 --- a/kernel/dma/direct.h +++ b/kernel/dma/direct.h @@ -75,9 +75,6 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev, } swiotlb_sync_single_for_cpu(dev, paddr, size, dir); - - if (dir == DMA_FROM_DEVICE) - arch_dma_mark_clean(paddr, size); } static inline dma_addr_t dma_direct_map_phys(struct device *dev, diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index c469c708fdd6..66ba6a2f83d3 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -789,7 +789,8 @@ void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor) struct srcu_data *sdp; /* NMI-unsafe use in NMI is a bad sign, as is multi-bit read_flavor values. */ - WARN_ON_ONCE((read_flavor != SRCU_READ_FLAVOR_NMI) && in_nmi()); + WARN_ON_ONCE(read_flavor != SRCU_READ_FLAVOR_NMI && + read_flavor != SRCU_READ_FLAVOR_FAST && in_nmi()); WARN_ON_ONCE(read_flavor & (read_flavor - 1)); sdp = raw_cpu_ptr(ssp->sda); diff --git a/kernel/sys.c b/kernel/sys.c index 35ea9d79a42e..c86eba9aa7e9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2388,6 +2388,21 @@ int __weak arch_lock_shadow_stack_status(struct task_struct *t, unsigned long st return -EINVAL; } +int __weak arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status) +{ + return -EINVAL; +} + +int __weak arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status) +{ + return -EINVAL; +} + +int __weak arch_lock_indir_br_lp_status(struct task_struct *t, unsigned long status) +{ + return -EINVAL; +} + #define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE) static int prctl_set_vma(unsigned long opt, unsigned long addr, @@ -2873,6 +2888,21 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return -EINVAL; error = rseq_slice_extension_prctl(arg2, arg3); break; + case PR_GET_INDIR_BR_LP_STATUS: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = arch_get_indir_br_lp_status(me, (unsigned long __user *)arg2); + break; + case PR_SET_INDIR_BR_LP_STATUS: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = arch_set_indir_br_lp_status(me, arg2); + break; + case PR_LOCK_INDIR_BR_LP_STATUS: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = arch_lock_indir_br_lp_status(me, arg2); + break; default: trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5); error = -EINVAL; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d7042a09fe46..49de13cae428 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -136,6 +136,7 @@ config BUILDTIME_MCOUNT_SORT config TRACER_MAX_TRACE bool + select TRACER_SNAPSHOT config TRACE_CLOCK bool @@ -425,7 +426,6 @@ config IRQSOFF_TRACER select GENERIC_TRACER select TRACER_MAX_TRACE select RING_BUFFER_ALLOW_SWAP - select TRACER_SNAPSHOT select TRACER_SNAPSHOT_PER_CPU_SWAP help This option measures the time spent in irqs-off critical @@ -448,7 +448,6 @@ config PREEMPT_TRACER select GENERIC_TRACER select TRACER_MAX_TRACE select RING_BUFFER_ALLOW_SWAP - select TRACER_SNAPSHOT select TRACER_SNAPSHOT_PER_CPU_SWAP select TRACE_PREEMPT_TOGGLE help @@ -470,7 +469,6 @@ config SCHED_TRACER select GENERIC_TRACER select CONTEXT_SWITCH_TRACER select TRACER_MAX_TRACE - select TRACER_SNAPSHOT help This tracer tracks the latency of the highest priority task to be scheduled in, starting from the point it has woken up. @@ -620,7 +618,6 @@ config TRACE_SYSCALL_BUF_SIZE_DEFAULT config TRACER_SNAPSHOT bool "Create a snapshot trace buffer" - select TRACER_MAX_TRACE help Allow tracing users to take snapshot of the current buffer using the ftrace interface, e.g.: @@ -628,6 +625,9 @@ config TRACER_SNAPSHOT echo 1 > /sys/kernel/tracing/snapshot cat snapshot + Note, the latency tracers select this option. To disable it, + all the latency tracers need to be disabled. + config TRACER_SNAPSHOT_PER_CPU_SWAP bool "Allow snapshot to swap per CPU" depends on TRACER_SNAPSHOT diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index fc5dcc888e13..04096c21d06b 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -68,6 +68,7 @@ obj-$(CONFIG_TRACING) += trace_output.o obj-$(CONFIG_TRACING) += trace_seq.o obj-$(CONFIG_TRACING) += trace_stat.o obj-$(CONFIG_TRACING) += trace_printk.o +obj-$(CONFIG_TRACING) += trace_pid.o obj-$(CONFIG_TRACING) += pid_list.o obj-$(CONFIG_TRACING_MAP) += tracing_map.o obj-$(CONFIG_PREEMPTIRQ_DELAY_TEST) += preemptirq_delay_test.o diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index c4db5c2e7103..f2de9cf15d0e 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1832,7 +1832,9 @@ static struct trace_event trace_blk_event = { .funcs = &trace_blk_event_funcs, }; -static int __init init_blk_tracer(void) +static struct work_struct blktrace_works __initdata; + +static int __init __init_blk_tracer(void) { if (!register_trace_event(&trace_blk_event)) { pr_warn("Warning: could not register block events\n"); @@ -1852,6 +1854,25 @@ static int __init init_blk_tracer(void) return 0; } +static void __init blktrace_works_func(struct work_struct *work) +{ + __init_blk_tracer(); +} + +static int __init init_blk_tracer(void) +{ + int ret = 0; + + if (trace_init_wq) { + INIT_WORK(&blktrace_works, blktrace_works_func); + queue_work(trace_init_wq, &blktrace_works); + } else { + ret = __init_blk_tracer(); + } + + return ret; +} + device_initcall(init_blk_tracer); static int blk_trace_remove_queue(struct request_queue *q) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f7baeb8278ca..eadaef8592a3 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2076,7 +2076,7 @@ void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args) struct bpf_run_ctx *old_run_ctx; struct bpf_trace_run_ctx run_ctx; - cant_sleep(); + rcu_read_lock_dont_migrate(); if (unlikely(!bpf_prog_get_recursion_context(prog))) { bpf_prog_inc_misses_counter(prog); goto out; @@ -2085,13 +2085,12 @@ void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args) run_ctx.bpf_cookie = link->cookie; old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); - rcu_read_lock(); (void) bpf_prog_run(prog, args); - rcu_read_unlock(); bpf_reset_run_ctx(old_run_ctx); out: bpf_prog_put_recursion_context(prog); + rcu_read_unlock_migrate(); } #define UNPACK(...) __VA_ARGS__ diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index cc48d16be43e..4df766c690f9 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -1303,7 +1303,7 @@ static void ftrace_graph_enable_direct(bool enable_branch, struct fgraph_ops *go static_call_update(fgraph_func, func); static_call_update(fgraph_retfunc, retfunc); if (enable_branch) - static_branch_disable(&fgraph_do_direct); + static_branch_enable(&fgraph_do_direct); } static void ftrace_graph_disable_direct(bool disable_branch) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8fb38722fd5c..1ce17c8af409 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1147,6 +1147,7 @@ struct ftrace_page { }; #define ENTRY_SIZE sizeof(struct dyn_ftrace) +#define ENTRIES_PER_PAGE_GROUP(order) ((PAGE_SIZE << (order)) / ENTRY_SIZE) static struct ftrace_page *ftrace_pages_start; static struct ftrace_page *ftrace_pages; @@ -3873,7 +3874,7 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count, *num_pages += 1 << order; ftrace_number_of_groups++; - cnt = (PAGE_SIZE << order) / ENTRY_SIZE; + cnt = ENTRIES_PER_PAGE_GROUP(order); pg->order = order; if (cnt > count) @@ -7668,7 +7669,7 @@ static int ftrace_process_locs(struct module *mod, long skip; /* Count the number of entries unused and compare it to skipped. */ - pg_remaining = (PAGE_SIZE << pg->order) / ENTRY_SIZE - pg->index; + pg_remaining = ENTRIES_PER_PAGE_GROUP(pg->order) - pg->index; if (!WARN(skipped < pg_remaining, "Extra allocated pages for ftrace")) { @@ -7676,7 +7677,7 @@ static int ftrace_process_locs(struct module *mod, for (pg = pg_unuse; pg && skip > 0; pg = pg->next) { remaining += 1 << pg->order; - skip -= (PAGE_SIZE << pg->order) / ENTRY_SIZE; + skip -= ENTRIES_PER_PAGE_GROUP(pg->order); } pages -= remaining; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 630221b00838..d33103408955 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4,6 +4,7 @@ * * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> */ +#include <linux/sched/isolation.h> #include <linux/trace_recursion.h> #include <linux/trace_events.h> #include <linux/ring_buffer.h> @@ -4013,19 +4014,36 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer) rb_end_commit(cpu_buffer); } +static bool +rb_irq_work_queue(struct rb_irq_work *irq_work) +{ + int cpu; + + /* irq_work_queue_on() is not NMI-safe */ + if (unlikely(in_nmi())) + return irq_work_queue(&irq_work->work); + + /* + * If CPU isolation is not active, cpu is always the current + * CPU, and the following is equivallent to irq_work_queue(). + */ + cpu = housekeeping_any_cpu(HK_TYPE_KERNEL_NOISE); + return irq_work_queue_on(&irq_work->work, cpu); +} + static __always_inline void rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) { if (buffer->irq_work.waiters_pending) { buffer->irq_work.waiters_pending = false; /* irq_work_queue() supplies it's own memory barriers */ - irq_work_queue(&buffer->irq_work.work); + rb_irq_work_queue(&buffer->irq_work); } if (cpu_buffer->irq_work.waiters_pending) { cpu_buffer->irq_work.waiters_pending = false; /* irq_work_queue() supplies it's own memory barriers */ - irq_work_queue(&cpu_buffer->irq_work.work); + rb_irq_work_queue(&cpu_buffer->irq_work); } if (cpu_buffer->last_pages_touch == local_read(&cpu_buffer->pages_touched)) @@ -4045,7 +4063,7 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->irq_work.wakeup_full = true; cpu_buffer->irq_work.full_waiters_pending = false; /* irq_work_queue() supplies it's own memory barriers */ - irq_work_queue(&cpu_buffer->irq_work.work); + rb_irq_work_queue(&cpu_buffer->irq_work); } #ifdef CONFIG_RING_BUFFER_RECORD_RECURSION diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b1cb30a7b83d..2f6fbf9e7caf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -67,7 +67,7 @@ * insertions into the ring-buffer such as trace_printk could occurred * at the same time, giving false positive or negative results. */ -static bool __read_mostly tracing_selftest_running; +bool __read_mostly tracing_selftest_running; /* * If boot-time tracing including tracers/events via kernel cmdline @@ -83,7 +83,6 @@ void __init disable_tracing_selftest(const char *reason) } } #else -#define tracing_selftest_running 0 #define tracing_selftest_disabled 0 #endif @@ -114,7 +113,7 @@ DEFINE_PER_CPU(bool, trace_taskinfo_save); * of the tracer is successful. But that is the only place that sets * this back to zero. */ -static int tracing_disabled = 1; +int tracing_disabled = 1; cpumask_var_t __read_mostly tracing_buffer_mask; @@ -535,22 +534,11 @@ static struct trace_array global_trace = { .trace_flags = TRACE_DEFAULT_FLAGS, }; -static struct trace_array *printk_trace = &global_trace; +struct trace_array *printk_trace = &global_trace; /* List of trace_arrays interested in the top level trace_marker */ static LIST_HEAD(marker_copies); -static __always_inline bool printk_binsafe(struct trace_array *tr) -{ - /* - * The binary format of traceprintk can cause a crash if used - * by a buffer from another boot. Force the use of the - * non binary version of trace_printk if the trace_printk - * buffer is a boot mapped ring buffer. - */ - return !(tr->flags & TRACE_ARRAY_FL_BOOT); -} - static void update_printk_trace(struct trace_array *tr) { if (printk_trace == tr) @@ -649,248 +637,6 @@ int tracing_check_open_get_tr(struct trace_array *tr) return 0; } -/** - * trace_find_filtered_pid - check if a pid exists in a filtered_pid list - * @filtered_pids: The list of pids to check - * @search_pid: The PID to find in @filtered_pids - * - * Returns true if @search_pid is found in @filtered_pids, and false otherwise. - */ -bool -trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid) -{ - return trace_pid_list_is_set(filtered_pids, search_pid); -} - -/** - * trace_ignore_this_task - should a task be ignored for tracing - * @filtered_pids: The list of pids to check - * @filtered_no_pids: The list of pids not to be traced - * @task: The task that should be ignored if not filtered - * - * Checks if @task should be traced or not from @filtered_pids. - * Returns true if @task should *NOT* be traced. - * Returns false if @task should be traced. - */ -bool -trace_ignore_this_task(struct trace_pid_list *filtered_pids, - struct trace_pid_list *filtered_no_pids, - struct task_struct *task) -{ - /* - * If filtered_no_pids is not empty, and the task's pid is listed - * in filtered_no_pids, then return true. - * Otherwise, if filtered_pids is empty, that means we can - * trace all tasks. If it has content, then only trace pids - * within filtered_pids. - */ - - return (filtered_pids && - !trace_find_filtered_pid(filtered_pids, task->pid)) || - (filtered_no_pids && - trace_find_filtered_pid(filtered_no_pids, task->pid)); -} - -/** - * trace_filter_add_remove_task - Add or remove a task from a pid_list - * @pid_list: The list to modify - * @self: The current task for fork or NULL for exit - * @task: The task to add or remove - * - * If adding a task, if @self is defined, the task is only added if @self - * is also included in @pid_list. This happens on fork and tasks should - * only be added when the parent is listed. If @self is NULL, then the - * @task pid will be removed from the list, which would happen on exit - * of a task. - */ -void trace_filter_add_remove_task(struct trace_pid_list *pid_list, - struct task_struct *self, - struct task_struct *task) -{ - if (!pid_list) - return; - - /* For forks, we only add if the forking task is listed */ - if (self) { - if (!trace_find_filtered_pid(pid_list, self->pid)) - return; - } - - /* "self" is set for forks, and NULL for exits */ - if (self) - trace_pid_list_set(pid_list, task->pid); - else - trace_pid_list_clear(pid_list, task->pid); -} - -/** - * trace_pid_next - Used for seq_file to get to the next pid of a pid_list - * @pid_list: The pid list to show - * @v: The last pid that was shown (+1 the actual pid to let zero be displayed) - * @pos: The position of the file - * - * This is used by the seq_file "next" operation to iterate the pids - * listed in a trace_pid_list structure. - * - * Returns the pid+1 as we want to display pid of zero, but NULL would - * stop the iteration. - */ -void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos) -{ - long pid = (unsigned long)v; - unsigned int next; - - (*pos)++; - - /* pid already is +1 of the actual previous bit */ - if (trace_pid_list_next(pid_list, pid, &next) < 0) - return NULL; - - pid = next; - - /* Return pid + 1 to allow zero to be represented */ - return (void *)(pid + 1); -} - -/** - * trace_pid_start - Used for seq_file to start reading pid lists - * @pid_list: The pid list to show - * @pos: The position of the file - * - * This is used by seq_file "start" operation to start the iteration - * of listing pids. - * - * Returns the pid+1 as we want to display pid of zero, but NULL would - * stop the iteration. - */ -void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos) -{ - unsigned long pid; - unsigned int first; - loff_t l = 0; - - if (trace_pid_list_first(pid_list, &first) < 0) - return NULL; - - pid = first; - - /* Return pid + 1 so that zero can be the exit value */ - for (pid++; pid && l < *pos; - pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l)) - ; - return (void *)pid; -} - -/** - * trace_pid_show - show the current pid in seq_file processing - * @m: The seq_file structure to write into - * @v: A void pointer of the pid (+1) value to display - * - * Can be directly used by seq_file operations to display the current - * pid value. - */ -int trace_pid_show(struct seq_file *m, void *v) -{ - unsigned long pid = (unsigned long)v - 1; - - seq_printf(m, "%lu\n", pid); - return 0; -} - -/* 128 should be much more than enough */ -#define PID_BUF_SIZE 127 - -int trace_pid_write(struct trace_pid_list *filtered_pids, - struct trace_pid_list **new_pid_list, - const char __user *ubuf, size_t cnt) -{ - struct trace_pid_list *pid_list; - struct trace_parser parser; - unsigned long val; - int nr_pids = 0; - ssize_t read = 0; - ssize_t ret; - loff_t pos; - pid_t pid; - - if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1)) - return -ENOMEM; - - /* - * Always recreate a new array. The write is an all or nothing - * operation. Always create a new array when adding new pids by - * the user. If the operation fails, then the current list is - * not modified. - */ - pid_list = trace_pid_list_alloc(); - if (!pid_list) { - trace_parser_put(&parser); - return -ENOMEM; - } - - if (filtered_pids) { - /* copy the current bits to the new max */ - ret = trace_pid_list_first(filtered_pids, &pid); - while (!ret) { - ret = trace_pid_list_set(pid_list, pid); - if (ret < 0) - goto out; - - ret = trace_pid_list_next(filtered_pids, pid + 1, &pid); - nr_pids++; - } - } - - ret = 0; - while (cnt > 0) { - - pos = 0; - - ret = trace_get_user(&parser, ubuf, cnt, &pos); - if (ret < 0) - break; - - read += ret; - ubuf += ret; - cnt -= ret; - - if (!trace_parser_loaded(&parser)) - break; - - ret = -EINVAL; - if (kstrtoul(parser.buffer, 0, &val)) - break; - - pid = (pid_t)val; - - if (trace_pid_list_set(pid_list, pid) < 0) { - ret = -1; - break; - } - nr_pids++; - - trace_parser_clear(&parser); - ret = 0; - } - out: - trace_parser_put(&parser); - - if (ret < 0) { - trace_pid_list_free(pid_list); - return ret; - } - - if (!nr_pids) { - /* Cleared the list of pids */ - trace_pid_list_free(pid_list); - pid_list = NULL; - } - - *new_pid_list = pid_list; - - return read; -} - static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu) { u64 ts; @@ -1033,56 +779,6 @@ static inline void trace_access_lock_init(void) #endif -#ifdef CONFIG_STACKTRACE -static void __ftrace_trace_stack(struct trace_array *tr, - struct trace_buffer *buffer, - unsigned int trace_ctx, - int skip, struct pt_regs *regs); -static inline void ftrace_trace_stack(struct trace_array *tr, - struct trace_buffer *buffer, - unsigned int trace_ctx, - int skip, struct pt_regs *regs); - -#else -static inline void __ftrace_trace_stack(struct trace_array *tr, - struct trace_buffer *buffer, - unsigned int trace_ctx, - int skip, struct pt_regs *regs) -{ -} -static inline void ftrace_trace_stack(struct trace_array *tr, - struct trace_buffer *buffer, - unsigned long trace_ctx, - int skip, struct pt_regs *regs) -{ -} - -#endif - -static __always_inline void -trace_event_setup(struct ring_buffer_event *event, - int type, unsigned int trace_ctx) -{ - struct trace_entry *ent = ring_buffer_event_data(event); - - tracing_generic_entry_update(ent, type, trace_ctx); -} - -static __always_inline struct ring_buffer_event * -__trace_buffer_lock_reserve(struct trace_buffer *buffer, - int type, - unsigned long len, - unsigned int trace_ctx) -{ - struct ring_buffer_event *event; - - event = ring_buffer_lock_reserve(buffer, len); - if (event != NULL) - trace_event_setup(event, type, trace_ctx); - - return event; -} - void tracer_tracing_on(struct trace_array *tr) { if (tr->array_buffer.buffer) @@ -1110,129 +806,10 @@ void tracing_on(void) } EXPORT_SYMBOL_GPL(tracing_on); - -static __always_inline void -__buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) -{ - __this_cpu_write(trace_taskinfo_save, true); - - /* If this is the temp buffer, we need to commit fully */ - if (this_cpu_read(trace_buffered_event) == event) { - /* Length is in event->array[0] */ - ring_buffer_write(buffer, event->array[0], &event->array[1]); - /* Release the temp buffer */ - this_cpu_dec(trace_buffered_event_cnt); - /* ring_buffer_unlock_commit() enables preemption */ - preempt_enable_notrace(); - } else - ring_buffer_unlock_commit(buffer); -} - -int __trace_array_puts(struct trace_array *tr, unsigned long ip, - const char *str, int size) -{ - struct ring_buffer_event *event; - struct trace_buffer *buffer; - struct print_entry *entry; - unsigned int trace_ctx; - int alloc; - - if (!(tr->trace_flags & TRACE_ITER(PRINTK))) - return 0; - - if (unlikely(tracing_selftest_running && tr == &global_trace)) - return 0; - - if (unlikely(tracing_disabled)) - return 0; - - alloc = sizeof(*entry) + size + 2; /* possible \n added */ - - trace_ctx = tracing_gen_ctx(); - buffer = tr->array_buffer.buffer; - guard(ring_buffer_nest)(buffer); - event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, - trace_ctx); - if (!event) - return 0; - - entry = ring_buffer_event_data(event); - entry->ip = ip; - - memcpy(&entry->buf, str, size); - - /* Add a newline if necessary */ - if (entry->buf[size - 1] != '\n') { - entry->buf[size] = '\n'; - entry->buf[size + 1] = '\0'; - } else - entry->buf[size] = '\0'; - - __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); - return size; -} -EXPORT_SYMBOL_GPL(__trace_array_puts); - -/** - * __trace_puts - write a constant string into the trace buffer. - * @ip: The address of the caller - * @str: The constant string to write - */ -int __trace_puts(unsigned long ip, const char *str) -{ - return __trace_array_puts(printk_trace, ip, str, strlen(str)); -} -EXPORT_SYMBOL_GPL(__trace_puts); - -/** - * __trace_bputs - write the pointer to a constant string into trace buffer - * @ip: The address of the caller - * @str: The constant string to write to the buffer to - */ -int __trace_bputs(unsigned long ip, const char *str) -{ - struct trace_array *tr = READ_ONCE(printk_trace); - struct ring_buffer_event *event; - struct trace_buffer *buffer; - struct bputs_entry *entry; - unsigned int trace_ctx; - int size = sizeof(struct bputs_entry); - - if (!printk_binsafe(tr)) - return __trace_puts(ip, str); - - if (!(tr->trace_flags & TRACE_ITER(PRINTK))) - return 0; - - if (unlikely(tracing_selftest_running || tracing_disabled)) - return 0; - - trace_ctx = tracing_gen_ctx(); - buffer = tr->array_buffer.buffer; - - guard(ring_buffer_nest)(buffer); - event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, - trace_ctx); - if (!event) - return 0; - - entry = ring_buffer_event_data(event); - entry->ip = ip; - entry->str = str; - - __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); - - return 1; -} -EXPORT_SYMBOL_GPL(__trace_bputs); - #ifdef CONFIG_TRACER_SNAPSHOT static void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data) { - struct tracer *tracer = tr->current_trace; unsigned long flags; if (in_nmi()) { @@ -1248,15 +825,15 @@ static void tracing_snapshot_instance_cond(struct trace_array *tr, return; } - /* Note, snapshot can not be used when the tracer uses it */ - if (tracer->use_max_tr) { - trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n"); + if (tr->mapped) { + trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n"); trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); return; } - if (tr->mapped) { - trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n"); + /* Note, snapshot can not be used when the tracer uses it */ + if (tracer_uses_snapshot(tr->current_trace)) { + trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n"); trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); return; } @@ -1356,12 +933,12 @@ int tracing_alloc_snapshot_instance(struct trace_array *tr) /* Make the snapshot buffer have the same order as main buffer */ order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); - ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); + ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order); if (ret < 0) return ret; /* allocate spare buffer */ - ret = resize_buffer_duplicate_size(&tr->max_buffer, + ret = resize_buffer_duplicate_size(&tr->snapshot_buffer, &tr->array_buffer, RING_BUFFER_ALL_CPUS); if (ret < 0) return ret; @@ -1379,10 +956,10 @@ static void free_snapshot(struct trace_array *tr) * The max_tr ring buffer has some state (e.g. ring->clock) and * we want preserve it. */ - ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0); - ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); - set_buffer_entries(&tr->max_buffer, 1); - tracing_reset_online_cpus(&tr->max_buffer); + ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, 0); + ring_buffer_resize(tr->snapshot_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); + set_buffer_entries(&tr->snapshot_buffer, 1); + tracing_reset_online_cpus(&tr->snapshot_buffer); tr->allocated_snapshot = false; } @@ -1498,7 +1075,7 @@ int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, guard(mutex)(&trace_types_lock); - if (tr->current_trace->use_max_tr) + if (tracer_uses_snapshot(tr->current_trace)) return -EBUSY; /* @@ -1665,9 +1242,18 @@ EXPORT_SYMBOL_GPL(tracing_off); void disable_trace_on_warning(void) { if (__disable_trace_on_warning) { + struct trace_array *tr = READ_ONCE(printk_trace); + trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_, "Disabling tracing due to warning\n"); tracing_off(); + + /* Disable trace_printk() buffer too */ + if (tr != &global_trace) { + trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, + "Disabling tracing due to warning\n"); + tracer_tracing_off(tr); + } } } @@ -1902,10 +1488,7 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) unsigned long __read_mostly tracing_thresh; #ifdef CONFIG_TRACER_MAX_TRACE -static const struct file_operations tracing_max_lat_fops; - #ifdef LATENCY_FS_NOTIFY - static struct workqueue_struct *fsnotify_wq; static void latency_fsnotify_workfn(struct work_struct *work) @@ -1922,17 +1505,6 @@ static void latency_fsnotify_workfn_irq(struct irq_work *iwork) queue_work(fsnotify_wq, &tr->fsnotify_work); } -static void trace_create_maxlat_file(struct trace_array *tr, - struct dentry *d_tracer) -{ - INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn); - init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq); - tr->d_max_latency = trace_create_file("tracing_max_latency", - TRACE_MODE_WRITE, - d_tracer, tr, - &tracing_max_lat_fops); -} - __init static int latency_fsnotify_init(void) { fsnotify_wq = alloc_workqueue("tr_max_lat_wq", @@ -1957,14 +1529,22 @@ void latency_fsnotify(struct trace_array *tr) */ irq_work_queue(&tr->fsnotify_irqwork); } +#endif /* !LATENCY_FS_NOTIFY */ -#else /* !LATENCY_FS_NOTIFY */ - -#define trace_create_maxlat_file(tr, d_tracer) \ - trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \ - d_tracer, tr, &tracing_max_lat_fops) +static const struct file_operations tracing_max_lat_fops; +static void trace_create_maxlat_file(struct trace_array *tr, + struct dentry *d_tracer) +{ +#ifdef LATENCY_FS_NOTIFY + INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn); + init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq); #endif + tr->d_max_latency = trace_create_file("tracing_max_latency", + TRACE_MODE_WRITE, + d_tracer, tr, + &tracing_max_lat_fops); +} /* * Copy the new maximum trace into the separate maximum-trace @@ -1975,8 +1555,8 @@ static void __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct array_buffer *trace_buf = &tr->array_buffer; - struct array_buffer *max_buf = &tr->max_buffer; struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu); + struct array_buffer *max_buf = &tr->snapshot_buffer; struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu); max_buf->cpu = cpu; @@ -2005,7 +1585,14 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) tracing_record_cmdline(tsk); latency_fsnotify(tr); } +#else +static inline void trace_create_maxlat_file(struct trace_array *tr, + struct dentry *d_tracer) { } +static inline void __update_max_tr(struct trace_array *tr, + struct task_struct *tsk, int cpu) { } +#endif /* CONFIG_TRACER_MAX_TRACE */ +#ifdef CONFIG_TRACER_SNAPSHOT /** * update_max_tr - snapshot all trace buffers from global_trace to max_tr * @tr: tracer @@ -2035,17 +1622,16 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, /* Inherit the recordable setting from array_buffer */ if (ring_buffer_record_is_set_on(tr->array_buffer.buffer)) - ring_buffer_record_on(tr->max_buffer.buffer); + ring_buffer_record_on(tr->snapshot_buffer.buffer); else - ring_buffer_record_off(tr->max_buffer.buffer); + ring_buffer_record_off(tr->snapshot_buffer.buffer); -#ifdef CONFIG_TRACER_SNAPSHOT if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) { arch_spin_unlock(&tr->max_lock); return; } -#endif - swap(tr->array_buffer.buffer, tr->max_buffer.buffer); + + swap(tr->array_buffer.buffer, tr->snapshot_buffer.buffer); __update_max_tr(tr, tsk, cpu); @@ -2080,7 +1666,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) arch_spin_lock(&tr->max_lock); - ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu); + ret = ring_buffer_swap_cpu(tr->snapshot_buffer.buffer, tr->array_buffer.buffer, cpu); if (ret == -EBUSY) { /* @@ -2090,7 +1676,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) * and flag that it failed. * Another reason is resize is in progress. */ - trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_, + trace_array_printk_buf(tr->snapshot_buffer.buffer, _THIS_IP_, "Failed to swap buffers due to commit or resize in progress\n"); } @@ -2099,8 +1685,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) __update_max_tr(tr, tsk, cpu); arch_spin_unlock(&tr->max_lock); } - -#endif /* CONFIG_TRACER_MAX_TRACE */ +#endif /* CONFIG_TRACER_SNAPSHOT */ struct pipe_wait { struct trace_iterator *iter; @@ -2133,13 +1718,13 @@ static int wait_on_pipe(struct trace_iterator *iter, int full) ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full, wait_pipe_cond, &pwait); -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT /* * Make sure this is still the snapshot buffer, as if a snapshot were * to happen, this would now be the main buffer. */ if (iter->snapshot) - iter->array_buffer = &iter->tr->max_buffer; + iter->array_buffer = &iter->tr->snapshot_buffer; #endif return ret; } @@ -2204,10 +1789,10 @@ static int run_tracer_selftest(struct tracer *type) tr->current_trace_flags = type->flags ? : type->default_flags; #ifdef CONFIG_TRACER_MAX_TRACE - if (type->use_max_tr) { + if (tracer_uses_snapshot(type)) { /* If we expanded the buffers, make sure the max is expanded too */ if (tr->ring_buffer_expanded) - ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size, + ring_buffer_resize(tr->snapshot_buffer.buffer, trace_buf_size, RING_BUFFER_ALL_CPUS); tr->allocated_snapshot = true; } @@ -2229,12 +1814,12 @@ static int run_tracer_selftest(struct tracer *type) tracing_reset_online_cpus(&tr->array_buffer); #ifdef CONFIG_TRACER_MAX_TRACE - if (type->use_max_tr) { + if (tracer_uses_snapshot(type)) { tr->allocated_snapshot = false; /* Shrink the max buffer again */ if (tr->ring_buffer_expanded) - ring_buffer_resize(tr->max_buffer.buffer, 1, + ring_buffer_resize(tr->snapshot_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); } #endif @@ -2476,8 +2061,8 @@ void tracing_reset_all_online_cpus_unlocked(void) continue; tr->clear_trace = false; tracing_reset_online_cpus(&tr->array_buffer); -#ifdef CONFIG_TRACER_MAX_TRACE - tracing_reset_online_cpus(&tr->max_buffer); +#ifdef CONFIG_TRACER_SNAPSHOT + tracing_reset_online_cpus(&tr->snapshot_buffer); #endif } } @@ -2516,8 +2101,8 @@ static void tracing_start_tr(struct trace_array *tr) if (buffer) ring_buffer_record_enable(buffer); -#ifdef CONFIG_TRACER_MAX_TRACE - buffer = tr->max_buffer.buffer; +#ifdef CONFIG_TRACER_SNAPSHOT + buffer = tr->snapshot_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); #endif @@ -2552,8 +2137,8 @@ static void tracing_stop_tr(struct trace_array *tr) if (buffer) ring_buffer_record_disable(buffer); -#ifdef CONFIG_TRACER_MAX_TRACE - buffer = tr->max_buffer.buffer; +#ifdef CONFIG_TRACER_SNAPSHOT + buffer = tr->snapshot_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); #endif @@ -3001,10 +2586,10 @@ struct ftrace_stacks { static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); static DEFINE_PER_CPU(int, ftrace_stack_reserve); -static void __ftrace_trace_stack(struct trace_array *tr, - struct trace_buffer *buffer, - unsigned int trace_ctx, - int skip, struct pt_regs *regs) +void __ftrace_trace_stack(struct trace_array *tr, + struct trace_buffer *buffer, + unsigned int trace_ctx, + int skip, struct pt_regs *regs) { struct ring_buffer_event *event; unsigned int size, nr_entries; @@ -3087,17 +2672,6 @@ static void __ftrace_trace_stack(struct trace_array *tr, trace_clear_recursion(bit); } -static inline void ftrace_trace_stack(struct trace_array *tr, - struct trace_buffer *buffer, - unsigned int trace_ctx, - int skip, struct pt_regs *regs) -{ - if (!(tr->trace_flags & TRACE_ITER(STACKTRACE))) - return; - - __ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs); -} - void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, int skip) { @@ -3232,324 +2806,6 @@ void trace_last_func_repeats(struct trace_array *tr, __buffer_unlock_commit(buffer, event); } -/* created for use with alloc_percpu */ -struct trace_buffer_struct { - int nesting; - char buffer[4][TRACE_BUF_SIZE]; -}; - -static struct trace_buffer_struct __percpu *trace_percpu_buffer; - -/* - * This allows for lockless recording. If we're nested too deeply, then - * this returns NULL. - */ -static char *get_trace_buf(void) -{ - struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer); - - if (!trace_percpu_buffer || buffer->nesting >= 4) - return NULL; - - buffer->nesting++; - - /* Interrupts must see nesting incremented before we use the buffer */ - barrier(); - return &buffer->buffer[buffer->nesting - 1][0]; -} - -static void put_trace_buf(void) -{ - /* Don't let the decrement of nesting leak before this */ - barrier(); - this_cpu_dec(trace_percpu_buffer->nesting); -} - -static int alloc_percpu_trace_buffer(void) -{ - struct trace_buffer_struct __percpu *buffers; - - if (trace_percpu_buffer) - return 0; - - buffers = alloc_percpu(struct trace_buffer_struct); - if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer")) - return -ENOMEM; - - trace_percpu_buffer = buffers; - return 0; -} - -static int buffers_allocated; - -void trace_printk_init_buffers(void) -{ - if (buffers_allocated) - return; - - if (alloc_percpu_trace_buffer()) - return; - - /* trace_printk() is for debug use only. Don't use it in production. */ - - pr_warn("\n"); - pr_warn("**********************************************************\n"); - pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); - pr_warn("** **\n"); - pr_warn("** trace_printk() being used. Allocating extra memory. **\n"); - pr_warn("** **\n"); - pr_warn("** This means that this is a DEBUG kernel and it is **\n"); - pr_warn("** unsafe for production use. **\n"); - pr_warn("** **\n"); - pr_warn("** If you see this message and you are not debugging **\n"); - pr_warn("** the kernel, report this immediately to your vendor! **\n"); - pr_warn("** **\n"); - pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); - pr_warn("**********************************************************\n"); - - /* Expand the buffers to set size */ - tracing_update_buffers(&global_trace); - - buffers_allocated = 1; - - /* - * trace_printk_init_buffers() can be called by modules. - * If that happens, then we need to start cmdline recording - * directly here. If the global_trace.buffer is already - * allocated here, then this was called by module code. - */ - if (global_trace.array_buffer.buffer) - tracing_start_cmdline_record(); -} -EXPORT_SYMBOL_GPL(trace_printk_init_buffers); - -void trace_printk_start_comm(void) -{ - /* Start tracing comms if trace printk is set */ - if (!buffers_allocated) - return; - tracing_start_cmdline_record(); -} - -static void trace_printk_start_stop_comm(int enabled) -{ - if (!buffers_allocated) - return; - - if (enabled) - tracing_start_cmdline_record(); - else - tracing_stop_cmdline_record(); -} - -/** - * trace_vbprintk - write binary msg to tracing buffer - * @ip: The address of the caller - * @fmt: The string format to write to the buffer - * @args: Arguments for @fmt - */ -int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) -{ - struct ring_buffer_event *event; - struct trace_buffer *buffer; - struct trace_array *tr = READ_ONCE(printk_trace); - struct bprint_entry *entry; - unsigned int trace_ctx; - char *tbuffer; - int len = 0, size; - - if (!printk_binsafe(tr)) - return trace_vprintk(ip, fmt, args); - - if (unlikely(tracing_selftest_running || tracing_disabled)) - return 0; - - /* Don't pollute graph traces with trace_vprintk internals */ - pause_graph_tracing(); - - trace_ctx = tracing_gen_ctx(); - guard(preempt_notrace)(); - - tbuffer = get_trace_buf(); - if (!tbuffer) { - len = 0; - goto out_nobuffer; - } - - len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args); - - if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) - goto out_put; - - size = sizeof(*entry) + sizeof(u32) * len; - buffer = tr->array_buffer.buffer; - scoped_guard(ring_buffer_nest, buffer) { - event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, - trace_ctx); - if (!event) - goto out_put; - entry = ring_buffer_event_data(event); - entry->ip = ip; - entry->fmt = fmt; - - memcpy(entry->buf, tbuffer, sizeof(u32) * len); - __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); - } -out_put: - put_trace_buf(); - -out_nobuffer: - unpause_graph_tracing(); - - return len; -} -EXPORT_SYMBOL_GPL(trace_vbprintk); - -static __printf(3, 0) -int __trace_array_vprintk(struct trace_buffer *buffer, - unsigned long ip, const char *fmt, va_list args) -{ - struct ring_buffer_event *event; - int len = 0, size; - struct print_entry *entry; - unsigned int trace_ctx; - char *tbuffer; - - if (tracing_disabled) - return 0; - - /* Don't pollute graph traces with trace_vprintk internals */ - pause_graph_tracing(); - - trace_ctx = tracing_gen_ctx(); - guard(preempt_notrace)(); - - - tbuffer = get_trace_buf(); - if (!tbuffer) { - len = 0; - goto out_nobuffer; - } - - len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); - - size = sizeof(*entry) + len + 1; - scoped_guard(ring_buffer_nest, buffer) { - event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, - trace_ctx); - if (!event) - goto out; - entry = ring_buffer_event_data(event); - entry->ip = ip; - - memcpy(&entry->buf, tbuffer, len + 1); - __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL); - } -out: - put_trace_buf(); - -out_nobuffer: - unpause_graph_tracing(); - - return len; -} - -int trace_array_vprintk(struct trace_array *tr, - unsigned long ip, const char *fmt, va_list args) -{ - if (tracing_selftest_running && tr == &global_trace) - return 0; - - return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args); -} - -/** - * trace_array_printk - Print a message to a specific instance - * @tr: The instance trace_array descriptor - * @ip: The instruction pointer that this is called from. - * @fmt: The format to print (printf format) - * - * If a subsystem sets up its own instance, they have the right to - * printk strings into their tracing instance buffer using this - * function. Note, this function will not write into the top level - * buffer (use trace_printk() for that), as writing into the top level - * buffer should only have events that can be individually disabled. - * trace_printk() is only used for debugging a kernel, and should not - * be ever incorporated in normal use. - * - * trace_array_printk() can be used, as it will not add noise to the - * top level tracing buffer. - * - * Note, trace_array_init_printk() must be called on @tr before this - * can be used. - */ -int trace_array_printk(struct trace_array *tr, - unsigned long ip, const char *fmt, ...) -{ - int ret; - va_list ap; - - if (!tr) - return -ENOENT; - - /* This is only allowed for created instances */ - if (tr == &global_trace) - return 0; - - if (!(tr->trace_flags & TRACE_ITER(PRINTK))) - return 0; - - va_start(ap, fmt); - ret = trace_array_vprintk(tr, ip, fmt, ap); - va_end(ap); - return ret; -} -EXPORT_SYMBOL_GPL(trace_array_printk); - -/** - * trace_array_init_printk - Initialize buffers for trace_array_printk() - * @tr: The trace array to initialize the buffers for - * - * As trace_array_printk() only writes into instances, they are OK to - * have in the kernel (unlike trace_printk()). This needs to be called - * before trace_array_printk() can be used on a trace_array. - */ -int trace_array_init_printk(struct trace_array *tr) -{ - if (!tr) - return -ENOENT; - - /* This is only allowed for created instances */ - if (tr == &global_trace) - return -EINVAL; - - return alloc_percpu_trace_buffer(); -} -EXPORT_SYMBOL_GPL(trace_array_init_printk); - -int trace_array_printk_buf(struct trace_buffer *buffer, - unsigned long ip, const char *fmt, ...) -{ - int ret; - va_list ap; - - if (!(printk_trace->trace_flags & TRACE_ITER(PRINTK))) - return 0; - - va_start(ap, fmt); - ret = __trace_array_vprintk(buffer, ip, fmt, ap); - va_end(ap); - return ret; -} - -int trace_vprintk(unsigned long ip, const char *fmt, va_list args) -{ - return trace_array_vprintk(printk_trace, ip, fmt, args); -} -EXPORT_SYMBOL_GPL(trace_vprintk); - static void trace_iterator_increment(struct trace_iterator *iter) { struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); @@ -3986,10 +3242,8 @@ static void *s_start(struct seq_file *m, loff_t *pos) } mutex_unlock(&trace_types_lock); -#ifdef CONFIG_TRACER_MAX_TRACE - if (iter->snapshot && iter->trace->use_max_tr) + if (iter->snapshot && tracer_uses_snapshot(iter->trace)) return ERR_PTR(-EBUSY); -#endif if (*pos != iter->pos) { iter->ent = NULL; @@ -4028,10 +3282,8 @@ static void s_stop(struct seq_file *m, void *p) { struct trace_iterator *iter = m->private; -#ifdef CONFIG_TRACER_MAX_TRACE - if (iter->snapshot && iter->trace->use_max_tr) + if (iter->snapshot && tracer_uses_snapshot(iter->trace)) return; -#endif trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); @@ -4285,7 +3537,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) /* ftrace and system call events are still OK */ if ((event->type > __TRACE_LAST_TYPE) && !is_syscall_event(event)) - return print_event_fields(iter, event); + return print_event_fields(iter, event); } return event->funcs->trace(iter, sym_flags, event); } @@ -4508,7 +3760,7 @@ static void test_ftrace_alive(struct seq_file *m) "# MAY BE MISSING FUNCTION EVENTS\n"); } -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT static void show_snapshot_main_help(struct seq_file *m) { seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n" @@ -4686,10 +3938,10 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) iter->tr = tr; -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT /* Currently only the top directory has a snapshot */ if (tr->current_trace->print_max || snapshot) - iter->array_buffer = &tr->max_buffer; + iter->array_buffer = &tr->snapshot_buffer; else #endif iter->array_buffer = &tr->array_buffer; @@ -4758,11 +4010,6 @@ int tracing_open_generic(struct inode *inode, struct file *filp) return 0; } -bool tracing_is_disabled(void) -{ - return (tracing_disabled) ? true: false; -} - /* * Open and update trace_array ref count. * Must have the current trace_array passed to it. @@ -4880,6 +4127,8 @@ static int tracing_single_release_tr(struct inode *inode, struct file *file) return single_release(inode, file); } +static bool update_last_data_if_empty(struct trace_array *tr); + static int tracing_open(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; @@ -4897,13 +4146,15 @@ static int tracing_open(struct inode *inode, struct file *file) #ifdef CONFIG_TRACER_MAX_TRACE if (tr->current_trace->print_max) - trace_buf = &tr->max_buffer; + trace_buf = &tr->snapshot_buffer; #endif if (cpu == RING_BUFFER_ALL_CPUS) tracing_reset_online_cpus(trace_buf); else tracing_reset_cpu(trace_buf, cpu); + + update_last_data_if_empty(tr); } if (file->f_mode & FMODE_READ) { @@ -4928,11 +4179,9 @@ static int tracing_open(struct inode *inode, struct file *file) static bool trace_ok_for_array(struct tracer *t, struct trace_array *tr) { -#ifdef CONFIG_TRACER_SNAPSHOT /* arrays with mapped buffer range do not have snapshots */ - if (tr->range_addr_start && t->use_max_tr) + if (tr->range_addr_start && tracer_uses_snapshot(t)) return false; -#endif return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances; } @@ -5109,15 +4358,15 @@ int tracing_set_cpumask(struct trace_array *tr, if (cpumask_test_cpu(cpu, tr->tracing_cpumask) && !cpumask_test_cpu(cpu, tracing_cpumask_new)) { ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu); -#ifdef CONFIG_TRACER_MAX_TRACE - ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu); +#ifdef CONFIG_TRACER_SNAPSHOT + ring_buffer_record_disable_cpu(tr->snapshot_buffer.buffer, cpu); #endif } if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && cpumask_test_cpu(cpu, tracing_cpumask_new)) { ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu); -#ifdef CONFIG_TRACER_MAX_TRACE - ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu); +#ifdef CONFIG_TRACER_SNAPSHOT + ring_buffer_record_enable_cpu(tr->snapshot_buffer.buffer, cpu); #endif } } @@ -5326,8 +4575,8 @@ int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled) case TRACE_ITER(OVERWRITE): ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled); -#ifdef CONFIG_TRACER_MAX_TRACE - ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled); +#ifdef CONFIG_TRACER_SNAPSHOT + ring_buffer_change_overwrite(tr->snapshot_buffer.buffer, enabled); #endif break; @@ -5970,6 +5219,7 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, int tracer_init(struct tracer *t, struct trace_array *tr) { tracing_reset_online_cpus(&tr->array_buffer); + update_last_data_if_empty(tr); return t->init(tr); } @@ -5990,7 +5240,7 @@ static void update_buffer_entries(struct array_buffer *buf, int cpu) } } -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT /* resize @tr's buffer to the size of @size_tr's entries */ static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, struct array_buffer *size_buf, int cpu_id) @@ -6016,7 +5266,7 @@ static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, return ret; } -#endif /* CONFIG_TRACER_MAX_TRACE */ +#endif /* CONFIG_TRACER_SNAPSHOT */ static int __tracing_resize_ring_buffer(struct trace_array *tr, unsigned long size, int cpu) @@ -6041,11 +5291,11 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, if (ret < 0) goto out_start; -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT if (!tr->allocated_snapshot) goto out; - ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); + ret = ring_buffer_resize(tr->snapshot_buffer.buffer, size, cpu); if (ret < 0) { int r = resize_buffer_duplicate_size(&tr->array_buffer, &tr->array_buffer, cpu); @@ -6070,10 +5320,10 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, goto out_start; } - update_buffer_entries(&tr->max_buffer, cpu); + update_buffer_entries(&tr->snapshot_buffer, cpu); out: -#endif /* CONFIG_TRACER_MAX_TRACE */ +#endif /* CONFIG_TRACER_SNAPSHOT */ update_buffer_entries(&tr->array_buffer, cpu); out_start: @@ -6264,6 +5514,9 @@ int tracing_update_buffers(struct trace_array *tr) { int ret = 0; + if (!tr) + tr = &global_trace; + guard(mutex)(&trace_types_lock); update_last_data(tr); @@ -6298,9 +5551,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) { struct tracer *trace = NULL; struct tracers *t; -#ifdef CONFIG_TRACER_MAX_TRACE bool had_max_tr; -#endif int ret; guard(mutex)(&trace_types_lock); @@ -6328,7 +5579,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) return 0; #ifdef CONFIG_TRACER_SNAPSHOT - if (trace->use_max_tr) { + if (tracer_uses_snapshot(trace)) { local_irq_disable(); arch_spin_lock(&tr->max_lock); ret = tr->cond_snapshot ? -EBUSY : 0; @@ -6360,14 +5611,13 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) if (tr->current_trace->reset) tr->current_trace->reset(tr); -#ifdef CONFIG_TRACER_MAX_TRACE - had_max_tr = tr->current_trace->use_max_tr; + had_max_tr = tracer_uses_snapshot(tr->current_trace); /* Current trace needs to be nop_trace before synchronize_rcu */ tr->current_trace = &nop_trace; tr->current_trace_flags = nop_trace.flags; - if (had_max_tr && !trace->use_max_tr) { + if (had_max_tr && !tracer_uses_snapshot(trace)) { /* * We need to make sure that the update_max_tr sees that * current_trace changed to nop_trace to keep it from @@ -6380,24 +5630,19 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) tracing_disarm_snapshot(tr); } - if (!had_max_tr && trace->use_max_tr) { + if (!had_max_tr && tracer_uses_snapshot(trace)) { ret = tracing_arm_snapshot_locked(tr); if (ret) return ret; } -#else - tr->current_trace = &nop_trace; -#endif tr->current_trace_flags = t->flags ? : t->tracer->flags; if (trace->init) { ret = tracer_init(trace, tr); if (ret) { -#ifdef CONFIG_TRACER_MAX_TRACE - if (trace->use_max_tr) + if (tracer_uses_snapshot(trace)) tracing_disarm_snapshot(tr); -#endif tr->current_trace_flags = nop_trace.flags; return ret; } @@ -7602,7 +6847,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, unsigned long ip; char *buf; - if (tracing_disabled) + if (unlikely(tracing_disabled)) return -EINVAL; if (!(tr->trace_flags & TRACE_ITER(MARKERS))) @@ -7682,7 +6927,7 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, ssize_t written = -ENODEV; char *buf; - if (tracing_disabled) + if (unlikely(tracing_disabled)) return -EINVAL; if (!(tr->trace_flags & TRACE_ITER(MARKERS))) @@ -7783,11 +7028,12 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr) */ tracing_reset_online_cpus(&tr->array_buffer); -#ifdef CONFIG_TRACER_MAX_TRACE - if (tr->max_buffer.buffer) - ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func); - tracing_reset_online_cpus(&tr->max_buffer); +#ifdef CONFIG_TRACER_SNAPSHOT + if (tr->snapshot_buffer.buffer) + ring_buffer_set_clock(tr->snapshot_buffer.buffer, trace_clocks[i].func); + tracing_reset_online_cpus(&tr->snapshot_buffer); #endif + update_last_data_if_empty(tr); if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) { struct trace_scratch *tscratch = tr->scratch; @@ -7880,26 +7126,6 @@ u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_eve return ring_buffer_event_time_stamp(buffer, rbe); } -/* - * Set or disable using the per CPU trace_buffer_event when possible. - */ -int tracing_set_filter_buffering(struct trace_array *tr, bool set) -{ - guard(mutex)(&trace_types_lock); - - if (set && tr->no_filter_buffering_ref++) - return 0; - - if (!set) { - if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) - return -EINVAL; - - --tr->no_filter_buffering_ref; - } - - return 0; -} - struct ftrace_buffer_info { struct trace_iterator iter; void *spare; @@ -7938,7 +7164,7 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file) ret = 0; iter->tr = tr; - iter->array_buffer = &tr->max_buffer; + iter->array_buffer = &tr->snapshot_buffer; iter->cpu_file = tracing_get_cpu(inode); m->private = iter; file->private_data = m; @@ -7975,7 +7201,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, guard(mutex)(&trace_types_lock); - if (tr->current_trace->use_max_tr) + if (tracer_uses_snapshot(tr->current_trace)) return -EBUSY; local_irq_disable(); @@ -8001,7 +7227,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, return -EINVAL; #endif if (tr->allocated_snapshot) - ret = resize_buffer_duplicate_size(&tr->max_buffer, + ret = resize_buffer_duplicate_size(&tr->snapshot_buffer, &tr->array_buffer, iter->cpu_file); ret = tracing_arm_snapshot_locked(tr); @@ -8022,9 +7248,9 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, default: if (tr->allocated_snapshot) { if (iter->cpu_file == RING_BUFFER_ALL_CPUS) - tracing_reset_online_cpus(&tr->max_buffer); + tracing_reset_online_cpus(&tr->snapshot_buffer); else - tracing_reset_cpu(&tr->max_buffer, iter->cpu_file); + tracing_reset_cpu(&tr->snapshot_buffer, iter->cpu_file); } break; } @@ -8074,13 +7300,13 @@ static int snapshot_raw_open(struct inode *inode, struct file *filp) info = filp->private_data; - if (info->iter.trace->use_max_tr) { + if (tracer_uses_snapshot(info->iter.trace)) { tracing_buffers_release(inode, filp); return -EBUSY; } info->iter.snapshot = true; - info->iter.array_buffer = &info->iter.tr->max_buffer; + info->iter.array_buffer = &info->iter.tr->snapshot_buffer; return ret; } @@ -8630,10 +7856,8 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, if (!count) return 0; -#ifdef CONFIG_TRACER_MAX_TRACE - if (iter->snapshot && iter->tr->current_trace->use_max_tr) + if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace)) return -EBUSY; -#endif page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); @@ -8817,10 +8041,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, int entries, i; ssize_t ret = 0; -#ifdef CONFIG_TRACER_MAX_TRACE - if (iter->snapshot && iter->tr->current_trace->use_max_tr) + if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace)) return -EBUSY; -#endif page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); if (*ppos & (page_size - 1)) @@ -8954,7 +8176,7 @@ static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned return 0; } -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT static int get_snapshot_map(struct trace_array *tr) { int err = 0; @@ -9397,7 +8619,7 @@ tracing_init_tracefs_percpu(struct trace_array *tr, long cpu) trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu, tr, cpu, &tracing_stats_fops); - trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu, + trace_create_cpu_file("buffer_size_kb", TRACE_MODE_WRITE, d_cpu, tr, cpu, &tracing_entries_fops); if (tr->range_addr_start) @@ -9958,12 +9180,12 @@ buffer_subbuf_size_write(struct file *filp, const char __user *ubuf, if (ret) goto out; -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT if (!tr->allocated_snapshot) goto out_max; - ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); + ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order); if (ret) { /* Put back the old order */ cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order); @@ -10179,12 +9401,12 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) if (ret) return ret; -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT /* Fix mapped buffer trace arrays do not have snapshot buffers */ if (tr->range_addr_start) return 0; - ret = allocate_trace_buffer(tr, &tr->max_buffer, + ret = allocate_trace_buffer(tr, &tr->snapshot_buffer, allocate_snapshot ? size : 1); if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) { free_trace_buffer(&tr->array_buffer); @@ -10206,8 +9428,8 @@ static void free_trace_buffers(struct trace_array *tr) free_trace_buffer(&tr->array_buffer); kfree(tr->module_delta); -#ifdef CONFIG_TRACER_MAX_TRACE - free_trace_buffer(&tr->max_buffer); +#ifdef CONFIG_TRACER_SNAPSHOT + free_trace_buffer(&tr->snapshot_buffer); #endif } @@ -10348,7 +9570,7 @@ trace_array_create_systems(const char *name, const char *systems, tr->syscall_buf_sz = global_trace.syscall_buf_sz; tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT spin_lock_init(&tr->snapshot_trigger_lock); #endif tr->current_trace = &nop_trace; @@ -10673,9 +9895,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) create_trace_options_dir(tr); -#ifdef CONFIG_TRACER_MAX_TRACE trace_create_maxlat_file(tr, d_tracer); -#endif if (ftrace_create_function_files(tr, d_tracer)) MEM_FAIL(1, "Could not allocate function filter files"); @@ -10774,7 +9994,7 @@ int tracing_init_dentry(void) extern struct trace_eval_map *__start_ftrace_eval_maps[]; extern struct trace_eval_map *__stop_ftrace_eval_maps[]; -static struct workqueue_struct *eval_map_wq __initdata; +struct workqueue_struct *trace_init_wq __initdata; static struct work_struct eval_map_work __initdata; static struct work_struct tracerfs_init_work __initdata; @@ -10790,15 +10010,15 @@ static int __init trace_eval_init(void) { INIT_WORK(&eval_map_work, eval_map_work_func); - eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0); - if (!eval_map_wq) { - pr_err("Unable to allocate eval_map_wq\n"); + trace_init_wq = alloc_workqueue("trace_init_wq", WQ_UNBOUND, 0); + if (!trace_init_wq) { + pr_err("Unable to allocate trace_init_wq\n"); /* Do work here */ eval_map_work_func(&eval_map_work); return -ENOMEM; } - queue_work(eval_map_wq, &eval_map_work); + queue_work(trace_init_wq, &eval_map_work); return 0; } @@ -10807,8 +10027,8 @@ subsys_initcall(trace_eval_init); static int __init trace_eval_sync(void) { /* Make sure the eval map updates are finished */ - if (eval_map_wq) - destroy_workqueue(eval_map_wq); + if (trace_init_wq) + destroy_workqueue(trace_init_wq); return 0; } @@ -10969,9 +10189,9 @@ static __init int tracer_init_tracefs(void) if (ret) return 0; - if (eval_map_wq) { + if (trace_init_wq) { INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func); - queue_work(eval_map_wq, &tracerfs_init_work); + queue_work(trace_init_wq, &tracerfs_init_work); } else { tracer_init_tracefs_work_func(NULL); } @@ -11304,7 +10524,7 @@ ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, return done; } -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT __init static bool tr_needs_alloc_snapshot(const char *name) { char *test; @@ -11494,7 +10714,7 @@ __init static void enable_instances(void) } } else { /* Only non mapped buffers have snapshot buffers */ - if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE)) + if (IS_ENABLED(CONFIG_TRACER_SNAPSHOT)) do_allocate_snapshot(name); } @@ -11621,7 +10841,7 @@ __init static int tracer_alloc_buffers(void) global_trace.current_trace_flags = nop_trace.flags; global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT spin_lock_init(&global_trace.snapshot_trigger_lock); #endif ftrace_init_global_array_ops(&global_trace); @@ -11689,7 +10909,7 @@ struct trace_array *trace_get_global_array(void) void __init ftrace_boot_snapshot(void) { -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT struct trace_array *tr; if (!snapshot_at_boot) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8428c437cb9d..b8f3804586a0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -131,7 +131,7 @@ enum trace_type { #define FAULT_STRING "(fault)" -#define HIST_STACKTRACE_DEPTH 16 +#define HIST_STACKTRACE_DEPTH 31 #define HIST_STACKTRACE_SIZE (HIST_STACKTRACE_DEPTH * sizeof(unsigned long)) #define HIST_STACKTRACE_SKIP 5 @@ -332,29 +332,33 @@ struct trace_array { struct list_head list; char *name; struct array_buffer array_buffer; -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT /* - * The max_buffer is used to snapshot the trace when a maximum + * The snapshot_buffer is used to snapshot the trace when a maximum * latency is reached, or when the user initiates a snapshot. * Some tracers will use this to store a maximum trace while * it continues examining live traces. * - * The buffers for the max_buffer are set up the same as the array_buffer - * When a snapshot is taken, the buffer of the max_buffer is swapped - * with the buffer of the array_buffer and the buffers are reset for - * the array_buffer so the tracing can continue. + * The buffers for the snapshot_buffer are set up the same as the + * array_buffer. When a snapshot is taken, the buffer of the + * snapshot_buffer is swapped with the buffer of the array_buffer + * and the buffers are reset for the array_buffer so the tracing can + * continue. */ - struct array_buffer max_buffer; + struct array_buffer snapshot_buffer; bool allocated_snapshot; spinlock_t snapshot_trigger_lock; unsigned int snapshot; +#ifdef CONFIG_TRACER_MAX_TRACE unsigned long max_latency; -#ifdef CONFIG_FSNOTIFY struct dentry *d_max_latency; +#ifdef CONFIG_FSNOTIFY struct work_struct fsnotify_work; struct irq_work fsnotify_irqwork; -#endif -#endif +#endif /* CONFIG_FSNOTIFY */ +#endif /* CONFIG_TRACER_MAX_TRACE */ +#endif /* CONFIG_TRACER_SNAPSHOT */ + /* The below is for memory mapped ring buffer */ unsigned int mapped; unsigned long range_addr_start; @@ -380,7 +384,7 @@ struct trace_array { * * It is also used in other places outside the update_max_tr * so it needs to be defined outside of the - * CONFIG_TRACER_MAX_TRACE. + * CONFIG_TRACER_SNAPSHOT. */ arch_spinlock_t max_lock; #ifdef CONFIG_FTRACE_SYSCALLS @@ -479,13 +483,14 @@ extern struct trace_array *trace_array_find(const char *instance); extern struct trace_array *trace_array_find_get(const char *instance); extern u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe); -extern int tracing_set_filter_buffering(struct trace_array *tr, bool set); extern int tracing_set_clock(struct trace_array *tr, const char *clockstr); extern bool trace_clock_in_ns(struct trace_array *tr); extern unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr); +extern struct trace_array *printk_trace; + /* * The global tracer (top) should be the first trace array added, * but we check the flag anyway. @@ -661,6 +666,8 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu) return iter->buffer_iter ? iter->buffer_iter[cpu] : NULL; } +extern int tracing_disabled; + int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); void tracing_reset_online_cpus(struct array_buffer *buf); @@ -672,7 +679,6 @@ int tracing_release_generic_tr(struct inode *inode, struct file *file); int tracing_open_file_tr(struct inode *inode, struct file *filp); int tracing_release_file_tr(struct inode *inode, struct file *filp); int tracing_single_release_file_tr(struct inode *inode, struct file *filp); -bool tracing_is_disabled(void); bool tracer_tracing_is_on(struct trace_array *tr); void tracer_tracing_on(struct trace_array *tr); void tracer_tracing_off(struct trace_array *tr); @@ -772,6 +778,7 @@ extern cpumask_var_t __read_mostly tracing_buffer_mask; extern unsigned long nsecs_to_usecs(unsigned long nsecs); extern unsigned long tracing_thresh; +extern struct workqueue_struct *trace_init_wq __initdata; /* PID filtering */ @@ -790,22 +797,22 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, struct trace_pid_list **new_pid_list, const char __user *ubuf, size_t cnt); -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, void *cond_data); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); -#ifdef CONFIG_FSNOTIFY -#define LATENCY_FS_NOTIFY +#if defined(CONFIG_TRACER_MAX_TRACE) && defined(CONFIG_FSNOTIFY) +# define LATENCY_FS_NOTIFY #endif -#endif /* CONFIG_TRACER_MAX_TRACE */ #ifdef LATENCY_FS_NOTIFY void latency_fsnotify(struct trace_array *tr); #else static inline void latency_fsnotify(struct trace_array *tr) { } #endif +#endif /* CONFIG_TRACER_SNAPSHOT */ #ifdef CONFIG_STACKTRACE void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, int skip); @@ -816,6 +823,18 @@ static inline void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, } #endif /* CONFIG_STACKTRACE */ +#ifdef CONFIG_TRACER_MAX_TRACE +static inline bool tracer_uses_snapshot(struct tracer *tracer) +{ + return tracer->use_max_tr; +} +#else +static inline bool tracer_uses_snapshot(struct tracer *tracer) +{ + return false; +} +#endif + void trace_last_func_repeats(struct trace_array *tr, struct trace_func_repeats *last_info, unsigned int trace_ctx); @@ -865,6 +884,7 @@ extern int trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr); +extern bool __read_mostly tracing_selftest_running; /* * Tracer data references selftest functions that only occur * on boot up. These can be __init functions. Thus, when selftests @@ -877,6 +897,7 @@ static inline void __init disable_tracing_selftest(const char *reason) } /* Tracers are seldom changed. Optimize when selftests are disabled. */ #define __tracer_data __read_mostly +#define tracing_selftest_running 0 #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); @@ -1414,6 +1435,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, C(COPY_MARKER, "copy_trace_marker"), \ C(PAUSE_ON_TRACE, "pause-on-trace"), \ C(HASH_PTR, "hash-ptr"), /* Print hashed pointer */ \ + C(BITMASK_LIST, "bitmask-list"), \ FUNCTION_FLAGS \ FGRAPH_FLAGS \ STACK_FLAGS \ @@ -1567,6 +1589,47 @@ char *trace_user_fault_read(struct trace_user_buf_info *tinfo, const char __user *ptr, size_t size, trace_user_buf_copy copy_func, void *data); +static __always_inline void +trace_event_setup(struct ring_buffer_event *event, + int type, unsigned int trace_ctx) +{ + struct trace_entry *ent = ring_buffer_event_data(event); + + tracing_generic_entry_update(ent, type, trace_ctx); +} + +static __always_inline struct ring_buffer_event * +__trace_buffer_lock_reserve(struct trace_buffer *buffer, + int type, + unsigned long len, + unsigned int trace_ctx) +{ + struct ring_buffer_event *event; + + event = ring_buffer_lock_reserve(buffer, len); + if (event != NULL) + trace_event_setup(event, type, trace_ctx); + + return event; +} + +static __always_inline void +__buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) +{ + __this_cpu_write(trace_taskinfo_save, true); + + /* If this is the temp buffer, we need to commit fully */ + if (this_cpu_read(trace_buffered_event) == event) { + /* Length is in event->array[0] */ + ring_buffer_write(buffer, event->array[0], &event->array[1]); + /* Release the temp buffer */ + this_cpu_dec(trace_buffered_event_cnt); + /* ring_buffer_unlock_commit() enables preemption */ + preempt_enable_notrace(); + } else + ring_buffer_unlock_commit(buffer); +} + static inline void __trace_event_discard_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) @@ -2087,6 +2150,7 @@ extern const char *__stop___tracepoint_str[]; void trace_printk_control(bool enabled); void trace_printk_start_comm(void); +void trace_printk_start_stop_comm(int enabled); int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set); int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled); @@ -2237,6 +2301,37 @@ static inline void sanitize_event_name(char *name) *name = '_'; } +#ifdef CONFIG_STACKTRACE +void __ftrace_trace_stack(struct trace_array *tr, + struct trace_buffer *buffer, + unsigned int trace_ctx, + int skip, struct pt_regs *regs); + +static __always_inline void ftrace_trace_stack(struct trace_array *tr, + struct trace_buffer *buffer, + unsigned int trace_ctx, + int skip, struct pt_regs *regs) +{ + if (!(tr->trace_flags & TRACE_ITER(STACKTRACE))) + return; + + __ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs); +} +#else +static inline void __ftrace_trace_stack(struct trace_array *tr, + struct trace_buffer *buffer, + unsigned int trace_ctx, + int skip, struct pt_regs *regs) +{ +} +static inline void ftrace_trace_stack(struct trace_array *tr, + struct trace_buffer *buffer, + unsigned long trace_ctx, + int skip, struct pt_regs *regs) +{ +} +#endif + /* * This is a generic way to read and write a u64 value from a file in tracefs. * diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 137b4d9bb116..61fe01dce7a6 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -649,6 +649,22 @@ bool trace_event_ignore_this_pid(struct trace_event_file *trace_file) } EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid); +/** + * trace_event_buffer_reserve - reserve space on the ring buffer for an event + * @fbuffer: information about how to save the event + * @trace_file: the instance file descriptor for the event + * @len: The length of the event + * + * The @fbuffer has information about the ring buffer and data will + * be added to it to be used by the call to trace_event_buffer_commit(). + * The @trace_file is the desrciptor with information about the status + * of the given event for a specific trace_array instance. + * The @len is the length of data to save for the event. + * + * Returns a pointer to the data on the ring buffer or NULL if the + * event was not reserved (event was filtered, too big, or the buffer + * simply was disabled for write). + */ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, struct trace_event_file *trace_file, unsigned long len) @@ -1662,6 +1678,82 @@ static void t_stop(struct seq_file *m, void *p) mutex_unlock(&event_mutex); } +static int get_call_len(struct trace_event_call *call) +{ + int len; + + /* Get the length of "<system>:<event>" */ + len = strlen(call->class->system) + 1; + len += strlen(trace_event_name(call)); + + /* Set the index to 32 bytes to separate event from data */ + return len >= 32 ? 1 : 32 - len; +} + +/** + * t_show_filters - seq_file callback to display active event filters + * @m: The seq_file interface for formatted output + * @v: The current trace_event_file being iterated + * + * Identifies and prints active filters for the current event file in the + * iteration. If a filter is applied to the current event and, if so, + * prints the system name, event name, and the filter string. + */ +static int t_show_filters(struct seq_file *m, void *v) +{ + struct trace_event_file *file = v; + struct trace_event_call *call = file->event_call; + struct event_filter *filter; + int len; + + guard(rcu)(); + filter = rcu_dereference(file->filter); + if (!filter || !filter->filter_string) + return 0; + + len = get_call_len(call); + + seq_printf(m, "%s:%s%*.s%s\n", call->class->system, + trace_event_name(call), len, "", filter->filter_string); + + return 0; +} + +/** + * t_show_triggers - seq_file callback to display active event triggers + * @m: The seq_file interface for formatted output + * @v: The current trace_event_file being iterated + * + * Iterates through the trigger list of the current event file and prints + * each active trigger's configuration using its associated print + * operation. + */ +static int t_show_triggers(struct seq_file *m, void *v) +{ + struct trace_event_file *file = v; + struct trace_event_call *call = file->event_call; + struct event_trigger_data *data; + int len; + + /* + * The event_mutex is held by t_start(), protecting the + * file->triggers list traversal. + */ + if (list_empty(&file->triggers)) + return 0; + + len = get_call_len(call); + + list_for_each_entry_rcu(data, &file->triggers, list) { + seq_printf(m, "%s:%s%*.s", call->class->system, + trace_event_name(call), len, ""); + + data->cmd_ops->print(m, data); + } + + return 0; +} + #ifdef CONFIG_MODULES static int s_show(struct seq_file *m, void *v) { @@ -2176,7 +2268,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) struct event_subsystem *system = NULL; int ret; - if (tracing_is_disabled()) + if (unlikely(tracing_disabled)) return -ENODEV; /* Make sure the system still exists */ @@ -2489,6 +2581,8 @@ ftrace_event_npid_write(struct file *filp, const char __user *ubuf, static int ftrace_event_avail_open(struct inode *inode, struct file *file); static int ftrace_event_set_open(struct inode *inode, struct file *file); +static int ftrace_event_show_filters_open(struct inode *inode, struct file *file); +static int ftrace_event_show_triggers_open(struct inode *inode, struct file *file); static int ftrace_event_set_pid_open(struct inode *inode, struct file *file); static int ftrace_event_set_npid_open(struct inode *inode, struct file *file); static int ftrace_event_release(struct inode *inode, struct file *file); @@ -2507,6 +2601,20 @@ static const struct seq_operations show_set_event_seq_ops = { .stop = s_stop, }; +static const struct seq_operations show_show_event_filters_seq_ops = { + .start = t_start, + .next = t_next, + .show = t_show_filters, + .stop = t_stop, +}; + +static const struct seq_operations show_show_event_triggers_seq_ops = { + .start = t_start, + .next = t_next, + .show = t_show_triggers, + .stop = t_stop, +}; + static const struct seq_operations show_set_pid_seq_ops = { .start = p_start, .next = p_next, @@ -2536,6 +2644,20 @@ static const struct file_operations ftrace_set_event_fops = { .release = ftrace_event_release, }; +static const struct file_operations ftrace_show_event_filters_fops = { + .open = ftrace_event_show_filters_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static const struct file_operations ftrace_show_event_triggers_fops = { + .open = ftrace_event_show_triggers_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static const struct file_operations ftrace_set_event_pid_fops = { .open = ftrace_event_set_pid_open, .read = seq_read, @@ -2680,6 +2802,34 @@ ftrace_event_set_open(struct inode *inode, struct file *file) return ret; } +/** + * ftrace_event_show_filters_open - open interface for set_event_filters + * @inode: The inode of the file + * @file: The file being opened + * + * Connects the set_event_filters file to the sequence operations + * required to iterate over and display active event filters. + */ +static int +ftrace_event_show_filters_open(struct inode *inode, struct file *file) +{ + return ftrace_event_open(inode, file, &show_show_event_filters_seq_ops); +} + +/** + * ftrace_event_show_triggers_open - open interface for show_event_triggers + * @inode: The inode of the file + * @file: The file being opened + * + * Connects the show_event_triggers file to the sequence operations + * required to iterate over and display active event triggers. + */ +static int +ftrace_event_show_triggers_open(struct inode *inode, struct file *file) +{ + return ftrace_event_open(inode, file, &show_show_event_triggers_seq_ops); +} + static int ftrace_event_set_pid_open(struct inode *inode, struct file *file) { @@ -3963,11 +4113,6 @@ void trace_put_event_file(struct trace_event_file *file) EXPORT_SYMBOL_GPL(trace_put_event_file); #ifdef CONFIG_DYNAMIC_FTRACE - -/* Avoid typos */ -#define ENABLE_EVENT_STR "enable_event" -#define DISABLE_EVENT_STR "disable_event" - struct event_probe_data { struct trace_event_file *file; unsigned long count; @@ -4400,6 +4545,12 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) if (!entry) return -ENOMEM; + trace_create_file("show_event_filters", TRACE_MODE_READ, parent, tr, + &ftrace_show_event_filters_fops); + + trace_create_file("show_event_triggers", TRACE_MODE_READ, parent, tr, + &ftrace_show_event_triggers_fops); + nr_entries = ARRAY_SIZE(events_entries); e_events = eventfs_create_events_dir("events", parent, events_entries, diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 385af8405392..7001e34476ee 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1375,7 +1375,7 @@ static void free_filter_list_tasks(struct rcu_head *rhp) struct filter_head *filter_list = container_of(rhp, struct filter_head, rcu); INIT_RCU_WORK(&filter_list->rwork, free_filter_list_work); - queue_rcu_work(system_wq, &filter_list->rwork); + queue_rcu_work(system_dfl_wq, &filter_list->rwork); } /* diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index c97bb2fda5c0..e6f449f53afc 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -105,38 +105,44 @@ enum field_op_id { FIELD_OP_MULT, }; +#define FIELD_FUNCS \ + C(NOP, "nop"), \ + C(VAR_REF, "var_ref"), \ + C(COUNTER, "counter"), \ + C(CONST, "const"), \ + C(LOG2, "log2"), \ + C(BUCKET, "bucket"), \ + C(TIMESTAMP, "timestamp"), \ + C(CPU, "cpu"), \ + C(COMM, "comm"), \ + C(STRING, "string"), \ + C(DYNSTRING, "dynstring"), \ + C(RELDYNSTRING, "reldynstring"), \ + C(PSTRING, "pstring"), \ + C(S64, "s64"), \ + C(U64, "u64"), \ + C(S32, "s32"), \ + C(U32, "u32"), \ + C(S16, "s16"), \ + C(U16, "u16"), \ + C(S8, "s8"), \ + C(U8, "u8"), \ + C(UMINUS, "uminus"), \ + C(MINUS, "minus"), \ + C(PLUS, "plus"), \ + C(DIV, "div"), \ + C(MULT, "mult"), \ + C(DIV_POWER2, "div_power2"), \ + C(DIV_NOT_POWER2, "div_not_power2"), \ + C(DIV_MULT_SHIFT, "div_mult_shift"), \ + C(EXECNAME, "execname"), \ + C(STACK, "stack"), + +#undef C +#define C(a, b) HIST_FIELD_FN_##a + enum hist_field_fn { - HIST_FIELD_FN_NOP, - HIST_FIELD_FN_VAR_REF, - HIST_FIELD_FN_COUNTER, - HIST_FIELD_FN_CONST, - HIST_FIELD_FN_LOG2, - HIST_FIELD_FN_BUCKET, - HIST_FIELD_FN_TIMESTAMP, - HIST_FIELD_FN_CPU, - HIST_FIELD_FN_COMM, - HIST_FIELD_FN_STRING, - HIST_FIELD_FN_DYNSTRING, - HIST_FIELD_FN_RELDYNSTRING, - HIST_FIELD_FN_PSTRING, - HIST_FIELD_FN_S64, - HIST_FIELD_FN_U64, - HIST_FIELD_FN_S32, - HIST_FIELD_FN_U32, - HIST_FIELD_FN_S16, - HIST_FIELD_FN_U16, - HIST_FIELD_FN_S8, - HIST_FIELD_FN_U8, - HIST_FIELD_FN_UMINUS, - HIST_FIELD_FN_MINUS, - HIST_FIELD_FN_PLUS, - HIST_FIELD_FN_DIV, - HIST_FIELD_FN_MULT, - HIST_FIELD_FN_DIV_POWER2, - HIST_FIELD_FN_DIV_NOT_POWER2, - HIST_FIELD_FN_DIV_MULT_SHIFT, - HIST_FIELD_FN_EXECNAME, - HIST_FIELD_FN_STACK, + FIELD_FUNCS }; /* @@ -3157,7 +3163,7 @@ static inline void __update_field_vars(struct tracing_map_elt *elt, u64 var_val; /* Make sure stacktrace can fit in the string variable length */ - BUILD_BUG_ON((HIST_STACKTRACE_DEPTH + 1) * sizeof(long) >= STR_VAR_LEN_MAX); + BUILD_BUG_ON((HIST_STACKTRACE_DEPTH + 1) * sizeof(long) > STR_VAR_LEN_MAX); for (i = 0, j = field_var_str_start; i < n_field_vars; i++) { struct field_var *field_var = field_vars[i]; @@ -5854,6 +5860,12 @@ const struct file_operations event_hist_fops = { }; #ifdef CONFIG_HIST_TRIGGERS_DEBUG + +#undef C +#define C(a, b) b + +static const char * const field_funcs[] = { FIELD_FUNCS }; + static void hist_field_debug_show_flags(struct seq_file *m, unsigned long flags) { @@ -5918,6 +5930,7 @@ static int hist_field_debug_show(struct seq_file *m, seq_printf(m, " type: %s\n", field->type); seq_printf(m, " size: %u\n", field->size); seq_printf(m, " is_signed: %u\n", field->is_signed); + seq_printf(m, " function: hist_field_%s()\n", field_funcs[field->fn_num]); return 0; } @@ -6518,6 +6531,26 @@ static bool existing_hist_update_only(char *glob, return updated; } +/* + * Set or disable using the per CPU trace_buffer_event when possible. + */ +static int tracing_set_filter_buffering(struct trace_array *tr, bool set) +{ + guard(mutex)(&trace_types_lock); + + if (set && tr->no_filter_buffering_ref++) + return 0; + + if (!set) { + if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) + return -EINVAL; + + --tr->no_filter_buffering_ref; + } + + return 0; +} + static int hist_register_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) @@ -6907,11 +6940,9 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, out_unreg: event_trigger_unregister(cmd_ops, file, glob+1, trigger_data); out_free: - event_trigger_reset_filter(cmd_ops, trigger_data); - remove_hist_vars(hist_data); - kfree(trigger_data); + trigger_data_free(trigger_data); destroy_hist_data(hist_data); goto out; diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 45c187e77e21..ce42fbf16f4a 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -499,9 +499,9 @@ static unsigned int trace_stack(struct synth_trace_event *entry, return len; } -static notrace void trace_event_raw_event_synth(void *__data, - u64 *var_ref_vals, - unsigned int *var_ref_idx) +static void trace_event_raw_event_synth(void *__data, + u64 *var_ref_vals, + unsigned int *var_ref_idx) { unsigned int i, n_u64, val_idx, len, data_size = 0; struct trace_event_file *trace_file = __data; diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 06b75bcfc7b8..7fa26327c9c7 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1347,18 +1347,13 @@ traceon_trigger(struct event_trigger_data *data, { struct trace_event_file *file = data->private_data; - if (file) { - if (tracer_tracing_is_on(file->tr)) - return; - - tracer_tracing_on(file->tr); + if (WARN_ON_ONCE(!file)) return; - } - if (tracing_is_on()) + if (tracer_tracing_is_on(file->tr)) return; - tracing_on(); + tracer_tracing_on(file->tr); } static bool @@ -1368,13 +1363,11 @@ traceon_count_func(struct event_trigger_data *data, { struct trace_event_file *file = data->private_data; - if (file) { - if (tracer_tracing_is_on(file->tr)) - return false; - } else { - if (tracing_is_on()) - return false; - } + if (WARN_ON_ONCE(!file)) + return false; + + if (tracer_tracing_is_on(file->tr)) + return false; if (!data->count) return false; @@ -1392,18 +1385,13 @@ traceoff_trigger(struct event_trigger_data *data, { struct trace_event_file *file = data->private_data; - if (file) { - if (!tracer_tracing_is_on(file->tr)) - return; - - tracer_tracing_off(file->tr); + if (WARN_ON_ONCE(!file)) return; - } - if (!tracing_is_on()) + if (!tracer_tracing_is_on(file->tr)) return; - tracing_off(); + tracer_tracing_off(file->tr); } static bool @@ -1413,13 +1401,11 @@ traceoff_count_func(struct event_trigger_data *data, { struct trace_event_file *file = data->private_data; - if (file) { - if (!tracer_tracing_is_on(file->tr)) - return false; - } else { - if (!tracing_is_on()) - return false; - } + if (WARN_ON_ONCE(!file)) + return false; + + if (!tracer_tracing_is_on(file->tr)) + return false; if (!data->count) return false; @@ -1481,10 +1467,10 @@ snapshot_trigger(struct event_trigger_data *data, { struct trace_event_file *file = data->private_data; - if (file) - tracing_snapshot_instance(file->tr); - else - tracing_snapshot(); + if (WARN_ON_ONCE(!file)) + return; + + tracing_snapshot_instance(file->tr); } static int @@ -1570,10 +1556,10 @@ stacktrace_trigger(struct event_trigger_data *data, { struct trace_event_file *file = data->private_data; - if (file) - __trace_stack(file->tr, tracing_gen_ctx_dec(), STACK_SKIP); - else - trace_dump_stack(STACK_SKIP); + if (WARN_ON_ONCE(!file)) + return; + + __trace_stack(file->tr, tracing_gen_ctx_dec(), STACK_SKIP); } static int diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 2f7b94e98317..3fe274b84f1c 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -102,9 +102,9 @@ struct hwlat_sample { /* keep the global state somewhere. */ static struct hwlat_data { - struct mutex lock; /* protect changes */ + struct mutex lock; /* protect changes */ - u64 count; /* total since reset */ + atomic64_t count; /* total since reset */ u64 sample_window; /* total sampling window (on+off) */ u64 sample_width; /* active sampling portion of window */ @@ -193,8 +193,7 @@ void trace_hwlat_callback(bool enter) * get_sample - sample the CPU TSC and look for likely hardware latencies * * Used to repeatedly capture the CPU TSC (or similar), looking for potential - * hardware-induced latency. Called with interrupts disabled and with - * hwlat_data.lock held. + * hardware-induced latency. Called with interrupts disabled. */ static int get_sample(void) { @@ -204,6 +203,7 @@ static int get_sample(void) time_type start, t1, t2, last_t2; s64 diff, outer_diff, total, last_total = 0; u64 sample = 0; + u64 sample_width = READ_ONCE(hwlat_data.sample_width); u64 thresh = tracing_thresh; u64 outer_sample = 0; int ret = -1; @@ -267,7 +267,7 @@ static int get_sample(void) if (diff > sample) sample = diff; /* only want highest value */ - } while (total <= hwlat_data.sample_width); + } while (total <= sample_width); barrier(); /* finish the above in the view for NMIs */ trace_hwlat_callback_enabled = false; @@ -285,8 +285,7 @@ static int get_sample(void) if (kdata->nmi_total_ts) do_div(kdata->nmi_total_ts, NSEC_PER_USEC); - hwlat_data.count++; - s.seqnum = hwlat_data.count; + s.seqnum = atomic64_inc_return(&hwlat_data.count); s.duration = sample; s.outer_duration = outer_sample; s.nmi_total_ts = kdata->nmi_total_ts; @@ -832,7 +831,7 @@ static int hwlat_tracer_init(struct trace_array *tr) hwlat_trace = tr; - hwlat_data.count = 0; + atomic64_set(&hwlat_data.count, 0); tr->max_latency = 0; save_tracing_thresh = tracing_thresh; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9953506370a5..061658518605 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -2048,6 +2048,10 @@ static __init int init_kprobe_trace(void) trace_create_file("kprobe_profile", TRACE_MODE_READ, NULL, NULL, &kprobe_profile_ops); + /* If no 'kprobe_event=' cmd is provided, return directly. */ + if (kprobe_boot_events_buf[0] == '\0') + return 0; + setup_boot_kprobe_events(); return 0; @@ -2079,7 +2083,7 @@ static __init int kprobe_trace_self_tests_init(void) struct trace_kprobe *tk; struct trace_event_file *file; - if (tracing_is_disabled()) + if (unlikely(tracing_disabled)) return -ENODEV; if (tracing_selftest_disabled) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index cc2d3306bb60..1996d7aba038 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -194,13 +194,37 @@ trace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, EXPORT_SYMBOL(trace_print_symbols_seq_u64); #endif +/** + * trace_print_bitmask_seq - print a bitmask to a sequence buffer + * @iter: The trace iterator for the current event instance + * @bitmask_ptr: The pointer to the bitmask data + * @bitmask_size: The size of the bitmask in bytes + * + * Prints a bitmask into a sequence buffer as either a hex string or a + * human-readable range list, depending on the instance's "bitmask-list" + * trace option. The bitmask is formatted into the iterator's temporary + * scratchpad rather than the primary sequence buffer. This avoids + * duplication and pointer-collision issues when the returned string is + * processed by a "%s" specifier in a TP_printk() macro. + * + * Returns a pointer to the formatted string within the temporary buffer. + */ const char * -trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, +trace_print_bitmask_seq(struct trace_iterator *iter, void *bitmask_ptr, unsigned int bitmask_size) { - const char *ret = trace_seq_buffer_ptr(p); + struct trace_seq *p = &iter->tmp_seq; + const struct trace_array *tr = iter->tr; + const char *ret; + + trace_seq_init(p); + ret = trace_seq_buffer_ptr(p); + + if (tr->trace_flags & TRACE_ITER(BITMASK_LIST)) + trace_seq_bitmask_list(p, bitmask_ptr, bitmask_size * 8); + else + trace_seq_bitmask(p, bitmask_ptr, bitmask_size * 8); - trace_seq_bitmask(p, bitmask_ptr, bitmask_size * 8); trace_seq_putc(p, 0); return ret; diff --git a/kernel/trace/trace_pid.c b/kernel/trace/trace_pid.c new file mode 100644 index 000000000000..7127c8de4174 --- /dev/null +++ b/kernel/trace/trace_pid.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "trace.h" + +/** + * trace_find_filtered_pid - check if a pid exists in a filtered_pid list + * @filtered_pids: The list of pids to check + * @search_pid: The PID to find in @filtered_pids + * + * Returns true if @search_pid is found in @filtered_pids, and false otherwise. + */ +bool +trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid) +{ + return trace_pid_list_is_set(filtered_pids, search_pid); +} + +/** + * trace_ignore_this_task - should a task be ignored for tracing + * @filtered_pids: The list of pids to check + * @filtered_no_pids: The list of pids not to be traced + * @task: The task that should be ignored if not filtered + * + * Checks if @task should be traced or not from @filtered_pids. + * Returns true if @task should *NOT* be traced. + * Returns false if @task should be traced. + */ +bool +trace_ignore_this_task(struct trace_pid_list *filtered_pids, + struct trace_pid_list *filtered_no_pids, + struct task_struct *task) +{ + /* + * If filtered_no_pids is not empty, and the task's pid is listed + * in filtered_no_pids, then return true. + * Otherwise, if filtered_pids is empty, that means we can + * trace all tasks. If it has content, then only trace pids + * within filtered_pids. + */ + + return (filtered_pids && + !trace_find_filtered_pid(filtered_pids, task->pid)) || + (filtered_no_pids && + trace_find_filtered_pid(filtered_no_pids, task->pid)); +} + +/** + * trace_filter_add_remove_task - Add or remove a task from a pid_list + * @pid_list: The list to modify + * @self: The current task for fork or NULL for exit + * @task: The task to add or remove + * + * If adding a task, if @self is defined, the task is only added if @self + * is also included in @pid_list. This happens on fork and tasks should + * only be added when the parent is listed. If @self is NULL, then the + * @task pid will be removed from the list, which would happen on exit + * of a task. + */ +void trace_filter_add_remove_task(struct trace_pid_list *pid_list, + struct task_struct *self, + struct task_struct *task) +{ + if (!pid_list) + return; + + /* For forks, we only add if the forking task is listed */ + if (self) { + if (!trace_find_filtered_pid(pid_list, self->pid)) + return; + } + + /* "self" is set for forks, and NULL for exits */ + if (self) + trace_pid_list_set(pid_list, task->pid); + else + trace_pid_list_clear(pid_list, task->pid); +} + +/** + * trace_pid_next - Used for seq_file to get to the next pid of a pid_list + * @pid_list: The pid list to show + * @v: The last pid that was shown (+1 the actual pid to let zero be displayed) + * @pos: The position of the file + * + * This is used by the seq_file "next" operation to iterate the pids + * listed in a trace_pid_list structure. + * + * Returns the pid+1 as we want to display pid of zero, but NULL would + * stop the iteration. + */ +void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos) +{ + long pid = (unsigned long)v; + unsigned int next; + + (*pos)++; + + /* pid already is +1 of the actual previous bit */ + if (trace_pid_list_next(pid_list, pid, &next) < 0) + return NULL; + + pid = next; + + /* Return pid + 1 to allow zero to be represented */ + return (void *)(pid + 1); +} + +/** + * trace_pid_start - Used for seq_file to start reading pid lists + * @pid_list: The pid list to show + * @pos: The position of the file + * + * This is used by seq_file "start" operation to start the iteration + * of listing pids. + * + * Returns the pid+1 as we want to display pid of zero, but NULL would + * stop the iteration. + */ +void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos) +{ + unsigned long pid; + unsigned int first; + loff_t l = 0; + + if (trace_pid_list_first(pid_list, &first) < 0) + return NULL; + + pid = first; + + /* Return pid + 1 so that zero can be the exit value */ + for (pid++; pid && l < *pos; + pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l)) + ; + return (void *)pid; +} + +/** + * trace_pid_show - show the current pid in seq_file processing + * @m: The seq_file structure to write into + * @v: A void pointer of the pid (+1) value to display + * + * Can be directly used by seq_file operations to display the current + * pid value. + */ +int trace_pid_show(struct seq_file *m, void *v) +{ + unsigned long pid = (unsigned long)v - 1; + + seq_printf(m, "%lu\n", pid); + return 0; +} + +/* 128 should be much more than enough */ +#define PID_BUF_SIZE 127 + +int trace_pid_write(struct trace_pid_list *filtered_pids, + struct trace_pid_list **new_pid_list, + const char __user *ubuf, size_t cnt) +{ + struct trace_pid_list *pid_list; + struct trace_parser parser; + unsigned long val; + int nr_pids = 0; + ssize_t read = 0; + ssize_t ret; + loff_t pos; + pid_t pid; + + if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1)) + return -ENOMEM; + + /* + * Always recreate a new array. The write is an all or nothing + * operation. Always create a new array when adding new pids by + * the user. If the operation fails, then the current list is + * not modified. + */ + pid_list = trace_pid_list_alloc(); + if (!pid_list) { + trace_parser_put(&parser); + return -ENOMEM; + } + + if (filtered_pids) { + /* copy the current bits to the new max */ + ret = trace_pid_list_first(filtered_pids, &pid); + while (!ret) { + ret = trace_pid_list_set(pid_list, pid); + if (ret < 0) + goto out; + + ret = trace_pid_list_next(filtered_pids, pid + 1, &pid); + nr_pids++; + } + } + + ret = 0; + while (cnt > 0) { + + pos = 0; + + ret = trace_get_user(&parser, ubuf, cnt, &pos); + if (ret < 0) + break; + + read += ret; + ubuf += ret; + cnt -= ret; + + if (!trace_parser_loaded(&parser)) + break; + + ret = -EINVAL; + if (kstrtoul(parser.buffer, 0, &val)) + break; + + pid = (pid_t)val; + + if (trace_pid_list_set(pid_list, pid) < 0) { + ret = -1; + break; + } + nr_pids++; + + trace_parser_clear(&parser); + ret = 0; + } + out: + trace_parser_put(&parser); + + if (ret < 0) { + trace_pid_list_free(pid_list); + return ret; + } + + if (!nr_pids) { + /* Cleared the list of pids */ + trace_pid_list_free(pid_list); + pid_list = NULL; + } + + *new_pid_list = pid_list; + + return read; +} + diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 29f6e95439b6..6a29e4350b55 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -376,6 +376,436 @@ static const struct file_operations ftrace_formats_fops = { .release = seq_release, }; +static __always_inline bool printk_binsafe(struct trace_array *tr) +{ + /* + * The binary format of traceprintk can cause a crash if used + * by a buffer from another boot. Force the use of the + * non binary version of trace_printk if the trace_printk + * buffer is a boot mapped ring buffer. + */ + return !(tr->flags & TRACE_ARRAY_FL_BOOT); +} + +int __trace_array_puts(struct trace_array *tr, unsigned long ip, + const char *str, int size) +{ + struct ring_buffer_event *event; + struct trace_buffer *buffer; + struct print_entry *entry; + unsigned int trace_ctx; + int alloc; + + if (!(tr->trace_flags & TRACE_ITER(PRINTK))) + return 0; + + if (unlikely(tracing_selftest_running && + (tr->flags & TRACE_ARRAY_FL_GLOBAL))) + return 0; + + if (unlikely(tracing_disabled)) + return 0; + + alloc = sizeof(*entry) + size + 2; /* possible \n added */ + + trace_ctx = tracing_gen_ctx(); + buffer = tr->array_buffer.buffer; + guard(ring_buffer_nest)(buffer); + event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, + trace_ctx); + if (!event) + return 0; + + entry = ring_buffer_event_data(event); + entry->ip = ip; + + memcpy(&entry->buf, str, size); + + /* Add a newline if necessary */ + if (entry->buf[size - 1] != '\n') { + entry->buf[size] = '\n'; + entry->buf[size + 1] = '\0'; + } else + entry->buf[size] = '\0'; + + __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); + return size; +} +EXPORT_SYMBOL_GPL(__trace_array_puts); + +/** + * __trace_puts - write a constant string into the trace buffer. + * @ip: The address of the caller + * @str: The constant string to write + */ +int __trace_puts(unsigned long ip, const char *str) +{ + return __trace_array_puts(printk_trace, ip, str, strlen(str)); +} +EXPORT_SYMBOL_GPL(__trace_puts); + +/** + * __trace_bputs - write the pointer to a constant string into trace buffer + * @ip: The address of the caller + * @str: The constant string to write to the buffer to + */ +int __trace_bputs(unsigned long ip, const char *str) +{ + struct trace_array *tr = READ_ONCE(printk_trace); + struct ring_buffer_event *event; + struct trace_buffer *buffer; + struct bputs_entry *entry; + unsigned int trace_ctx; + int size = sizeof(struct bputs_entry); + + if (!printk_binsafe(tr)) + return __trace_puts(ip, str); + + if (!(tr->trace_flags & TRACE_ITER(PRINTK))) + return 0; + + if (unlikely(tracing_selftest_running || tracing_disabled)) + return 0; + + trace_ctx = tracing_gen_ctx(); + buffer = tr->array_buffer.buffer; + + guard(ring_buffer_nest)(buffer); + event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, + trace_ctx); + if (!event) + return 0; + + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->str = str; + + __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); + + return 1; +} +EXPORT_SYMBOL_GPL(__trace_bputs); + +/* created for use with alloc_percpu */ +struct trace_buffer_struct { + int nesting; + char buffer[4][TRACE_BUF_SIZE]; +}; + +static struct trace_buffer_struct __percpu *trace_percpu_buffer; + +/* + * This allows for lockless recording. If we're nested too deeply, then + * this returns NULL. + */ +static char *get_trace_buf(void) +{ + struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer); + + if (!trace_percpu_buffer || buffer->nesting >= 4) + return NULL; + + buffer->nesting++; + + /* Interrupts must see nesting incremented before we use the buffer */ + barrier(); + return &buffer->buffer[buffer->nesting - 1][0]; +} + +static void put_trace_buf(void) +{ + /* Don't let the decrement of nesting leak before this */ + barrier(); + this_cpu_dec(trace_percpu_buffer->nesting); +} + +static int alloc_percpu_trace_buffer(void) +{ + struct trace_buffer_struct __percpu *buffers; + + if (trace_percpu_buffer) + return 0; + + buffers = alloc_percpu(struct trace_buffer_struct); + if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer")) + return -ENOMEM; + + trace_percpu_buffer = buffers; + return 0; +} + +static int buffers_allocated; + +void trace_printk_init_buffers(void) +{ + if (buffers_allocated) + return; + + if (alloc_percpu_trace_buffer()) + return; + + /* trace_printk() is for debug use only. Don't use it in production. */ + + pr_warn("\n"); + pr_warn("**********************************************************\n"); + pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_warn("** **\n"); + pr_warn("** trace_printk() being used. Allocating extra memory. **\n"); + pr_warn("** **\n"); + pr_warn("** This means that this is a DEBUG kernel and it is **\n"); + pr_warn("** unsafe for production use. **\n"); + pr_warn("** **\n"); + pr_warn("** If you see this message and you are not debugging **\n"); + pr_warn("** the kernel, report this immediately to your vendor! **\n"); + pr_warn("** **\n"); + pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_warn("**********************************************************\n"); + + /* Expand the buffers to set size */ + if (tracing_update_buffers(NULL) < 0) + pr_err("Failed to expand tracing buffers for trace_printk() calls\n"); + else + buffers_allocated = 1; + + /* + * trace_printk_init_buffers() can be called by modules. + * If that happens, then we need to start cmdline recording + * directly here. + */ + if (system_state == SYSTEM_RUNNING) + tracing_start_cmdline_record(); +} +EXPORT_SYMBOL_GPL(trace_printk_init_buffers); + +void trace_printk_start_comm(void) +{ + /* Start tracing comms if trace printk is set */ + if (!buffers_allocated) + return; + tracing_start_cmdline_record(); +} + +void trace_printk_start_stop_comm(int enabled) +{ + if (!buffers_allocated) + return; + + if (enabled) + tracing_start_cmdline_record(); + else + tracing_stop_cmdline_record(); +} + +/** + * trace_vbprintk - write binary msg to tracing buffer + * @ip: The address of the caller + * @fmt: The string format to write to the buffer + * @args: Arguments for @fmt + */ +int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) +{ + struct ring_buffer_event *event; + struct trace_buffer *buffer; + struct trace_array *tr = READ_ONCE(printk_trace); + struct bprint_entry *entry; + unsigned int trace_ctx; + char *tbuffer; + int len = 0, size; + + if (!printk_binsafe(tr)) + return trace_vprintk(ip, fmt, args); + + if (unlikely(tracing_selftest_running || tracing_disabled)) + return 0; + + /* Don't pollute graph traces with trace_vprintk internals */ + pause_graph_tracing(); + + trace_ctx = tracing_gen_ctx(); + guard(preempt_notrace)(); + + tbuffer = get_trace_buf(); + if (!tbuffer) { + len = 0; + goto out_nobuffer; + } + + len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args); + + if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) + goto out_put; + + size = sizeof(*entry) + sizeof(u32) * len; + buffer = tr->array_buffer.buffer; + scoped_guard(ring_buffer_nest, buffer) { + event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, + trace_ctx); + if (!event) + goto out_put; + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->fmt = fmt; + + memcpy(entry->buf, tbuffer, sizeof(u32) * len); + __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); + } +out_put: + put_trace_buf(); + +out_nobuffer: + unpause_graph_tracing(); + + return len; +} +EXPORT_SYMBOL_GPL(trace_vbprintk); + +static __printf(3, 0) +int __trace_array_vprintk(struct trace_buffer *buffer, + unsigned long ip, const char *fmt, va_list args) +{ + struct ring_buffer_event *event; + int len = 0, size; + struct print_entry *entry; + unsigned int trace_ctx; + char *tbuffer; + + if (unlikely(tracing_disabled)) + return 0; + + /* Don't pollute graph traces with trace_vprintk internals */ + pause_graph_tracing(); + + trace_ctx = tracing_gen_ctx(); + guard(preempt_notrace)(); + + + tbuffer = get_trace_buf(); + if (!tbuffer) { + len = 0; + goto out_nobuffer; + } + + len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); + + size = sizeof(*entry) + len + 1; + scoped_guard(ring_buffer_nest, buffer) { + event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, + trace_ctx); + if (!event) + goto out; + entry = ring_buffer_event_data(event); + entry->ip = ip; + + memcpy(&entry->buf, tbuffer, len + 1); + __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL); + } +out: + put_trace_buf(); + +out_nobuffer: + unpause_graph_tracing(); + + return len; +} + +int trace_array_vprintk(struct trace_array *tr, + unsigned long ip, const char *fmt, va_list args) +{ + if (tracing_selftest_running && (tr->flags & TRACE_ARRAY_FL_GLOBAL)) + return 0; + + return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args); +} + +/** + * trace_array_printk - Print a message to a specific instance + * @tr: The instance trace_array descriptor + * @ip: The instruction pointer that this is called from. + * @fmt: The format to print (printf format) + * + * If a subsystem sets up its own instance, they have the right to + * printk strings into their tracing instance buffer using this + * function. Note, this function will not write into the top level + * buffer (use trace_printk() for that), as writing into the top level + * buffer should only have events that can be individually disabled. + * trace_printk() is only used for debugging a kernel, and should not + * be ever incorporated in normal use. + * + * trace_array_printk() can be used, as it will not add noise to the + * top level tracing buffer. + * + * Note, trace_array_init_printk() must be called on @tr before this + * can be used. + */ +int trace_array_printk(struct trace_array *tr, + unsigned long ip, const char *fmt, ...) +{ + int ret; + va_list ap; + + if (!tr) + return -ENOENT; + + /* This is only allowed for created instances */ + if (tr->flags & TRACE_ARRAY_FL_GLOBAL) + return 0; + + if (!(tr->trace_flags & TRACE_ITER(PRINTK))) + return 0; + + va_start(ap, fmt); + ret = trace_array_vprintk(tr, ip, fmt, ap); + va_end(ap); + return ret; +} +EXPORT_SYMBOL_GPL(trace_array_printk); + +/** + * trace_array_init_printk - Initialize buffers for trace_array_printk() + * @tr: The trace array to initialize the buffers for + * + * As trace_array_printk() only writes into instances, they are OK to + * have in the kernel (unlike trace_printk()). This needs to be called + * before trace_array_printk() can be used on a trace_array. + */ +int trace_array_init_printk(struct trace_array *tr) +{ + if (!tr) + return -ENOENT; + + /* This is only allowed for created instances */ + if (tr->flags & TRACE_ARRAY_FL_GLOBAL) + return -EINVAL; + + return alloc_percpu_trace_buffer(); +} +EXPORT_SYMBOL_GPL(trace_array_init_printk); + +int trace_array_printk_buf(struct trace_buffer *buffer, + unsigned long ip, const char *fmt, ...) +{ + int ret; + va_list ap; + + if (!(printk_trace->trace_flags & TRACE_ITER(PRINTK))) + return 0; + + va_start(ap, fmt); + ret = __trace_array_vprintk(buffer, ip, fmt, ap); + va_end(ap); + return ret; +} + +int trace_vprintk(unsigned long ip, const char *fmt, va_list args) +{ + return trace_array_vprintk(printk_trace, ip, fmt, args); +} +EXPORT_SYMBOL_GPL(trace_vprintk); + static __init int init_trace_printk_function_export(void) { int ret; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index d88c44f1dfa5..be53fe6fee6a 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -1225,7 +1225,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) /* check both trace buffers */ ret = trace_test_buffer(&tr->array_buffer, NULL); if (!ret) - ret = trace_test_buffer(&tr->max_buffer, &count); + ret = trace_test_buffer(&tr->snapshot_buffer, &count); trace->reset(tr); tracing_start(); @@ -1287,7 +1287,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) /* check both trace buffers */ ret = trace_test_buffer(&tr->array_buffer, NULL); if (!ret) - ret = trace_test_buffer(&tr->max_buffer, &count); + ret = trace_test_buffer(&tr->snapshot_buffer, &count); trace->reset(tr); tracing_start(); @@ -1355,7 +1355,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * if (ret) goto out; - ret = trace_test_buffer(&tr->max_buffer, &count); + ret = trace_test_buffer(&tr->snapshot_buffer, &count); if (ret) goto out; @@ -1385,7 +1385,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * if (ret) goto out; - ret = trace_test_buffer(&tr->max_buffer, &count); + ret = trace_test_buffer(&tr->snapshot_buffer, &count); if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); @@ -1513,7 +1513,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) /* check both trace buffers */ ret = trace_test_buffer(&tr->array_buffer, NULL); if (!ret) - ret = trace_test_buffer(&tr->max_buffer, &count); + ret = trace_test_buffer(&tr->snapshot_buffer, &count); trace->reset(tr); diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c index 32684ef4fb9d..85f6f10d107f 100644 --- a/kernel/trace/trace_seq.c +++ b/kernel/trace/trace_seq.c @@ -106,7 +106,7 @@ EXPORT_SYMBOL_GPL(trace_seq_printf); * Writes a ASCII representation of a bitmask string into @s. */ void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, - int nmaskbits) + int nmaskbits) { unsigned int save_len = s->seq.len; @@ -125,6 +125,33 @@ void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, EXPORT_SYMBOL_GPL(trace_seq_bitmask); /** + * trace_seq_bitmask_list - write a bitmask array in its list representation + * @s: trace sequence descriptor + * @maskp: points to an array of unsigned longs that represent a bitmask + * @nmaskbits: The number of bits that are valid in @maskp + * + * Writes a list representation (e.g., 0-3,5-7) of a bitmask string into @s. + */ +void trace_seq_bitmask_list(struct trace_seq *s, const unsigned long *maskp, + int nmaskbits) +{ + unsigned int save_len = s->seq.len; + + if (s->full) + return; + + __trace_seq_init(s); + + seq_buf_printf(&s->seq, "%*pbl", nmaskbits, maskp); + + if (unlikely(seq_buf_has_overflowed(&s->seq))) { + s->seq.len = save_len; + s->full = 1; + } +} +EXPORT_SYMBOL_GPL(trace_seq_bitmask_list); + +/** * trace_seq_vprintf - sequence printing of trace information * @s: trace sequence descriptor * @fmt: printf format string diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 62719d2941c9..fd2ee879815c 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -34,9 +34,13 @@ enum tp_transition_sync { struct tp_transition_snapshot { unsigned long rcu; + unsigned long srcu_gp; bool ongoing; }; +DEFINE_SRCU_FAST(tracepoint_srcu); +EXPORT_SYMBOL_GPL(tracepoint_srcu); + /* Protected by tracepoints_mutex */ static struct tp_transition_snapshot tp_transition_snapshot[_NR_TP_TRANSITION_SYNC]; @@ -46,6 +50,7 @@ static void tp_rcu_get_state(enum tp_transition_sync sync) /* Keep the latest get_state snapshot. */ snapshot->rcu = get_state_synchronize_rcu(); + snapshot->srcu_gp = start_poll_synchronize_srcu(&tracepoint_srcu); snapshot->ongoing = true; } @@ -56,6 +61,8 @@ static void tp_rcu_cond_sync(enum tp_transition_sync sync) if (!snapshot->ongoing) return; cond_synchronize_rcu(snapshot->rcu); + if (!poll_state_synchronize_srcu(&tracepoint_srcu, snapshot->srcu_gp)) + synchronize_srcu(&tracepoint_srcu); snapshot->ongoing = false; } @@ -112,10 +119,13 @@ static inline void release_probes(struct tracepoint *tp, struct tracepoint_func struct tp_probes *tp_probes = container_of(old, struct tp_probes, probes[0]); - if (tracepoint_is_faultable(tp)) - call_rcu_tasks_trace(&tp_probes->rcu, rcu_free_old_probes); - else - call_rcu(&tp_probes->rcu, rcu_free_old_probes); + if (tracepoint_is_faultable(tp)) { + call_rcu_tasks_trace(&tp_probes->rcu, + rcu_free_old_probes); + } else { + call_srcu(&tracepoint_srcu, &tp_probes->rcu, + rcu_free_old_probes); + } } } |
