From d430d3d7e646eb1eac2bb4aa244a644312e67c76 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Wed, 16 Mar 2011 17:29:47 -0400 Subject: jump label: Introduce static_branch() interface Introduce: static __always_inline bool static_branch(struct jump_label_key *key); instead of the old JUMP_LABEL(key, label) macro. In this way, jump labels become really easy to use: Define: struct jump_label_key jump_key; Can be used as: if (static_branch(&jump_key)) do unlikely code enable/disale via: jump_label_inc(&jump_key); jump_label_dec(&jump_key); that's it! For the jump labels disabled case, the static_branch() becomes an atomic_read(), and jump_label_inc()/dec() are simply atomic_inc(), atomic_dec() operations. We show testing results for this change below. Thanks to H. Peter Anvin for suggesting the 'static_branch()' construct. Since we now require a 'struct jump_label_key *key', we can store a pointer into the jump table addresses. In this way, we can enable/disable jump labels, in basically constant time. This change allows us to completely remove the previous hashtable scheme. Thanks to Peter Zijlstra for this re-write. Testing: I ran a series of 'tbench 20' runs 5 times (with reboots) for 3 configurations, where tracepoints were disabled. jump label configured in avg: 815.6 jump label *not* configured in (using atomic reads) avg: 800.1 jump label *not* configured in (regular reads) avg: 803.4 Signed-off-by: Peter Zijlstra LKML-Reference: <20110316212947.GA8792@redhat.com> Signed-off-by: Jason Baron Suggested-by: H. Peter Anvin Tested-by: David Daney Acked-by: Ralf Baechle Acked-by: David S. Miller Acked-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- include/asm-generic/vmlinux.lds.h | 14 ++---- include/linux/dynamic_debug.h | 2 - include/linux/jump_label.h | 89 ++++++++++++++++++++++++--------------- include/linux/jump_label_ref.h | 44 ------------------- include/linux/perf_event.h | 26 ++++++------ include/linux/tracepoint.h | 22 +++++----- 6 files changed, 83 insertions(+), 114 deletions(-) delete mode 100644 include/linux/jump_label_ref.h (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 32c45e5fe0ab..79522166d7f1 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -170,6 +170,10 @@ STRUCT_ALIGN(); \ *(__tracepoints) \ /* implement dynamic printk debug */ \ + . = ALIGN(8); \ + VMLINUX_SYMBOL(__start___jump_table) = .; \ + *(__jump_table) \ + VMLINUX_SYMBOL(__stop___jump_table) = .; \ . = ALIGN(8); \ VMLINUX_SYMBOL(__start___verbose) = .; \ *(__verbose) \ @@ -228,8 +232,6 @@ \ BUG_TABLE \ \ - JUMP_TABLE \ - \ /* PCI quirks */ \ .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \ @@ -589,14 +591,6 @@ #define BUG_TABLE #endif -#define JUMP_TABLE \ - . = ALIGN(8); \ - __jump_table : AT(ADDR(__jump_table) - LOAD_OFFSET) { \ - VMLINUX_SYMBOL(__start___jump_table) = .; \ - *(__jump_table) \ - VMLINUX_SYMBOL(__stop___jump_table) = .; \ - } - #ifdef CONFIG_PM_TRACE #define TRACEDATA \ . = ALIGN(4); \ diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index 0c9653f11c18..e747ecd48e1c 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -1,8 +1,6 @@ #ifndef _DYNAMIC_DEBUG_H #define _DYNAMIC_DEBUG_H -#include - /* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They * use independent hash functions, to reduce the chance of false positives. diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 7880f18e4b86..83e745f3ead7 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -1,20 +1,43 @@ #ifndef _LINUX_JUMP_LABEL_H #define _LINUX_JUMP_LABEL_H +#include +#include + #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) + +struct jump_label_key { + atomic_t enabled; + struct jump_entry *entries; +#ifdef CONFIG_MODULES + struct jump_label_mod *next; +#endif +}; + # include # define HAVE_JUMP_LABEL #endif enum jump_label_type { + JUMP_LABEL_DISABLE = 0, JUMP_LABEL_ENABLE, - JUMP_LABEL_DISABLE }; struct module; #ifdef HAVE_JUMP_LABEL +#ifdef CONFIG_MODULES +#define JUMP_LABEL_INIT {{ 0 }, NULL, NULL} +#else +#define JUMP_LABEL_INIT {{ 0 }, NULL} +#endif + +static __always_inline bool static_branch(struct jump_label_key *key) +{ + return arch_static_branch(key); +} + extern struct jump_entry __start___jump_table[]; extern struct jump_entry __stop___jump_table[]; @@ -23,37 +46,37 @@ extern void jump_label_unlock(void); extern void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type); extern void arch_jump_label_text_poke_early(jump_label_t addr); -extern void jump_label_update(unsigned long key, enum jump_label_type type); -extern void jump_label_apply_nops(struct module *mod); extern int jump_label_text_reserved(void *start, void *end); +extern void jump_label_inc(struct jump_label_key *key); +extern void jump_label_dec(struct jump_label_key *key); +extern bool jump_label_enabled(struct jump_label_key *key); +extern void jump_label_apply_nops(struct module *mod); -#define jump_label_enable(key) \ - jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE); +#else -#define jump_label_disable(key) \ - jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE); +#include -#else +#define JUMP_LABEL_INIT {ATOMIC_INIT(0)} -#define JUMP_LABEL(key, label) \ -do { \ - if (unlikely(*key)) \ - goto label; \ -} while (0) +struct jump_label_key { + atomic_t enabled; +}; -#define jump_label_enable(cond_var) \ -do { \ - *(cond_var) = 1; \ -} while (0) +static __always_inline bool static_branch(struct jump_label_key *key) +{ + if (unlikely(atomic_read(&key->enabled))) + return true; + return false; +} -#define jump_label_disable(cond_var) \ -do { \ - *(cond_var) = 0; \ -} while (0) +static inline void jump_label_inc(struct jump_label_key *key) +{ + atomic_inc(&key->enabled); +} -static inline int jump_label_apply_nops(struct module *mod) +static inline void jump_label_dec(struct jump_label_key *key) { - return 0; + atomic_dec(&key->enabled); } static inline int jump_label_text_reserved(void *start, void *end) @@ -64,16 +87,16 @@ static inline int jump_label_text_reserved(void *start, void *end) static inline void jump_label_lock(void) {} static inline void jump_label_unlock(void) {} -#endif +static inline bool jump_label_enabled(struct jump_label_key *key) +{ + return !!atomic_read(&key->enabled); +} -#define COND_STMT(key, stmt) \ -do { \ - __label__ jl_enabled; \ - JUMP_LABEL(key, jl_enabled); \ - if (0) { \ -jl_enabled: \ - stmt; \ - } \ -} while (0) +static inline int jump_label_apply_nops(struct module *mod) +{ + return 0; +} + +#endif #endif diff --git a/include/linux/jump_label_ref.h b/include/linux/jump_label_ref.h deleted file mode 100644 index e5d012ad92c6..000000000000 --- a/include/linux/jump_label_ref.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef _LINUX_JUMP_LABEL_REF_H -#define _LINUX_JUMP_LABEL_REF_H - -#include -#include - -#ifdef HAVE_JUMP_LABEL - -static inline void jump_label_inc(atomic_t *key) -{ - if (atomic_add_return(1, key) == 1) - jump_label_enable(key); -} - -static inline void jump_label_dec(atomic_t *key) -{ - if (atomic_dec_and_test(key)) - jump_label_disable(key); -} - -#else /* !HAVE_JUMP_LABEL */ - -static inline void jump_label_inc(atomic_t *key) -{ - atomic_inc(key); -} - -static inline void jump_label_dec(atomic_t *key) -{ - atomic_dec(key); -} - -#undef JUMP_LABEL -#define JUMP_LABEL(key, label) \ -do { \ - if (unlikely(__builtin_choose_expr( \ - __builtin_types_compatible_p(typeof(key), atomic_t *), \ - atomic_read((atomic_t *)(key)), *(key)))) \ - goto label; \ -} while (0) - -#endif /* HAVE_JUMP_LABEL */ - -#endif /* _LINUX_JUMP_LABEL_REF_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 311b4dc785a1..730b7821690f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -505,7 +505,7 @@ struct perf_guest_info_callbacks { #include #include #include -#include +#include #include #include @@ -1034,7 +1034,7 @@ static inline int is_software_event(struct perf_event *event) return event->pmu->task_ctx_nr == perf_sw_context; } -extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; +extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); @@ -1063,22 +1063,21 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { struct pt_regs hot_regs; - JUMP_LABEL(&perf_swevent_enabled[event_id], have_event); - return; - -have_event: - if (!regs) { - perf_fetch_caller_regs(&hot_regs); - regs = &hot_regs; + if (static_branch(&perf_swevent_enabled[event_id])) { + if (!regs) { + perf_fetch_caller_regs(&hot_regs); + regs = &hot_regs; + } + __perf_sw_event(event_id, nr, nmi, regs, addr); } - __perf_sw_event(event_id, nr, nmi, regs, addr); } -extern atomic_t perf_sched_events; +extern struct jump_label_key perf_sched_events; static inline void perf_event_task_sched_in(struct task_struct *task) { - COND_STMT(&perf_sched_events, __perf_event_task_sched_in(task)); + if (static_branch(&perf_sched_events)) + __perf_event_task_sched_in(task); } static inline @@ -1086,7 +1085,8 @@ void perf_event_task_sched_out(struct task_struct *task, struct task_struct *nex { perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); - COND_STMT(&perf_sched_events, __perf_event_task_sched_out(task, next)); + if (static_branch(&perf_sched_events)) + __perf_event_task_sched_out(task, next); } extern void perf_event_mmap(struct vm_area_struct *vma); diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 97c84a58efb8..d530a4460a0b 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -29,7 +29,7 @@ struct tracepoint_func { struct tracepoint { const char *name; /* Tracepoint name */ - int state; /* State. */ + struct jump_label_key key; void (*regfunc)(void); void (*unregfunc)(void); struct tracepoint_func __rcu *funcs; @@ -146,9 +146,7 @@ void tracepoint_update_probe_range(struct tracepoint * const *begin, extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ - JUMP_LABEL(&__tracepoint_##name.state, do_trace); \ - return; \ -do_trace: \ + if (static_branch(&__tracepoint_##name.key)) \ __DO_TRACE(&__tracepoint_##name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args), \ @@ -176,14 +174,14 @@ do_trace: \ * structures, so we create an array of pointers that will be used for iteration * on the tracepoints. */ -#define DEFINE_TRACE_FN(name, reg, unreg) \ - static const char __tpstrtab_##name[] \ - __attribute__((section("__tracepoints_strings"))) = #name; \ - struct tracepoint __tracepoint_##name \ - __attribute__((section("__tracepoints"))) = \ - { __tpstrtab_##name, 0, reg, unreg, NULL }; \ - static struct tracepoint * const __tracepoint_ptr_##name __used \ - __attribute__((section("__tracepoints_ptrs"))) = \ +#define DEFINE_TRACE_FN(name, reg, unreg) \ + static const char __tpstrtab_##name[] \ + __attribute__((section("__tracepoints_strings"))) = #name; \ + struct tracepoint __tracepoint_##name \ + __attribute__((section("__tracepoints"))) = \ + { __tpstrtab_##name, JUMP_LABEL_INIT, reg, unreg, NULL };\ + static struct tracepoint * const __tracepoint_ptr_##name __used \ + __attribute__((section("__tracepoints_ptrs"))) = \ &__tracepoint_##name; #define DEFINE_TRACE(name) \ -- cgit v1.2.3 From 94403f8863d0d1d2005291b2ef0719c2534aa303 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 24 Apr 2011 08:18:31 +0200 Subject: perf events: Add stalled cycles generic event - PERF_COUNT_HW_STALLED_CYCLES The new PERF_COUNT_HW_STALLED_CYCLES event tries to approximate cycles the CPU does nothing useful, because it is stalled on a cache-miss or some other condition. Acked-by: Peter Zijlstra Acked-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-fue11vymwqsoo5to72jxxjyl@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ee9f1e782800..ac636dd20a0c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -52,6 +52,7 @@ enum perf_hw_id { PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, PERF_COUNT_HW_BRANCH_MISSES = 5, PERF_COUNT_HW_BUS_CYCLES = 6, + PERF_COUNT_HW_STALLED_CYCLES = 7, PERF_COUNT_HW_MAX, /* non-ABI */ }; -- cgit v1.2.3 From 8f62242246351b5a4bc0c1f00c0c7003edea128a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 29 Apr 2011 13:19:47 +0200 Subject: perf events: Add generic front-end and back-end stalled cycle event definitions Add two generic hardware events: front-end and back-end stalled cycles. These events measure conditions when the CPU is executing code but its capabilities are not fully utilized. Understanding such situations and analyzing them is an important sub-task of code optimization workflows. Both events limit performance: most front end stalls tend to be caused by branch misprediction or instruction fetch cachemisses, backend stalls can be caused by various resource shortages or inefficient instruction scheduling. Front-end stalls are the more important ones: code cannot run fast if the instruction stream is not being kept up. An over-utilized back-end can cause front-end stalls and thus has to be kept an eye on as well. The exact composition is very program logic and instruction mix dependent. We use the terms 'stall', 'front-end' and 'back-end' loosely and try to use the best available events from specific CPUs that approximate these concepts. Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-7y40wib8n000io7hjpn1dsrm@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ac636dd20a0c..4e2d7ae71499 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -52,7 +52,8 @@ enum perf_hw_id { PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, PERF_COUNT_HW_BRANCH_MISSES = 5, PERF_COUNT_HW_BUS_CYCLES = 6, - PERF_COUNT_HW_STALLED_CYCLES = 7, + PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7, + PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8, PERF_COUNT_HW_MAX, /* non-ABI */ }; -- cgit v1.2.3 From 45a4a2372b364107cabea79f255b333236626416 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 21 Apr 2011 23:16:46 -0400 Subject: ftrace: Remove FTRACE_FL_FAILED flag Since we disable all function tracer processing if we detect that a modification of a instruction had failed, we do not need to track that the record has failed. No more ftrace processing is allowed, and the FTRACE_FL_FAILED flag is pointless. Removing this flag simplifies some of the code, but some ftrace_disabled checks needed to be added or move around a little. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ca29e03c1fac..2a195ffd4269 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -147,11 +147,10 @@ extern int ftrace_text_reserved(void *start, void *end); enum { FTRACE_FL_FREE = (1 << 0), - FTRACE_FL_FAILED = (1 << 1), - FTRACE_FL_FILTER = (1 << 2), - FTRACE_FL_ENABLED = (1 << 3), - FTRACE_FL_NOTRACE = (1 << 4), - FTRACE_FL_CONVERTED = (1 << 5), + FTRACE_FL_FILTER = (1 << 1), + FTRACE_FL_ENABLED = (1 << 2), + FTRACE_FL_NOTRACE = (1 << 3), + FTRACE_FL_CONVERTED = (1 << 4), }; struct dyn_ftrace { -- cgit v1.2.3 From d2c8c3eafbf715306ec891e7ca52d3d999acbe31 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 25 Apr 2011 14:32:42 -0400 Subject: ftrace: Remove FTRACE_FL_CONVERTED flag Since we disable all function tracer processing if we detect that a modification of a instruction had failed, we do not need to track that the record has failed. No more ftrace processing is allowed, and the FTRACE_FL_CONVERTED flag is pointless. The FTRACE_FL_CONVERTED flag was used to denote records that were successfully converted from mcount calls into nops. But if a single record fails, all of ftrace is disabled. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2a195ffd4269..32047449b309 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -150,7 +150,6 @@ enum { FTRACE_FL_FILTER = (1 << 1), FTRACE_FL_ENABLED = (1 << 2), FTRACE_FL_NOTRACE = (1 << 3), - FTRACE_FL_CONVERTED = (1 << 4), }; struct dyn_ftrace { -- cgit v1.2.3 From e7e7ee2eab2080248084d71fe0a115ab745eb2aa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 4 May 2011 08:42:29 +0200 Subject: perf events: Clean up definitions and initializers, update copyrights Fix a few inconsistent style bits that were added over the past few months. Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-yv4hwf9yhnzoada8pcpb3a97@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 96 +++++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9eec53d97370..207c16976a17 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -2,8 +2,8 @@ * Performance events: * * Copyright (C) 2008-2009, Thomas Gleixner - * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar - * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar + * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra * * Data type definitions, declarations, prototypes. * @@ -468,9 +468,9 @@ enum perf_callchain_context { PERF_CONTEXT_MAX = (__u64)-4095, }; -#define PERF_FLAG_FD_NO_GROUP (1U << 0) -#define PERF_FLAG_FD_OUTPUT (1U << 1) -#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */ +#define PERF_FLAG_FD_NO_GROUP (1U << 0) +#define PERF_FLAG_FD_OUTPUT (1U << 1) +#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */ #ifdef __KERNEL__ /* @@ -484,9 +484,9 @@ enum perf_callchain_context { #endif struct perf_guest_info_callbacks { - int (*is_in_guest) (void); - int (*is_user_mode) (void); - unsigned long (*get_guest_ip) (void); + int (*is_in_guest)(void); + int (*is_user_mode)(void); + unsigned long (*get_guest_ip)(void); }; #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -652,19 +652,19 @@ struct pmu { * Start the transaction, after this ->add() doesn't need to * do schedulability tests. */ - void (*start_txn) (struct pmu *pmu); /* optional */ + void (*start_txn) (struct pmu *pmu); /* optional */ /* * If ->start_txn() disabled the ->add() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. */ - int (*commit_txn) (struct pmu *pmu); /* optional */ + int (*commit_txn) (struct pmu *pmu); /* optional */ /* * Will cancel the transaction, assumes ->del() is called * for each successful ->add() during the transaction. */ - void (*cancel_txn) (struct pmu *pmu); /* optional */ + void (*cancel_txn) (struct pmu *pmu); /* optional */ }; /** @@ -712,15 +712,15 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, int, struct pt_regs *regs); enum perf_group_flag { - PERF_GROUP_SOFTWARE = 0x1, + PERF_GROUP_SOFTWARE = 0x1, }; -#define SWEVENT_HLIST_BITS 8 -#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS) +#define SWEVENT_HLIST_BITS 8 +#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS) struct swevent_hlist { - struct hlist_head heads[SWEVENT_HLIST_SIZE]; - struct rcu_head rcu_head; + struct hlist_head heads[SWEVENT_HLIST_SIZE]; + struct rcu_head rcu_head; }; #define PERF_ATTACH_CONTEXT 0x01 @@ -733,13 +733,13 @@ struct swevent_hlist { * This is a per-cpu dynamically allocated data structure. */ struct perf_cgroup_info { - u64 time; - u64 timestamp; + u64 time; + u64 timestamp; }; struct perf_cgroup { - struct cgroup_subsys_state css; - struct perf_cgroup_info *info; /* timing info, one per cpu */ + struct cgroup_subsys_state css; + struct perf_cgroup_info *info; /* timing info, one per cpu */ }; #endif @@ -923,7 +923,7 @@ struct perf_event_context { /* * Number of contexts where an event can trigger: - * task, softirq, hardirq, nmi. + * task, softirq, hardirq, nmi. */ #define PERF_NR_CONTEXTS 4 @@ -1001,8 +1001,7 @@ struct perf_sample_data { struct perf_raw_record *raw; }; -static inline -void perf_sample_data_init(struct perf_sample_data *data, u64 addr) +static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) { data->addr = addr; data->raw = NULL; @@ -1039,8 +1038,7 @@ extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); #ifndef perf_arch_fetch_caller_regs -static inline void -perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { } +static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { } #endif /* @@ -1080,8 +1078,7 @@ static inline void perf_event_task_sched_in(struct task_struct *task) __perf_event_task_sched_in(task); } -static inline -void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) +static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) { perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); @@ -1099,14 +1096,10 @@ extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); -extern void perf_callchain_user(struct perf_callchain_entry *entry, - struct pt_regs *regs); -extern void perf_callchain_kernel(struct perf_callchain_entry *entry, - struct pt_regs *regs); - +extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs); +extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs); -static inline void -perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) +static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) { if (entry->nr < PERF_MAX_STACK_DEPTH) entry->ip[entry->nr++] = ip; @@ -1142,9 +1135,9 @@ extern void perf_tp_event(u64 addr, u64 count, void *record, extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags -#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ - PERF_RECORD_MISC_KERNEL) -#define perf_instruction_pointer(regs) instruction_pointer(regs) +# define perf_misc_flags(regs) \ + (user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL) +# define perf_instruction_pointer(regs) instruction_pointer(regs) #endif extern int perf_output_begin(struct perf_output_handle *handle, @@ -1179,9 +1172,9 @@ static inline void perf_bp_event(struct perf_event *event, void *data) { } static inline int perf_register_guest_info_callbacks -(struct perf_guest_info_callbacks *callbacks) { return 0; } +(struct perf_guest_info_callbacks *callbacks) { return 0; } static inline int perf_unregister_guest_info_callbacks -(struct perf_guest_info_callbacks *callbacks) { return 0; } +(struct perf_guest_info_callbacks *callbacks) { return 0; } static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } @@ -1194,23 +1187,22 @@ static inline void perf_event_disable(struct perf_event *event) { } static inline void perf_event_task_tick(void) { } #endif -#define perf_output_put(handle, x) \ - perf_output_copy((handle), &(x), sizeof(x)) +#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) /* * This has to have a higher priority than migration_notifier in sched.c. */ -#define perf_cpu_notifier(fn) \ -do { \ - static struct notifier_block fn##_nb __cpuinitdata = \ - { .notifier_call = fn, .priority = CPU_PRI_PERF }; \ - fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \ - (void *)(unsigned long)smp_processor_id()); \ - fn(&fn##_nb, (unsigned long)CPU_STARTING, \ - (void *)(unsigned long)smp_processor_id()); \ - fn(&fn##_nb, (unsigned long)CPU_ONLINE, \ - (void *)(unsigned long)smp_processor_id()); \ - register_cpu_notifier(&fn##_nb); \ +#define perf_cpu_notifier(fn) \ +do { \ + static struct notifier_block fn##_nb __cpuinitdata = \ + { .notifier_call = fn, .priority = CPU_PRI_PERF }; \ + fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \ + (void *)(unsigned long)smp_processor_id()); \ + fn(&fn##_nb, (unsigned long)CPU_STARTING, \ + (void *)(unsigned long)smp_processor_id()); \ + fn(&fn##_nb, (unsigned long)CPU_ONLINE, \ + (void *)(unsigned long)smp_processor_id()); \ + register_cpu_notifier(&fn##_nb); \ } while (0) #endif /* __KERNEL__ */ -- cgit v1.2.3 From f0201738b61b1adcf6b2c4719c5c415745014c1c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 12 Apr 2011 19:06:39 -0400 Subject: ftrace: Avoid recording mcount on .init sections directly The init and exit sections should not be traced and adding a call to mcount to them is a waste of text and instruction cache. Have the macro section attributes include notrace to ignore these functions for tracing from the build. Link: http://lkml.kernel.org/r/20110421023738.953028219@goodmis.org Signed-off-by: Steven Rostedt --- include/linux/init.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/init.h b/include/linux/init.h index 577671c55153..9146f39cdddf 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -79,29 +79,29 @@ #define __exitused __used #endif -#define __exit __section(.exit.text) __exitused __cold +#define __exit __section(.exit.text) __exitused __cold notrace /* Used for HOTPLUG */ -#define __devinit __section(.devinit.text) __cold +#define __devinit __section(.devinit.text) __cold notrace #define __devinitdata __section(.devinit.data) #define __devinitconst __section(.devinit.rodata) -#define __devexit __section(.devexit.text) __exitused __cold +#define __devexit __section(.devexit.text) __exitused __cold notrace #define __devexitdata __section(.devexit.data) #define __devexitconst __section(.devexit.rodata) /* Used for HOTPLUG_CPU */ -#define __cpuinit __section(.cpuinit.text) __cold +#define __cpuinit __section(.cpuinit.text) __cold notrace #define __cpuinitdata __section(.cpuinit.data) #define __cpuinitconst __section(.cpuinit.rodata) -#define __cpuexit __section(.cpuexit.text) __exitused __cold +#define __cpuexit __section(.cpuexit.text) __exitused __cold notrace #define __cpuexitdata __section(.cpuexit.data) #define __cpuexitconst __section(.cpuexit.rodata) /* Used for MEMORY_HOTPLUG */ -#define __meminit __section(.meminit.text) __cold +#define __meminit __section(.meminit.text) __cold notrace #define __meminitdata __section(.meminit.data) #define __meminitconst __section(.meminit.rodata) -#define __memexit __section(.memexit.text) __exitused __cold +#define __memexit __section(.memexit.text) __exitused __cold notrace #define __memexitdata __section(.memexit.data) #define __memexitconst __section(.memexit.rodata) -- cgit v1.2.3 From b448c4e3ae6d20108dba1d7833f2c0d3dbad87ce Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 29 Apr 2011 15:12:32 -0400 Subject: ftrace: Replace FTRACE_FL_NOTRACE flag with a hash of ignored functions To prepare for the accounting system that will allow multiple users of the function tracer, having the FTRACE_FL_NOTRACE as a flag in the dyn_trace record does not make sense. All ftrace_ops will soon have a hash of functions they should trace and not trace. By making a global hash of functions not to trace makes this easier for the transition. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 32047449b309..fe0a90a09243 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -149,7 +149,6 @@ enum { FTRACE_FL_FREE = (1 << 0), FTRACE_FL_FILTER = (1 << 1), FTRACE_FL_ENABLED = (1 << 2), - FTRACE_FL_NOTRACE = (1 << 3), }; struct dyn_ftrace { -- cgit v1.2.3 From 1cf41dd79993389b012e4542ab502ce36ae7343f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 29 Apr 2011 20:59:51 -0400 Subject: ftrace: Use hash instead for FTRACE_FL_FILTER When multiple users are allowed to have their own set of functions to trace, having the FTRACE_FL_FILTER flag will not be enough to handle the accounting of those users. Each user will need their own set of functions. Replace the FTRACE_FL_FILTER with a filter_hash instead. This is temporary until the rest of the function filtering accounting gets in. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index fe0a90a09243..52fc5d4e6edd 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -147,8 +147,7 @@ extern int ftrace_text_reserved(void *start, void *end); enum { FTRACE_FL_FREE = (1 << 0), - FTRACE_FL_FILTER = (1 << 1), - FTRACE_FL_ENABLED = (1 << 2), + FTRACE_FL_ENABLED = (1 << 1), }; struct dyn_ftrace { -- cgit v1.2.3 From f45948e898e7bc76a73a468796d2ce80dd040058 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 May 2011 12:29:25 -0400 Subject: ftrace: Create a global_ops to hold the filter and notrace hashes Combine the filter and notrace hashes to be accessed by a single entity, the global_ops. The global_ops is a ftrace_ops structure that is passed to different functions that can read or modify the filtering of the function tracer. The ftrace_ops structure was modified to hold a filter and notrace hashes so that later patches may allow each ftrace_ops to have its own set of rules to what functions may be filtered. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 52fc5d4e6edd..6658a51390fe 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -29,9 +29,15 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); +struct ftrace_hash; + struct ftrace_ops { - ftrace_func_t func; - struct ftrace_ops *next; + ftrace_func_t func; + struct ftrace_ops *next; +#ifdef CONFIG_DYNAMIC_FTRACE + struct ftrace_hash *notrace_hash; + struct ftrace_hash *filter_hash; +#endif }; extern int function_trace_stop; -- cgit v1.2.3 From ed926f9b35cda0988234c356e16a7cb30f4e5338 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 3 May 2011 13:25:24 -0400 Subject: ftrace: Use counters to enable functions to trace Every function has its own record that stores the instruction pointer and flags for the function to be traced. There are only two flags: enabled and free. The enabled flag states that tracing for the function has been enabled (actively traced), and the free flag states that the record no longer points to a function and can be used by new functions (loaded modules). These flags are now moved to the MSB of the flags (actually just the top 32bits). The rest of the bits (30 bits) are now used as a ref counter. Everytime a tracer register functions to trace, those functions will have its counter incremented. When tracing is enabled, to determine if a function should be traced, the counter is examined, and if it is non-zero it is set to trace. When a ftrace_ops is registered to trace functions, its hashes are examined. If the ftrace_ops filter_hash count is zero, then all functions are set to be traced, otherwise only the functions in the hash are to be traced. The exception to this is if a function is also in the ftrace_ops notrace_hash. Then that function's counter is not incremented for this ftrace_ops. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 6658a51390fe..ab1c46e70bb6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -37,6 +37,7 @@ struct ftrace_ops { #ifdef CONFIG_DYNAMIC_FTRACE struct ftrace_hash *notrace_hash; struct ftrace_hash *filter_hash; + unsigned long flags; #endif }; @@ -152,10 +153,13 @@ extern void unregister_ftrace_function_probe_all(char *glob); extern int ftrace_text_reserved(void *start, void *end); enum { - FTRACE_FL_FREE = (1 << 0), - FTRACE_FL_ENABLED = (1 << 1), + FTRACE_FL_ENABLED = (1 << 30), + FTRACE_FL_FREE = (1 << 31), }; +#define FTRACE_FL_MASK (0x3UL << 30) +#define FTRACE_REF_MAX ((1 << 30) - 1) + struct dyn_ftrace { union { unsigned long ip; /* address of mcount call-site */ -- cgit v1.2.3 From b848914ce39589d89ee0078a6d1ef452b464729e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 May 2011 09:27:52 -0400 Subject: ftrace: Implement separate user function filtering ftrace_ops that are registered to trace functions can now be agnostic to each other in respect to what functions they trace. Each ops has their own hash of the functions they want to trace and a hash to what they do not want to trace. A empty hash for the functions they want to trace denotes all functions should be traced that are not in the notrace hash. Cc: Paul E. McKenney Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ab1c46e70bb6..4609c0ece79a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -31,13 +31,18 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); struct ftrace_hash; +enum { + FTRACE_OPS_FL_ENABLED = 1 << 0, + FTRACE_OPS_FL_GLOBAL = 1 << 1, +}; + struct ftrace_ops { ftrace_func_t func; struct ftrace_ops *next; + unsigned long flags; #ifdef CONFIG_DYNAMIC_FTRACE struct ftrace_hash *notrace_hash; struct ftrace_hash *filter_hash; - unsigned long flags; #endif }; -- cgit v1.2.3 From cdbe61bfe70440939e457fb4a8d0995eaaed17de Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 5 May 2011 21:14:55 -0400 Subject: ftrace: Allow dynamically allocated function tracers Now that functions may be selected individually, it only makes sense that we should allow dynamically allocated trace structures to be traced. This will allow perf to allocate a ftrace_ops structure at runtime and use it to pick and choose which functions that structure will trace. Note, a dynamically allocated ftrace_ops will always be called indirectly instead of being called directly from the mcount in entry.S. This is because there's no safe way to prevent mcount from being preempted before calling the function, unless we modify every entry.S to do so (not likely). Thus, dynamically allocated functions will now be called by the ftrace_ops_list_func() that loops through the ops that are allocated if there are more than one op allocated at a time. This loop is protected with a preempt_disable. To determine if an ftrace_ops structure is allocated or not, a new util function was added to the kernel/extable.c called core_kernel_data(), which returns 1 if the address is between _sdata and _edata. Cc: Paul E. McKenney Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 1 + include/linux/kernel.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4609c0ece79a..caba694a62b6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -34,6 +34,7 @@ struct ftrace_hash; enum { FTRACE_OPS_FL_ENABLED = 1 << 0, FTRACE_OPS_FL_GLOBAL = 1 << 1, + FTRACE_OPS_FL_DYNAMIC = 1 << 2, }; struct ftrace_ops { diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 00cec4dc0ae2..f37ba716ef8b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -283,6 +283,7 @@ extern char *get_options(const char *str, int nints, int *ints); extern unsigned long long memparse(const char *ptr, char **retptr); extern int core_kernel_text(unsigned long addr); +extern int core_kernel_data(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); extern int func_ptr_is_kernel_text(void *ptr); -- cgit v1.2.3 From 936e074b286ae779f134312178dbab139ee7ea52 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 5 May 2011 22:54:01 -0400 Subject: ftrace: Modify ftrace_set_filter/notrace to take ops Since users of the function tracer can now pick and choose which functions they want to trace agnostically from other users of the function tracer, we need to pass the ops struct to the ftrace_set_filter() functions. The functions ftrace_set_global_filter() and ftrace_set_global_notrace() is added to keep the old filter functions which are used to modify the generic function tracers. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index caba694a62b6..9d88e1cb5dbb 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -179,7 +179,12 @@ struct dyn_ftrace { }; int ftrace_force_update(void); -void ftrace_set_filter(unsigned char *buf, int len, int reset); +void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, + int len, int reset); +void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, + int len, int reset); +void ftrace_set_global_filter(unsigned char *buf, int len, int reset); +void ftrace_set_global_notrace(unsigned char *buf, int len, int reset); int register_ftrace_command(struct ftrace_func_command *cmd); int unregister_ftrace_command(struct ftrace_func_command *cmd); -- cgit v1.2.3