From 70791ce9ba68a5921c9905ef05d23f62a90bc10c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 29 Jun 2010 19:34:05 +0200 Subject: perf: Generalize callchain_store() callchain_store() is the same on every archs, inline it in perf_event.h and rename it to perf_callchain_store() to avoid any collision. This removes repetitive code. Signed-off-by: Frederic Weisbecker Acked-by: Paul Mackerras Tested-by: Will Deacon Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: David Miller Cc: Paul Mundt Cc: Borislav Petkov --- include/linux/perf_event.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 937495c25073..358880404b42 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -978,6 +978,13 @@ extern void perf_event_fork(struct task_struct *tsk); extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); +static inline void +perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) +{ + if (entry->nr < PERF_MAX_STACK_DEPTH) + entry->ip[entry->nr++] = ip; +} + extern int sysctl_perf_event_paranoid; extern int sysctl_perf_event_mlock; extern int sysctl_perf_event_sample_rate; -- cgit v1.2.3 From 56962b4449af34070bb1994621ef4f0265eed4d8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 30 Jun 2010 23:03:51 +0200 Subject: perf: Generalize some arch callchain code - Most archs use one callchain buffer per cpu, except x86 that needs to deal with NMIs. Provide a default perf_callchain_buffer() implementation that x86 overrides. - Centralize all the kernel/user regs handling and invoke new arch handlers from there: perf_callchain_user() / perf_callchain_kernel() That avoid all the user_mode(), current->mm checks and so... - Invert some parameters in perf_callchain_*() helpers: entry to the left, regs to the right, following the traditional (dst, src). Signed-off-by: Frederic Weisbecker Acked-by: Paul Mackerras Tested-by: Will Deacon Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: David Miller Cc: Paul Mundt Cc: Borislav Petkov --- include/linux/perf_event.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 358880404b42..4db61dded388 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -976,7 +976,15 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks extern void perf_event_comm(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); -extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); +/* Callchains */ +DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); + +extern void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs); +extern void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs); +extern struct perf_callchain_entry *perf_callchain_buffer(void); + static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) -- cgit v1.2.3 From 927c7a9e92c4f69097a6e9e086d11fc2f8a5b40b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 1 Jul 2010 16:20:36 +0200 Subject: perf: Fix race in callchains Now that software events don't have interrupt disabled anymore in the event path, callchains can nest on any context. So seperating nmi and others contexts in two buffers has become racy. Fix this by providing one buffer per nesting level. Given the size of the callchain entries (2040 bytes * 4), we now need to allocate them dynamically. v2: Fixed put_callchain_entry call after recursion. Fix the type of the recursion, it must be an array. v3: Use a manual pr cpu allocation (temporary solution until NMIs can safely access vmalloc'ed memory). Do a better separation between callchain reference tracking and allocation. Make the "put" path lockless for non-release cases. v4: Protect the callchain buffers with rcu. v5: Do the cpu buffers allocations node affine. Signed-off-by: Frederic Weisbecker Tested-by: Will Deacon Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Stephane Eranian Cc: Paul Mundt Cc: David Miller Cc: Borislav Petkov --- include/linux/perf_event.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4db61dded388..d7e8ea690864 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -983,7 +983,6 @@ extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs); extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs); -extern struct perf_callchain_entry *perf_callchain_buffer(void); static inline void -- cgit v1.2.3 From 7ae07ea3a48d30689ee037cb136bc21f0b37d8ae Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 14 Aug 2010 20:45:13 +0200 Subject: perf: Humanize the number of contexts Instead of hardcoding the number of contexts for the recursions barriers, define a cpp constant to make the code more self-explanatory. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Stephane Eranian --- include/linux/perf_event.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d7e8ea690864..ae6fa6050925 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -808,6 +808,12 @@ struct perf_event_context { struct rcu_head rcu_head; }; +/* + * Number of contexts where an event can trigger: + * task, softirq, hardirq, nmi. + */ +#define PERF_NR_CONTEXTS 4 + /** * struct perf_event_cpu_context - per cpu event context structure */ @@ -821,12 +827,8 @@ struct perf_cpu_context { struct mutex hlist_mutex; int hlist_refcount; - /* - * Recursion avoidance: - * - * task, softirq, irq, nmi context - */ - int recursion[4]; + /* Recursion avoidance in each contexts */ + int recursion[PERF_NR_CONTEXTS]; }; struct perf_output_handle { -- cgit v1.2.3 From 6016ee13db518ab1cd0cbf43fc2ad5712021e338 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 11 Aug 2010 12:47:59 +0900 Subject: perf, tracing: add missing __percpu markups ftrace_event_call->perf_events, perf_trace_buf, fgraph_data->cpu_data and some local variables are percpu pointers missing __percpu markups. Add them. Signed-off-by: Namhyung Kim Acked-by: Tejun Heo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Stephane Eranian LKML-Reference: <1281498479-28551-1-git-send-email-namhyung@gmail.com> Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 02b8b24f8f51..5f8ad7bec636 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -191,8 +191,8 @@ struct ftrace_event_call { unsigned int flags; #ifdef CONFIG_PERF_EVENTS - int perf_refcount; - struct hlist_head *perf_events; + int perf_refcount; + struct hlist_head __percpu *perf_events; #endif }; -- cgit v1.2.3 From 2bf2160d8805de64308e2e7c3cd97813cb58ed2f Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 23 Aug 2010 18:42:48 +0900 Subject: irq: Add tracepoint to softirq_raise Add a tracepoint for tracing when softirq action is raised. This and the existing tracepoints complete softirq's tracepoints: softirq_raise, softirq_entry and softirq_exit. And when this tracepoint is used in combination with the softirq_entry tracepoint we can determine the softirq raise latency. Signed-off-by: Lai Jiangshan Acked-by: Mathieu Desnoyers Acked-by: Neil Horman Cc: David Miller Cc: Kaneshige Kenji Cc: Izumo Taku Cc: Kosaki Motohiro Cc: Lai Jiangshan Cc: Scott Mcmillan Cc: Steven Rostedt Cc: Eric Dumazet LKML-Reference: <4C724298.4050509@jp.fujitsu.com> [ factorize softirq events with DECLARE_EVENT_CLASS ] Signed-off-by: Koki Sanagi Signed-off-by: Frederic Weisbecker --- include/linux/interrupt.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a0384a4d1e6f..531495db1708 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -18,6 +18,7 @@ #include #include #include +#include /* * These correspond to the IORESOURCE_IRQ_* defines in @@ -407,7 +408,12 @@ asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); -#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) +static inline void __raise_softirq_irqoff(unsigned int nr) +{ + trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL); + or_softirq_pending(1UL << nr); +} + extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); extern void wakeup_softirqd(void); -- cgit v1.2.3 From 51b0fe39549a04858001922919ab355dee9bdfcf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Jun 2010 13:35:57 +0200 Subject: perf: Deconstify struct pmu sed -ie 's/const struct pmu\>/struct pmu/g' `git grep -l "const struct pmu\>"` Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 000610c4de71..09d048b52115 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -578,19 +578,19 @@ struct pmu { * Start the transaction, after this ->enable() doesn't need * to do schedulability tests. */ - void (*start_txn) (const struct pmu *pmu); + void (*start_txn) (struct pmu *pmu); /* * If ->start_txn() disabled the ->enable() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. */ - int (*commit_txn) (const struct pmu *pmu); + int (*commit_txn) (struct pmu *pmu); /* * Will cancel the transaction, assumes ->disable() is called for * each successfull ->enable() during the transaction. */ - void (*cancel_txn) (const struct pmu *pmu); + void (*cancel_txn) (struct pmu *pmu); }; /** @@ -669,7 +669,7 @@ struct perf_event { int nr_siblings; int group_flags; struct perf_event *group_leader; - const struct pmu *pmu; + struct pmu *pmu; enum perf_event_active_state state; unsigned int attach_state; @@ -849,7 +849,7 @@ struct perf_output_handle { */ extern int perf_max_events; -extern const struct pmu *hw_perf_event_init(struct perf_event *event); +extern struct pmu *hw_perf_event_init(struct perf_event *event); extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); -- cgit v1.2.3 From b0a873ebbf87bf38bf70b5e39a7cadc96099fa13 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Jun 2010 13:35:08 +0200 Subject: perf: Register PMU implementations Simple registration interface for struct pmu, this provides the infrastructure for removing all the weak functions. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 09d048b52115..ab72f56eb372 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -561,6 +561,13 @@ struct perf_event; * struct pmu - generic performance monitoring unit */ struct pmu { + struct list_head entry; + + /* + * Should return -ENOENT when the @event doesn't match this pmu + */ + int (*event_init) (struct perf_event *event); + int (*enable) (struct perf_event *event); void (*disable) (struct perf_event *event); int (*start) (struct perf_event *event); @@ -849,7 +856,8 @@ struct perf_output_handle { */ extern int perf_max_events; -extern struct pmu *hw_perf_event_init(struct perf_event *event); +extern int perf_pmu_register(struct pmu *pmu); +extern void perf_pmu_unregister(struct pmu *pmu); extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); -- cgit v1.2.3 From 24cd7f54a0d47e1d5b3de29e2456bfbd2d8447b7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Jun 2010 17:32:03 +0200 Subject: perf: Reduce perf_disable() usage Since the current perf_disable() usage is only an optimization, remove it for now. This eases the removal of the __weak hw_perf_enable() interface. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ab72f56eb372..243286a8ded7 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -564,26 +564,26 @@ struct pmu { struct list_head entry; /* - * Should return -ENOENT when the @event doesn't match this pmu + * Should return -ENOENT when the @event doesn't match this PMU. */ int (*event_init) (struct perf_event *event); - int (*enable) (struct perf_event *event); + int (*enable) (struct perf_event *event); void (*disable) (struct perf_event *event); - int (*start) (struct perf_event *event); + int (*start) (struct perf_event *event); void (*stop) (struct perf_event *event); void (*read) (struct perf_event *event); void (*unthrottle) (struct perf_event *event); /* - * Group events scheduling is treated as a transaction, add group - * events as a whole and perform one schedulability test. If the test - * fails, roll back the whole group + * Group events scheduling is treated as a transaction, add + * group events as a whole and perform one schedulability test. + * If the test fails, roll back the whole group */ /* - * Start the transaction, after this ->enable() doesn't need - * to do schedulability tests. + * Start the transaction, after this ->enable() doesn't need to + * do schedulability tests. */ void (*start_txn) (struct pmu *pmu); /* @@ -594,8 +594,8 @@ struct pmu { */ int (*commit_txn) (struct pmu *pmu); /* - * Will cancel the transaction, assumes ->disable() is called for - * each successfull ->enable() during the transaction. + * Will cancel the transaction, assumes ->disable() is called + * for each successfull ->enable() during the transaction. */ void (*cancel_txn) (struct pmu *pmu); }; -- cgit v1.2.3 From 33696fc0d141bbbcb12f75b69608ea83282e3117 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Jun 2010 08:49:00 +0200 Subject: perf: Per PMU disable Changes perf_disable() into perf_pmu_disable(). Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 243286a8ded7..6abf103fb7f8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -563,6 +563,11 @@ struct perf_event; struct pmu { struct list_head entry; + int *pmu_disable_count; + + void (*pmu_enable) (struct pmu *pmu); + void (*pmu_disable) (struct pmu *pmu); + /* * Should return -ENOENT when the @event doesn't match this PMU. */ @@ -868,10 +873,8 @@ extern void perf_event_free_task(struct task_struct *task); extern void set_perf_event_pending(void); extern void perf_event_do_pending(void); extern void perf_event_print_debug(void); -extern void __perf_disable(void); -extern bool __perf_enable(void); -extern void perf_disable(void); -extern void perf_enable(void); +extern void perf_pmu_disable(struct pmu *pmu); +extern void perf_pmu_enable(struct pmu *pmu); extern int perf_event_task_disable(void); extern int perf_event_task_enable(void); extern void perf_event_update_userpage(struct perf_event *event); @@ -1056,8 +1059,6 @@ static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } static inline void perf_event_do_pending(void) { } static inline void perf_event_print_debug(void) { } -static inline void perf_disable(void) { } -static inline void perf_enable(void) { } static inline int perf_event_task_disable(void) { return -EINVAL; } static inline int perf_event_task_enable(void) { return -EINVAL; } -- cgit v1.2.3 From ad5133b7030d04ce7701aa7cbe98f561347c79c2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Jun 2010 12:22:39 +0200 Subject: perf: Default PMU ops Provide default implementations for the pmu txn methods, this allows us to remove some conditional code. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6abf103fb7f8..bf85733597ec 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -565,8 +565,8 @@ struct pmu { int *pmu_disable_count; - void (*pmu_enable) (struct pmu *pmu); - void (*pmu_disable) (struct pmu *pmu); + void (*pmu_enable) (struct pmu *pmu); /* optional */ + void (*pmu_disable) (struct pmu *pmu); /* optional */ /* * Should return -ENOENT when the @event doesn't match this PMU. @@ -590,19 +590,19 @@ struct pmu { * Start the transaction, after this ->enable() doesn't need to * do schedulability tests. */ - void (*start_txn) (struct pmu *pmu); + void (*start_txn) (struct pmu *pmu); /* optional */ /* * If ->start_txn() disabled the ->enable() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. */ - int (*commit_txn) (struct pmu *pmu); + int (*commit_txn) (struct pmu *pmu); /* optional */ /* * Will cancel the transaction, assumes ->disable() is called * for each successfull ->enable() during the transaction. */ - void (*cancel_txn) (struct pmu *pmu); + void (*cancel_txn) (struct pmu *pmu); /* optional */ }; /** -- cgit v1.2.3 From fa407f35e0298d841e4088f95a7f9cf6e725c6d5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 24 Jun 2010 12:35:12 +0200 Subject: perf: Shrink hw_perf_event Use hw_perf_event::period_left instead of hw_perf_event::remaining and win back 8 bytes. Signed-off-by: Peter Zijlstra Cc: paulus Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index bf85733597ec..8cafa15af60d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -529,7 +529,6 @@ struct hw_perf_event { int last_cpu; }; struct { /* software */ - s64 remaining; struct hrtimer hrtimer; }; #ifdef CONFIG_HAVE_HW_BREAKPOINT -- cgit v1.2.3 From a4eaf7f14675cb512d69f0c928055e73d0c6d252 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Jun 2010 14:37:10 +0200 Subject: perf: Rework the PMU methods Replace pmu::{enable,disable,start,stop,unthrottle} with pmu::{add,del,start,stop}, all of which take a flags argument. The new interface extends the capability to stop a counter while keeping it scheduled on the PMU. We replace the throttled state with the generic stopped state. This also allows us to efficiently stop/start counters over certain code paths (like IRQ handlers). It also allows scheduling a counter without it starting, allowing for a generic frozen state (useful for rotating stopped counters). The stopped state is implemented in two different ways, depending on how the architecture implemented the throttled state: 1) We disable the counter: a) the pmu has per-counter enable bits, we flip that b) we program a NOP event, preserving the counter state 2) We store the counter state and ignore all read/overflow events Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 4 ++-- include/linux/perf_event.h | 54 +++++++++++++++++++++++++++++++++----------- 2 files changed, 43 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 5f8ad7bec636..8beabb958f61 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -252,8 +252,8 @@ DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); -extern int perf_trace_enable(struct perf_event *event); -extern void perf_trace_disable(struct perf_event *event); +extern int perf_trace_add(struct perf_event *event, int flags); +extern void perf_trace_del(struct perf_event *event, int flags); extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8cafa15af60d..402073c61669 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -538,6 +538,7 @@ struct hw_perf_event { }; #endif }; + int state; local64_t prev_count; u64 sample_period; u64 last_period; @@ -549,6 +550,13 @@ struct hw_perf_event { #endif }; +/* + * hw_perf_event::state flags + */ +#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ +#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ +#define PERF_HES_ARCH 0x04 + struct perf_event; /* @@ -564,42 +572,62 @@ struct pmu { int *pmu_disable_count; + /* + * Fully disable/enable this PMU, can be used to protect from the PMI + * as well as for lazy/batch writing of the MSRs. + */ void (*pmu_enable) (struct pmu *pmu); /* optional */ void (*pmu_disable) (struct pmu *pmu); /* optional */ /* + * Try and initialize the event for this PMU. * Should return -ENOENT when the @event doesn't match this PMU. */ int (*event_init) (struct perf_event *event); - int (*enable) (struct perf_event *event); - void (*disable) (struct perf_event *event); - int (*start) (struct perf_event *event); - void (*stop) (struct perf_event *event); +#define PERF_EF_START 0x01 /* start the counter when adding */ +#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ +#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ + + /* + * Adds/Removes a counter to/from the PMU, can be done inside + * a transaction, see the ->*_txn() methods. + */ + int (*add) (struct perf_event *event, int flags); + void (*del) (struct perf_event *event, int flags); + + /* + * Starts/Stops a counter present on the PMU. The PMI handler + * should stop the counter when perf_event_overflow() returns + * !0. ->start() will be used to continue. + */ + void (*start) (struct perf_event *event, int flags); + void (*stop) (struct perf_event *event, int flags); + + /* + * Updates the counter value of the event. + */ void (*read) (struct perf_event *event); - void (*unthrottle) (struct perf_event *event); /* * Group events scheduling is treated as a transaction, add * group events as a whole and perform one schedulability test. * If the test fails, roll back the whole group - */ - - /* - * Start the transaction, after this ->enable() doesn't need to + * + * Start the transaction, after this ->add() doesn't need to * do schedulability tests. */ void (*start_txn) (struct pmu *pmu); /* optional */ /* - * If ->start_txn() disabled the ->enable() schedulability test + * If ->start_txn() disabled the ->add() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. */ int (*commit_txn) (struct pmu *pmu); /* optional */ /* - * Will cancel the transaction, assumes ->disable() is called - * for each successfull ->enable() during the transaction. + * Will cancel the transaction, assumes ->del() is called + * for each successfull ->add() during the transaction. */ void (*cancel_txn) (struct pmu *pmu); /* optional */ }; @@ -680,7 +708,7 @@ struct perf_event { int nr_siblings; int group_flags; struct perf_event *group_leader; - struct pmu *pmu; + struct pmu *pmu; enum perf_event_active_state state; unsigned int attach_state; -- cgit v1.2.3 From 15ac9a395a753cb28c674e7ea80386ffdff21785 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Sep 2010 15:51:45 +0200 Subject: perf: Remove the sysfs bits Neither the overcommit nor the reservation sysfs parameter were actually working, remove them as they'll only get in the way. Signed-off-by: Peter Zijlstra Cc: paulus LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 402073c61669..b22176d3ebdf 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -860,7 +860,6 @@ struct perf_cpu_context { struct perf_event_context ctx; struct perf_event_context *task_ctx; int active_oncpu; - int max_pertask; int exclusive; struct swevent_hlist *swevent_hlist; struct mutex hlist_mutex; @@ -883,11 +882,6 @@ struct perf_output_handle { #ifdef CONFIG_PERF_EVENTS -/* - * Set by architecture code: - */ -extern int perf_max_events; - extern int perf_pmu_register(struct pmu *pmu); extern void perf_pmu_unregister(struct pmu *pmu); -- cgit v1.2.3 From b28ab83c595e767f2028276b7398d17f2253cec0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Sep 2010 14:48:15 +0200 Subject: perf: Remove the swevent hash-table from the cpu context Separate the swevent hash-table from the cpu_context bits in preparation for per pmu cpu contexts. This keeps the swevent hash a global entity. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b22176d3ebdf..4ab4f0ca09a1 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -861,12 +861,6 @@ struct perf_cpu_context { struct perf_event_context *task_ctx; int active_oncpu; int exclusive; - struct swevent_hlist *swevent_hlist; - struct mutex hlist_mutex; - int hlist_refcount; - - /* Recursion avoidance in each contexts */ - int recursion[PERF_NR_CONTEXTS]; }; struct perf_output_handle { -- cgit v1.2.3 From b5ab4cd563e7ab49b27957704112a8ecade54e1f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Sep 2010 16:32:21 +0200 Subject: perf: Per cpu-context rotation timer Give each cpu-context its own timer so that it is a self contained entity, this eases the way for per-pmu-per-cpu contexts as well as provides the basic infrastructure to allow different rotation times per pmu. Things to look at: - folding the tick and these TICK_NSEC timers - separate task context rotation Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4ab4f0ca09a1..fa04537df55b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -861,6 +861,8 @@ struct perf_cpu_context { struct perf_event_context *task_ctx; int active_oncpu; int exclusive; + u64 timer_interval; + struct hrtimer timer; }; struct perf_output_handle { @@ -881,7 +883,6 @@ extern void perf_pmu_unregister(struct pmu *pmu); extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); -extern void perf_event_task_tick(struct task_struct *task); extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); @@ -1067,8 +1068,6 @@ perf_event_task_sched_in(struct task_struct *task) { } static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) { } -static inline void -perf_event_task_tick(struct task_struct *task) { } static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } -- cgit v1.2.3 From 108b02cfce04ee90b0a07ee0b104baffd39f5934 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Sep 2010 14:32:03 +0200 Subject: perf: Per-pmu-per-cpu contexts Allocate per-cpu contexts per pmu. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fa04537df55b..22155ef3b362 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -570,7 +570,8 @@ struct perf_event; struct pmu { struct list_head entry; - int *pmu_disable_count; + int * __percpu pmu_disable_count; + struct perf_cpu_context * __percpu pmu_cpu_context; /* * Fully disable/enable this PMU, can be used to protect from the PMI @@ -808,6 +809,7 @@ struct perf_event { * Used as a container for task events and CPU events as well: */ struct perf_event_context { + struct pmu *pmu; /* * Protect the states of the events in the list, * nr_active, and the list: -- cgit v1.2.3 From 8dc85d547285668e509f86c177bcd4ea055bcaaf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Sep 2010 16:50:03 +0200 Subject: perf: Multiple task contexts Provide the infrastructure for multiple task contexts. A more flexible approach would have resulted in more pointer chases in the scheduling hot-paths. This approach has the limitation of a static number of task contexts. Since I expect most external PMUs to be system wide, or at least node wide (as per the intel uncore unit) they won't actually need a task context. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + include/linux/sched.h | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 22155ef3b362..9ecfd856ce6e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -572,6 +572,7 @@ struct pmu { int * __percpu pmu_disable_count; struct perf_cpu_context * __percpu pmu_cpu_context; + int task_ctx_nr; /* * Fully disable/enable this PMU, can be used to protect from the PMI diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e2a6db2d7dd..89d6023c6f82 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1160,6 +1160,12 @@ struct sched_rt_entity { struct rcu_node; +enum perf_event_task_context { + perf_invalid_context = -1, + perf_hw_context = 0, + perf_nr_task_contexts, +}; + struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; @@ -1431,7 +1437,7 @@ struct task_struct { struct futex_pi_state *pi_state_cache; #endif #ifdef CONFIG_PERF_EVENTS - struct perf_event_context *perf_event_ctxp; + struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; struct mutex perf_event_mutex; struct list_head perf_event_list; #endif -- cgit v1.2.3 From 89a1e18731959e9953fae15ddc1a983eb15a4f19 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 7 Sep 2010 17:34:50 +0200 Subject: perf: Provide a separate task context for swevents Since software events are always schedulable, mixing them up with hardware events (who are not) can lead to funny scheduling oddities. Giving them their own context solves this. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 9 +-------- include/linux/sched.h | 1 + 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9ecfd856ce6e..c1173520f14d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -952,14 +952,7 @@ extern int perf_event_overflow(struct perf_event *event, int nmi, */ static inline int is_software_event(struct perf_event *event) { - switch (event->attr.type) { - case PERF_TYPE_SOFTWARE: - case PERF_TYPE_TRACEPOINT: - /* for now the breakpoint stuff also works as software event */ - case PERF_TYPE_BREAKPOINT: - return 1; - } - return 0; + return event->pmu->task_ctx_nr == perf_sw_context; } extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; diff --git a/include/linux/sched.h b/include/linux/sched.h index 89d6023c6f82..eb3c1ceec06e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1163,6 +1163,7 @@ struct rcu_node; enum perf_event_task_context { perf_invalid_context = -1, perf_hw_context = 0, + perf_sw_context, perf_nr_task_contexts, }; -- cgit v1.2.3 From 4e231c7962ce711c7d8c2a4dc23ecd1e8fc28363 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 9 Sep 2010 21:01:59 +0200 Subject: perf: Fix up delayed_put_task_struct() I missed a perf_event_ctxp user when converting it to an array. Pull this last user into perf_event.c as well and fix it up. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c1173520f14d..93bf53aa50e5 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -889,6 +889,7 @@ extern void perf_event_task_sched_out(struct task_struct *task, struct task_stru extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); +extern void perf_event_delayed_put(struct task_struct *task); extern void set_perf_event_pending(void); extern void perf_event_do_pending(void); extern void perf_event_print_debug(void); @@ -1067,6 +1068,7 @@ perf_event_task_sched_out(struct task_struct *task, static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } +static inline void perf_event_delayed_put(struct task_struct *task) { } static inline void perf_event_do_pending(void) { } static inline void perf_event_print_debug(void) { } static inline int perf_event_task_disable(void) { return -EINVAL; } -- cgit v1.2.3 From 38a81da2205f94e8a2a834b51a6b99c91fc7c2e8 Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Mon, 13 Sep 2010 13:01:20 -0700 Subject: perf events: Clean up pid passing The kernel perf event creation path shouldn't use find_task_by_vpid() because a vpid exists in a specific namespace. find_task_by_vpid() uses current's pid namespace which isn't always the correct namespace to use for the vpid in all the places perf_event_create_kernel_counter() (and thus find_get_context()) is called. The goal is to clean up pid namespace handling and prevent bugs like: https://bugzilla.kernel.org/show_bug.cgi?id=17281 Instead of using pids switch find_get_context() to use task struct pointers directly. The syscall is responsible for resolving the pid to a task struct. This moves the pid namespace resolution into the syscall much like every other syscall that takes pid parameters. Signed-off-by: Matt Helsley Signed-off-by: Peter Zijlstra Cc: Robin Green Cc: Prasad Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Will Deacon Cc: Mahesh Salgaonkar LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 93bf53aa50e5..39d8860b2684 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -902,7 +902,7 @@ extern int perf_event_release_kernel(struct perf_event *event); extern struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, - pid_t pid, + struct task_struct *task, perf_overflow_handler_t callback); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); -- cgit v1.2.3 From b04243ef7006cda301819f54ee7ce0a3632489e3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Sep 2010 11:28:48 +0200 Subject: perf: Complete software pmu grouping Aside from allowing software events into a !software group, allow adding !software events to pure software groups. Once we've moved the software group and attached the first !software event, the group will no longer be a pure software group and hence no longer be eligible for movement, at which point the straight ctx comparison is correct again. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian Cc: Robert Richter Cc: Paul Mackerras LKML-Reference: <20100917093009.410784731@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 39d8860b2684..165287fd2cc4 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -804,12 +804,18 @@ struct perf_event { #endif /* CONFIG_PERF_EVENTS */ }; +enum perf_event_context_type { + task_context, + cpu_context, +}; + /** * struct perf_event_context - event context structure * * Used as a container for task events and CPU events as well: */ struct perf_event_context { + enum perf_event_context_type type; struct pmu *pmu; /* * Protect the states of the events in the list, -- cgit v1.2.3 From e9d2b064149ff7ef4acbc65a1b9374ac8b218d3e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Sep 2010 11:28:50 +0200 Subject: perf: Undo the per cpu-context timer stuff Revert the timer per cpu-context timers because of unfortunate nohz interaction. Fixing that would have been somewhat ugly, so go back to driving things from the regular tick. Provide a jiffies interval feature for people who want slower rotations. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian Cc: Robert Richter Cc: Yinghai Lu LKML-Reference: <20100917093009.519845633@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 165287fd2cc4..61b1e2d760fd 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -870,8 +870,8 @@ struct perf_cpu_context { struct perf_event_context *task_ctx; int active_oncpu; int exclusive; - u64 timer_interval; - struct hrtimer timer; + struct list_head rotation_list; + int jiffies_interval; }; struct perf_output_handle { @@ -1065,6 +1065,7 @@ extern int perf_swevent_get_recursion_context(void); extern void perf_swevent_put_recursion_context(int rctx); extern void perf_event_enable(struct perf_event *event); extern void perf_event_disable(struct perf_event *event); +extern void perf_event_task_tick(void); #else static inline void perf_event_task_sched_in(struct task_struct *task) { } @@ -1099,6 +1100,7 @@ static inline int perf_swevent_get_recursion_context(void) { return -1; } static inline void perf_swevent_put_recursion_context(int rctx) { } static inline void perf_event_enable(struct perf_event *event) { } static inline void perf_event_disable(struct perf_event *event) { } +static inline void perf_event_task_tick(void) { } #endif #define perf_output_put(handle, x) \ -- cgit v1.2.3 From 8b8e2ec1eeca7f6941bc81cefc9663018d6ceb57 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Sep 2010 19:21:28 +0200 Subject: percpu: Add {get,put}_cpu_ptr These are similar to {get,put}_cpu_var() except for dynamically allocated per-cpu memory. Signed-off-by: Peter Zijlstra Acked-by: Tejun Heo LKML-Reference: <20100917093009.252867712@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/percpu.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 49466b13c5c6..0eb50832aa00 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -39,6 +39,15 @@ preempt_enable(); \ } while (0) +#define get_cpu_ptr(var) ({ \ + preempt_disable(); \ + this_cpu_ptr(var); }) + +#define put_cpu_ptr(var) do { \ + (void)(var); \ + preempt_enable(); \ +} while (0) + #ifdef CONFIG_SMP /* minimum unit size, also is the maximum supported allocation size */ -- cgit v1.2.3 From bf5438fca2950b03c21ad868090cc1a8fcd49536 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 17 Sep 2010 11:09:00 -0400 Subject: jump label: Base patch for jump label base patch to implement 'jump labeling'. Based on a new 'asm goto' inline assembly gcc mechanism, we can now branch to labels from an 'asm goto' statment. This allows us to create a 'no-op' fastpath, which can subsequently be patched with a jump to the slowpath code. This is useful for code which might be rarely used, but which we'd like to be able to call, if needed. Tracepoints are the current usecase that these are being implemented for. Acked-by: David S. Miller Signed-off-by: Jason Baron LKML-Reference: [ cleaned up some formating ] Signed-off-by: Steven Rostedt --- include/linux/jump_label.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/module.h | 5 +++- 2 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 include/linux/jump_label.h (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h new file mode 100644 index 000000000000..de58656d28e0 --- /dev/null +++ b/include/linux/jump_label.h @@ -0,0 +1,58 @@ +#ifndef _LINUX_JUMP_LABEL_H +#define _LINUX_JUMP_LABEL_H + +#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL) +# include +# define HAVE_JUMP_LABEL +#endif + +enum jump_label_type { + JUMP_LABEL_ENABLE, + JUMP_LABEL_DISABLE +}; + +struct module; + +#ifdef HAVE_JUMP_LABEL + +extern struct jump_entry __start___jump_table[]; +extern struct jump_entry __stop___jump_table[]; + +extern void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type); +extern void jump_label_update(unsigned long key, enum jump_label_type type); +extern void jump_label_apply_nops(struct module *mod); +extern void arch_jump_label_text_poke_early(jump_label_t addr); + +#define enable_jump_label(key) \ + jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE); + +#define disable_jump_label(key) \ + jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE); + +#else + +#define JUMP_LABEL(key, label) \ +do { \ + if (unlikely(*key)) \ + goto label; \ +} while (0) + +#define enable_jump_label(cond_var) \ +do { \ + *(cond_var) = 1; \ +} while (0) + +#define disable_jump_label(cond_var) \ +do { \ + *(cond_var) = 0; \ +} while (0) + +static inline int jump_label_apply_nops(struct module *mod) +{ + return 0; +} + +#endif + +#endif diff --git a/include/linux/module.h b/include/linux/module.h index 8a6b9fdc7ffa..403ac26023ce 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -350,7 +350,10 @@ struct module struct tracepoint *tracepoints; unsigned int num_tracepoints; #endif - +#ifdef HAVE_JUMP_LABEL + struct jump_entry *jump_entries; + unsigned int num_jump_entries; +#endif #ifdef CONFIG_TRACING const char **trace_bprintk_fmt_start; unsigned int num_trace_bprintk_fmt; -- cgit v1.2.3 From 4c3ef6d79328c0e23ade60cbfc8d496123a6855c Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 17 Sep 2010 11:09:08 -0400 Subject: jump label: Add jump_label_text_reserved() to reserve jump points Add a jump_label_text_reserved(void *start, void *end), so that other pieces of code that want to modify kernel text, can first verify that jump label has not reserved the instruction. Acked-by: Masami Hiramatsu Signed-off-by: Jason Baron LKML-Reference: <06236663a3a7b1c1f13576bb9eccb6d9c17b7bfe.1284733808.git.jbaron@redhat.com> Signed-off-by: Steven Rostedt --- include/linux/jump_label.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index de58656d28e0..b72cd9f92c2e 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -20,9 +20,10 @@ extern struct jump_entry __stop___jump_table[]; extern void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type); +extern void arch_jump_label_text_poke_early(jump_label_t addr); extern void jump_label_update(unsigned long key, enum jump_label_type type); extern void jump_label_apply_nops(struct module *mod); -extern void arch_jump_label_text_poke_early(jump_label_t addr); +extern int jump_label_text_reserved(void *start, void *end); #define enable_jump_label(key) \ jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE); @@ -53,6 +54,11 @@ static inline int jump_label_apply_nops(struct module *mod) return 0; } +static inline int jump_label_text_reserved(void *start, void *end) +{ + return 0; +} + #endif #endif -- cgit v1.2.3 From 8f7b50c514206211cc282a4247f7b12f18dee674 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 17 Sep 2010 11:09:13 -0400 Subject: jump label: Tracepoint support for jump labels Make use of the jump label infrastructure for tracepoints. Signed-off-by: Jason Baron LKML-Reference: Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 103d1b61aacb..a4a90b6726ce 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -17,6 +17,7 @@ #include #include #include +#include struct module; struct tracepoint; @@ -145,7 +146,9 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ - if (unlikely(__tracepoint_##name.state)) \ + JUMP_LABEL(&__tracepoint_##name.state, do_trace); \ + return; \ +do_trace: \ __DO_TRACE(&__tracepoint_##name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args)); \ -- cgit v1.2.3 From 52159d98be6f26c48f5e02c7ab3c9848a85979b5 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 17 Sep 2010 11:09:17 -0400 Subject: jump label: Convert dynamic debug to use jump labels Convert the 'dynamic debug' infrastructure to use jump labels. Signed-off-by: Jason Baron LKML-Reference: Signed-off-by: Steven Rostedt --- include/linux/dynamic_debug.h | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index 52c0da4bdd18..bef3cda44c4c 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -1,6 +1,8 @@ #ifndef _DYNAMIC_DEBUG_H #define _DYNAMIC_DEBUG_H +#include + /* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They * use independent hash functions, to reduce the chance of false positives. @@ -22,8 +24,6 @@ struct _ddebug { const char *function; const char *filename; const char *format; - char primary_hash; - char secondary_hash; unsigned int lineno:24; /* * The flags field controls the behaviour at the callsite. @@ -33,6 +33,7 @@ struct _ddebug { #define _DPRINTK_FLAGS_PRINT (1<<0) /* printk() a message using the format */ #define _DPRINTK_FLAGS_DEFAULT 0 unsigned int flags:8; + char enabled; } __attribute__((aligned(8))); @@ -42,33 +43,35 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, #if defined(CONFIG_DYNAMIC_DEBUG) extern int ddebug_remove_module(const char *mod_name); -#define __dynamic_dbg_enabled(dd) ({ \ - int __ret = 0; \ - if (unlikely((dynamic_debug_enabled & (1LL << DEBUG_HASH)) && \ - (dynamic_debug_enabled2 & (1LL << DEBUG_HASH2)))) \ - if (unlikely(dd.flags)) \ - __ret = 1; \ - __ret; }) - #define dynamic_pr_debug(fmt, ...) do { \ + __label__ do_printk; \ + __label__ out; \ static struct _ddebug descriptor \ __used \ __attribute__((section("__verbose"), aligned(8))) = \ - { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ - DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ - if (__dynamic_dbg_enabled(descriptor)) \ - printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ + { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ + _DPRINTK_FLAGS_DEFAULT }; \ + JUMP_LABEL(&descriptor.enabled, do_printk); \ + goto out; \ +do_printk: \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ +out: ; \ } while (0) #define dynamic_dev_dbg(dev, fmt, ...) do { \ + __label__ do_printk; \ + __label__ out; \ static struct _ddebug descriptor \ __used \ __attribute__((section("__verbose"), aligned(8))) = \ - { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ - DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ - if (__dynamic_dbg_enabled(descriptor)) \ - dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ + { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ + _DPRINTK_FLAGS_DEFAULT }; \ + JUMP_LABEL(&descriptor.enabled, do_printk); \ + goto out; \ +do_printk: \ + dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ +out: ; \ } while (0) #else -- cgit v1.2.3 From 3bf101ba42a1c89b5afbc7492e7647dae5e18735 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 27 Sep 2010 20:22:24 +0100 Subject: perf: Add helper function to return number of counters The number of counters for the registered pmu is needed in a few places so provide a helper function that returns this number. Signed-off-by: Matt Fleming Tested-by: Will Deacon Acked-by: Paul Mundt Acked-by: Peter Zijlstra Signed-off-by: Robert Richter --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 716f99b682c1..1a0219247183 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -849,6 +849,7 @@ extern int perf_max_events; extern const struct pmu *hw_perf_event_init(struct perf_event *event); +extern int perf_num_counters(void); extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); extern void perf_event_task_tick(struct task_struct *task); -- cgit v1.2.3 From 84c7991059c9c4530cc911137c5bf508a41ed129 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sun, 3 Oct 2010 21:41:13 +0100 Subject: perf: New helper function for pmu name Introduce perf_pmu_name() helper function that returns the name of the pmu. This gives us a generic way to get the name of a pmu regardless of how an architecture identifies it internally. Signed-off-by: Matt Fleming Acked-by: Peter Zijlstra Acked-by: Paul Mundt Signed-off-by: Robert Richter --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1a0219247183..33f08dafda2f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -850,6 +850,7 @@ extern int perf_max_events; extern const struct pmu *hw_perf_event_init(struct perf_event *event); extern int perf_num_counters(void); +extern const char *perf_pmu_name(void); extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); extern void perf_event_task_tick(struct task_struct *task); -- cgit v1.2.3 From 56946331b28d53232115a155ba662ab3dc598952 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 8 Oct 2010 21:42:17 +0100 Subject: oprofile: Make op_name_from_perf_id() global Make op_name_from_perf_id() global so that we have a way for each architecture to construct an oprofile name for op->cpu_type. We need to remove the argument from the function prototype so that we can hide all implementation details inside the function. Signed-off-by: Matt Fleming Signed-off-by: Robert Richter --- include/linux/oprofile.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 5171639ecf0f..1574d4aca721 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -185,4 +185,8 @@ int oprofile_add_data(struct op_entry *entry, unsigned long val); int oprofile_add_data64(struct op_entry *entry, u64 val); int oprofile_write_commit(struct op_entry *entry); +#ifdef CONFIG_PERF_EVENTS +char *op_name_from_perf_id(void); +#endif /* CONFIG_PERF_EVENTS */ + #endif /* OPROFILE_H */ -- cgit v1.2.3 From 3d90a00763b51e1db344a7430c966be723b67a29 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 27 Sep 2010 20:45:08 +0100 Subject: oprofile: Abstract the perf-events backend Move the perf-events backend from arch/arm/oprofile into drivers/oprofile so that the code can be shared between architectures. This allows each architecture to maintain only a single copy of the PMU accessor functions instead of one for both perf and OProfile. It also becomes possible for other architectures to delete much of their OProfile code in favour of the common code now available in drivers/oprofile/oprofile_perf.c. Signed-off-by: Matt Fleming Tested-by: Will Deacon Signed-off-by: Robert Richter --- include/linux/oprofile.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 1574d4aca721..d67a8330b41e 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -15,6 +15,7 @@ #include #include +#include #include /* Each escaped entry is prefixed by ESCAPE_CODE @@ -186,6 +187,8 @@ int oprofile_add_data64(struct op_entry *entry, u64 val); int oprofile_write_commit(struct op_entry *entry); #ifdef CONFIG_PERF_EVENTS +int __init oprofile_perf_init(struct oprofile_operations *ops); +void __exit oprofile_perf_exit(void); char *op_name_from_perf_id(void); #endif /* CONFIG_PERF_EVENTS */ -- cgit v1.2.3 From 087a4eb55971dfcc8df18312faf9393d0a479f3a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 14 Oct 2010 12:10:30 +0900 Subject: stopmachine: Define __stop_machine when CONFIG_STOP_MACHINE=n Define dummy __stop_machine() function even when CONFIG_STOP_MACHINE=n. This getcpu-required version of stop_machine() will be used from poke_text_smp(). Signed-off-by: Masami Hiramatsu Acked-by: Tejun Heo Cc: Rusty Russell Cc: Ananth N Mavinakayanahalli Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Peter Zijlstra LKML-Reference: <20101014031030.4100.34156.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Ingo Molnar --- include/linux/stop_machine.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 6b524a0d02e4..1808960c5059 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -126,8 +126,8 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); #else /* CONFIG_STOP_MACHINE && CONFIG_SMP */ -static inline int stop_machine(int (*fn)(void *), void *data, - const struct cpumask *cpus) +static inline int __stop_machine(int (*fn)(void *), void *data, + const struct cpumask *cpus) { int ret; local_irq_disable(); @@ -136,5 +136,11 @@ static inline int stop_machine(int (*fn)(void *), void *data, return ret; } +static inline int stop_machine(int (*fn)(void *), void *data, + const struct cpumask *cpus) +{ + return __stop_machine(fn, data, cpus); +} + #endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */ #endif /* _LINUX_STOP_MACHINE */ -- cgit v1.2.3 From b3b3a9b63f2deacfd59137e3781211d21a568ca9 Mon Sep 17 00:00:00 2001 From: Anand Gadiyar Date: Thu, 14 Oct 2010 11:31:42 -0400 Subject: oprofile: fix linker errors Commit e9677b3ce (oprofile, ARM: Use oprofile_arch_exit() to cleanup on failure) caused oprofile_perf_exit to be called in the cleanup path of oprofile_perf_init. The __exit tag for oprofile_perf_exit should therefore be dropped. The same has to be done for exit_driverfs as well, as this function is called from oprofile_perf_exit. Else, we get the following two linker errors. LD .tmp_vmlinux1 `oprofile_perf_exit' referenced in section `.init.text' of arch/arm/oprofile/built-in.o: defined in discarded section `.exit.text' of arch/arm/oprofile/built-in.o make: *** [.tmp_vmlinux1] Error 1 LD .tmp_vmlinux1 `exit_driverfs' referenced in section `.text' of arch/arm/oprofile/built-in.o: defined in discarded section `.exit.text' of arch/arm/oprofile/built-in.o make: *** [.tmp_vmlinux1] Error 1 Signed-off-by: Anand Gadiyar Cc: Will Deacon Signed-off-by: Robert Richter --- include/linux/oprofile.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index d67a8330b41e..32fb81212fd1 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -188,7 +188,7 @@ int oprofile_write_commit(struct op_entry *entry); #ifdef CONFIG_PERF_EVENTS int __init oprofile_perf_init(struct oprofile_operations *ops); -void __exit oprofile_perf_exit(void); +void oprofile_perf_exit(void); char *op_name_from_perf_id(void); #endif /* CONFIG_PERF_EVENTS */ -- cgit v1.2.3 From e360adbe29241a0194e10e20595360dd7b98a2b3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Oct 2010 14:01:34 +0800 Subject: irq_work: Add generic hardirq context callbacks Provide a mechanism that allows running code in IRQ context. It is most useful for NMI code that needs to interact with the rest of the system -- like wakeup a task to drain buffers. Perf currently has such a mechanism, so extract that and provide it as a generic feature, independent of perf so that others may also benefit. The IRQ context callback is generated through self-IPIs where possible, or on architectures like powerpc the decrementer (the built-in timer facility) is set to generate an interrupt immediately. Architectures that don't have anything like this get to do with a callback from the timer tick. These architectures can call irq_work_run() at the tail of any IRQ handlers that might enqueue such work (like the perf IRQ handler) to avoid undue latencies in processing the work. Signed-off-by: Peter Zijlstra Acked-by: Kyle McMartin Acked-by: Martin Schwidefsky [ various fixes ] Signed-off-by: Huang Ying LKML-Reference: <1287036094.7768.291.camel@yhuang-dev> Signed-off-by: Ingo Molnar --- include/linux/irq_work.h | 20 ++++++++++++++++++++ include/linux/perf_event.h | 11 ++--------- 2 files changed, 22 insertions(+), 9 deletions(-) create mode 100644 include/linux/irq_work.h (limited to 'include/linux') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h new file mode 100644 index 000000000000..4fa09d4d0b71 --- /dev/null +++ b/include/linux/irq_work.h @@ -0,0 +1,20 @@ +#ifndef _LINUX_IRQ_WORK_H +#define _LINUX_IRQ_WORK_H + +struct irq_work { + struct irq_work *next; + void (*func)(struct irq_work *); +}; + +static inline +void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *)) +{ + entry->next = NULL; + entry->func = func; +} + +bool irq_work_queue(struct irq_work *entry); +void irq_work_run(void); +void irq_work_sync(struct irq_work *entry); + +#endif /* _LINUX_IRQ_WORK_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a9227e985207..2ebfc9ae4755 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -486,6 +486,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #include #include @@ -672,11 +673,6 @@ struct perf_buffer { void *data_pages[0]; }; -struct perf_pending_entry { - struct perf_pending_entry *next; - void (*func)(struct perf_pending_entry *); -}; - struct perf_sample_data; typedef void (*perf_overflow_handler_t)(struct perf_event *, int, @@ -784,7 +780,7 @@ struct perf_event { int pending_wakeup; int pending_kill; int pending_disable; - struct perf_pending_entry pending; + struct irq_work pending; atomic_t event_limit; @@ -898,8 +894,6 @@ extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); extern void perf_event_delayed_put(struct task_struct *task); -extern void set_perf_event_pending(void); -extern void perf_event_do_pending(void); extern void perf_event_print_debug(void); extern void perf_pmu_disable(struct pmu *pmu); extern void perf_pmu_enable(struct pmu *pmu); @@ -1078,7 +1072,6 @@ static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } static inline void perf_event_delayed_put(struct task_struct *task) { } -static inline void perf_event_do_pending(void) { } static inline void perf_event_print_debug(void) { } static inline int perf_event_task_disable(void) { return -EINVAL; } static inline int perf_event_task_enable(void) { return -EINVAL; } -- cgit v1.2.3 From d580ff8699e8811a9af37e9de4dea375401bdeec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Oct 2010 17:43:23 +0200 Subject: perf, hw_breakpoint: Fix crash in hw_breakpoint creation hw_breakpoint creation needs to account stuff per-task to ensure there is always sufficient hardware resources to back these things due to ptrace. With the perf per pmu context changes the event initialization no longer has access to the event context, for the simple reason that we need to first find the pmu (result of initialization) before we can find the context. This makes hw_breakpoints unhappy, because it can no longer do per task accounting, cure this by frobbing a task pointer in the event::hw bits for now... Signed-off-by: Peter Zijlstra Cc: Frederic Weisbecker LKML-Reference: <20101014203625.391543667@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2ebfc9ae4755..97965fac55fe 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -536,6 +536,12 @@ struct hw_perf_event { struct { /* breakpoint */ struct arch_hw_breakpoint info; struct list_head bp_list; + /* + * Crufty hack to avoid the chicken and egg + * problem hw_breakpoint has with context + * creation and event initalization. + */ + struct task_struct *bp_target; }; #endif }; @@ -693,6 +699,7 @@ struct swevent_hlist { #define PERF_ATTACH_CONTEXT 0x01 #define PERF_ATTACH_GROUP 0x02 +#define PERF_ATTACH_TASK 0x04 /** * struct perf_event - performance event kernel representation: -- cgit v1.2.3 From 3b6e901f839f42afb40f614418df82c08b01320a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Oct 2010 21:10:38 +0200 Subject: jump_label: Use more consistent naming Now that there's still only a few users around, rename things to make them more consistent. Signed-off-by: Peter Zijlstra LKML-Reference: <20101014203625.448565169@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index b72cd9f92c2e..81be4962b7a1 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -25,10 +25,10 @@ extern void jump_label_update(unsigned long key, enum jump_label_type type); extern void jump_label_apply_nops(struct module *mod); extern int jump_label_text_reserved(void *start, void *end); -#define enable_jump_label(key) \ +#define jump_label_enable(key) \ jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE); -#define disable_jump_label(key) \ +#define jump_label_disable(key) \ jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE); #else @@ -39,12 +39,12 @@ do { \ goto label; \ } while (0) -#define enable_jump_label(cond_var) \ +#define jump_label_enable(cond_var) \ do { \ *(cond_var) = 1; \ } while (0) -#define disable_jump_label(cond_var) \ +#define jump_label_disable(cond_var) \ do { \ *(cond_var) = 0; \ } while (0) -- cgit v1.2.3 From 8b92538d84e50062560ba33adbaed7887b6e4a42 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Oct 2010 21:39:02 +0200 Subject: jump_label: Add atomic_t interface Add an interface to allow usage of jump_labels with atomic counters. Signed-off-by: Peter Zijlstra Acked-by: Frederic Weisbecker LKML-Reference: <20101014203625.501657727@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/jump_label_ref.h | 44 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 include/linux/jump_label_ref.h (limited to 'include/linux') diff --git a/include/linux/jump_label_ref.h b/include/linux/jump_label_ref.h new file mode 100644 index 000000000000..e5d012ad92c6 --- /dev/null +++ b/include/linux/jump_label_ref.h @@ -0,0 +1,44 @@ +#ifndef _LINUX_JUMP_LABEL_REF_H +#define _LINUX_JUMP_LABEL_REF_H + +#include +#include + +#ifdef HAVE_JUMP_LABEL + +static inline void jump_label_inc(atomic_t *key) +{ + if (atomic_add_return(1, key) == 1) + jump_label_enable(key); +} + +static inline void jump_label_dec(atomic_t *key) +{ + if (atomic_dec_and_test(key)) + jump_label_disable(key); +} + +#else /* !HAVE_JUMP_LABEL */ + +static inline void jump_label_inc(atomic_t *key) +{ + atomic_inc(key); +} + +static inline void jump_label_dec(atomic_t *key) +{ + atomic_dec(key); +} + +#undef JUMP_LABEL +#define JUMP_LABEL(key, label) \ +do { \ + if (unlikely(__builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(key), atomic_t *), \ + atomic_read((atomic_t *)(key)), *(key)))) \ + goto label; \ +} while (0) + +#endif /* HAVE_JUMP_LABEL */ + +#endif /* _LINUX_JUMP_LABEL_REF_H */ -- cgit v1.2.3 From 82cd6def9806dcb6a325fb6abbc1d61388a15f6a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Oct 2010 17:57:23 +0200 Subject: perf: Use jump_labels to optimize the scheduler hooks Trades a call + conditional + ret for an unconditional jmp. Acked-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra LKML-Reference: <20101014203625.501657727@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 97965fac55fe..7f0e7f52af8b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -487,6 +487,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #include #include @@ -895,8 +896,30 @@ extern void perf_pmu_unregister(struct pmu *pmu); extern int perf_num_counters(void); extern const char *perf_pmu_name(void); -extern void perf_event_task_sched_in(struct task_struct *task); -extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); +extern void __perf_event_task_sched_in(struct task_struct *task); +extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); + +extern atomic_t perf_task_events; + +static inline void perf_event_task_sched_in(struct task_struct *task) +{ + JUMP_LABEL(&perf_task_events, have_events); + return; + +have_events: + __perf_event_task_sched_in(task); +} + +static inline +void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) +{ + JUMP_LABEL(&perf_task_events, have_events); + return; + +have_events: + __perf_event_task_sched_out(task, next); +} + extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); -- cgit v1.2.3 From 7e54a5a0b655734326dc78c2b5efc1eb35497bb6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Oct 2010 22:32:45 +0200 Subject: perf: Optimize sw events Acked-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7f0e7f52af8b..3b80cbf509ef 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1012,18 +1012,20 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs) perf_arch_fetch_caller_regs(regs, CALLER_ADDR0); } -static inline void +static __always_inline void perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { - if (atomic_read(&perf_swevent_enabled[event_id])) { - struct pt_regs hot_regs; - - if (!regs) { - perf_fetch_caller_regs(&hot_regs); - regs = &hot_regs; - } - __perf_sw_event(event_id, nr, nmi, regs, addr); + struct pt_regs hot_regs; + + JUMP_LABEL(&perf_swevent_enabled[event_id], have_event); + return; + +have_event: + if (!regs) { + perf_fetch_caller_regs(&hot_regs); + regs = &hot_regs; } + __perf_sw_event(event_id, nr, nmi, regs, addr); } extern void perf_event_mmap(struct vm_area_struct *vma); -- cgit v1.2.3 From ebf31f502492527e2b6b5e5cf85a4ebc7fc8a52e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 17 Oct 2010 12:15:00 +0200 Subject: jump_label: Add COND_STMT(), reducer wrappery The use of the JUMP_LABEL() construct ends up creating endless silly wrappers, create a higher level construct to reduce this clutter. Signed-off-by: Peter Zijlstra Cc: Jason Baron Cc: Steven Rostedt Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Paul Mackerras LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 10 ++++++++++ include/linux/perf_event.h | 12 ++---------- 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 81be4962b7a1..b67cb180e6e9 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -61,4 +61,14 @@ static inline int jump_label_text_reserved(void *start, void *end) #endif +#define COND_STMT(key, stmt) \ +do { \ + __label__ jl_enabled; \ + JUMP_LABEL(key, jl_enabled); \ + if (0) { \ +jl_enabled: \ + stmt; \ + } \ +} while (0) + #endif diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3b80cbf509ef..057bf22a8323 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -903,21 +903,13 @@ extern atomic_t perf_task_events; static inline void perf_event_task_sched_in(struct task_struct *task) { - JUMP_LABEL(&perf_task_events, have_events); - return; - -have_events: - __perf_event_task_sched_in(task); + COND_STMT(&perf_task_events, __perf_event_task_sched_in(task)); } static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) { - JUMP_LABEL(&perf_task_events, have_events); - return; - -have_events: - __perf_event_task_sched_out(task, next); + COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next)); } extern int perf_event_init_task(struct task_struct *child); -- cgit v1.2.3