From 1ca1d8d54f925ad0eb6d9806ecd4309738f25301 Mon Sep 17 00:00:00 2001 From: Lance Ortiz Date: Thu, 3 Jan 2013 15:34:01 -0700 Subject: aerdrv: Trace Event for PCI Express Advanced Error Reporting This header file will define a new trace event that will be triggered when a AER event occurs. The following data will be provided to the trace event. char * dev_name - The name of the slot where the device resides ([domain:]bus:device.function). u32 status - Either the correctable or uncorrectable register indicating what error or errors have been see. u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED The trace event will also provide a trace string that may look like: "0000:05:00.0 PCIe Bus Error:severity=Uncorrected (Non-Fatal), Poisoned TLP" Signed-off-by: Lance Ortiz Acked-by: Mauro Carvalho Chehab Acked-by: Boris Petkov Signed-off-by: Tony Luck --- include/trace/events/ras.h | 77 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 include/trace/events/ras.h (limited to 'include') diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h new file mode 100644 index 000000000000..88b878383797 --- /dev/null +++ b/include/trace/events/ras.h @@ -0,0 +1,77 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ras + +#if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_AER_H + +#include +#include + + +/* + * PCIe AER Trace event + * + * These events are generated when hardware detects a corrected or + * uncorrected event on a PCIe device. The event report has + * the following structure: + * + * char * dev_name - The name of the slot where the device resides + * ([domain:]bus:device.function). + * u32 status - Either the correctable or uncorrectable register + * indicating what error or errors have been seen + * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED + */ + +#define aer_correctable_errors \ + {BIT(0), "Receiver Error"}, \ + {BIT(6), "Bad TLP"}, \ + {BIT(7), "Bad DLLP"}, \ + {BIT(8), "RELAY_NUM Rollover"}, \ + {BIT(12), "Replay Timer Timeout"}, \ + {BIT(13), "Advisory Non-Fatal"} + +#define aer_uncorrectable_errors \ + {BIT(4), "Data Link Protocol"}, \ + {BIT(12), "Poisoned TLP"}, \ + {BIT(13), "Flow Control Protocol"}, \ + {BIT(14), "Completion Timeout"}, \ + {BIT(15), "Completer Abort"}, \ + {BIT(16), "Unexpected Completion"}, \ + {BIT(17), "Receiver Overflow"}, \ + {BIT(18), "Malformed TLP"}, \ + {BIT(19), "ECRC"}, \ + {BIT(20), "Unsupported Request"} + +TRACE_EVENT(aer_event, + TP_PROTO(const char *dev_name, + const u32 status, + const u8 severity), + + TP_ARGS(dev_name, status, severity), + + TP_STRUCT__entry( + __string( dev_name, dev_name ) + __field( u32, status ) + __field( u8, severity ) + ), + + TP_fast_assign( + __assign_str(dev_name, dev_name); + __entry->status = status; + __entry->severity = severity; + ), + + TP_printk("%s PCIe Bus Error: severity=%s, %s\n", + __get_str(dev_name), + __entry->severity == HW_EVENT_ERR_CORRECTED ? "Corrected" : + __entry->severity == HW_EVENT_ERR_FATAL ? + "Fatal" : "Uncorrected", + __entry->severity == HW_EVENT_ERR_CORRECTED ? + __print_flags(__entry->status, "|", aer_correctable_errors) : + __print_flags(__entry->status, "|", aer_uncorrectable_errors)) +); + +#endif /* _TRACE_AER_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 1d5210008bd3a26daf4b06aed9d6c330dd4c83e2 Mon Sep 17 00:00:00 2001 From: Lance Ortiz Date: Thu, 3 Jan 2013 15:34:08 -0700 Subject: aerdrv: Enhanced AER logging This patch will provide a more reliable and easy way for user-space applications to have access to AER logs rather than reading them from the message buffer. It also provides a way to notify user-space when an AER event occurs. The aer driver is updated to generate a trace event of function 'aer_event' when a PCIe error is reported over the AER interface. The trace event was added to both the interrupt based aer path and the firmware first path. Signed-off-by: Lance Ortiz Acked-by: Mauro Carvalho Chehab Acked-by: Boris Petkov Signed-off-by: Tony Luck --- include/linux/aer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/aer.h b/include/linux/aer.h index 544abdb2238c..ec10e1b24c1c 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -49,8 +49,8 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) } #endif -extern void cper_print_aer(const char *prefix, int cper_severity, - struct aer_capability_regs *aer); +extern void cper_print_aer(const char *prefix, struct pci_dev *dev, + int cper_severity, struct aer_capability_regs *aer); extern int cper_severity_to_aer(int cper_severity); extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, int severity); -- cgit v1.2.3 From 418c59e49ddc77fcb7054f2c8d52c9d47403b43e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 Dec 2012 13:15:08 -0500 Subject: tracing: Fix sparse warning with is_signed_type() macro Sparse complains when is_signed_type() is used on a pointer. This macro is needed for the format output used for ftrace and perf, to know if a binary field is a signed type or not. The is_signed_type() macro is used against all fields that are recorded by events to automate the operation. The problem sparse has is with the current way is_signed_type() works: ((type)-1 < 0) If "type" is a poiner, than sparse does not like it being compared to an integer (zero). The simple fix is to just give zero the same type. The runtime result stays the same. Reported-by: Robert Jarzmik Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index a3d489531d83..43ef8b6cce7f 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -272,7 +272,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type, extern int trace_add_event_call(struct ftrace_event_call *call); extern void trace_remove_event_call(struct ftrace_event_call *call); -#define is_signed_type(type) (((type)(-1)) < 0) +#define is_signed_type(type) (((type)(-1)) < (type)0) int trace_set_clr_event(const char *system, const char *event, int set); -- cgit v1.2.3 From 0f1ac8fd254b6c3e77950a1c4ee67be5dc88f7e0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 15 Jan 2013 22:11:19 -0500 Subject: tracing/lockdep: Disable lockdep first in entering NMI When function tracing with either debug locks enabled or tracing preempt disabled, the add_preempt_count() is traced. This is an issue with lockdep and function tracing. As function tracing can disable interrupts, and lockdep records that change, lockdep may not be able to handle this recursion if it happens from an NMI context. The first thing that an NMI does is: #define nmi_enter() \ do { \ ftrace_nmi_enter(); \ BUG_ON(in_nmi()); \ add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ lockdep_off(); \ rcu_nmi_enter(); \ trace_hardirq_enter(); \ } while (0) When the add_preempt_count() is traced, and the tracing callback disables interrupts, it will jump into the lockdep code. There's some places in lockdep that can't handle this re-entrance, and causes lockdep to fail. As the lockdep_off() (and lockdep_on) is a simple: void lockdep_off(void) { current->lockdep_recursion++; } and is never traced, it can be called first in nmi_enter() and lockdep_on() last in nmi_exit(). Cc: Peter Zijlstra Signed-off-by: Steven Rostedt --- include/linux/hardirq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 624ef3f45c8e..57bfdce8fb90 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -180,10 +180,10 @@ extern void irq_exit(void); #define nmi_enter() \ do { \ + lockdep_off(); \ ftrace_nmi_enter(); \ BUG_ON(in_nmi()); \ add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ - lockdep_off(); \ rcu_nmi_enter(); \ trace_hardirq_enter(); \ } while (0) @@ -192,10 +192,10 @@ extern void irq_exit(void); do { \ trace_hardirq_exit(); \ rcu_nmi_exit(); \ - lockdep_on(); \ BUG_ON(!in_nmi()); \ sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ ftrace_nmi_exit(); \ + lockdep_on(); \ } while (0) #endif /* LINUX_HARDIRQ_H */ -- cgit v1.2.3 From 06aeaaeabf69da4a3e86df532425640f51b01cef Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 28 Sep 2012 17:15:17 +0900 Subject: ftrace: Move ARCH_SUPPORTS_FTRACE_SAVE_REGS in Kconfig Move SAVE_REGS support flag into Kconfig and rename it to CONFIG_DYNAMIC_FTRACE_WITH_REGS. This also introduces CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS which indicates the architecture depending part of ftrace has a code that saves full registers. On the other hand, CONFIG_DYNAMIC_FTRACE_WITH_REGS indicates the code is enabled. Link: http://lkml.kernel.org/r/20120928081516.3560.72534.stgit@ltc138.sdl.hitachi.co.jp Cc: Ingo Molnar Cc: Ananth N Mavinakayanahalli Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Frederic Weisbecker Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 92691d85c320..e5ca8ef50e9b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -74,7 +74,7 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, * SAVE_REGS - The ftrace_ops wants regs saved at each function called * and passed to the callback. If this flag is set, but the * architecture does not support passing regs - * (ARCH_SUPPORTS_FTRACE_SAVE_REGS is not defined), then the + * (CONFIG_DYNAMIC_FTRACE_WITH_REGS is not defined), then the * ftrace_ops will fail to register, unless the next flag * is set. * SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the @@ -418,7 +418,7 @@ void ftrace_modify_all_code(int command); #endif #ifndef FTRACE_REGS_ADDR -#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS # define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller) #else # define FTRACE_REGS_ADDR FTRACE_ADDR @@ -480,7 +480,7 @@ extern int ftrace_make_nop(struct module *mod, */ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); -#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS /** * ftrace_modify_call - convert from one addr to another (no nop) * @rec: the mcount call site record -- cgit v1.2.3 From e7dbfe349d12eabb7783b117e0c115f6f3d9ef9e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 28 Sep 2012 17:15:20 +0900 Subject: kprobes/x86: Move ftrace-based kprobe code into kprobes-ftrace.c Split ftrace-based kprobes code from kprobes, and introduce CONFIG_(HAVE_)KPROBES_ON_FTRACE Kconfig flags. For the cleanup reason, this also moves kprobe_ftrace check into skip_singlestep. Link: http://lkml.kernel.org/r/20120928081520.3560.25624.stgit@ltc138.sdl.hitachi.co.jp Cc: Ingo Molnar Cc: Ananth N Mavinakayanahalli Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Frederic Weisbecker Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/kprobes.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 23755ba42abc..4b6ef4d33cc2 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -49,16 +49,6 @@ #define KPROBE_REENTER 0x00000004 #define KPROBE_HIT_SSDONE 0x00000008 -/* - * If function tracer is enabled and the arch supports full - * passing of pt_regs to function tracing, then kprobes can - * optimize on top of function tracing. - */ -#if defined(CONFIG_FUNCTION_TRACER) && defined(ARCH_SUPPORTS_FTRACE_SAVE_REGS) \ - && defined(ARCH_SUPPORTS_KPROBES_ON_FTRACE) -# define KPROBES_CAN_USE_FTRACE -#endif - /* Attach to insert probes on any functions which should be ignored*/ #define __kprobes __attribute__((__section__(".kprobes.text"))) @@ -316,7 +306,7 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table, #endif #endif /* CONFIG_OPTPROBES */ -#ifdef KPROBES_CAN_USE_FTRACE +#ifdef CONFIG_KPROBES_ON_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *regs); extern int arch_prepare_kprobe_ftrace(struct kprobe *p); -- cgit v1.2.3 From b000c8065a92b0fe0e1694f41b2c8d8ba7b7b1ec Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 18 Jan 2013 10:31:20 -0500 Subject: tracing: Remove the extra 4 bytes of padding in events Due to a userspace issue with PowerTop v2beta, which hardcoded the offset of event fields that it was using, it broke when we removed the Big Kernel Lock counter from the event header. (commit e6e1e2593 "tracing: Remove lock_depth from event entry") Because this broke userspace, it was determined that we must keep those 4 bytes around. (commit a3a4a5acd "Regression: partial revert "tracing: Remove lock_depth from event entry"") This unfortunately wastes space in the ring buffer. 4 bytes per event, where a lot of events are just 24 bytes. That's 16% of the buffer wasted. A million events will add 4 megs of white space into the buffer. It was later noticed that PowerTop v2beta could not work on systems where the kernel was 64 bit but the userspace was 32 bits. The reason was because the offsets are different between the two and the hard coded offset of one would not work with the other. With PowerTop v2 final, it implemented the same interface that both perf and trace-cmd use. That is, it reads the format file of the event to find the offsets of the fields it needs. This fixes the problem with running powertop on a 32 bit userspace running on a 64 bit kernel. It also no longer requires the 4 byte padding. As PowerTop v2 has been out for a while, and is included in all major distributions, it is time that we can safely remove the 4 bytes of padding. Users of PowerTop v2beta should upgrade to PowerTop v2 final. Cc: Linus Torvalds Acked-by: Arjan van de Ven Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 43ef8b6cce7f..6f8d0b77006b 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -49,7 +49,6 @@ struct trace_entry { unsigned char flags; unsigned char preempt_count; int pid; - int padding; }; #define FTRACE_MAX_EVENT \ -- cgit v1.2.3 From ba6fdda46b377034c782c0b89c8f1090b31eabd8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 22 Dec 2012 16:59:51 +0100 Subject: profiling: Remove unused timer hook The last remaining user was oprofile and its use has been removed a while ago in commit bc078e4eab65f11bba ("oprofile: convert oprofile from timer_hook to hrtimer"). There doesn't seem to be any upstream user of this hook for about two years now. And I'm not even aware of any out of tree user. Let's remove it. Signed-off-by: Frederic Weisbecker Cc: Alessio Igor Bogani Cc: Avi Kivity Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1356191991-2251-1-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/profile.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/profile.h b/include/linux/profile.h index a0fc32279fc0..21123902366d 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -82,9 +82,6 @@ int task_handoff_unregister(struct notifier_block * n); int profile_event_register(enum profile_type, struct notifier_block * n); int profile_event_unregister(enum profile_type, struct notifier_block * n); -int register_timer_hook(int (*hook)(struct pt_regs *)); -void unregister_timer_hook(int (*hook)(struct pt_regs *)); - struct pt_regs; #else @@ -135,16 +132,6 @@ static inline int profile_event_unregister(enum profile_type t, struct notifier_ #define profile_handoff_task(a) (0) #define profile_munmap(a) do { } while (0) -static inline int register_timer_hook(int (*hook)(struct pt_regs *)) -{ - return -ENOSYS; -} - -static inline void unregister_timer_hook(int (*hook)(struct pt_regs *)) -{ - return; -} - #endif /* CONFIG_PROFILING */ #endif /* _LINUX_PROFILE_H */ -- cgit v1.2.3 From b878e7fb22ea48b0585bbbbef249f7efc6d42748 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Tue, 8 Jan 2013 14:44:25 -0500 Subject: perf: Missing field in PERF_RECORD_SAMPLE documentation While trying to write a perf_event/mmap test for my perf_event test-suite I came across a missing field description in the PERF_RECORD_SAMPLE documentation in perf_event.h Signed-off-by: Vince Weaver Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1301081439300.24507@vincent-weaver-1.um.maine.edu Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 4f63c05d27c9..9fa9c622a7f4 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -579,7 +579,8 @@ enum perf_event_type { * { u32 size; * char data[size];}&& PERF_SAMPLE_RAW * - * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * { u64 nr; + * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK * * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER -- cgit v1.2.3 From ad964704ba9326d027fc10fd0099b7c880e50172 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 29 Jan 2013 17:45:49 -0500 Subject: ring-buffer: Add stats field for amount read from trace ring buffer Add a stat about the number of events read from the ring buffer: # cat /debug/tracing/per_cpu/cpu0/stats entries: 39869 overrun: 870512 commit overrun: 0 bytes: 1449912 oldest event ts: 6561.368690 now ts: 6565.246426 dropped events: 0 read events: 112 <-- Added Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 519777e3fa01..1342e69542f3 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -167,6 +167,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu); +unsigned long ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu); u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, -- cgit v1.2.3 From debdd57f5145f3c6a4b3f8d0126abd1a2def7fc6 Mon Sep 17 00:00:00 2001 From: Hiraku Toyooka Date: Wed, 26 Dec 2012 11:53:00 +0900 Subject: tracing: Make a snapshot feature available from userspace Ftrace has a snapshot feature available from kernel space and latency tracers (e.g. irqsoff) are using it. This patch enables user applictions to take a snapshot via debugfs. Add "snapshot" debugfs file in "tracing" directory. snapshot: This is used to take a snapshot and to read the output of the snapshot. # echo 1 > snapshot This will allocate the spare buffer for snapshot (if it is not allocated), and take a snapshot. # cat snapshot This will show contents of the snapshot. # echo 0 > snapshot This will free the snapshot if it is allocated. Any other positive values will clear the snapshot contents if the snapshot is allocated, or return EINVAL if it is not allocated. Link: http://lkml.kernel.org/r/20121226025300.3252.86850.stgit@liselsia Cc: Jiri Olsa Cc: David Sharp Signed-off-by: Hiraku Toyooka [ Fixed irqsoff selftest and also a conflict with a change that fixes the update_max_tr. ] Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 6f8d0b77006b..13a54d0bdfa8 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -83,6 +83,9 @@ struct trace_iterator { long idx; cpumask_var_t started; + + /* it's true when current open file is snapshot */ + bool snapshot; }; enum trace_iter_flags { -- cgit v1.2.3 From 2663960c159f23cbfb8e196c96e9fc9f3b5f1a8d Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 22 Jan 2013 22:24:23 -0800 Subject: perf: Make EVENT_ATTR global Rename EVENT_ATTR() to PMU_EVENT_ATTR() and make it global so it is available to all architectures. Further to allow architectures flexibility, have PMU_EVENT_ATTR() pass in the variable name as a parameter. Changelog[v2] - [Jiri Olsa] No need to define PMU_EVENT_PTR() Signed-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Anton Blanchard Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: linuxppc-dev@ozlabs.org Link: http://lkml.kernel.org/r/20130123062422.GC13720@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6bfb2faa0b19..42adf012145d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -817,6 +817,17 @@ do { \ } while (0) +struct perf_pmu_events_attr { + struct device_attribute attr; + u64 id; +}; + +#define PMU_EVENT_ATTR(_name, _var, _id, _show) \ +static struct perf_pmu_events_attr _var = { \ + .attr = __ATTR(_name, 0444, _show, NULL), \ + .id = _id, \ +}; + #define PMU_FORMAT_ATTR(_name, _format) \ static ssize_t \ _name##_show(struct device *dev, \ -- cgit v1.2.3 From fe20d71f25400cccc8bffef865f79250be7dbc81 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 21 Nov 2012 17:32:30 +0100 Subject: uprobes: Kill uprobe_consumer->filter() uprobe_consumer->filter() is pointless in its current form, kill it. We will add it back, but with the different signature/semantics. Perhaps we will even re-introduce the callsite in handler_chain(), but not to just skip uc->handler(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 4f628a6fc5b4..83742b91ff73 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -37,11 +37,6 @@ struct inode; struct uprobe_consumer { int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); - /* - * filter is optional; If a filter exists, handler is run - * if and only if filter returns true. - */ - bool (*filter)(struct uprobe_consumer *self, struct task_struct *task); struct uprobe_consumer *next; }; -- cgit v1.2.3 From 8a7f2fa0dea3b019500961b86d765e6fdd4bffb2 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 28 Dec 2012 17:58:38 +0100 Subject: uprobes: Reintroduce uprobe_consumer->filter() Finally add uprobe_consumer->filter() and change consumer_filter() to actually call this method. Note that ->filter() accepts mm_struct, not task_struct. Because: 1. We do not have for_each_mm_user(mm, task). 2. Even if we implement for_each_mm_user(), ->filter() can use it itself. 3. It is not clear who will actually need this interface to do the "nontrivial" filtering. Another argument is "enum uprobe_filter_ctx", consumer->filter() can use it to figure out why/where it was called. For example, perhaps we can add UPROBE_FILTER_PRE_REGISTER used by build_map_info() to quickly "nack" the unwanted mm's. In this case consumer should know that it is called under ->i_mmap_mutex. See the previous discussion at http://marc.info/?t=135214229700002 Perhaps we should pass more arguments, vma/vaddr? Note: this patch obviously can't help to filter out the child created by fork(), this will be addressed later. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 83742b91ff73..c2df6934fdc6 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -35,8 +35,17 @@ struct inode; # include #endif +enum uprobe_filter_ctx { + UPROBE_FILTER_REGISTER, + UPROBE_FILTER_UNREGISTER, + UPROBE_FILTER_MMAP, +}; + struct uprobe_consumer { int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); + bool (*filter)(struct uprobe_consumer *self, + enum uprobe_filter_ctx ctx, + struct mm_struct *mm); struct uprobe_consumer *next; }; -- cgit v1.2.3 From da1816b1caeccdff04531e763bb35d7caa3ed19f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 29 Dec 2012 17:49:11 +0100 Subject: uprobes: Teach handler_chain() to filter out the probed task Currrently the are 2 problems with pre-filtering: 1. It is not possible to add/remove a task (mm) after uprobe_register() 2. A forked child inherits all breakpoints and uprobe_consumer can not control this. This patch does the first step to improve the filtering. handler_chain() removes the breakpoints installed by this uprobe from current->mm if all handlers return UPROBE_HANDLER_REMOVE. Note that handler_chain() relies on ->register_rwsem to avoid the race with uprobe_register/unregister which can add/del a consumer, or even remove and then insert the new uprobe at the same address. Perhaps we will add uprobe_apply_mm(uprobe, mm, is_register) and teach copy_mm() to do filter(UPROBE_FILTER_FORK), but I think this change makes sense anyway. Note: instead of checking the retcode from uc->handler, we could add uc->filter(UPROBE_FILTER_BPHIT). But I think this is not optimal to call 2 hooks in a row. This buys nothing, and if handler/filter do something nontrivial they will probably do the same work twice. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index c2df6934fdc6..95d0002efda5 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -35,6 +35,9 @@ struct inode; # include #endif +#define UPROBE_HANDLER_REMOVE 1 +#define UPROBE_HANDLER_MASK 1 + enum uprobe_filter_ctx { UPROBE_FILTER_REGISTER, UPROBE_FILTER_UNREGISTER, -- cgit v1.2.3 From f22c1bb6b4706be3502b378cb14564449b15f983 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 2 Feb 2013 16:27:52 +0100 Subject: perf: Introduce hw_perf_event->tp_target and ->tp_list sys_perf_event_open()->perf_init_event(event) is called before find_get_context(event), this means that event->ctx == NULL when class->reg(TRACE_REG_PERF_REGISTER/OPEN) is called and thus it can't know if this event is per-task or system-wide. This patch adds hw_perf_event->tp_target for PERF_TYPE_TRACEPOINT, this is analogous to PERF_TYPE_BREAKPOINT/bp_target we already have. The patch also moves ->bp_target up so that it can overlap with the new member, this can help the compiler to generate the better code. trace_uprobe_register() will use it for prefiltering to avoid the unnecessary breakpoints in mm's we do not want to trace. ->tp_target doesn't have its own reference, but we can rely on the fact that either sys_perf_event_open() holds a reference, or it is equal to event->ctx->task. So this pointer is always valid until free_event(). Also add the "struct list_head tp_list" into this union. It is not strictly necessary, but it can simplify the next changes and we can add it for free. Signed-off-by: Oleg Nesterov --- include/linux/perf_event.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 42adf012145d..e47ee462c2f2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -135,16 +135,21 @@ struct hw_perf_event { struct { /* software */ struct hrtimer hrtimer; }; + struct { /* tracepoint */ + struct task_struct *tp_target; + /* for tp_event->class */ + struct list_head tp_list; + }; #ifdef CONFIG_HAVE_HW_BREAKPOINT struct { /* breakpoint */ - struct arch_hw_breakpoint info; - struct list_head bp_list; /* * Crufty hack to avoid the chicken and egg * problem hw_breakpoint has with context * creation and event initalization. */ struct task_struct *bp_target; + struct arch_hw_breakpoint info; + struct list_head bp_list; }; #endif }; -- cgit v1.2.3 From bdf8647c44766590ed02f9a84a450a796558b753 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 3 Feb 2013 19:21:12 +0100 Subject: uprobes: Introduce uprobe_apply() Currently it is not possible to change the filtering constraints after uprobe_register(), so a consumer can not, say, start to trace a task/mm which was previously filtered out, or remove the no longer needed bp's. Introduce uprobe_apply() which simply does register_for_each_vma() again to consult uprobe_consumer->filter() and install/remove the breakpoints. The only complication is that register_for_each_vma() can no longer assume that uprobe->consumers should be consulter if is_register == T, so we change it to accept "struct uprobe_consumer *new" instead. Unlike uprobe_register(), uprobe_apply(true) doesn't do "unregister" if register_for_each_vma() fails, it is up to caller to handle the error. Note: we probably need to cleanup the current interface, it is strange that uprobe_apply/unregister need inode/offset. We should either change uprobe_register() to return "struct uprobe *", or add a private ->uprobe member in uprobe_consumer. And in the long term uprobe_apply() should take a single argument, uprobe or consumer, even "bool add" should go away. Signed-off-by: Oleg Nesterov --- include/linux/uprobes.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 95d0002efda5..02b83db8e2c5 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -101,6 +101,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); +extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_mmap(struct vm_area_struct *vma); extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -124,6 +125,11 @@ uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) { return -ENOSYS; } +static inline int +uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add) +{ + return -ENOSYS; +} static inline void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) { -- cgit v1.2.3