From be8f274323c26ddc7e6fd6c44254b7abcdbe6389 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 17 Apr 2014 17:16:58 +0900 Subject: kprobes: Prohibit probing on .entry.text code .entry.text is a code area which is used for interrupt/syscall entries, which includes many sensitive code. Thus, it is better to prohibit probing on all of such code instead of a part of that. Since some symbols are already registered on kprobe blacklist, this also removes them from the blacklist. Signed-off-by: Masami Hiramatsu Reviewed-by: Steven Rostedt Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Borislav Petkov Cc: David S. Miller Cc: Frederic Weisbecker Cc: Jan Kiszka Cc: Jiri Kosina Cc: Jonathan Lebon Cc: Seiji Aguchi Link: http://lkml.kernel.org/r/20140417081658.26341.57354.stgit@ltc230.yrl.intra.hitachi.co.jp Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 925eaf28fca9..cdf9251f8249 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -265,6 +265,7 @@ extern void arch_disarm_kprobe(struct kprobe *p); extern int arch_init_kprobes(void); extern void show_registers(struct pt_regs *regs); extern void kprobes_inc_nmissed_count(struct kprobe *p); +extern bool arch_within_kprobe_blacklist(unsigned long addr); struct kprobe_insn_cache { struct mutex mutex; -- cgit v1.2.3 From 376e242429bf8539ef39a080ac113c8799840b13 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 17 Apr 2014 17:17:05 +0900 Subject: kprobes: Introduce NOKPROBE_SYMBOL() macro to maintain kprobes blacklist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce NOKPROBE_SYMBOL() macro which builds a kprobes blacklist at kernel build time. The usage of this macro is similar to EXPORT_SYMBOL(), placed after the function definition: NOKPROBE_SYMBOL(function); Since this macro will inhibit inlining of static/inline functions, this patch also introduces a nokprobe_inline macro for static/inline functions. In this case, we must use NOKPROBE_SYMBOL() for the inline function caller. When CONFIG_KPROBES=y, the macro stores the given function address in the "_kprobe_blacklist" section. Since the data structures are not fully initialized by the macro (because there is no "size" information), those are re-initialized at boot time by using kallsyms. Signed-off-by: Masami Hiramatsu Link: http://lkml.kernel.org/r/20140417081705.26341.96719.stgit@ltc230.yrl.intra.hitachi.co.jp Cc: Alok Kataria Cc: Ananth N Mavinakayanahalli Cc: Andrew Morton Cc: Anil S Keshavamurthy Cc: Arnd Bergmann Cc: Christopher Li Cc: Chris Wright Cc: David S. Miller Cc: Jan-Simon Möller Cc: Jeremy Fitzhardinge Cc: Linus Torvalds Cc: Randy Dunlap Cc: Rusty Russell Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-sparse@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 9 +++++++++ include/linux/compiler.h | 2 ++ include/linux/kprobes.h | 20 +++++++++++++++++--- 3 files changed, 28 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 146e4fffd710..40ceb3ceba79 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -109,6 +109,14 @@ #define BRANCH_PROFILE() #endif +#ifdef CONFIG_KPROBES +#define KPROBE_BLACKLIST() VMLINUX_SYMBOL(__start_kprobe_blacklist) = .; \ + *(_kprobe_blacklist) \ + VMLINUX_SYMBOL(__stop_kprobe_blacklist) = .; +#else +#define KPROBE_BLACKLIST() +#endif + #ifdef CONFIG_EVENT_TRACING #define FTRACE_EVENTS() . = ALIGN(8); \ VMLINUX_SYMBOL(__start_ftrace_events) = .; \ @@ -507,6 +515,7 @@ *(.init.rodata) \ FTRACE_EVENTS() \ TRACE_SYSCALLS() \ + KPROBE_BLACKLIST() \ MEM_DISCARD(init.rodata) \ CLK_OF_TABLES() \ RESERVEDMEM_OF_TABLES() \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index ee7239ea1583..0300c0f5c88b 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -374,7 +374,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); /* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */ #ifdef CONFIG_KPROBES # define __kprobes __attribute__((__section__(".kprobes.text"))) +# define nokprobe_inline __always_inline #else # define __kprobes +# define nokprobe_inline inline #endif #endif /* __LINUX_COMPILER_H */ diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index cdf9251f8249..e059507c465d 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -205,10 +205,10 @@ struct kretprobe_blackpoint { void *addr; }; -struct kprobe_blackpoint { - const char *name; +struct kprobe_blacklist_entry { + struct list_head list; unsigned long start_addr; - unsigned long range; + unsigned long end_addr; }; #ifdef CONFIG_KPROBES @@ -477,4 +477,18 @@ static inline int enable_jprobe(struct jprobe *jp) return enable_kprobe(&jp->kp); } +#ifdef CONFIG_KPROBES +/* + * Blacklist ganerating macro. Specify functions which is not probed + * by using this macro. + */ +#define __NOKPROBE_SYMBOL(fname) \ +static unsigned long __used \ + __attribute__((section("_kprobe_blacklist"))) \ + _kbl_addr_##fname = (unsigned long)fname; +#define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname) +#else +#define NOKPROBE_SYMBOL(fname) +#endif + #endif /* _LINUX_KPROBES_H */ -- cgit v1.2.3 From 69902c718c0b476e94ed7fccd3cf29ca39fe433a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 1 May 2014 10:56:44 +0530 Subject: kprobes: Ensure blacklist data is aligned ARC Linux (not supporting native unaligned access) was failing to boot because __start_kprobe_blacklist was not aligned. This was because per generated vmlinux.lds it was emitted right next to .rodata with strings etc hence could be randomly unaligned. Fix that by ensuring a word alignment. While 4 would suffice for 32bit arches and problem at hand, it is probably better to put 8. | Path: (null) CPU: 0 PID: 1 Comm: swapper Not tainted | 3.15.0-rc3-next-20140430 #2 | task: 8f044000 ti: 8f01e000 task.ti: 8f01e000 | | [ECR ]: 0x00230400 => Misaligned r/w from 0x800fb0d3 | [EFA ]: 0x800fb0d3 | [BLINK ]: do_one_initcall+0x86/0x1bc | [ERET ]: init_kprobes+0x52/0x120 Signed-off-by: Vineet Gupta Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: anton Kolesov Link: http://lkml.kernel.org/r/5361DB14.7010406@synopsys.com Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 40ceb3ceba79..8e0204a68c74 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -110,7 +110,8 @@ #endif #ifdef CONFIG_KPROBES -#define KPROBE_BLACKLIST() VMLINUX_SYMBOL(__start_kprobe_blacklist) = .; \ +#define KPROBE_BLACKLIST() . = ALIGN(8); \ + VMLINUX_SYMBOL(__start_kprobe_blacklist) = .; \ *(_kprobe_blacklist) \ VMLINUX_SYMBOL(__stop_kprobe_blacklist) = .; #else -- cgit v1.2.3 From b02ef20a9fba08948e643d3eec0efadf1da01a44 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 12 May 2014 18:24:45 +0200 Subject: uprobes/x86: Fix the wrong ->si_addr when xol triggers a trap If the probed insn triggers a trap, ->si_addr = regs->ip is technically correct, but this is not what the signal handler wants; we need to pass the address of the probed insn, not the address of xol slot. Add the new arch-agnostic helper, uprobe_get_trap_addr(), and change fill_trap_info() and math_error() to use it. !CONFIG_UPROBES case in uprobes.h uses a macro to avoid include hell and ensure that it can be compiled even if an architecture doesn't define instruction_pointer(). Test-case: #include #include #include extern void probe_div(void); void sigh(int sig, siginfo_t *info, void *c) { int passed = (info->si_addr == probe_div); printf(passed ? "PASS\n" : "FAIL\n"); _exit(!passed); } int main(void) { struct sigaction sa = { .sa_sigaction = sigh, .sa_flags = SA_SIGINFO, }; sigaction(SIGFPE, &sa, NULL); asm ( "xor %ecx,%ecx\n" ".globl probe_div; probe_div:\n" "idiv %ecx\n" ); return 0; } it fails if probe_div() is probed. Note: show_unhandled_signals users should probably use this helper too, but we need to cleanup them first. Signed-off-by: Oleg Nesterov Reviewed-by: Masami Hiramatsu --- include/linux/uprobes.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index edff2b97b864..88c3b7e8b384 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -102,6 +102,7 @@ extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, u extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern bool __weak is_trap_insn(uprobe_opcode_t *insn); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); +extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); @@ -130,6 +131,9 @@ extern bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *r #else /* !CONFIG_UPROBES */ struct uprobes_state { }; + +#define uprobe_get_trap_addr(regs) instruction_pointer(regs) + static inline int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) { -- cgit v1.2.3 From 53b25335dd60981ad608da7890420898a34469a6 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Fri, 16 May 2014 17:12:12 -0400 Subject: perf: Disable sampled events if no PMU interrupt Add common code to generate -ENOTSUPP at event creation time if an architecture attempts to create a sampled event and PERF_PMU_NO_INTERRUPT is set. This adds a new pmu->capabilities flag. Initially we only support PERF_PMU_NO_INTERRUPT (to indicate a PMU has no support for generating hardware interrupts) but there are other capabilities that can be added later. Signed-off-by: Vince Weaver Acked-by: Will Deacon [peterz: rename to PERF_PMU_CAP_* and moved the pmu::capabilities word into a hole] Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1405161708060.11099@vincent-weaver-1.umelst.maine.edu Signed-off-by: Ingo Molnar Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index af6dcf1d9e47..267c8f37012c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -166,6 +166,11 @@ struct perf_event; */ #define PERF_EVENT_TXN 0x1 +/** + * pmu::capabilities flags + */ +#define PERF_PMU_CAP_NO_INTERRUPT 0x01 + /** * struct pmu - generic performance monitoring unit */ @@ -178,6 +183,11 @@ struct pmu { const char *name; int type; + /* + * various common per-pmu feature flags + */ + int capabilities; + int * __percpu pmu_disable_count; struct perf_cpu_context * __percpu pmu_cpu_context; int task_ctx_nr; -- cgit v1.2.3 From bac52139f0b7ab31330e98fd87fc5a2664951050 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 22 May 2014 12:50:07 +0530 Subject: perf: Add new conditional branch filter 'PERF_SAMPLE_BRANCH_COND' This patch introduces new branch filter PERF_SAMPLE_BRANCH_COND which will extend the existing perf ABI. This will filter branches which are conditional. Various architectures can provide this functionality either with HW filtering support (if present) or with SW filtering of captured branch instructions. Signed-off-by: Anshuman Khandual Reviewed-by: Stephane Eranian Reviewed-by: Andi Kleen Signed-off-by: Peter Zijlstra Cc: mpe@ellerman.id.au Cc: benh@kernel.crashing.org Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1400743210-32289-1-git-send-email-khandual@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/uapi/linux/perf_event.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index e3fc8f09d110..d9cd853818ad 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -163,8 +163,9 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_ABORT_TX = 1U << 7, /* transaction aborts */ PERF_SAMPLE_BRANCH_IN_TX = 1U << 8, /* in transaction */ PERF_SAMPLE_BRANCH_NO_TX = 1U << 9, /* not in transaction */ + PERF_SAMPLE_BRANCH_COND = 1U << 10, /* conditional branches */ - PERF_SAMPLE_BRANCH_MAX = 1U << 10, /* non-ABI */ + PERF_SAMPLE_BRANCH_MAX = 1U << 11, /* non-ABI */ }; #define PERF_SAMPLE_BRANCH_PLM_ALL \ -- cgit v1.2.3 From e041e328c4b41e1db79bfe5ba9992c2ed771ad19 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 21 May 2014 17:32:19 +0200 Subject: perf: Fix perf_event_comm() vs. exec() assumption perf_event_comm() assumes that set_task_comm() is only called on exec(), and in particular that its only called on current. Neither are true, as Dave reported a WARN triggered by set_task_comm() being called on !current. Separate the exec() hook from the comm hook. Reported-by: Dave Jones Signed-off-by: Peter Zijlstra Cc: Alexander Viro Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: linux-fsdevel@vger.kernel.org Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/20140521153219.GH5226@laptop.programming.kicks-ass.net [ Build fix. ] Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3ef6ea12806a..9b5cd1992a88 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -695,6 +695,7 @@ extern struct perf_guest_info_callbacks *perf_guest_cbs; extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); +extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); @@ -772,7 +773,7 @@ extern void perf_event_enable(struct perf_event *event); extern void perf_event_disable(struct perf_event *event); extern int __perf_event_disable(void *info); extern void perf_event_task_tick(void); -#else +#else /* !CONFIG_PERF_EVENTS: */ static inline void perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task) { } @@ -802,6 +803,7 @@ static inline int perf_unregister_guest_info_callbacks (struct perf_guest_info_callbacks *callbacks) { return 0; } static inline void perf_event_mmap(struct vm_area_struct *vma) { } +static inline void perf_event_exec(void) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } -- cgit v1.2.3 From 82b897782d10fcc4930c9d4a15b175348fdd2871 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 28 May 2014 11:45:04 +0300 Subject: perf: Differentiate exec() and non-exec() comm events perf tools like 'perf report' can aggregate samples by comm strings, which generally works. However, there are other potential use-cases. For example, to pair up 'calls' with 'returns' accurately (from branch events like Intel BTS) it is necessary to identify whether the process has exec'd. Although a comm event is generated when an 'exec' happens it is also generated whenever the comm string is changed on a whim (e.g. by prctl PR_SET_NAME). This patch adds a flag to the comm event to differentiate one case from the other. In order to determine whether the kernel supports the new flag, a selection bit named 'exec' is added to struct perf_event_attr. The bit does nothing but will cause perf_event_open() to fail if the bit is set on kernels that do not have it defined. Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/537D9EBE.7030806@intel.com Cc: Paul Mackerras Cc: Dave Jones Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Alexander Viro Cc: Linus Torvalds Cc: linux-fsdevel@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 4 ++-- include/linux/sched.h | 6 +++++- include/uapi/linux/perf_event.h | 9 +++++++-- 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b4c1d4685bf0..707617a8c0f6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -707,7 +707,7 @@ extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks * extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); extern void perf_event_exec(void); -extern void perf_event_comm(struct task_struct *tsk); +extern void perf_event_comm(struct task_struct *tsk, bool exec); extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ @@ -815,7 +815,7 @@ static inline int perf_unregister_guest_info_callbacks static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_exec(void) { } -static inline void perf_event_comm(struct task_struct *tsk) { } +static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } static inline int perf_swevent_get_recursion_context(void) { return -1; } diff --git a/include/linux/sched.h b/include/linux/sched.h index 221b2bde3723..ad86e1d7dbc2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2379,7 +2379,11 @@ extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, i struct task_struct *fork_idle(int); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); -extern void set_task_comm(struct task_struct *tsk, const char *from); +extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); +static inline void set_task_comm(struct task_struct *tsk, const char *from) +{ + __set_task_comm(tsk, from, false); +} extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index d9cd853818ad..5312fae47218 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -302,8 +302,8 @@ struct perf_event_attr { exclude_callchain_kernel : 1, /* exclude kernel callchains */ exclude_callchain_user : 1, /* exclude user callchains */ mmap2 : 1, /* include mmap with inode data */ - - __reserved_1 : 40; + comm_exec : 1, /* flag comm events that are due to an exec */ + __reserved_1 : 39; union { __u32 wakeup_events; /* wakeup every n events */ @@ -502,7 +502,12 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0) #define PERF_RECORD_MISC_GUEST_USER (5 << 0) +/* + * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on + * different events so can reuse the same bit position. + */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) +#define PERF_RECORD_MISC_COMM_EXEC (1 << 13) /* * Indicates that the content of PERF_SAMPLE_IP points to * the actual instruction that triggered the event. See also -- cgit v1.2.3