diff options
| author | Ingo Molnar <mingo@elte.hu> | 2010-05-03 08:29:35 +0200 | 
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2010-05-03 08:29:35 +0200 | 
| commit | 0806ebd974590ab24ab357d5d87db744e56bfe13 (patch) | |
| tree | baf96726e3d9f8c2316e509e0a4cbc99ba5fe67a | |
| parent | 090f7204dfdb5d7f18208ea81dfdba845897cedd (diff) | |
| parent | feef47d0cb530e8419dfa0b48141b538b89b1b1a (diff) | |
Merge branch 'perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing into perf/core
28 files changed, 243 insertions, 256 deletions
| diff --git a/arch/Kconfig b/arch/Kconfig index f06010fb4838..acda512da2e2 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -137,6 +137,17 @@ config HAVE_HW_BREAKPOINT  	bool  	depends on PERF_EVENTS +config HAVE_MIXED_BREAKPOINTS_REGS +	bool +	depends on HAVE_HW_BREAKPOINT +	help +	  Depending on the arch implementation of hardware breakpoints, +	  some of them have separate registers for data and instruction +	  breakpoints addresses, others have mixed registers to store +	  them but define the access type in a control register. +	  Select this option if your arch implements breakpoints under the +	  latter fashion. +  config HAVE_USER_RETURN_NOTIFIER  	bool diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 8d90564c2bcf..e6d8ab5cfa9d 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -44,6 +44,7 @@ config SUPERH32  	select HAVE_FUNCTION_GRAPH_TRACER  	select HAVE_ARCH_KGDB  	select HAVE_HW_BREAKPOINT +	select HAVE_MIXED_BREAKPOINTS_REGS  	select PERF_EVENTS if HAVE_HW_BREAKPOINT  	select ARCH_HIBERNATION_POSSIBLE if MMU diff --git a/arch/sh/include/asm/hw_breakpoint.h b/arch/sh/include/asm/hw_breakpoint.h index 965dd780d51b..e14cad96798f 100644 --- a/arch/sh/include/asm/hw_breakpoint.h +++ b/arch/sh/include/asm/hw_breakpoint.h @@ -46,10 +46,14 @@ struct pmu;  /* Maximum number of UBC channels */  #define HBP_NUM		2 +static inline int hw_breakpoint_slots(int type) +{ +	return HBP_NUM; +} +  /* arch/sh/kernel/hw_breakpoint.c */ -extern int arch_check_va_in_userspace(unsigned long va, u16 hbp_len); -extern int arch_validate_hwbkpt_settings(struct perf_event *bp, -					 struct task_struct *tsk); +extern int arch_check_bp_in_kernelspace(struct perf_event *bp); +extern int arch_validate_hwbkpt_settings(struct perf_event *bp);  extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,  					   unsigned long val, void *data); diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c index 675eea7785d9..1f2cf6229862 100644 --- a/arch/sh/kernel/hw_breakpoint.c +++ b/arch/sh/kernel/hw_breakpoint.c @@ -120,25 +120,16 @@ static int get_hbp_len(u16 hbp_len)  }  /* - * Check for virtual address in user space. - */ -int arch_check_va_in_userspace(unsigned long va, u16 hbp_len) -{ -	unsigned int len; - -	len = get_hbp_len(hbp_len); - -	return (va <= TASK_SIZE - len); -} - -/*   * Check for virtual address in kernel space.   */ -static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) +int arch_check_bp_in_kernelspace(struct perf_event *bp)  {  	unsigned int len; +	unsigned long va; +	struct arch_hw_breakpoint *info = counter_arch_bp(bp); -	len = get_hbp_len(hbp_len); +	va = info->address; +	len = get_hbp_len(info->len);  	return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);  } @@ -226,8 +217,7 @@ static int arch_build_bp_info(struct perf_event *bp)  /*   * Validate the arch-specific HW Breakpoint register settings   */ -int arch_validate_hwbkpt_settings(struct perf_event *bp, -				  struct task_struct *tsk) +int arch_validate_hwbkpt_settings(struct perf_event *bp)  {  	struct arch_hw_breakpoint *info = counter_arch_bp(bp);  	unsigned int align; @@ -270,15 +260,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,  	if (info->address & align)  		return -EINVAL; -	/* Check that the virtual address is in the proper range */ -	if (tsk) { -		if (!arch_check_va_in_userspace(info->address, info->len)) -			return -EFAULT; -	} else { -		if (!arch_check_va_in_kernelspace(info->address, info->len)) -			return -EFAULT; -	} -  	return 0;  } @@ -363,8 +344,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)  		perf_bp_event(bp, args->regs);  		/* Deliver the signal to userspace */ -		if (arch_check_va_in_userspace(bp->attr.bp_addr, -					       bp->attr.bp_len)) { +		if (!arch_check_bp_in_kernelspace(bp)) {  			siginfo_t info;  			info.si_signo = args->signr; diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index 7759a9a93211..d4104ce9fe53 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -85,7 +85,7 @@ static int set_single_step(struct task_struct *tsk, unsigned long addr)  	bp = thread->ptrace_bps[0];  	if (!bp) { -		hw_breakpoint_init(&attr); +		ptrace_breakpoint_init(&attr);  		attr.bp_addr = addr;  		attr.bp_len = HW_BREAKPOINT_LEN_2; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 97a95dfd1181..01177dcbe261 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -53,6 +53,7 @@ config X86  	select HAVE_KERNEL_LZMA  	select HAVE_KERNEL_LZO  	select HAVE_HW_BREAKPOINT +	select HAVE_MIXED_BREAKPOINTS_REGS  	select PERF_EVENTS  	select ANON_INODES  	select HAVE_ARCH_KMEMCHECK diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 2a1bd8f4f23a..942255310e6a 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -41,12 +41,16 @@ struct arch_hw_breakpoint {  /* Total number of available HW breakpoint registers */  #define HBP_NUM 4 +static inline int hw_breakpoint_slots(int type) +{ +	return HBP_NUM; +} +  struct perf_event;  struct pmu; -extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); -extern int arch_validate_hwbkpt_settings(struct perf_event *bp, -					 struct task_struct *tsk); +extern int arch_check_bp_in_kernelspace(struct perf_event *bp); +extern int arch_validate_hwbkpt_settings(struct perf_event *bp);  extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,  					   unsigned long val, void *data); diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index d6cc065f519f..a8f1b803d2fd 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -189,25 +189,16 @@ static int get_hbp_len(u8 hbp_len)  }  /* - * Check for virtual address in user space. - */ -int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) -{ -	unsigned int len; - -	len = get_hbp_len(hbp_len); - -	return (va <= TASK_SIZE - len); -} - -/*   * Check for virtual address in kernel space.   */ -static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) +int arch_check_bp_in_kernelspace(struct perf_event *bp)  {  	unsigned int len; +	unsigned long va; +	struct arch_hw_breakpoint *info = counter_arch_bp(bp); -	len = get_hbp_len(hbp_len); +	va = info->address; +	len = get_hbp_len(info->len);  	return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);  } @@ -300,8 +291,7 @@ static int arch_build_bp_info(struct perf_event *bp)  /*   * Validate the arch-specific HW Breakpoint register settings   */ -int arch_validate_hwbkpt_settings(struct perf_event *bp, -				  struct task_struct *tsk) +int arch_validate_hwbkpt_settings(struct perf_event *bp)  {  	struct arch_hw_breakpoint *info = counter_arch_bp(bp);  	unsigned int align; @@ -314,16 +304,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,  	ret = -EINVAL; -	if (info->type == X86_BREAKPOINT_EXECUTE) -		/* -		 * Ptrace-refactoring code -		 * For now, we'll allow instruction breakpoint only for user-space -		 * addresses -		 */ -		if ((!arch_check_va_in_userspace(info->address, info->len)) && -			info->len != X86_BREAKPOINT_EXECUTE) -			return ret; -  	switch (info->len) {  	case X86_BREAKPOINT_LEN_1:  		align = 0; @@ -350,15 +330,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,  	if (info->address & align)  		return -EINVAL; -	/* Check that the virtual address is in the proper range */ -	if (tsk) { -		if (!arch_check_va_in_userspace(info->address, info->len)) -			return -EFAULT; -	} else { -		if (!arch_check_va_in_kernelspace(info->address, info->len)) -			return -EFAULT; -	} -  	return 0;  } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 055be0afd330..70c4872cd8aa 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -688,7 +688,7 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,  	struct perf_event_attr attr;  	if (!t->ptrace_bps[nr]) { -		hw_breakpoint_init(&attr); +		ptrace_breakpoint_init(&attr);  		/*  		 * Put stub len and type to register (reserve) an inactive but  		 * correct bp diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index c70d27af03f9..a2d6ea49ec56 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -9,9 +9,22 @@ enum {  };  enum { -	HW_BREAKPOINT_R = 1, -	HW_BREAKPOINT_W = 2, -	HW_BREAKPOINT_X = 4, +	HW_BREAKPOINT_EMPTY	= 0, +	HW_BREAKPOINT_R		= 1, +	HW_BREAKPOINT_W		= 2, +	HW_BREAKPOINT_RW	= HW_BREAKPOINT_R | HW_BREAKPOINT_W, +	HW_BREAKPOINT_X		= 4, +	HW_BREAKPOINT_INVALID   = HW_BREAKPOINT_RW | HW_BREAKPOINT_X, +}; + +enum bp_type_idx { +	TYPE_INST 	= 0, +#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS +	TYPE_DATA	= 0, +#else +	TYPE_DATA	= 1, +#endif +	TYPE_MAX  };  #ifdef __KERNEL__ @@ -34,6 +47,12 @@ static inline void hw_breakpoint_init(struct perf_event_attr *attr)  	attr->sample_period = 1;  } +static inline void ptrace_breakpoint_init(struct perf_event_attr *attr) +{ +	hw_breakpoint_init(attr); +	attr->exclude_kernel = 1; +} +  static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)  {  	return bp->attr.bp_addr; diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 03808ed342a6..684b710cbb91 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -40,23 +40,29 @@  #include <linux/percpu.h>  #include <linux/sched.h>  #include <linux/init.h> +#include <linux/slab.h>  #include <linux/cpu.h>  #include <linux/smp.h>  #include <linux/hw_breakpoint.h> +  /*   * Constraints data   */  /* Number of pinned cpu breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); +static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]);  /* Number of pinned task breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]); +static DEFINE_PER_CPU(unsigned int, *nr_task_bp_pinned[TYPE_MAX]);  /* Number of non-pinned cpu/task breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); +static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]); + +static int nr_slots[TYPE_MAX]; + +static int constraints_initialized;  /* Gather the number of total pinned and un-pinned bp in a cpuset */  struct bp_busy_slots { @@ -67,16 +73,29 @@ struct bp_busy_slots {  /* Serialize accesses to the above constraints */  static DEFINE_MUTEX(nr_bp_mutex); +__weak int hw_breakpoint_weight(struct perf_event *bp) +{ +	return 1; +} + +static inline enum bp_type_idx find_slot_idx(struct perf_event *bp) +{ +	if (bp->attr.bp_type & HW_BREAKPOINT_RW) +		return TYPE_DATA; + +	return TYPE_INST; +} +  /*   * Report the maximum number of pinned breakpoints a task   * have in this cpu   */ -static unsigned int max_task_bp_pinned(int cpu) +static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)  {  	int i; -	unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); +	unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); -	for (i = HBP_NUM -1; i >= 0; i--) { +	for (i = nr_slots[type] - 1; i >= 0; i--) {  		if (tsk_pinned[i] > 0)  			return i + 1;  	} @@ -84,7 +103,7 @@ static unsigned int max_task_bp_pinned(int cpu)  	return 0;  } -static int task_bp_pinned(struct task_struct *tsk) +static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type)  {  	struct perf_event_context *ctx = tsk->perf_event_ctxp;  	struct list_head *list; @@ -105,7 +124,8 @@ static int task_bp_pinned(struct task_struct *tsk)  	 */  	list_for_each_entry(bp, list, event_entry) {  		if (bp->attr.type == PERF_TYPE_BREAKPOINT) -			count++; +			if (find_slot_idx(bp) == type) +				count += hw_breakpoint_weight(bp);  	}  	raw_spin_unlock_irqrestore(&ctx->lock, flags); @@ -118,18 +138,19 @@ static int task_bp_pinned(struct task_struct *tsk)   * a given cpu (cpu > -1) or in all of them (cpu = -1).   */  static void -fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) +fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, +		    enum bp_type_idx type)  {  	int cpu = bp->cpu;  	struct task_struct *tsk = bp->ctx->task;  	if (cpu >= 0) { -		slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); +		slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu);  		if (!tsk) -			slots->pinned += max_task_bp_pinned(cpu); +			slots->pinned += max_task_bp_pinned(cpu, type);  		else -			slots->pinned += task_bp_pinned(tsk); -		slots->flexible = per_cpu(nr_bp_flexible, cpu); +			slots->pinned += task_bp_pinned(tsk, type); +		slots->flexible = per_cpu(nr_bp_flexible[type], cpu);  		return;  	} @@ -137,16 +158,16 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)  	for_each_online_cpu(cpu) {  		unsigned int nr; -		nr = per_cpu(nr_cpu_bp_pinned, cpu); +		nr = per_cpu(nr_cpu_bp_pinned[type], cpu);  		if (!tsk) -			nr += max_task_bp_pinned(cpu); +			nr += max_task_bp_pinned(cpu, type);  		else -			nr += task_bp_pinned(tsk); +			nr += task_bp_pinned(tsk, type);  		if (nr > slots->pinned)  			slots->pinned = nr; -		nr = per_cpu(nr_bp_flexible, cpu); +		nr = per_cpu(nr_bp_flexible[type], cpu);  		if (nr > slots->flexible)  			slots->flexible = nr; @@ -154,31 +175,49 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)  }  /* + * For now, continue to consider flexible as pinned, until we can + * ensure no flexible event can ever be scheduled before a pinned event + * in a same cpu. + */ +static void +fetch_this_slot(struct bp_busy_slots *slots, int weight) +{ +	slots->pinned += weight; +} + +/*   * Add a pinned breakpoint for the given task in our constraint table   */ -static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) +static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable, +				enum bp_type_idx type, int weight)  {  	unsigned int *tsk_pinned; -	int count = 0; +	int old_count = 0; +	int old_idx = 0; +	int idx = 0; -	count = task_bp_pinned(tsk); +	old_count = task_bp_pinned(tsk, type); +	old_idx = old_count - 1; +	idx = old_idx + weight; -	tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); +	tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);  	if (enable) { -		tsk_pinned[count]++; -		if (count > 0) -			tsk_pinned[count-1]--; +		tsk_pinned[idx]++; +		if (old_count > 0) +			tsk_pinned[old_idx]--;  	} else { -		tsk_pinned[count]--; -		if (count > 0) -			tsk_pinned[count-1]++; +		tsk_pinned[idx]--; +		if (old_count > 0) +			tsk_pinned[old_idx]++;  	}  }  /*   * Add/remove the given breakpoint in our constraint table   */ -static void toggle_bp_slot(struct perf_event *bp, bool enable) +static void +toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, +	       int weight)  {  	int cpu = bp->cpu;  	struct task_struct *tsk = bp->ctx->task; @@ -186,20 +225,20 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)  	/* Pinned counter task profiling */  	if (tsk) {  		if (cpu >= 0) { -			toggle_bp_task_slot(tsk, cpu, enable); +			toggle_bp_task_slot(tsk, cpu, enable, type, weight);  			return;  		}  		for_each_online_cpu(cpu) -			toggle_bp_task_slot(tsk, cpu, enable); +			toggle_bp_task_slot(tsk, cpu, enable, type, weight);  		return;  	}  	/* Pinned counter cpu profiling */  	if (enable) -		per_cpu(nr_cpu_bp_pinned, bp->cpu)++; +		per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;  	else -		per_cpu(nr_cpu_bp_pinned, bp->cpu)--; +		per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;  }  /* @@ -246,14 +285,29 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)  static int __reserve_bp_slot(struct perf_event *bp)  {  	struct bp_busy_slots slots = {0}; +	enum bp_type_idx type; +	int weight; -	fetch_bp_busy_slots(&slots, bp); +	/* We couldn't initialize breakpoint constraints on boot */ +	if (!constraints_initialized) +		return -ENOMEM; + +	/* Basic checks */ +	if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY || +	    bp->attr.bp_type == HW_BREAKPOINT_INVALID) +		return -EINVAL; + +	type = find_slot_idx(bp); +	weight = hw_breakpoint_weight(bp); + +	fetch_bp_busy_slots(&slots, bp, type); +	fetch_this_slot(&slots, weight);  	/* Flexible counters need to keep at least one slot */ -	if (slots.pinned + (!!slots.flexible) == HBP_NUM) +	if (slots.pinned + (!!slots.flexible) > nr_slots[type])  		return -ENOSPC; -	toggle_bp_slot(bp, true); +	toggle_bp_slot(bp, true, type, weight);  	return 0;  } @@ -273,7 +327,12 @@ int reserve_bp_slot(struct perf_event *bp)  static void __release_bp_slot(struct perf_event *bp)  { -	toggle_bp_slot(bp, false); +	enum bp_type_idx type; +	int weight; + +	type = find_slot_idx(bp); +	weight = hw_breakpoint_weight(bp); +	toggle_bp_slot(bp, false, type, weight);  }  void release_bp_slot(struct perf_event *bp) @@ -308,6 +367,28 @@ int dbg_release_bp_slot(struct perf_event *bp)  	return 0;  } +static int validate_hw_breakpoint(struct perf_event *bp) +{ +	int ret; + +	ret = arch_validate_hwbkpt_settings(bp); +	if (ret) +		return ret; + +	if (arch_check_bp_in_kernelspace(bp)) { +		if (bp->attr.exclude_kernel) +			return -EINVAL; +		/* +		 * Don't let unprivileged users set a breakpoint in the trap +		 * path to avoid trap recursion attacks. +		 */ +		if (!capable(CAP_SYS_ADMIN)) +			return -EPERM; +	} + +	return 0; +} +  int register_perf_hw_breakpoint(struct perf_event *bp)  {  	int ret; @@ -316,17 +397,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp)  	if (ret)  		return ret; -	/* -	 * Ptrace breakpoints can be temporary perf events only -	 * meant to reserve a slot. In this case, it is created disabled and -	 * we don't want to check the params right now (as we put a null addr) -	 * But perf tools create events as disabled and we want to check -	 * the params for them. -	 * This is a quick hack that will be removed soon, once we remove -	 * the tmp breakpoints from ptrace -	 */ -	if (!bp->attr.disabled || !bp->overflow_handler) -		ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); +	ret = validate_hw_breakpoint(bp);  	/* if arch_validate_hwbkpt_settings() fails then release bp slot */  	if (ret) @@ -373,7 +444,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att  	if (attr->disabled)  		goto end; -	err = arch_validate_hwbkpt_settings(bp, bp->ctx->task); +	err = validate_hw_breakpoint(bp);  	if (!err)  		perf_event_enable(bp); @@ -480,7 +551,36 @@ static struct notifier_block hw_breakpoint_exceptions_nb = {  static int __init init_hw_breakpoint(void)  { +	unsigned int **task_bp_pinned; +	int cpu, err_cpu; +	int i; + +	for (i = 0; i < TYPE_MAX; i++) +		nr_slots[i] = hw_breakpoint_slots(i); + +	for_each_possible_cpu(cpu) { +		for (i = 0; i < TYPE_MAX; i++) { +			task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu); +			*task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i], +						  GFP_KERNEL); +			if (!*task_bp_pinned) +				goto err_alloc; +		} +	} + +	constraints_initialized = 1; +  	return register_die_notifier(&hw_breakpoint_exceptions_nb); + + err_alloc: +	for_each_possible_cpu(err_cpu) { +		if (err_cpu == cpu) +			break; +		for (i = 0; i < TYPE_MAX; i++) +			kfree(per_cpu(nr_task_bp_pinned[i], cpu)); +	} + +	return -ENOMEM;  }  core_initcall(init_hw_breakpoint); diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index d59cd6879477..8eaf00749b65 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -34,12 +34,6 @@  #include <asm/atomic.h> -/* - * For now, let us restrict the no. of symbols traced simultaneously to number - * of available hardware breakpoint registers. - */ -#define KSYM_TRACER_MAX HBP_NUM -  #define KSYM_TRACER_OP_LEN 3 /* rw- */  struct trace_ksym { @@ -53,7 +47,6 @@ struct trace_ksym {  static struct trace_array *ksym_trace_array; -static unsigned int ksym_filter_entry_count;  static unsigned int ksym_tracing_enabled;  static HLIST_HEAD(ksym_filter_head); @@ -181,13 +174,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)  	struct trace_ksym *entry;  	int ret = -ENOMEM; -	if (ksym_filter_entry_count >= KSYM_TRACER_MAX) { -		printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No" -		" new requests for tracing can be accepted now.\n", -			KSYM_TRACER_MAX); -		return -ENOSPC; -	} -  	entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);  	if (!entry)  		return -ENOMEM; @@ -203,13 +189,17 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)  	if (IS_ERR(entry->ksym_hbp)) {  		ret = PTR_ERR(entry->ksym_hbp); -		printk(KERN_INFO "ksym_tracer request failed. Try again" -					" later!!\n"); +		if (ret == -ENOSPC) { +			printk(KERN_ERR "ksym_tracer: Maximum limit reached." +			" No new requests for tracing can be accepted now.\n"); +		} else { +			printk(KERN_INFO "ksym_tracer request failed. Try again" +					 " later!!\n"); +		}  		goto err;  	}  	hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); -	ksym_filter_entry_count++;  	return 0; @@ -265,7 +255,6 @@ static void __ksym_trace_reset(void)  	hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,  								ksym_hlist) {  		unregister_wide_hw_breakpoint(entry->ksym_hbp); -		ksym_filter_entry_count--;  		hlist_del_rcu(&(entry->ksym_hlist));  		synchronize_rcu();  		kfree(entry); @@ -338,7 +327,6 @@ static ssize_t ksym_trace_filter_write(struct file *file,  				goto out_unlock;  		}  		/* Error or "symbol:---" case: drop it */ -		ksym_filter_entry_count--;  		hlist_del_rcu(&(entry->ksym_hlist));  		synchronize_rcu();  		kfree(entry); diff --git a/tools/perf/Documentation/perf-trace-perl.txt b/tools/perf/Documentation/perf-trace-perl.txt index d729cee8d987..ee6525ee6d69 100644 --- a/tools/perf/Documentation/perf-trace-perl.txt +++ b/tools/perf/Documentation/perf-trace-perl.txt @@ -49,12 +49,10 @@ available as calls back into the perf executable (see below).  As an example, the following perf record command can be used to record  all sched_wakeup events in the system: - # perf record -c 1 -f -a -M -R -e sched:sched_wakeup + # perf record -a -e sched:sched_wakeup  Traces meant to be processed using a script should be recorded with -the above options: -c 1 says to sample every event, -a to enable -system-wide collection, -M to multiplex the output, and -R to collect -raw samples. +the above option: -a to enable system-wide collection.  The format file for the sched_wakep event defines the following fields  (see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format): diff --git a/tools/perf/Documentation/perf-trace-python.txt b/tools/perf/Documentation/perf-trace-python.txt index a241aca77184..16a86500dcf1 100644 --- a/tools/perf/Documentation/perf-trace-python.txt +++ b/tools/perf/Documentation/perf-trace-python.txt @@ -93,7 +93,7 @@ don't care how it exited, so we'll use 'perf record' to record only  the sys_enter events:  ---- -# perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter +# perf record -a -e raw_syscalls:sys_enter  ^C[ perf record: Woken up 1 times to write data ]  [ perf record: Captured and wrote 56.545 MB perf.data (~2470503 samples) ] @@ -359,7 +359,7 @@ your script:  # cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-record  #!/bin/bash -perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter +perf record -a -e raw_syscalls:sys_enter  ----  The 'report' script is also a shell script with the same base name as @@ -449,12 +449,10 @@ available as calls back into the perf executable (see below).  As an example, the following perf record command can be used to record  all sched_wakeup events in the system: - # perf record -c 1 -f -a -M -R -e sched:sched_wakeup + # perf record -a -e sched:sched_wakeup  Traces meant to be processed using a script should be recorded with -the above options: -c 1 says to sample every event, -a to enable -system-wide collection, -M to multiplex the output, and -R to collect -raw samples. +the above option: -a to enable system-wide collection.  The format file for the sched_wakep event defines the following fields  (see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format): diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-record b/tools/perf/scripts/perl/bin/check-perf-trace-record index e6cb1474f8e8..423ad6aed056 100644 --- a/tools/perf/scripts/perl/bin/check-perf-trace-record +++ b/tools/perf/scripts/perl/bin/check-perf-trace-record @@ -1,2 +1,2 @@  #!/bin/bash -perf record -c 1 -f -a -M -R -e kmem:kmalloc -e irq:softirq_entry -e kmem:kfree +perf record -a -e kmem:kmalloc -e irq:softirq_entry -e kmem:kfree diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-record b/tools/perf/scripts/perl/bin/failed-syscalls-record index 6ad9b8f5f009..eb5846bcb565 100644 --- a/tools/perf/scripts/perl/bin/failed-syscalls-record +++ b/tools/perf/scripts/perl/bin/failed-syscalls-record @@ -1,2 +1,2 @@  #!/bin/bash -perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit $@ +perf record -a -e raw_syscalls:sys_exit $@ diff --git a/tools/perf/scripts/perl/bin/rw-by-file-record b/tools/perf/scripts/perl/bin/rw-by-file-record index a828679837a8..5bfaae5a6cba 100644 --- a/tools/perf/scripts/perl/bin/rw-by-file-record +++ b/tools/perf/scripts/perl/bin/rw-by-file-record @@ -1,3 +1,3 @@  #!/bin/bash -perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_enter_write $@ +perf record -a -e syscalls:sys_enter_read -e syscalls:sys_enter_write $@ diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-record b/tools/perf/scripts/perl/bin/rw-by-pid-record index 63976bf11e8b..6e0b2f7755ac 100644 --- a/tools/perf/scripts/perl/bin/rw-by-pid-record +++ b/tools/perf/scripts/perl/bin/rw-by-pid-record @@ -1,2 +1,2 @@  #!/bin/bash -perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@ +perf record -a -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@ diff --git a/tools/perf/scripts/perl/bin/rwtop-record b/tools/perf/scripts/perl/bin/rwtop-record index 63976bf11e8b..6e0b2f7755ac 100644 --- a/tools/perf/scripts/perl/bin/rwtop-record +++ b/tools/perf/scripts/perl/bin/rwtop-record @@ -1,2 +1,2 @@  #!/bin/bash -perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@ +perf record -a -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@ diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-record b/tools/perf/scripts/perl/bin/wakeup-latency-record index 9c0cf588ff8c..9f2acaaae9f0 100644 --- a/tools/perf/scripts/perl/bin/wakeup-latency-record +++ b/tools/perf/scripts/perl/bin/wakeup-latency-record @@ -1,5 +1,5 @@  #!/bin/bash -perf record -c 1 -f -a -M -R -e sched:sched_switch -e sched:sched_wakeup $@ +perf record -a -e sched:sched_switch -e sched:sched_wakeup $@ diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-record b/tools/perf/scripts/perl/bin/workqueue-stats-record index c2a1a9421133..85301f2471ff 100644 --- a/tools/perf/scripts/perl/bin/workqueue-stats-record +++ b/tools/perf/scripts/perl/bin/workqueue-stats-record @@ -1,2 +1,2 @@  #!/bin/bash -perf record -c 1 -f -a -M -R -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion $@ +perf record -a -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion $@ diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record index 6ad9b8f5f009..eb5846bcb565 100644 --- a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record +++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record @@ -1,2 +1,2 @@  #!/bin/bash -perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit $@ +perf record -a -e raw_syscalls:sys_exit $@ diff --git a/tools/perf/scripts/python/bin/sctop-record b/tools/perf/scripts/python/bin/sctop-record index 27ccffa26ab4..1fc5998b721d 100644 --- a/tools/perf/scripts/python/bin/sctop-record +++ b/tools/perf/scripts/python/bin/sctop-record @@ -1,2 +1,2 @@  #!/bin/bash -perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter $@ +perf record -a -e raw_syscalls:sys_enter $@ diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-record b/tools/perf/scripts/python/bin/syscall-counts-by-pid-record index 27ccffa26ab4..1fc5998b721d 100644 --- a/tools/perf/scripts/python/bin/syscall-counts-by-pid-record +++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-record @@ -1,2 +1,2 @@  #!/bin/bash -perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter $@ +perf record -a -e raw_syscalls:sys_enter $@ diff --git a/tools/perf/scripts/python/bin/syscall-counts-record b/tools/perf/scripts/python/bin/syscall-counts-record index 27ccffa26ab4..1fc5998b721d 100644 --- a/tools/perf/scripts/python/bin/syscall-counts-record +++ b/tools/perf/scripts/python/bin/syscall-counts-record @@ -1,2 +1,2 @@  #!/bin/bash -perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter $@ +perf record -a -e raw_syscalls:sys_enter $@ diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index d6ef414075a6..069f261b225c 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -691,11 +691,6 @@ static int __read_expected(enum event_type expect, const char *str,  	return ret;  } -static int read_expected_warn(enum event_type expect, const char *str, bool warn) -{ -	return __read_expected(expect, str, 1, warn); -} -  static int read_expected(enum event_type expect, const char *str)  {  	return __read_expected(expect, str, 1, true); @@ -3104,90 +3099,6 @@ static void print_args(struct print_arg *args)  	}  } -static void parse_header_field(const char *field, -			       int *offset, int *size, bool warn) -{ -	char *token; -	int type; - -	if (read_expected(EVENT_ITEM, "field") < 0) -		return; -	if (read_expected(EVENT_OP, ":") < 0) -		return; - -	/* type */ -	if (read_expect_type(EVENT_ITEM, &token) < 0) -		goto fail; -	free_token(token); - -	if (read_expected_warn(EVENT_ITEM, field, warn) < 0) -		return; -	if (read_expected(EVENT_OP, ";") < 0) -		return; -	if (read_expected(EVENT_ITEM, "offset") < 0) -		return; -	if (read_expected(EVENT_OP, ":") < 0) -		return; -	if (read_expect_type(EVENT_ITEM, &token) < 0) -		goto fail; -	*offset = atoi(token); -	free_token(token); -	if (read_expected(EVENT_OP, ";") < 0) -		return; -	if (read_expected(EVENT_ITEM, "size") < 0) -		return; -	if (read_expected(EVENT_OP, ":") < 0) -		return; -	if (read_expect_type(EVENT_ITEM, &token) < 0) -		goto fail; -	*size = atoi(token); -	free_token(token); -	if (read_expected(EVENT_OP, ";") < 0) -		return; -	type = read_token(&token); -	if (type != EVENT_NEWLINE) { -		/* newer versions of the kernel have a "signed" type */ -		if (type != EVENT_ITEM) -			goto fail; - -		if (strcmp(token, "signed") != 0) -			goto fail; - -		free_token(token); - -		if (read_expected(EVENT_OP, ":") < 0) -			return; - -		if (read_expect_type(EVENT_ITEM, &token)) -			goto fail; - -		free_token(token); -		if (read_expected(EVENT_OP, ";") < 0) -			return; - -		if (read_expect_type(EVENT_NEWLINE, &token)) -			goto fail; -	} - fail: -	free_token(token); -} - -int parse_header_page(char *buf, unsigned long size) -{ -	init_input_buf(buf, size); - -	parse_header_field("timestamp", &header_page_ts_offset, -			   &header_page_ts_size, true); -	parse_header_field("commit", &header_page_size_offset, -			   &header_page_size_size, true); -	parse_header_field("overwrite", &header_page_overwrite_offset, -			   &header_page_overwrite_size, false); -	parse_header_field("data", &header_page_data_offset, -			   &header_page_data_size, true); - -	return 0; -} -  int parse_ftrace_file(char *buf, unsigned long size)  {  	struct format_field *field; diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 43f19c1fed3a..cb54cd002f49 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -53,6 +53,12 @@ static unsigned long	page_size;  static ssize_t calc_data_size;  static bool repipe; +/* If it fails, the next read will report it */ +static void skip(int size) +{ +	lseek(input_fd, size, SEEK_CUR); +} +  static int do_read(int fd, void *buf, int size)  {  	int rsize = size; @@ -184,7 +190,6 @@ static void read_ftrace_printk(void)  static void read_header_files(void)  {  	unsigned long long size; -	char *header_page;  	char *header_event;  	char buf[BUFSIZ]; @@ -194,10 +199,7 @@ static void read_header_files(void)  		die("did not read header page");  	size = read8(); -	header_page = malloc_or_die(size); -	read_or_die(header_page, size); -	parse_header_page(header_page, size); -	free(header_page); +	skip(size);  	/*  	 * The size field in the page is of type long, diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index ebfee80e4a07..406d452956db 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -244,7 +244,6 @@ extern int header_page_data_size;  extern bool latency_format; -int parse_header_page(char *buf, unsigned long size);  int trace_parse_common_type(void *data);  int trace_parse_common_pid(void *data);  int parse_common_pc(void *data); | 
