diff options
48 files changed, 1501 insertions, 670 deletions
| diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 83c4bb1d917d..3ea3dc487047 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -121,6 +121,7 @@  #define MSR_AMD64_IBSDCLINAD		0xc0011038  #define MSR_AMD64_IBSDCPHYSAD		0xc0011039  #define MSR_AMD64_IBSCTL		0xc001103a +#define MSR_AMD64_IBSBRTARGET		0xc001103b  /* Fam 10h MSRs */  #define MSR_FAM10H_MMIO_CONF_BASE	0xc0010058 diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 6e742cc4251b..550e26b1dbb3 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -111,17 +111,18 @@ union cpuid10_edx {  #define X86_PMC_IDX_FIXED_BTS				(X86_PMC_IDX_FIXED + 16)  /* IbsFetchCtl bits/masks */ -#define IBS_FETCH_RAND_EN		(1ULL<<57) -#define IBS_FETCH_VAL			(1ULL<<49) -#define IBS_FETCH_ENABLE		(1ULL<<48) -#define IBS_FETCH_CNT			0xFFFF0000ULL -#define IBS_FETCH_MAX_CNT		0x0000FFFFULL +#define IBS_FETCH_RAND_EN	(1ULL<<57) +#define IBS_FETCH_VAL		(1ULL<<49) +#define IBS_FETCH_ENABLE	(1ULL<<48) +#define IBS_FETCH_CNT		0xFFFF0000ULL +#define IBS_FETCH_MAX_CNT	0x0000FFFFULL  /* IbsOpCtl bits */ -#define IBS_OP_CNT_CTL			(1ULL<<19) -#define IBS_OP_VAL			(1ULL<<18) -#define IBS_OP_ENABLE			(1ULL<<17) -#define IBS_OP_MAX_CNT			0x0000FFFFULL +#define IBS_OP_CNT_CTL		(1ULL<<19) +#define IBS_OP_VAL		(1ULL<<18) +#define IBS_OP_ENABLE		(1ULL<<17) +#define IBS_OP_MAX_CNT		0x0000FFFFULL +#define IBS_OP_MAX_CNT_EXT	0x007FFFFFULL	/* not a register bit mask */  #ifdef CONFIG_PERF_EVENTS  extern void init_hw_perf_events(void); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c1e8c7a51164..ed6310183efb 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -237,6 +237,7 @@ struct x86_pmu {  	 * Intel DebugStore bits  	 */  	int		bts, pebs; +	int		bts_active, pebs_active;  	int		pebs_record_size;  	void		(*drain_pebs)(struct pt_regs *regs);  	struct event_constraint *pebs_constraints; @@ -380,7 +381,7 @@ static void release_pmc_hardware(void) {}  #endif -static int reserve_ds_buffers(void); +static void reserve_ds_buffers(void);  static void release_ds_buffers(void);  static void hw_perf_event_destroy(struct perf_event *event) @@ -477,7 +478,7 @@ static int x86_setup_perfctr(struct perf_event *event)  	if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&  	    (hwc->sample_period == 1)) {  		/* BTS is not supported by this architecture. */ -		if (!x86_pmu.bts) +		if (!x86_pmu.bts_active)  			return -EOPNOTSUPP;  		/* BTS is currently only allowed for user-mode. */ @@ -496,12 +497,13 @@ static int x86_pmu_hw_config(struct perf_event *event)  		int precise = 0;  		/* Support for constant skid */ -		if (x86_pmu.pebs) +		if (x86_pmu.pebs_active) {  			precise++; -		/* Support for IP fixup */ -		if (x86_pmu.lbr_nr) -			precise++; +			/* Support for IP fixup */ +			if (x86_pmu.lbr_nr) +				precise++; +		}  		if (event->attr.precise_ip > precise)  			return -EOPNOTSUPP; @@ -543,11 +545,8 @@ static int __x86_pmu_event_init(struct perf_event *event)  		if (atomic_read(&active_events) == 0) {  			if (!reserve_pmc_hardware())  				err = -EBUSY; -			else { -				err = reserve_ds_buffers(); -				if (err) -					release_pmc_hardware(); -			} +			else +				reserve_ds_buffers();  		}  		if (!err)  			atomic_inc(&active_events); diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 4977f9c400e5..b7dcd9f2b8a0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -74,6 +74,107 @@ static void fini_debug_store_on_cpu(int cpu)  	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);  } +static int alloc_pebs_buffer(int cpu) +{ +	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; +	int node = cpu_to_node(cpu); +	int max, thresh = 1; /* always use a single PEBS record */ +	void *buffer; + +	if (!x86_pmu.pebs) +		return 0; + +	buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node); +	if (unlikely(!buffer)) +		return -ENOMEM; + +	max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; + +	ds->pebs_buffer_base = (u64)(unsigned long)buffer; +	ds->pebs_index = ds->pebs_buffer_base; +	ds->pebs_absolute_maximum = ds->pebs_buffer_base + +		max * x86_pmu.pebs_record_size; + +	ds->pebs_interrupt_threshold = ds->pebs_buffer_base + +		thresh * x86_pmu.pebs_record_size; + +	return 0; +} + +static void release_pebs_buffer(int cpu) +{ +	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + +	if (!ds || !x86_pmu.pebs) +		return; + +	kfree((void *)(unsigned long)ds->pebs_buffer_base); +	ds->pebs_buffer_base = 0; +} + +static int alloc_bts_buffer(int cpu) +{ +	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; +	int node = cpu_to_node(cpu); +	int max, thresh; +	void *buffer; + +	if (!x86_pmu.bts) +		return 0; + +	buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node); +	if (unlikely(!buffer)) +		return -ENOMEM; + +	max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; +	thresh = max / 16; + +	ds->bts_buffer_base = (u64)(unsigned long)buffer; +	ds->bts_index = ds->bts_buffer_base; +	ds->bts_absolute_maximum = ds->bts_buffer_base + +		max * BTS_RECORD_SIZE; +	ds->bts_interrupt_threshold = ds->bts_absolute_maximum - +		thresh * BTS_RECORD_SIZE; + +	return 0; +} + +static void release_bts_buffer(int cpu) +{ +	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + +	if (!ds || !x86_pmu.bts) +		return; + +	kfree((void *)(unsigned long)ds->bts_buffer_base); +	ds->bts_buffer_base = 0; +} + +static int alloc_ds_buffer(int cpu) +{ +	int node = cpu_to_node(cpu); +	struct debug_store *ds; + +	ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node); +	if (unlikely(!ds)) +		return -ENOMEM; + +	per_cpu(cpu_hw_events, cpu).ds = ds; + +	return 0; +} + +static void release_ds_buffer(int cpu) +{ +	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + +	if (!ds) +		return; + +	per_cpu(cpu_hw_events, cpu).ds = NULL; +	kfree(ds); +} +  static void release_ds_buffers(void)  {  	int cpu; @@ -82,93 +183,77 @@ static void release_ds_buffers(void)  		return;  	get_online_cpus(); -  	for_each_online_cpu(cpu)  		fini_debug_store_on_cpu(cpu);  	for_each_possible_cpu(cpu) { -		struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - -		if (!ds) -			continue; - -		per_cpu(cpu_hw_events, cpu).ds = NULL; - -		kfree((void *)(unsigned long)ds->pebs_buffer_base); -		kfree((void *)(unsigned long)ds->bts_buffer_base); -		kfree(ds); +		release_pebs_buffer(cpu); +		release_bts_buffer(cpu); +		release_ds_buffer(cpu);  	} -  	put_online_cpus();  } -static int reserve_ds_buffers(void) +static void reserve_ds_buffers(void)  { -	int cpu, err = 0; +	int bts_err = 0, pebs_err = 0; +	int cpu; + +	x86_pmu.bts_active = 0; +	x86_pmu.pebs_active = 0;  	if (!x86_pmu.bts && !x86_pmu.pebs) -		return 0; +		return; + +	if (!x86_pmu.bts) +		bts_err = 1; + +	if (!x86_pmu.pebs) +		pebs_err = 1;  	get_online_cpus();  	for_each_possible_cpu(cpu) { -		struct debug_store *ds; -		void *buffer; -		int max, thresh; +		if (alloc_ds_buffer(cpu)) { +			bts_err = 1; +			pebs_err = 1; +		} + +		if (!bts_err && alloc_bts_buffer(cpu)) +			bts_err = 1; -		err = -ENOMEM; -		ds = kzalloc(sizeof(*ds), GFP_KERNEL); -		if (unlikely(!ds)) +		if (!pebs_err && alloc_pebs_buffer(cpu)) +			pebs_err = 1; + +		if (bts_err && pebs_err)  			break; -		per_cpu(cpu_hw_events, cpu).ds = ds; - -		if (x86_pmu.bts) { -			buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); -			if (unlikely(!buffer)) -				break; - -			max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; -			thresh = max / 16; - -			ds->bts_buffer_base = (u64)(unsigned long)buffer; -			ds->bts_index = ds->bts_buffer_base; -			ds->bts_absolute_maximum = ds->bts_buffer_base + -				max * BTS_RECORD_SIZE; -			ds->bts_interrupt_threshold = ds->bts_absolute_maximum - -				thresh * BTS_RECORD_SIZE; -		} +	} -		if (x86_pmu.pebs) { -			buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); -			if (unlikely(!buffer)) -				break; - -			max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; - -			ds->pebs_buffer_base = (u64)(unsigned long)buffer; -			ds->pebs_index = ds->pebs_buffer_base; -			ds->pebs_absolute_maximum = ds->pebs_buffer_base + -				max * x86_pmu.pebs_record_size; -			/* -			 * Always use single record PEBS -			 */ -			ds->pebs_interrupt_threshold = ds->pebs_buffer_base + -				x86_pmu.pebs_record_size; -		} +	if (bts_err) { +		for_each_possible_cpu(cpu) +			release_bts_buffer(cpu); +	} -		err = 0; +	if (pebs_err) { +		for_each_possible_cpu(cpu) +			release_pebs_buffer(cpu);  	} -	if (err) -		release_ds_buffers(); -	else { +	if (bts_err && pebs_err) { +		for_each_possible_cpu(cpu) +			release_ds_buffer(cpu); +	} else { +		if (x86_pmu.bts && !bts_err) +			x86_pmu.bts_active = 1; + +		if (x86_pmu.pebs && !pebs_err) +			x86_pmu.pebs_active = 1; +  		for_each_online_cpu(cpu)  			init_debug_store_on_cpu(cpu);  	}  	put_online_cpus(); - -	return err;  }  /* @@ -233,7 +318,7 @@ static int intel_pmu_drain_bts_buffer(void)  	if (!event)  		return 0; -	if (!ds) +	if (!x86_pmu.bts_active)  		return 0;  	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base; @@ -503,7 +588,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)  	struct pebs_record_core *at, *top;  	int n; -	if (!ds || !x86_pmu.pebs) +	if (!x86_pmu.pebs_active)  		return;  	at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; @@ -545,7 +630,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)  	u64 status = 0;  	int bit, n; -	if (!ds || !x86_pmu.pebs) +	if (!x86_pmu.pebs_active)  		return;  	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; @@ -630,9 +715,8 @@ static void intel_ds_init(void)  #else /* CONFIG_CPU_SUP_INTEL */ -static int reserve_ds_buffers(void) +static void reserve_ds_buffers(void)  { -	return 0;  }  static void release_ds_buffers(void) diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index bd1489c3ce09..4e8baad36d37 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -726,6 +726,12 @@ int __init op_nmi_init(struct oprofile_operations *ops)  		case 0x11:  			cpu_type = "x86-64/family11h";  			break; +		case 0x12: +			cpu_type = "x86-64/family12h"; +			break; +		case 0x14: +			cpu_type = "x86-64/family14h"; +			break;  		default:  			return -ENODEV;  		} diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 42fb46f83883..a011bcc0f943 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -48,17 +48,24 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS];  static u32 ibs_caps; -struct op_ibs_config { +struct ibs_config {  	unsigned long op_enabled;  	unsigned long fetch_enabled;  	unsigned long max_cnt_fetch;  	unsigned long max_cnt_op;  	unsigned long rand_en;  	unsigned long dispatched_ops; +	unsigned long branch_target;  }; -static struct op_ibs_config ibs_config; -static u64 ibs_op_ctl; +struct ibs_state { +	u64		ibs_op_ctl; +	int		branch_target; +	unsigned long	sample_size; +}; + +static struct ibs_config ibs_config; +static struct ibs_state ibs_state;  /*   * IBS cpuid feature detection @@ -71,8 +78,16 @@ static u64 ibs_op_ctl;   * bit 0 is used to indicate the existence of IBS.   */  #define IBS_CAPS_AVAIL			(1U<<0) +#define IBS_CAPS_FETCHSAM		(1U<<1) +#define IBS_CAPS_OPSAM			(1U<<2)  #define IBS_CAPS_RDWROPCNT		(1U<<3)  #define IBS_CAPS_OPCNT			(1U<<4) +#define IBS_CAPS_BRNTRGT		(1U<<5) +#define IBS_CAPS_OPCNTEXT		(1U<<6) + +#define IBS_CAPS_DEFAULT		(IBS_CAPS_AVAIL		\ +					 | IBS_CAPS_FETCHSAM	\ +					 | IBS_CAPS_OPSAM)  /*   * IBS APIC setup @@ -99,12 +114,12 @@ static u32 get_ibs_caps(void)  	/* check IBS cpuid feature flags */  	max_level = cpuid_eax(0x80000000);  	if (max_level < IBS_CPUID_FEATURES) -		return IBS_CAPS_AVAIL; +		return IBS_CAPS_DEFAULT;  	ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);  	if (!(ibs_caps & IBS_CAPS_AVAIL))  		/* cpuid flags not valid */ -		return IBS_CAPS_AVAIL; +		return IBS_CAPS_DEFAULT;  	return ibs_caps;  } @@ -197,8 +212,8 @@ op_amd_handle_ibs(struct pt_regs * const regs,  		rdmsrl(MSR_AMD64_IBSOPCTL, ctl);  		if (ctl & IBS_OP_VAL) {  			rdmsrl(MSR_AMD64_IBSOPRIP, val); -			oprofile_write_reserve(&entry, regs, val, -					       IBS_OP_CODE, IBS_OP_SIZE); +			oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE, +					       ibs_state.sample_size);  			oprofile_add_data64(&entry, val);  			rdmsrl(MSR_AMD64_IBSOPDATA, val);  			oprofile_add_data64(&entry, val); @@ -210,10 +225,14 @@ op_amd_handle_ibs(struct pt_regs * const regs,  			oprofile_add_data64(&entry, val);  			rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);  			oprofile_add_data64(&entry, val); +			if (ibs_state.branch_target) { +				rdmsrl(MSR_AMD64_IBSBRTARGET, val); +				oprofile_add_data(&entry, (unsigned long)val); +			}  			oprofile_write_commit(&entry);  			/* reenable the IRQ */ -			ctl = op_amd_randomize_ibs_op(ibs_op_ctl); +			ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);  			wrmsrl(MSR_AMD64_IBSOPCTL, ctl);  		}  	} @@ -226,21 +245,32 @@ static inline void op_amd_start_ibs(void)  	if (!ibs_caps)  		return; +	memset(&ibs_state, 0, sizeof(ibs_state)); + +	/* +	 * Note: Since the max count settings may out of range we +	 * write back the actual used values so that userland can read +	 * it. +	 */ +  	if (ibs_config.fetch_enabled) { -		val = (ibs_config.max_cnt_fetch >> 4) & IBS_FETCH_MAX_CNT; +		val = ibs_config.max_cnt_fetch >> 4; +		val = min(val, IBS_FETCH_MAX_CNT); +		ibs_config.max_cnt_fetch = val << 4;  		val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;  		val |= IBS_FETCH_ENABLE;  		wrmsrl(MSR_AMD64_IBSFETCHCTL, val);  	}  	if (ibs_config.op_enabled) { -		ibs_op_ctl = ibs_config.max_cnt_op >> 4; +		val = ibs_config.max_cnt_op >> 4;  		if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {  			/*  			 * IbsOpCurCnt not supported.  See  			 * op_amd_randomize_ibs_op() for details.  			 */ -			ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL); +			val = clamp(val, 0x0081ULL, 0xFF80ULL); +			ibs_config.max_cnt_op = val << 4;  		} else {  			/*  			 * The start value is randomized with a @@ -248,13 +278,24 @@ static inline void op_amd_start_ibs(void)  			 * with the half of the randomized range. Also  			 * avoid underflows.  			 */ -			ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET, -					 IBS_OP_MAX_CNT); +			val += IBS_RANDOM_MAXCNT_OFFSET; +			if (ibs_caps & IBS_CAPS_OPCNTEXT) +				val = min(val, IBS_OP_MAX_CNT_EXT); +			else +				val = min(val, IBS_OP_MAX_CNT); +			ibs_config.max_cnt_op = +				(val - IBS_RANDOM_MAXCNT_OFFSET) << 4; +		} +		val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT); +		val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; +		val |= IBS_OP_ENABLE; +		ibs_state.ibs_op_ctl = val; +		ibs_state.sample_size = IBS_OP_SIZE; +		if (ibs_config.branch_target) { +			ibs_state.branch_target = 1; +			ibs_state.sample_size++;  		} -		if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops) -			ibs_op_ctl |= IBS_OP_CNT_CTL; -		ibs_op_ctl |= IBS_OP_ENABLE; -		val = op_amd_randomize_ibs_op(ibs_op_ctl); +		val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);  		wrmsrl(MSR_AMD64_IBSOPCTL, val);  	}  } @@ -281,29 +322,25 @@ static inline int eilvt_is_available(int offset)  static inline int ibs_eilvt_valid(void)  { -	u64 val;  	int offset; +	u64 val;  	rdmsrl(MSR_AMD64_IBSCTL, val); +	offset = val & IBSCTL_LVT_OFFSET_MASK; +  	if (!(val & IBSCTL_LVT_OFFSET_VALID)) { -		pr_err(FW_BUG "cpu %d, invalid IBS " -		       "interrupt offset %d (MSR%08X=0x%016llx)", -		       smp_processor_id(), offset, -		       MSR_AMD64_IBSCTL, val); +		pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", +		       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);  		return 0;  	} -	offset = val & IBSCTL_LVT_OFFSET_MASK; - -	if (eilvt_is_available(offset)) -		return !0; - -	pr_err(FW_BUG "cpu %d, IBS interrupt offset %d " -	       "not available (MSR%08X=0x%016llx)", -	       smp_processor_id(), offset, -	       MSR_AMD64_IBSCTL, val); +	if (!eilvt_is_available(offset)) { +		pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", +		       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); +		return 0; +	} -	return 0; +	return 1;  }  static inline int get_ibs_offset(void) @@ -630,28 +667,33 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)  	/* model specific files */  	/* setup some reasonable defaults */ +	memset(&ibs_config, 0, sizeof(ibs_config));  	ibs_config.max_cnt_fetch = 250000; -	ibs_config.fetch_enabled = 0;  	ibs_config.max_cnt_op = 250000; -	ibs_config.op_enabled = 0; -	ibs_config.dispatched_ops = 0; - -	dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); -	oprofilefs_create_ulong(sb, dir, "enable", -				&ibs_config.fetch_enabled); -	oprofilefs_create_ulong(sb, dir, "max_count", -				&ibs_config.max_cnt_fetch); -	oprofilefs_create_ulong(sb, dir, "rand_enable", -				&ibs_config.rand_en); - -	dir = oprofilefs_mkdir(sb, root, "ibs_op"); -	oprofilefs_create_ulong(sb, dir, "enable", -				&ibs_config.op_enabled); -	oprofilefs_create_ulong(sb, dir, "max_count", -				&ibs_config.max_cnt_op); -	if (ibs_caps & IBS_CAPS_OPCNT) -		oprofilefs_create_ulong(sb, dir, "dispatched_ops", -					&ibs_config.dispatched_ops); + +	if (ibs_caps & IBS_CAPS_FETCHSAM) { +		dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); +		oprofilefs_create_ulong(sb, dir, "enable", +					&ibs_config.fetch_enabled); +		oprofilefs_create_ulong(sb, dir, "max_count", +					&ibs_config.max_cnt_fetch); +		oprofilefs_create_ulong(sb, dir, "rand_enable", +					&ibs_config.rand_en); +	} + +	if (ibs_caps & IBS_CAPS_OPSAM) { +		dir = oprofilefs_mkdir(sb, root, "ibs_op"); +		oprofilefs_create_ulong(sb, dir, "enable", +					&ibs_config.op_enabled); +		oprofilefs_create_ulong(sb, dir, "max_count", +					&ibs_config.max_cnt_op); +		if (ibs_caps & IBS_CAPS_OPCNT) +			oprofilefs_create_ulong(sb, dir, "dispatched_ops", +						&ibs_config.dispatched_ops); +		if (ibs_caps & IBS_CAPS_BRNTRGT) +			oprofilefs_create_ulong(sb, dir, "branch_target", +						&ibs_config.branch_target); +	}  	return 0;  } diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 01b281646251..79d0c4f6d071 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -410,7 +410,7 @@ extern void open_softirq(int nr, void (*action)(struct softirq_action *));  extern void softirq_init(void);  static inline void __raise_softirq_irqoff(unsigned int nr)  { -	trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL); +	trace_softirq_raise(nr);  	or_softirq_pending(1UL << nr);  } diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 25b4f686d918..8d3a2486544d 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -62,18 +62,6 @@ enum ring_buffer_type {  unsigned ring_buffer_event_length(struct ring_buffer_event *event);  void *ring_buffer_event_data(struct ring_buffer_event *event); -/** - * ring_buffer_event_time_delta - return the delta timestamp of the event - * @event: the event to get the delta timestamp of - * - * The delta timestamp is the 27 bit timestamp since the last event. - */ -static inline unsigned -ring_buffer_event_time_delta(struct ring_buffer_event *event) -{ -	return event->time_delta; -} -  /*   * ring_buffer_discard_commit will remove an event that has not   *   ben committed yet. If this is used, then ring_buffer_unlock_commit diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 6fa7cbab7d93..1c09820df585 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -86,76 +86,62 @@ TRACE_EVENT(irq_handler_exit,  DECLARE_EVENT_CLASS(softirq, -	TP_PROTO(struct softirq_action *h, struct softirq_action *vec), +	TP_PROTO(unsigned int vec_nr), -	TP_ARGS(h, vec), +	TP_ARGS(vec_nr),  	TP_STRUCT__entry( -		__field(	int,	vec			) +		__field(	unsigned int,	vec	)  	),  	TP_fast_assign( -		if (vec) -			__entry->vec = (int)(h - vec); -		else -			__entry->vec = (int)(long)h; +		__entry->vec = vec_nr;  	), -	TP_printk("vec=%d [action=%s]", __entry->vec, +	TP_printk("vec=%u [action=%s]", __entry->vec,  		  show_softirq_name(__entry->vec))  );  /**   * softirq_entry - called immediately before the softirq handler - * @h: pointer to struct softirq_action - * @vec: pointer to first struct softirq_action in softirq_vec array + * @vec_nr:  softirq vector number   * - * The @h parameter, contains a pointer to the struct softirq_action - * which has a pointer to the action handler that is called. By subtracting - * the @vec pointer from the @h pointer, we can determine the softirq - * number. Also, when used in combination with the softirq_exit tracepoint - * we can determine the softirq latency. + * When used in combination with the softirq_exit tracepoint + * we can determine the softirq handler runtine.   */  DEFINE_EVENT(softirq, softirq_entry, -	TP_PROTO(struct softirq_action *h, struct softirq_action *vec), +	TP_PROTO(unsigned int vec_nr), -	TP_ARGS(h, vec) +	TP_ARGS(vec_nr)  );  /**   * softirq_exit - called immediately after the softirq handler returns - * @h: pointer to struct softirq_action - * @vec: pointer to first struct softirq_action in softirq_vec array + * @vec_nr:  softirq vector number   * - * The @h parameter contains a pointer to the struct softirq_action - * that has handled the softirq. By subtracting the @vec pointer from - * the @h pointer, we can determine the softirq number. Also, when used in - * combination with the softirq_entry tracepoint we can determine the softirq - * latency. + * When used in combination with the softirq_entry tracepoint + * we can determine the softirq handler runtine.   */  DEFINE_EVENT(softirq, softirq_exit, -	TP_PROTO(struct softirq_action *h, struct softirq_action *vec), +	TP_PROTO(unsigned int vec_nr), -	TP_ARGS(h, vec) +	TP_ARGS(vec_nr)  );  /**   * softirq_raise - called immediately when a softirq is raised - * @h: pointer to struct softirq_action - * @vec: pointer to first struct softirq_action in softirq_vec array + * @vec_nr:  softirq vector number   * - * The @h parameter contains a pointer to the softirq vector number which is - * raised. @vec is NULL and it means @h includes vector number not - * softirq_action. When used in combination with the softirq_entry tracepoint - * we can determine the softirq raise latency. + * When used in combination with the softirq_entry tracepoint + * we can determine the softirq raise to run latency.   */  DEFINE_EVENT(softirq, softirq_raise, -	TP_PROTO(struct softirq_action *h, struct softirq_action *vec), +	TP_PROTO(unsigned int vec_nr), -	TP_ARGS(h, vec) +	TP_ARGS(vec_nr)  );  #endif /*  _TRACE_IRQ_H */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 56a891914273..99865c33a60d 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -74,7 +74,8 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];  /* NOTE: change this value only with kprobe_mutex held */  static bool kprobes_all_disarmed; -static DEFINE_MUTEX(kprobe_mutex);	/* Protects kprobe_table */ +/* This protects kprobe_table and optimizing_list */ +static DEFINE_MUTEX(kprobe_mutex);  static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;  static struct {  	spinlock_t lock ____cacheline_aligned_in_smp; @@ -595,6 +596,7 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)  }  #ifdef CONFIG_SYSCTL +/* This should be called with kprobe_mutex locked */  static void __kprobes optimize_all_kprobes(void)  {  	struct hlist_head *head; @@ -607,17 +609,16 @@ static void __kprobes optimize_all_kprobes(void)  		return;  	kprobes_allow_optimization = true; -	mutex_lock(&text_mutex);  	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {  		head = &kprobe_table[i];  		hlist_for_each_entry_rcu(p, node, head, hlist)  			if (!kprobe_disabled(p))  				optimize_kprobe(p);  	} -	mutex_unlock(&text_mutex);  	printk(KERN_INFO "Kprobes globally optimized\n");  } +/* This should be called with kprobe_mutex locked */  static void __kprobes unoptimize_all_kprobes(void)  {  	struct hlist_head *head; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index f309e8014c78..517d827f4982 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -417,8 +417,8 @@ event_filter_match(struct perf_event *event)  	return event->cpu == -1 || event->cpu == smp_processor_id();  } -static int -__event_sched_out(struct perf_event *event, +static void +event_sched_out(struct perf_event *event,  		  struct perf_cpu_context *cpuctx,  		  struct perf_event_context *ctx)  { @@ -437,13 +437,14 @@ __event_sched_out(struct perf_event *event,  	}  	if (event->state != PERF_EVENT_STATE_ACTIVE) -		return 0; +		return;  	event->state = PERF_EVENT_STATE_INACTIVE;  	if (event->pending_disable) {  		event->pending_disable = 0;  		event->state = PERF_EVENT_STATE_OFF;  	} +	event->tstamp_stopped = ctx->time;  	event->pmu->del(event, 0);  	event->oncpu = -1; @@ -452,19 +453,6 @@ __event_sched_out(struct perf_event *event,  	ctx->nr_active--;  	if (event->attr.exclusive || !cpuctx->active_oncpu)  		cpuctx->exclusive = 0; -	return 1; -} - -static void -event_sched_out(struct perf_event *event, -		  struct perf_cpu_context *cpuctx, -		  struct perf_event_context *ctx) -{ -	int ret; - -	ret = __event_sched_out(event, cpuctx, ctx); -	if (ret) -		event->tstamp_stopped = ctx->time;  }  static void @@ -664,7 +652,7 @@ retry:  }  static int -__event_sched_in(struct perf_event *event, +event_sched_in(struct perf_event *event,  		 struct perf_cpu_context *cpuctx,  		 struct perf_event_context *ctx)  { @@ -684,6 +672,8 @@ __event_sched_in(struct perf_event *event,  		return -EAGAIN;  	} +	event->tstamp_running += ctx->time - event->tstamp_stopped; +  	if (!is_software_event(event))  		cpuctx->active_oncpu++;  	ctx->nr_active++; @@ -694,35 +684,6 @@ __event_sched_in(struct perf_event *event,  	return 0;  } -static inline int -event_sched_in(struct perf_event *event, -		 struct perf_cpu_context *cpuctx, -		 struct perf_event_context *ctx) -{ -	int ret = __event_sched_in(event, cpuctx, ctx); -	if (ret) -		return ret; -	event->tstamp_running += ctx->time - event->tstamp_stopped; -	return 0; -} - -static void -group_commit_event_sched_in(struct perf_event *group_event, -	       struct perf_cpu_context *cpuctx, -	       struct perf_event_context *ctx) -{ -	struct perf_event *event; -	u64 now = ctx->time; - -	group_event->tstamp_running += now - group_event->tstamp_stopped; -	/* -	 * Schedule in siblings as one group (if any): -	 */ -	list_for_each_entry(event, &group_event->sibling_list, group_entry) { -		event->tstamp_running += now - event->tstamp_stopped; -	} -} -  static int  group_sched_in(struct perf_event *group_event,  	       struct perf_cpu_context *cpuctx, @@ -730,19 +691,15 @@ group_sched_in(struct perf_event *group_event,  {  	struct perf_event *event, *partial_group = NULL;  	struct pmu *pmu = group_event->pmu; +	u64 now = ctx->time; +	bool simulate = false;  	if (group_event->state == PERF_EVENT_STATE_OFF)  		return 0;  	pmu->start_txn(pmu); -	/* -	 * use __event_sched_in() to delay updating tstamp_running -	 * until the transaction is committed. In case of failure -	 * we will keep an unmodified tstamp_running which is a -	 * requirement to get correct timing information -	 */ -	if (__event_sched_in(group_event, cpuctx, ctx)) { +	if (event_sched_in(group_event, cpuctx, ctx)) {  		pmu->cancel_txn(pmu);  		return -EAGAIN;  	} @@ -751,31 +708,42 @@ group_sched_in(struct perf_event *group_event,  	 * Schedule in siblings as one group (if any):  	 */  	list_for_each_entry(event, &group_event->sibling_list, group_entry) { -		if (__event_sched_in(event, cpuctx, ctx)) { +		if (event_sched_in(event, cpuctx, ctx)) {  			partial_group = event;  			goto group_error;  		}  	} -	if (!pmu->commit_txn(pmu)) { -		/* commit tstamp_running */ -		group_commit_event_sched_in(group_event, cpuctx, ctx); +	if (!pmu->commit_txn(pmu))  		return 0; -	} +  group_error:  	/*  	 * Groups can be scheduled in as one unit only, so undo any  	 * partial group before returning: +	 * The events up to the failed event are scheduled out normally, +	 * tstamp_stopped will be updated.  	 * -	 * use __event_sched_out() to avoid updating tstamp_stopped -	 * because the event never actually ran +	 * The failed events and the remaining siblings need to have +	 * their timings updated as if they had gone thru event_sched_in() +	 * and event_sched_out(). This is required to get consistent timings +	 * across the group. This also takes care of the case where the group +	 * could never be scheduled by ensuring tstamp_stopped is set to mark +	 * the time the event was actually stopped, such that time delta +	 * calculation in update_event_times() is correct.  	 */  	list_for_each_entry(event, &group_event->sibling_list, group_entry) {  		if (event == partial_group) -			break; -		__event_sched_out(event, cpuctx, ctx); +			simulate = true; + +		if (simulate) { +			event->tstamp_running += now - event->tstamp_stopped; +			event->tstamp_stopped = now; +		} else { +			event_sched_out(event, cpuctx, ctx); +		}  	} -	__event_sched_out(group_event, cpuctx, ctx); +	event_sched_out(group_event, cpuctx, ctx);  	pmu->cancel_txn(pmu); diff --git a/kernel/softirq.c b/kernel/softirq.c index f02a9dfa19bc..18f4be0d5fe0 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -229,18 +229,20 @@ restart:  	do {  		if (pending & 1) { +			unsigned int vec_nr = h - softirq_vec;  			int prev_count = preempt_count(); -			kstat_incr_softirqs_this_cpu(h - softirq_vec); -			trace_softirq_entry(h, softirq_vec); +			kstat_incr_softirqs_this_cpu(vec_nr); + +			trace_softirq_entry(vec_nr);  			h->action(h); -			trace_softirq_exit(h, softirq_vec); +			trace_softirq_exit(vec_nr);  			if (unlikely(prev_count != preempt_count())) { -				printk(KERN_ERR "huh, entered softirq %td %s %p" +				printk(KERN_ERR "huh, entered softirq %u %s %p"  				       "with preempt_count %08x," -				       " exited with %08x?\n", h - softirq_vec, -				       softirq_to_name[h - softirq_vec], -				       h->action, prev_count, preempt_count()); +				       " exited with %08x?\n", vec_nr, +				       softirq_to_name[vec_nr], h->action, +				       prev_count, preempt_count());  				preempt_count() = prev_count;  			} diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c3dab054d18e..9ed509a015d8 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -224,6 +224,9 @@ enum {  	RB_LEN_TIME_STAMP = 16,  }; +#define skip_time_extend(event) \ +	((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND)) +  static inline int rb_null_event(struct ring_buffer_event *event)  {  	return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; @@ -248,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event)  	return length + RB_EVNT_HDR_SIZE;  } -/* inline for ring buffer fast paths */ -static unsigned +/* + * Return the length of the given event. Will return + * the length of the time extend if the event is a + * time extend. + */ +static inline unsigned  rb_event_length(struct ring_buffer_event *event)  {  	switch (event->type_len) { @@ -274,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event)  	return 0;  } +/* + * Return total length of time extend and data, + *   or just the event length for all other events. + */ +static inline unsigned +rb_event_ts_length(struct ring_buffer_event *event) +{ +	unsigned len = 0; + +	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { +		/* time extends include the data event after it */ +		len = RB_LEN_TIME_EXTEND; +		event = skip_time_extend(event); +	} +	return len + rb_event_length(event); +} +  /**   * ring_buffer_event_length - return the length of the event   * @event: the event to get the length of + * + * Returns the size of the data load of a data event. + * If the event is something other than a data event, it + * returns the size of the event itself. With the exception + * of a TIME EXTEND, where it still returns the size of the + * data load of the data event after it.   */  unsigned ring_buffer_event_length(struct ring_buffer_event *event)  { -	unsigned length = rb_event_length(event); +	unsigned length; + +	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) +		event = skip_time_extend(event); + +	length = rb_event_length(event);  	if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)  		return length;  	length -= RB_EVNT_HDR_SIZE; @@ -294,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);  static void *  rb_event_data(struct ring_buffer_event *event)  { +	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) +		event = skip_time_extend(event);  	BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);  	/* If length is in len field, then array[0] has the data */  	if (event->type_len) @@ -404,9 +441,6 @@ static inline int test_time_stamp(u64 delta)  /* Max payload is BUF_PAGE_SIZE - header (8bytes) */  #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) -/* Max number of timestamps that can fit on a page */ -#define RB_TIMESTAMPS_PER_PAGE	(BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND) -  int ring_buffer_print_page_header(struct trace_seq *s)  {  	struct buffer_data_page field; @@ -1546,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)  	iter->head = 0;  } +/* Slow path, do not inline */ +static noinline struct ring_buffer_event * +rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) +{ +	event->type_len = RINGBUF_TYPE_TIME_EXTEND; + +	/* Not the first event on the page? */ +	if (rb_event_index(event)) { +		event->time_delta = delta & TS_MASK; +		event->array[0] = delta >> TS_SHIFT; +	} else { +		/* nope, just zero it */ +		event->time_delta = 0; +		event->array[0] = 0; +	} + +	return skip_time_extend(event); +} +  /**   * ring_buffer_update_event - update event type and data   * @event: the even to update @@ -1558,28 +1611,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)   * data field.   */  static void -rb_update_event(struct ring_buffer_event *event, -			 unsigned type, unsigned length) +rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, +		struct ring_buffer_event *event, unsigned length, +		int add_timestamp, u64 delta)  { -	event->type_len = type; - -	switch (type) { - -	case RINGBUF_TYPE_PADDING: -	case RINGBUF_TYPE_TIME_EXTEND: -	case RINGBUF_TYPE_TIME_STAMP: -		break; +	/* Only a commit updates the timestamp */ +	if (unlikely(!rb_event_is_commit(cpu_buffer, event))) +		delta = 0; -	case 0: -		length -= RB_EVNT_HDR_SIZE; -		if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) -			event->array[0] = length; -		else -			event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); -		break; -	default: -		BUG(); +	/* +	 * If we need to add a timestamp, then we +	 * add it to the start of the resevered space. +	 */ +	if (unlikely(add_timestamp)) { +		event = rb_add_time_stamp(event, delta); +		length -= RB_LEN_TIME_EXTEND; +		delta = 0;  	} + +	event->time_delta = delta; +	length -= RB_EVNT_HDR_SIZE; +	if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) { +		event->type_len = 0; +		event->array[0] = length; +	} else +		event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);  }  /* @@ -1823,10 +1879,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,  	local_sub(length, &tail_page->write);  } -static struct ring_buffer_event * +/* + * This is the slow path, force gcc not to inline it. + */ +static noinline struct ring_buffer_event *  rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,  	     unsigned long length, unsigned long tail, -	     struct buffer_page *tail_page, u64 *ts) +	     struct buffer_page *tail_page, u64 ts)  {  	struct buffer_page *commit_page = cpu_buffer->commit_page;  	struct ring_buffer *buffer = cpu_buffer->buffer; @@ -1909,8 +1968,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,  		 * Nested commits always have zero deltas, so  		 * just reread the time stamp  		 */ -		*ts = rb_time_stamp(buffer); -		next_page->page->time_stamp = *ts; +		ts = rb_time_stamp(buffer); +		next_page->page->time_stamp = ts;  	}   out_again: @@ -1929,12 +1988,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,  static struct ring_buffer_event *  __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, -		  unsigned type, unsigned long length, u64 *ts) +		  unsigned long length, u64 ts, +		  u64 delta, int add_timestamp)  {  	struct buffer_page *tail_page;  	struct ring_buffer_event *event;  	unsigned long tail, write; +	/* +	 * If the time delta since the last event is too big to +	 * hold in the time field of the event, then we append a +	 * TIME EXTEND event ahead of the data event. +	 */ +	if (unlikely(add_timestamp)) +		length += RB_LEN_TIME_EXTEND; +  	tail_page = cpu_buffer->tail_page;  	write = local_add_return(length, &tail_page->write); @@ -1943,7 +2011,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,  	tail = write - length;  	/* See if we shot pass the end of this buffer page */ -	if (write > BUF_PAGE_SIZE) +	if (unlikely(write > BUF_PAGE_SIZE))  		return rb_move_tail(cpu_buffer, length, tail,  				    tail_page, ts); @@ -1951,18 +2019,16 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,  	event = __rb_page_index(tail_page, tail);  	kmemcheck_annotate_bitfield(event, bitfield); -	rb_update_event(event, type, length); +	rb_update_event(cpu_buffer, event, length, add_timestamp, delta); -	/* The passed in type is zero for DATA */ -	if (likely(!type)) -		local_inc(&tail_page->entries); +	local_inc(&tail_page->entries);  	/*  	 * If this is the first commit on the page, then update  	 * its timestamp.  	 */  	if (!tail) -		tail_page->page->time_stamp = *ts; +		tail_page->page->time_stamp = ts;  	return event;  } @@ -1977,7 +2043,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,  	unsigned long addr;  	new_index = rb_event_index(event); -	old_index = new_index + rb_event_length(event); +	old_index = new_index + rb_event_ts_length(event);  	addr = (unsigned long)event;  	addr &= PAGE_MASK; @@ -2003,76 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,  	return 0;  } -static int -rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, -		  u64 *ts, u64 *delta) -{ -	struct ring_buffer_event *event; -	int ret; - -	WARN_ONCE(*delta > (1ULL << 59), -		  KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", -		  (unsigned long long)*delta, -		  (unsigned long long)*ts, -		  (unsigned long long)cpu_buffer->write_stamp); - -	/* -	 * The delta is too big, we to add a -	 * new timestamp. -	 */ -	event = __rb_reserve_next(cpu_buffer, -				  RINGBUF_TYPE_TIME_EXTEND, -				  RB_LEN_TIME_EXTEND, -				  ts); -	if (!event) -		return -EBUSY; - -	if (PTR_ERR(event) == -EAGAIN) -		return -EAGAIN; - -	/* Only a commited time event can update the write stamp */ -	if (rb_event_is_commit(cpu_buffer, event)) { -		/* -		 * If this is the first on the page, then it was -		 * updated with the page itself. Try to discard it -		 * and if we can't just make it zero. -		 */ -		if (rb_event_index(event)) { -			event->time_delta = *delta & TS_MASK; -			event->array[0] = *delta >> TS_SHIFT; -		} else { -			/* try to discard, since we do not need this */ -			if (!rb_try_to_discard(cpu_buffer, event)) { -				/* nope, just zero it */ -				event->time_delta = 0; -				event->array[0] = 0; -			} -		} -		cpu_buffer->write_stamp = *ts; -		/* let the caller know this was the commit */ -		ret = 1; -	} else { -		/* Try to discard the event */ -		if (!rb_try_to_discard(cpu_buffer, event)) { -			/* Darn, this is just wasted space */ -			event->time_delta = 0; -			event->array[0] = 0; -		} -		ret = 0; -	} - -	*delta = 0; - -	return ret; -} -  static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)  {  	local_inc(&cpu_buffer->committing);  	local_inc(&cpu_buffer->commits);  } -static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) +static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)  {  	unsigned long commits; @@ -2110,9 +2113,10 @@ rb_reserve_next_event(struct ring_buffer *buffer,  		      unsigned long length)  {  	struct ring_buffer_event *event; -	u64 ts, delta = 0; -	int commit = 0; +	u64 ts, delta;  	int nr_loops = 0; +	int add_timestamp; +	u64 diff;  	rb_start_commit(cpu_buffer); @@ -2133,6 +2137,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,  	length = rb_calculate_event_length(length);   again: +	add_timestamp = 0; +	delta = 0; +  	/*  	 * We allow for interrupts to reenter here and do a trace.  	 * If one does, it will cause this original code to loop @@ -2146,56 +2153,32 @@ rb_reserve_next_event(struct ring_buffer *buffer,  		goto out_fail;  	ts = rb_time_stamp(cpu_buffer->buffer); +	diff = ts - cpu_buffer->write_stamp; -	/* -	 * Only the first commit can update the timestamp. -	 * Yes there is a race here. If an interrupt comes in -	 * just after the conditional and it traces too, then it -	 * will also check the deltas. More than one timestamp may -	 * also be made. But only the entry that did the actual -	 * commit will be something other than zero. -	 */ -	if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page && -		   rb_page_write(cpu_buffer->tail_page) == -		   rb_commit_index(cpu_buffer))) { -		u64 diff; - -		diff = ts - cpu_buffer->write_stamp; - -		/* make sure this diff is calculated here */ -		barrier(); - -		/* Did the write stamp get updated already? */ -		if (unlikely(ts < cpu_buffer->write_stamp)) -			goto get_event; +	/* make sure this diff is calculated here */ +	barrier(); +	/* Did the write stamp get updated already? */ +	if (likely(ts >= cpu_buffer->write_stamp)) {  		delta = diff;  		if (unlikely(test_time_stamp(delta))) { - -			commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); -			if (commit == -EBUSY) -				goto out_fail; - -			if (commit == -EAGAIN) -				goto again; - -			RB_WARN_ON(cpu_buffer, commit < 0); +			WARN_ONCE(delta > (1ULL << 59), +				  KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", +				  (unsigned long long)delta, +				  (unsigned long long)ts, +				  (unsigned long long)cpu_buffer->write_stamp); +			add_timestamp = 1;  		}  	} - get_event: -	event = __rb_reserve_next(cpu_buffer, 0, length, &ts); +	event = __rb_reserve_next(cpu_buffer, length, ts, +				  delta, add_timestamp);  	if (unlikely(PTR_ERR(event) == -EAGAIN))  		goto again;  	if (!event)  		goto out_fail; -	if (!rb_event_is_commit(cpu_buffer, event)) -		delta = 0; - -	event->time_delta = delta; -  	return event;   out_fail: @@ -2207,13 +2190,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,  #define TRACE_RECURSIVE_DEPTH 16 -static int trace_recursive_lock(void) +/* Keep this code out of the fast path cache */ +static noinline void trace_recursive_fail(void)  { -	current->trace_recursion++; - -	if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) -		return 0; -  	/* Disable all tracing before we do anything else */  	tracing_off_permanent(); @@ -2225,10 +2204,21 @@ static int trace_recursive_lock(void)  		    in_nmi());  	WARN_ON_ONCE(1); +} + +static inline int trace_recursive_lock(void) +{ +	current->trace_recursion++; + +	if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) +		return 0; + +	trace_recursive_fail(); +  	return -1;  } -static void trace_recursive_unlock(void) +static inline void trace_recursive_unlock(void)  {  	WARN_ON_ONCE(!current->trace_recursion); @@ -2308,12 +2298,28 @@ static void  rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,  		      struct ring_buffer_event *event)  { +	u64 delta; +  	/*  	 * The event first in the commit queue updates the  	 * time stamp.  	 */ -	if (rb_event_is_commit(cpu_buffer, event)) -		cpu_buffer->write_stamp += event->time_delta; +	if (rb_event_is_commit(cpu_buffer, event)) { +		/* +		 * A commit event that is first on a page +		 * updates the write timestamp with the page stamp +		 */ +		if (!rb_event_index(event)) +			cpu_buffer->write_stamp = +				cpu_buffer->commit_page->page->time_stamp; +		else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { +			delta = event->array[0]; +			delta <<= TS_SHIFT; +			delta += event->time_delta; +			cpu_buffer->write_stamp += delta; +		} else +			cpu_buffer->write_stamp += event->time_delta; +	}  }  static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, @@ -2353,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);  static inline void rb_event_discard(struct ring_buffer_event *event)  { +	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) +		event = skip_time_extend(event); +  	/* array[0] holds the actual length for the discarded event */  	event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;  	event->type_len = RINGBUF_TYPE_PADDING; @@ -3049,12 +3058,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,   again:  	/* -	 * We repeat when a timestamp is encountered. It is possible -	 * to get multiple timestamps from an interrupt entering just -	 * as one timestamp is about to be written, or from discarded -	 * commits. The most that we can have is the number on a single page. +	 * We repeat when a time extend is encountered. +	 * Since the time extend is always attached to a data event, +	 * we should never loop more than once. +	 * (We never hit the following condition more than twice).  	 */ -	if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) +	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))  		return NULL;  	reader = rb_get_reader_page(cpu_buffer); @@ -3130,14 +3139,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)  		return NULL;  	/* -	 * We repeat when a timestamp is encountered. -	 * We can get multiple timestamps by nested interrupts or also -	 * if filtering is on (discarding commits). Since discarding -	 * commits can be frequent we can get a lot of timestamps. -	 * But we limit them by not adding timestamps if they begin -	 * at the start of a page. +	 * We repeat when a time extend is encountered. +	 * Since the time extend is always attached to a data event, +	 * we should never loop more than once. +	 * (We never hit the following condition more than twice).  	 */ -	if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) +	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))  		return NULL;  	if (rb_per_cpu_empty(cpu_buffer)) @@ -3835,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,  		if (len > (commit - read))  			len = (commit - read); -		size = rb_event_length(event); +		/* Always keep the time extend and data together */ +		size = rb_event_ts_length(event);  		if (len < size)  			goto out_unlock; @@ -3857,7 +3865,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,  				break;  			event = rb_reader_event(cpu_buffer); -			size = rb_event_length(event); +			/* Always keep the time extend and data together */ +			size = rb_event_ts_length(event);  		} while (len > size);  		/* update bpage */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 001bcd2ccf4a..82d9b8106cd0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3996,13 +3996,9 @@ static void tracing_init_debugfs_percpu(long cpu)  {  	struct dentry *d_percpu = tracing_dentry_percpu();  	struct dentry *d_cpu; -	/* strlen(cpu) + MAX(log10(cpu)) + '\0' */ -	char cpu_dir[7]; +	char cpu_dir[30]; /* 30 characters should be more than enough */ -	if (cpu > 999 || cpu < 0) -		return; - -	sprintf(cpu_dir, "cpu%ld", cpu); +	snprintf(cpu_dir, 30, "cpu%ld", cpu);  	d_cpu = debugfs_create_dir(cpu_dir, d_percpu);  	if (!d_cpu) {  		pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 43e3dd284b90..399751befeed 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -15,6 +15,23 @@ DESCRIPTION  This command displays the symbolic event types which can be selected in the  various perf commands with the -e option. +EVENT MODIFIERS +--------------- + +Events can optionally have a modifer by appending a colon and one or +more modifiers.  Modifiers allow the user to restrict when events are +counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor. + +The 'p' modifier can be used for specifying how precise the instruction +address should be. The 'p' modifier is currently only implemented for +Intel PEBS and can be specified multiple times: +  0 - SAMPLE_IP can have arbitrary skid +  1 - SAMPLE_IP must have constant skid +  2 - SAMPLE_IP requested to have 0 skid +  3 - SAMPLE_IP must have 0 skid + +The PEBS implementation now supports up to 2. +  RAW HARDWARE EVENT DESCRIPTOR  -----------------------------  Even when an event is not available in a symbolic form within perf right now, diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index 27d52dae5a43..62de1b7f4e76 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -16,7 +16,9 @@ or  or  'perf probe' --list  or -'perf probe' --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]' +'perf probe' [options] --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]' +or +'perf probe' [options] --vars='PROBEPOINT'  DESCRIPTION  ----------- @@ -31,6 +33,11 @@ OPTIONS  --vmlinux=PATH::  	Specify vmlinux path which has debuginfo (Dwarf binary). +-m:: +--module=MODNAME:: +	Specify module name in which perf-probe searches probe points +	or lines. +  -s::  --source=PATH::  	Specify path to kernel source. @@ -57,6 +64,15 @@ OPTIONS  	Show source code lines which can be probed. This needs an argument  	which specifies a range of the source code. (see LINE SYNTAX for detail) +-V:: +--vars=:: +	Show available local variables at given probe point. The argument +	syntax is same as PROBE SYNTAX, but NO ARGs. + +--externs:: +	(Only for --vars) Show external defined variables in addition to local +	variables. +  -f::  --force::  	Forcibly add events with existing name. diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 3ee27dccfde9..a91f9f9e6e5c 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -83,6 +83,10 @@ OPTIONS  --call-graph::  	Do call-graph (stack chain/backtrace) recording. +-q:: +--quiet:: +	Don't print any message, useful for scripting. +  -v::  --verbose::  	Be more verbose (show counter open errors, etc). diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 199d5e19554f..2e000c068cc5 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -50,14 +50,17 @@ static struct {  	bool list_events;  	bool force_add;  	bool show_lines; +	bool show_vars; +	bool show_ext_vars; +	bool mod_events;  	int nevents;  	struct perf_probe_event events[MAX_PROBES];  	struct strlist *dellist;  	struct line_range line_range; +	const char *target_module;  	int max_probe_points;  } params; -  /* Parse an event definition. Note that any error must die. */  static int parse_probe_event(const char *str)  { @@ -92,6 +95,7 @@ static int parse_probe_event_argv(int argc, const char **argv)  	len = 0;  	for (i = 0; i < argc; i++)  		len += sprintf(&buf[len], "%s ", argv[i]); +	params.mod_events = true;  	ret = parse_probe_event(buf);  	free(buf);  	return ret; @@ -100,9 +104,10 @@ static int parse_probe_event_argv(int argc, const char **argv)  static int opt_add_probe_event(const struct option *opt __used,  			      const char *str, int unset __used)  { -	if (str) +	if (str) { +		params.mod_events = true;  		return parse_probe_event(str); -	else +	} else  		return 0;  } @@ -110,6 +115,7 @@ static int opt_del_probe_event(const struct option *opt __used,  			       const char *str, int unset __used)  {  	if (str) { +		params.mod_events = true;  		if (!params.dellist)  			params.dellist = strlist__new(true, NULL);  		strlist__add(params.dellist, str); @@ -130,6 +136,25 @@ static int opt_show_lines(const struct option *opt __used,  	return ret;  } + +static int opt_show_vars(const struct option *opt __used, +			 const char *str, int unset __used) +{ +	struct perf_probe_event *pev = ¶ms.events[params.nevents]; +	int ret; + +	if (!str) +		return 0; + +	ret = parse_probe_event(str); +	if (!ret && pev->nargs != 0) { +		pr_err("  Error: '--vars' doesn't accept arguments.\n"); +		return -EINVAL; +	} +	params.show_vars = true; + +	return ret; +}  #endif  static const char * const probe_usage[] = { @@ -138,7 +163,8 @@ static const char * const probe_usage[] = {  	"perf probe [<options>] --del '[GROUP:]EVENT' ...",  	"perf probe --list",  #ifdef DWARF_SUPPORT -	"perf probe --line 'LINEDESC'", +	"perf probe [<options>] --line 'LINEDESC'", +	"perf probe [<options>] --vars 'PROBEPOINT'",  #endif  	NULL  }; @@ -180,10 +206,17 @@ static const struct option options[] = {  	OPT_CALLBACK('L', "line", NULL,  		     "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]",  		     "Show source code lines.", opt_show_lines), +	OPT_CALLBACK('V', "vars", NULL, +		     "FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT", +		     "Show accessible variables on PROBEDEF", opt_show_vars), +	OPT_BOOLEAN('\0', "externs", ¶ms.show_ext_vars, +		    "Show external variables too (with --vars only)"),  	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,  		   "file", "vmlinux pathname"),  	OPT_STRING('s', "source", &symbol_conf.source_prefix,  		   "directory", "path to kernel source"), +	OPT_STRING('m', "module", ¶ms.target_module, +		   "modname", "target module name"),  #endif  	OPT__DRY_RUN(&probe_event_dry_run),  	OPT_INTEGER('\0', "max-probes", ¶ms.max_probe_points, @@ -217,7 +250,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)  		usage_with_options(probe_usage, options);  	if (params.list_events) { -		if (params.nevents != 0 || params.dellist) { +		if (params.mod_events) {  			pr_err("  Error: Don't use --list with --add/--del.\n");  			usage_with_options(probe_usage, options);  		} @@ -225,6 +258,10 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)  			pr_err("  Error: Don't use --list with --line.\n");  			usage_with_options(probe_usage, options);  		} +		if (params.show_vars) { +			pr_err(" Error: Don't use --list with --vars.\n"); +			usage_with_options(probe_usage, options); +		}  		ret = show_perf_probe_events();  		if (ret < 0)  			pr_err("  Error: Failed to show event list. (%d)\n", @@ -234,17 +271,35 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)  #ifdef DWARF_SUPPORT  	if (params.show_lines) { -		if (params.nevents != 0 || params.dellist) { -			pr_warning("  Error: Don't use --line with" -				   " --add/--del.\n"); +		if (params.mod_events) { +			pr_err("  Error: Don't use --line with" +			       " --add/--del.\n"); +			usage_with_options(probe_usage, options); +		} +		if (params.show_vars) { +			pr_err(" Error: Don't use --line with --vars.\n");  			usage_with_options(probe_usage, options);  		} -		ret = show_line_range(¶ms.line_range); +		ret = show_line_range(¶ms.line_range, params.target_module);  		if (ret < 0)  			pr_err("  Error: Failed to show lines. (%d)\n", ret);  		return ret;  	} +	if (params.show_vars) { +		if (params.mod_events) { +			pr_err("  Error: Don't use --vars with" +			       " --add/--del.\n"); +			usage_with_options(probe_usage, options); +		} +		ret = show_available_vars(params.events, params.nevents, +					  params.max_probe_points, +					  params.target_module, +					  params.show_ext_vars); +		if (ret < 0) +			pr_err("  Error: Failed to show vars. (%d)\n", ret); +		return ret; +	}  #endif  	if (params.dellist) { @@ -258,8 +313,9 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)  	if (params.nevents) {  		ret = add_perf_probe_events(params.events, params.nevents, -					    params.force_add, -					    params.max_probe_points); +					    params.max_probe_points, +					    params.target_module, +					    params.force_add);  		if (ret < 0) {  			pr_err("  Error: Failed to add events. (%d)\n", ret);  			return ret; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index ff77b805de71..4e75583ddd6d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -353,7 +353,7 @@ try_again:  		}  		if (read(fd[nr_cpu][counter][thread_index], &read_data, sizeof(read_data)) == -1) { -			perror("Unable to read perf file descriptor\n"); +			perror("Unable to read perf file descriptor");  			exit(-1);  		} @@ -626,7 +626,7 @@ static int __cmd_record(int argc, const char **argv)  	nr_cpus = read_cpu_map(cpu_list);  	if (nr_cpus < 1) { -		perror("failed to collect number of CPUs\n"); +		perror("failed to collect number of CPUs");  		return -1;  	} @@ -761,6 +761,9 @@ static int __cmd_record(int argc, const char **argv)  		}  	} +	if (quiet) +		return 0; +  	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);  	/* @@ -820,6 +823,7 @@ static const struct option options[] = {  		    "do call-graph (stack chain/backtrace) recording"),  	OPT_INCR('v', "verbose", &verbose,  		    "be more verbose (show counter open errors, etc)"), +	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),  	OPT_BOOLEAN('s', "stat", &inherit_stat,  		    "per thread counts"),  	OPT_BOOLEAN('d', "data", &sample_address, diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 40a6a2992d15..2f8df45c4dcb 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -46,9 +46,6 @@ static struct scripting_ops	*scripting_ops;  static void setup_scripting(void)  { -	/* make sure PERF_EXEC_PATH is set for scripts */ -	perf_set_argv_exec_path(perf_exec_path()); -  	setup_perl_scripting();  	setup_python_scripting(); @@ -285,7 +282,7 @@ static int parse_scriptname(const struct option *opt __used,  		script++;  	} else {  		script = str; -		ext = strchr(script, '.'); +		ext = strrchr(script, '.');  		if (!ext) {  			fprintf(stderr, "invalid script extension");  			return -1; @@ -593,6 +590,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)  		suffix = REPORT_SUFFIX;  	} +	/* make sure PERF_EXEC_PATH is set for scripts */ +	perf_set_argv_exec_path(perf_exec_path()); +  	if (!suffix && argc >= 2 && strncmp(argv[1], "-", strlen("-")) != 0) {  		char *record_script_path, *report_script_path;  		int live_pipe[2]; @@ -625,12 +625,13 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)  			dup2(live_pipe[1], 1);  			close(live_pipe[0]); -			__argv = malloc(5 * sizeof(const char *)); +			__argv = malloc(6 * sizeof(const char *));  			__argv[0] = "/bin/sh";  			__argv[1] = record_script_path; -			__argv[2] = "-o"; -			__argv[3] = "-"; -			__argv[4] = NULL; +			__argv[2] = "-q"; +			__argv[3] = "-o"; +			__argv[4] = "-"; +			__argv[5] = NULL;  			execvp("/bin/sh", (char **)__argv);  			exit(-1); diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-report b/tools/perf/scripts/perl/bin/failed-syscalls-report index e3a5e55d54ff..4028d92dc4ae 100644 --- a/tools/perf/scripts/perl/bin/failed-syscalls-report +++ b/tools/perf/scripts/perl/bin/failed-syscalls-report @@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then  	shift      fi  fi -perf trace $@ -s ~/libexec/perf-core/scripts/perl/failed-syscalls.pl $comm +perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm diff --git a/tools/perf/scripts/perl/bin/rw-by-file-report b/tools/perf/scripts/perl/bin/rw-by-file-report index d83070b7eeb5..ba25f4d41fb0 100644 --- a/tools/perf/scripts/perl/bin/rw-by-file-report +++ b/tools/perf/scripts/perl/bin/rw-by-file-report @@ -7,7 +7,7 @@ if [ $# -lt 1 ] ; then  fi  comm=$1  shift -perf trace $@ -s ~/libexec/perf-core/scripts/perl/rw-by-file.pl $comm +perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-report b/tools/perf/scripts/perl/bin/rw-by-pid-report index 7ef46983f62f..641a3f5d085c 100644 --- a/tools/perf/scripts/perl/bin/rw-by-pid-report +++ b/tools/perf/scripts/perl/bin/rw-by-pid-report @@ -1,6 +1,6 @@  #!/bin/bash  # description: system-wide r/w activity -perf trace $@ -s ~/libexec/perf-core/scripts/perl/rw-by-pid.pl +perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl diff --git a/tools/perf/scripts/perl/bin/rwtop-report b/tools/perf/scripts/perl/bin/rwtop-report index 93e698cd3f38..4918dba77021 100644 --- a/tools/perf/scripts/perl/bin/rwtop-report +++ b/tools/perf/scripts/perl/bin/rwtop-report @@ -17,7 +17,7 @@ if [ "$n_args" -gt 0 ] ; then      interval=$1      shift  fi -perf trace $@ -s ~/libexec/perf-core/scripts/perl/rwtop.pl $interval +perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-report b/tools/perf/scripts/perl/bin/wakeup-latency-report index a0d898f9ca1d..49052ebcb632 100644 --- a/tools/perf/scripts/perl/bin/wakeup-latency-report +++ b/tools/perf/scripts/perl/bin/wakeup-latency-report @@ -1,6 +1,6 @@  #!/bin/bash  # description: system-wide min/max/avg wakeup latency -perf trace $@ -s ~/libexec/perf-core/scripts/perl/wakeup-latency.pl +perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-report b/tools/perf/scripts/perl/bin/workqueue-stats-report index 35081132ef97..df0c65f4ca93 100644 --- a/tools/perf/scripts/perl/bin/workqueue-stats-report +++ b/tools/perf/scripts/perl/bin/workqueue-stats-report @@ -1,6 +1,6 @@  #!/bin/bash  # description: workqueue stats (ins/exe/create/destroy) -perf trace $@ -s ~/libexec/perf-core/scripts/perl/workqueue-stats.pl +perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py index 9689bc0acd9f..13cc02b5893a 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py @@ -6,6 +6,14 @@  # Public License ("GPL") version 2 as published by the Free Software  # Foundation. +import errno, os + +FUTEX_WAIT = 0 +FUTEX_WAKE = 1 +FUTEX_PRIVATE_FLAG = 128 +FUTEX_CLOCK_REALTIME = 256 +FUTEX_CMD_MASK = ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME) +  NSECS_PER_SEC    = 1000000000  def avg(total, n): @@ -24,5 +32,55 @@ def nsecs_str(nsecs):      str = "%5u.%09u" % (nsecs_secs(nsecs), nsecs_nsecs(nsecs)),      return str +def add_stats(dict, key, value): +	if not dict.has_key(key): +		dict[key] = (value, value, value, 1) +	else: +		min, max, avg, count = dict[key] +		if value < min: +			min = value +		if value > max: +			max = value +		avg = (avg + value) / 2 +		dict[key] = (min, max, avg, count + 1) +  def clear_term():      print("\x1b[H\x1b[2J") + +audit_package_warned = False + +try: +	import audit +	machine_to_id = { +		'x86_64': audit.MACH_86_64, +		'alpha'	: audit.MACH_ALPHA, +		'ia64'	: audit.MACH_IA64, +		'ppc'	: audit.MACH_PPC, +		'ppc64'	: audit.MACH_PPC64, +		's390'	: audit.MACH_S390, +		's390x'	: audit.MACH_S390X, +		'i386'	: audit.MACH_X86, +		'i586'	: audit.MACH_X86, +		'i686'	: audit.MACH_X86, +	} +	try: +		machine_to_id['armeb'] = audit.MACH_ARMEB +	except: +		pass +	machine_id = machine_to_id[os.uname()[4]] +except: +	if not audit_package_warned: +		audit_package_warned = True +		print "Install the audit-libs-python package to get syscall names" + +def syscall_name(id): +	try: +		return audit.audit_syscall_to_name(id, machine_id) +	except: +		return str(id) + +def strerror(nr): +	try: +		return errno.errorcode[abs(nr)] +	except: +		return "Unknown %d errno" % nr diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report index 30293545fcc2..03587021463d 100644 --- a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report +++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report @@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then  	shift      fi  fi -perf trace $@ -s ~/libexec/perf-core/scripts/python/failed-syscalls-by-pid.py $comm +perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm diff --git a/tools/perf/scripts/python/bin/futex-contention-record b/tools/perf/scripts/python/bin/futex-contention-record new file mode 100644 index 000000000000..5ecbb433caf4 --- /dev/null +++ b/tools/perf/scripts/python/bin/futex-contention-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -a -e syscalls:sys_enter_futex -e syscalls:sys_exit_futex $@ diff --git a/tools/perf/scripts/python/bin/futex-contention-report b/tools/perf/scripts/python/bin/futex-contention-report new file mode 100644 index 000000000000..c8268138fb7e --- /dev/null +++ b/tools/perf/scripts/python/bin/futex-contention-report @@ -0,0 +1,4 @@ +#!/bin/bash +# description: futext contention measurement + +perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py diff --git a/tools/perf/scripts/python/bin/netdev-times-report b/tools/perf/scripts/python/bin/netdev-times-report index c3d0a638123d..4ad361b31249 100644 --- a/tools/perf/scripts/python/bin/netdev-times-report +++ b/tools/perf/scripts/python/bin/netdev-times-report @@ -2,4 +2,4 @@  # description: display a process of packet and processing time  # args: [tx] [rx] [dev=] [debug] -perf trace -s ~/libexec/perf-core/scripts/python/netdev-times.py $@ +perf trace -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@ diff --git a/tools/perf/scripts/python/bin/sched-migration-report b/tools/perf/scripts/python/bin/sched-migration-report index 61d05f72e443..df1791f07c24 100644 --- a/tools/perf/scripts/python/bin/sched-migration-report +++ b/tools/perf/scripts/python/bin/sched-migration-report @@ -1,3 +1,3 @@  #!/bin/bash  # description: sched migration overview -perf trace $@ -s ~/libexec/perf-core/scripts/python/sched-migration.py +perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py diff --git a/tools/perf/scripts/python/bin/sctop-report b/tools/perf/scripts/python/bin/sctop-report index b01c842ae7b4..36b409c05e50 100644 --- a/tools/perf/scripts/python/bin/sctop-report +++ b/tools/perf/scripts/python/bin/sctop-report @@ -21,4 +21,4 @@ elif [ "$n_args" -gt 0 ] ; then      interval=$1      shift  fi -perf trace $@ -s ~/libexec/perf-core/scripts/python/sctop.py $comm $interval +perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report index 9e9d8ddd72ce..4eb88c9fc83c 100644 --- a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report +++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report @@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then  	shift      fi  fi -perf trace $@ -s ~/libexec/perf-core/scripts/python/syscall-counts-by-pid.py $comm +perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm diff --git a/tools/perf/scripts/python/bin/syscall-counts-report b/tools/perf/scripts/python/bin/syscall-counts-report index dc076b618796..cb2f9c5cf17e 100644 --- a/tools/perf/scripts/python/bin/syscall-counts-report +++ b/tools/perf/scripts/python/bin/syscall-counts-report @@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then  	shift      fi  fi -perf trace $@ -s ~/libexec/perf-core/scripts/python/syscall-counts.py $comm +perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm diff --git a/tools/perf/scripts/python/failed-syscalls-by-pid.py b/tools/perf/scripts/python/failed-syscalls-by-pid.py index 0ca02278fe69..acd7848717b3 100644 --- a/tools/perf/scripts/python/failed-syscalls-by-pid.py +++ b/tools/perf/scripts/python/failed-syscalls-by-pid.py @@ -13,21 +13,26 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \  from perf_trace_context import *  from Core import * +from Util import * -usage = "perf trace -s syscall-counts-by-pid.py [comm]\n"; +usage = "perf trace -s syscall-counts-by-pid.py [comm|pid]\n";  for_comm = None +for_pid = None  if len(sys.argv) > 2:  	sys.exit(usage)  if len(sys.argv) > 1: -	for_comm = sys.argv[1] +	try: +		for_pid = int(sys.argv[1]) +	except: +		for_comm = sys.argv[1]  syscalls = autodict()  def trace_begin(): -	pass +	print "Press control+C to stop and show the summary"  def trace_end():  	print_error_totals() @@ -35,9 +40,9 @@ def trace_end():  def raw_syscalls__sys_exit(event_name, context, common_cpu,  	common_secs, common_nsecs, common_pid, common_comm,  	id, ret): -	if for_comm is not None: -		if common_comm != for_comm: -			return +	if (for_comm and common_comm != for_comm) or \ +	   (for_pid  and common_pid  != for_pid ): +		return  	if ret < 0:  		try: @@ -62,7 +67,7 @@ def print_error_totals():  		    print "\n%s [%d]\n" % (comm, pid),  		    id_keys = syscalls[comm][pid].keys()  		    for id in id_keys: -			    print "  syscall: %-16d\n" % (id), +			    print "  syscall: %-16s\n" % syscall_name(id),  			    ret_keys = syscalls[comm][pid][id].keys()  			    for ret, val in sorted(syscalls[comm][pid][id].iteritems(), key = lambda(k, v): (v, k),  reverse = True): -				    print "    err = %-20d  %10d\n" % (ret, val), +				    print "    err = %-20s  %10d\n" % (strerror(ret), val), diff --git a/tools/perf/scripts/python/futex-contention.py b/tools/perf/scripts/python/futex-contention.py new file mode 100644 index 000000000000..11e70a388d41 --- /dev/null +++ b/tools/perf/scripts/python/futex-contention.py @@ -0,0 +1,50 @@ +# futex contention +# (c) 2010, Arnaldo Carvalho de Melo <acme@redhat.com> +# Licensed under the terms of the GNU GPL License version 2 +# +# Translation of: +# +# http://sourceware.org/systemtap/wiki/WSFutexContention +# +# to perf python scripting. +# +# Measures futex contention + +import os, sys +sys.path.append(os.environ['PERF_EXEC_PATH'] + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') +from Util import * + +process_names = {} +thread_thislock = {} +thread_blocktime = {} + +lock_waits = {} # long-lived stats on (tid,lock) blockage elapsed time +process_names = {} # long-lived pid-to-execname mapping + +def syscalls__sys_enter_futex(event, ctxt, cpu, s, ns, tid, comm, +			      nr, uaddr, op, val, utime, uaddr2, val3): +	cmd = op & FUTEX_CMD_MASK +	if cmd != FUTEX_WAIT: +		return # we don't care about originators of WAKE events + +	process_names[tid] = comm +	thread_thislock[tid] = uaddr +	thread_blocktime[tid] = nsecs(s, ns) + +def syscalls__sys_exit_futex(event, ctxt, cpu, s, ns, tid, comm, +			     nr, ret): +	if thread_blocktime.has_key(tid): +		elapsed = nsecs(s, ns) - thread_blocktime[tid] +		add_stats(lock_waits, (tid, thread_thislock[tid]), elapsed) +		del thread_blocktime[tid] +		del thread_thislock[tid] + +def trace_begin(): +	print "Press control+C to stop and show the summary" + +def trace_end(): +	for (tid, lock) in lock_waits: +		min, max, avg, count = lock_waits[tid, lock] +		print "%s[%d] lock %x contended %d times, %d avg ns" % \ +		      (process_names[tid], tid, lock, count, avg) + diff --git a/tools/perf/scripts/python/sctop.py b/tools/perf/scripts/python/sctop.py index 6cafad40c296..7a6ec2c7d8ab 100644 --- a/tools/perf/scripts/python/sctop.py +++ b/tools/perf/scripts/python/sctop.py @@ -8,10 +8,7 @@  # will be refreshed every [interval] seconds.  The default interval is  # 3 seconds. -import thread -import time -import os -import sys +import os, sys, thread, time  sys.path.append(os.environ['PERF_EXEC_PATH'] + \  	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace') @@ -20,7 +17,7 @@ from perf_trace_context import *  from Core import *  from Util import * -usage = "perf trace -s syscall-counts.py [comm] [interval]\n"; +usage = "perf trace -s sctop.py [comm] [interval]\n";  for_comm = None  default_interval = 3 @@ -71,7 +68,7 @@ def print_syscall_totals(interval):  		for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \  					      reverse = True):  			try: -				print "%-40d  %10d\n" % (id, val), +				print "%-40s  %10d\n" % (syscall_name(id), val),  			except TypeError:  				pass  		syscalls.clear() diff --git a/tools/perf/scripts/python/syscall-counts-by-pid.py b/tools/perf/scripts/python/syscall-counts-by-pid.py index af722d6a4b3f..d1ee3ec10cf2 100644 --- a/tools/perf/scripts/python/syscall-counts-by-pid.py +++ b/tools/perf/scripts/python/syscall-counts-by-pid.py @@ -5,29 +5,33 @@  # Displays system-wide system call totals, broken down by syscall.  # If a [comm] arg is specified, only syscalls called by [comm] are displayed. -import os -import sys +import os, sys  sys.path.append(os.environ['PERF_EXEC_PATH'] + \  	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')  from perf_trace_context import *  from Core import * +from Util import syscall_name  usage = "perf trace -s syscall-counts-by-pid.py [comm]\n";  for_comm = None +for_pid = None  if len(sys.argv) > 2:  	sys.exit(usage)  if len(sys.argv) > 1: -	for_comm = sys.argv[1] +	try: +		for_pid = int(sys.argv[1]) +	except: +		for_comm = sys.argv[1]  syscalls = autodict()  def trace_begin(): -	pass +	print "Press control+C to stop and show the summary"  def trace_end():  	print_syscall_totals() @@ -35,9 +39,10 @@ def trace_end():  def raw_syscalls__sys_enter(event_name, context, common_cpu,  	common_secs, common_nsecs, common_pid, common_comm,  	id, args): -	if for_comm is not None: -		if common_comm != for_comm: -			return + +	if (for_comm and common_comm != for_comm) or \ +	   (for_pid  and common_pid  != for_pid ): +		return  	try:  		syscalls[common_comm][common_pid][id] += 1  	except TypeError: @@ -61,4 +66,4 @@ def print_syscall_totals():  		    id_keys = syscalls[comm][pid].keys()  		    for id, val in sorted(syscalls[comm][pid].iteritems(), \  				  key = lambda(k, v): (v, k),  reverse = True): -			    print "  %-38d  %10d\n" % (id, val), +			    print "  %-38s  %10d\n" % (syscall_name(id), val), diff --git a/tools/perf/scripts/python/syscall-counts.py b/tools/perf/scripts/python/syscall-counts.py index f977e85ff049..ea183dc82d29 100644 --- a/tools/perf/scripts/python/syscall-counts.py +++ b/tools/perf/scripts/python/syscall-counts.py @@ -13,6 +13,7 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \  from perf_trace_context import *  from Core import * +from Util import syscall_name  usage = "perf trace -s syscall-counts.py [comm]\n"; @@ -27,7 +28,7 @@ if len(sys.argv) > 1:  syscalls = autodict()  def trace_begin(): -	pass +	print "Press control+C to stop and show the summary"  def trace_end():  	print_syscall_totals() @@ -55,4 +56,4 @@ def print_syscall_totals():      for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \  				  reverse = True): -	    print "%-40d  %10d\n" % (id, val), +	    print "%-40s  %10d\n" % (syscall_name(id), val), diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index f9c7e3ad1aa7..c8d81b00089d 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -12,8 +12,8 @@  #include "debug.h"  #include "util.h" -int verbose = 0; -bool dump_trace = false; +int verbose; +bool dump_trace = false, quiet = false;  int eprintf(int level, const char *fmt, ...)  { diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 7a17ee061bcb..7b514082bbaf 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -6,7 +6,7 @@  #include "event.h"  extern int verbose; -extern bool dump_trace; +extern bool quiet, dump_trace;  int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));  void trace_event(event_t *event); diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 78575796d5f3..b397c0383728 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -215,6 +215,16 @@ struct symbol *map_groups__find_function_by_name(struct map_groups *self,  	return map_groups__find_symbol_by_name(self, MAP__FUNCTION, name, mapp, filter);  } +static inline +struct symbol *machine__find_kernel_function_by_name(struct machine *self, +						     const char *name, +						     struct map **mapp, +						     symbol_filter_t filter) +{ +	return map_groups__find_function_by_name(&self->kmaps, name, mapp, +						 filter); +} +  int map_groups__fixup_overlappings(struct map_groups *self, struct map *map,  				   int verbose, FILE *fp); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index fcc16e4349df..3b6a5297bf16 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -74,10 +74,9 @@ static int e_snprintf(char *str, size_t size, const char *format, ...)  static char *synthesize_perf_probe_point(struct perf_probe_point *pp);  static struct machine machine; -/* Initialize symbol maps and path of vmlinux */ +/* Initialize symbol maps and path of vmlinux/modules */  static int init_vmlinux(void)  { -	struct dso *kernel;  	int ret;  	symbol_conf.sort_by_name = true; @@ -91,33 +90,61 @@ static int init_vmlinux(void)  		goto out;  	} -	ret = machine__init(&machine, "/", 0); +	ret = machine__init(&machine, "", HOST_KERNEL_ID);  	if (ret < 0)  		goto out; -	kernel = dso__new_kernel(symbol_conf.vmlinux_name); -	if (kernel == NULL) -		die("Failed to create kernel dso."); - -	ret = __machine__create_kernel_maps(&machine, kernel); -	if (ret < 0) -		pr_debug("Failed to create kernel maps.\n"); - +	if (machine__create_kernel_maps(&machine) < 0) { +		pr_debug("machine__create_kernel_maps "); +		goto out; +	}  out:  	if (ret < 0)  		pr_warning("Failed to init vmlinux path.\n");  	return ret;  } +static struct symbol *__find_kernel_function_by_name(const char *name, +						     struct map **mapp) +{ +	return machine__find_kernel_function_by_name(&machine, name, mapp, +						     NULL); +} + +const char *kernel_get_module_path(const char *module) +{ +	struct dso *dso; + +	if (module) { +		list_for_each_entry(dso, &machine.kernel_dsos, node) { +			if (strncmp(dso->short_name + 1, module, +				    dso->short_name_len - 2) == 0) +				goto found; +		} +		pr_debug("Failed to find module %s.\n", module); +		return NULL; +	} else { +		dso = machine.vmlinux_maps[MAP__FUNCTION]->dso; +		if (dso__load_vmlinux_path(dso, +			 machine.vmlinux_maps[MAP__FUNCTION], NULL) < 0) { +			pr_debug("Failed to load kernel map.\n"); +			return NULL; +		} +	} +found: +	return dso->long_name; +} +  #ifdef DWARF_SUPPORT -static int open_vmlinux(void) +static int open_vmlinux(const char *module)  { -	if (map__load(machine.vmlinux_maps[MAP__FUNCTION], NULL) < 0) { -		pr_debug("Failed to load kernel map.\n"); -		return -EINVAL; +	const char *path = kernel_get_module_path(module); +	if (!path) { +		pr_err("Failed to find path of %s module", module ?: "kernel"); +		return -ENOENT;  	} -	pr_debug("Try to open %s\n", machine.vmlinux_maps[MAP__FUNCTION]->dso->long_name); -	return open(machine.vmlinux_maps[MAP__FUNCTION]->dso->long_name, O_RDONLY); +	pr_debug("Try to open %s\n", path); +	return open(path, O_RDONLY);  }  /* @@ -125,20 +152,19 @@ static int open_vmlinux(void)   * Currently only handles kprobes.   */  static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, -				       struct perf_probe_point *pp) +					struct perf_probe_point *pp)  {  	struct symbol *sym; -	int fd, ret = -ENOENT; +	struct map *map; +	u64 addr; +	int ret = -ENOENT; -	sym = map__find_symbol_by_name(machine.vmlinux_maps[MAP__FUNCTION], -				       tp->symbol, NULL); +	sym = __find_kernel_function_by_name(tp->symbol, &map);  	if (sym) { -		fd = open_vmlinux(); -		if (fd >= 0) { -			ret = find_perf_probe_point(fd, -						 sym->start + tp->offset, pp); -			close(fd); -		} +		addr = map->unmap_ip(map, sym->start + tp->offset); +		pr_debug("try to find %s+%ld@%llx\n", tp->symbol, +			 tp->offset, addr); +		ret = find_perf_probe_point((unsigned long)addr, pp);  	}  	if (ret <= 0) {  		pr_debug("Failed to find corresponding probes from " @@ -156,12 +182,12 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,  /* Try to find perf_probe_event with debuginfo */  static int try_to_find_probe_trace_events(struct perf_probe_event *pev,  					   struct probe_trace_event **tevs, -					   int max_tevs) +					   int max_tevs, const char *module)  {  	bool need_dwarf = perf_probe_event_need_dwarf(pev);  	int fd, ntevs; -	fd = open_vmlinux(); +	fd = open_vmlinux(module);  	if (fd < 0) {  		if (need_dwarf) {  			pr_warning("Failed to open debuginfo file.\n"); @@ -300,7 +326,7 @@ error:   * Show line-range always requires debuginfo to find source file and   * line number.   */ -int show_line_range(struct line_range *lr) +int show_line_range(struct line_range *lr, const char *module)  {  	int l = 1;  	struct line_node *ln; @@ -313,7 +339,7 @@ int show_line_range(struct line_range *lr)  	if (ret < 0)  		return ret; -	fd = open_vmlinux(); +	fd = open_vmlinux(module);  	if (fd < 0) {  		pr_warning("Failed to open debuginfo file.\n");  		return fd; @@ -378,11 +404,84 @@ end:  	return ret;  } +static int show_available_vars_at(int fd, struct perf_probe_event *pev, +				  int max_vls, bool externs) +{ +	char *buf; +	int ret, i; +	struct str_node *node; +	struct variable_list *vls = NULL, *vl; + +	buf = synthesize_perf_probe_point(&pev->point); +	if (!buf) +		return -EINVAL; +	pr_debug("Searching variables at %s\n", buf); + +	ret = find_available_vars_at(fd, pev, &vls, max_vls, externs); +	if (ret > 0) { +		/* Some variables were found */ +		fprintf(stdout, "Available variables at %s\n", buf); +		for (i = 0; i < ret; i++) { +			vl = &vls[i]; +			/* +			 * A probe point might be converted to +			 * several trace points. +			 */ +			fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol, +				vl->point.offset); +			free(vl->point.symbol); +			if (vl->vars) { +				strlist__for_each(node, vl->vars) +					fprintf(stdout, "\t\t%s\n", node->s); +				strlist__delete(vl->vars); +			} else +				fprintf(stdout, "(No variables)\n"); +		} +		free(vls); +	} else +		pr_err("Failed to find variables at %s (%d)\n", buf, ret); + +	free(buf); +	return ret; +} + +/* Show available variables on given probe point */ +int show_available_vars(struct perf_probe_event *pevs, int npevs, +			int max_vls, const char *module, bool externs) +{ +	int i, fd, ret = 0; + +	ret = init_vmlinux(); +	if (ret < 0) +		return ret; + +	fd = open_vmlinux(module); +	if (fd < 0) { +		pr_warning("Failed to open debuginfo file.\n"); +		return fd; +	} + +	setup_pager(); + +	for (i = 0; i < npevs && ret >= 0; i++) +		ret = show_available_vars_at(fd, &pevs[i], max_vls, externs); + +	close(fd); +	return ret; +} +  #else	/* !DWARF_SUPPORT */  static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, -				       struct perf_probe_point *pp) +					struct perf_probe_point *pp)  { +	struct symbol *sym; + +	sym = __find_kernel_function_by_name(tp->symbol, NULL); +	if (!sym) { +		pr_err("Failed to find symbol %s in kernel.\n", tp->symbol); +		return -ENOENT; +	}  	pp->function = strdup(tp->symbol);  	if (pp->function == NULL)  		return -ENOMEM; @@ -394,7 +493,7 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,  static int try_to_find_probe_trace_events(struct perf_probe_event *pev,  				struct probe_trace_event **tevs __unused, -				int max_tevs __unused) +				int max_tevs __unused, const char *mod __unused)  {  	if (perf_probe_event_need_dwarf(pev)) {  		pr_warning("Debuginfo-analysis is not supported.\n"); @@ -403,12 +502,19 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,  	return 0;  } -int show_line_range(struct line_range *lr __unused) +int show_line_range(struct line_range *lr __unused, const char *module __unused)  {  	pr_warning("Debuginfo-analysis is not supported.\n");  	return -ENOSYS;  } +int show_available_vars(struct perf_probe_event *pevs __unused, +			int npevs __unused, int max_vls __unused, +			const char *module __unused, bool externs __unused) +{ +	pr_warning("Debuginfo-analysis is not supported.\n"); +	return -ENOSYS; +}  #endif  int parse_line_range_desc(const char *arg, struct line_range *lr) @@ -1087,7 +1193,7 @@ error:  }  static int convert_to_perf_probe_event(struct probe_trace_event *tev, -				struct perf_probe_event *pev) +				       struct perf_probe_event *pev)  {  	char buf[64] = "";  	int i, ret; @@ -1516,14 +1622,14 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,  static int convert_to_probe_trace_events(struct perf_probe_event *pev,  					  struct probe_trace_event **tevs, -					  int max_tevs) +					  int max_tevs, const char *module)  {  	struct symbol *sym;  	int ret = 0, i;  	struct probe_trace_event *tev;  	/* Convert perf_probe_event with debuginfo */ -	ret = try_to_find_probe_trace_events(pev, tevs, max_tevs); +	ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, module);  	if (ret != 0)  		return ret; @@ -1572,8 +1678,7 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,  	}  	/* Currently just checking function name from symbol map */ -	sym = map__find_symbol_by_name(machine.vmlinux_maps[MAP__FUNCTION], -				       tev->point.symbol, NULL); +	sym = __find_kernel_function_by_name(tev->point.symbol, NULL);  	if (!sym) {  		pr_warning("Kernel symbol \'%s\' not found.\n",  			   tev->point.symbol); @@ -1596,7 +1701,7 @@ struct __event_package {  };  int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, -			  bool force_add, int max_tevs) +			  int max_tevs, const char *module, bool force_add)  {  	int i, j, ret;  	struct __event_package *pkgs; @@ -1617,7 +1722,9 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,  		pkgs[i].pev = &pevs[i];  		/* Convert with or without debuginfo */  		ret  = convert_to_probe_trace_events(pkgs[i].pev, -						      &pkgs[i].tevs, max_tevs); +						     &pkgs[i].tevs, +						     max_tevs, +						     module);  		if (ret < 0)  			goto end;  		pkgs[i].ntevs = ret; diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 5af39243a25b..5accbedfea37 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -90,6 +90,12 @@ struct line_range {  	struct list_head	line_list;	/* Visible lines */  }; +/* List of variables */ +struct variable_list { +	struct probe_trace_point	point;	/* Actual probepoint */ +	struct strlist			*vars;	/* Available variables */ +}; +  /* Command string to events */  extern int parse_perf_probe_command(const char *cmd,  				    struct perf_probe_event *pev); @@ -109,12 +115,18 @@ extern void clear_perf_probe_event(struct perf_probe_event *pev);  /* Command string to line-range */  extern int parse_line_range_desc(const char *cmd, struct line_range *lr); +/* Internal use: Return kernel/module path */ +extern const char *kernel_get_module_path(const char *module);  extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, -				 bool force_add, int max_probe_points); +				 int max_probe_points, const char *module, +				 bool force_add);  extern int del_perf_probe_events(struct strlist *dellist);  extern int show_perf_probe_events(void); -extern int show_line_range(struct line_range *lr); +extern int show_line_range(struct line_range *lr, const char *module); +extern int show_available_vars(struct perf_probe_event *pevs, int npevs, +			       int max_probe_points, const char *module, +			       bool externs);  /* Maximum index number of event-name postfix */ diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 32b81f707ff5..3991d73d1cff 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -116,6 +116,101 @@ static void line_list__free(struct list_head *head)  	}  } +/* Dwarf FL wrappers */ + +static int __linux_kernel_find_elf(Dwfl_Module *mod, +				   void **userdata, +				   const char *module_name, +				   Dwarf_Addr base, +				   char **file_name, Elf **elfp) +{ +	int fd; +	const char *path = kernel_get_module_path(module_name); + +	if (path) { +		fd = open(path, O_RDONLY); +		if (fd >= 0) { +			*file_name = strdup(path); +			return fd; +		} +	} +	/* If failed, try to call standard method */ +	return dwfl_linux_kernel_find_elf(mod, userdata, module_name, base, +					  file_name, elfp); +} + +static char *debuginfo_path;	/* Currently dummy */ + +static const Dwfl_Callbacks offline_callbacks = { +	.find_debuginfo = dwfl_standard_find_debuginfo, +	.debuginfo_path = &debuginfo_path, + +	.section_address = dwfl_offline_section_address, + +	/* We use this table for core files too.  */ +	.find_elf = dwfl_build_id_find_elf, +}; + +static const Dwfl_Callbacks kernel_callbacks = { +	.find_debuginfo = dwfl_standard_find_debuginfo, +	.debuginfo_path = &debuginfo_path, + +	.find_elf = __linux_kernel_find_elf, +	.section_address = dwfl_linux_kernel_module_section_address, +}; + +/* Get a Dwarf from offline image */ +static Dwarf *dwfl_init_offline_dwarf(int fd, Dwfl **dwflp, Dwarf_Addr *bias) +{ +	Dwfl_Module *mod; +	Dwarf *dbg = NULL; + +	if (!dwflp) +		return NULL; + +	*dwflp = dwfl_begin(&offline_callbacks); +	if (!*dwflp) +		return NULL; + +	mod = dwfl_report_offline(*dwflp, "", "", fd); +	if (!mod) +		goto error; + +	dbg = dwfl_module_getdwarf(mod, bias); +	if (!dbg) { +error: +		dwfl_end(*dwflp); +		*dwflp = NULL; +	} +	return dbg; +} + +/* Get a Dwarf from live kernel image */ +static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr, Dwfl **dwflp, +					  Dwarf_Addr *bias) +{ +	Dwarf *dbg; + +	if (!dwflp) +		return NULL; + +	*dwflp = dwfl_begin(&kernel_callbacks); +	if (!*dwflp) +		return NULL; + +	/* Load the kernel dwarves: Don't care the result here */ +	dwfl_linux_kernel_report_kernel(*dwflp); +	dwfl_linux_kernel_report_modules(*dwflp); + +	dbg = dwfl_addrdwarf(*dwflp, addr, bias); +	/* Here, check whether we could get a real dwarf */ +	if (!dbg) { +		dwfl_end(*dwflp); +		*dwflp = NULL; +	} +	return dbg; +} +  /* Dwarf wrappers */  /* Find the realpath of the target file. */ @@ -160,26 +255,44 @@ static bool die_compare_name(Dwarf_Die *dw_die, const char *tname)  	return name ? (strcmp(tname, name) == 0) : false;  } -/* Get type die, but skip qualifiers and typedef */ -static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) +/* Get type die */ +static Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)  {  	Dwarf_Attribute attr; + +	if (dwarf_attr_integrate(vr_die, DW_AT_type, &attr) && +	    dwarf_formref_die(&attr, die_mem)) +		return die_mem; +	else +		return NULL; +} + +/* Get a type die, but skip qualifiers */ +static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) +{  	int tag;  	do { -		if (dwarf_attr(vr_die, DW_AT_type, &attr) == NULL || -		    dwarf_formref_die(&attr, die_mem) == NULL) -			return NULL; - -		tag = dwarf_tag(die_mem); -		vr_die = die_mem; +		vr_die = die_get_type(vr_die, die_mem); +		if (!vr_die) +			break; +		tag = dwarf_tag(vr_die);  	} while (tag == DW_TAG_const_type ||  		 tag == DW_TAG_restrict_type ||  		 tag == DW_TAG_volatile_type || -		 tag == DW_TAG_shared_type || -		 tag == DW_TAG_typedef); +		 tag == DW_TAG_shared_type); + +	return vr_die; +} -	return die_mem; +/* Get a type die, but skip qualifiers and typedef */ +static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) +{ +	do { +		vr_die = __die_get_real_type(vr_die, die_mem); +	} while (vr_die && dwarf_tag(vr_die) == DW_TAG_typedef); + +	return vr_die;  }  static bool die_is_signed_type(Dwarf_Die *tp_die) @@ -320,25 +433,35 @@ static Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,  	return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem);  } +struct __find_variable_param { +	const char *name; +	Dwarf_Addr addr; +}; +  static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data)  { -	const char *name = data; +	struct __find_variable_param *fvp = data;  	int tag;  	tag = dwarf_tag(die_mem);  	if ((tag == DW_TAG_formal_parameter ||  	     tag == DW_TAG_variable) && -	    die_compare_name(die_mem, name)) +	    die_compare_name(die_mem, fvp->name))  		return DIE_FIND_CB_FOUND; -	return DIE_FIND_CB_CONTINUE; +	if (dwarf_haspc(die_mem, fvp->addr)) +		return DIE_FIND_CB_CONTINUE; +	else +		return DIE_FIND_CB_SIBLING;  } -/* Find a variable called 'name' */ -static Dwarf_Die *die_find_variable(Dwarf_Die *sp_die, const char *name, -				    Dwarf_Die *die_mem) +/* Find a variable called 'name' at given address */ +static Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name, +				       Dwarf_Addr addr, Dwarf_Die *die_mem)  { -	return die_find_child(sp_die, __die_find_variable_cb, (void *)name, +	struct __find_variable_param fvp = { .name = name, .addr = addr}; + +	return die_find_child(sp_die, __die_find_variable_cb, (void *)&fvp,  			      die_mem);  } @@ -361,6 +484,60 @@ static Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,  			      die_mem);  } +/* Get the name of given variable DIE */ +static int die_get_typename(Dwarf_Die *vr_die, char *buf, int len) +{ +	Dwarf_Die type; +	int tag, ret, ret2; +	const char *tmp = ""; + +	if (__die_get_real_type(vr_die, &type) == NULL) +		return -ENOENT; + +	tag = dwarf_tag(&type); +	if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type) +		tmp = "*"; +	else if (tag == DW_TAG_subroutine_type) { +		/* Function pointer */ +		ret = snprintf(buf, len, "(function_type)"); +		return (ret >= len) ? -E2BIG : ret; +	} else { +		if (!dwarf_diename(&type)) +			return -ENOENT; +		if (tag == DW_TAG_union_type) +			tmp = "union "; +		else if (tag == DW_TAG_structure_type) +			tmp = "struct "; +		/* Write a base name */ +		ret = snprintf(buf, len, "%s%s", tmp, dwarf_diename(&type)); +		return (ret >= len) ? -E2BIG : ret; +	} +	ret = die_get_typename(&type, buf, len); +	if (ret > 0) { +		ret2 = snprintf(buf + ret, len - ret, "%s", tmp); +		ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret; +	} +	return ret; +} + +/* Get the name and type of given variable DIE, stored as "type\tname" */ +static int die_get_varname(Dwarf_Die *vr_die, char *buf, int len) +{ +	int ret, ret2; + +	ret = die_get_typename(vr_die, buf, len); +	if (ret < 0) { +		pr_debug("Failed to get type, make it unknown.\n"); +		ret = snprintf(buf, len, "(unknown_type)"); +	} +	if (ret > 0) { +		ret2 = snprintf(buf + ret, len - ret, "\t%s", +				dwarf_diename(vr_die)); +		ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret; +	} +	return ret; +} +  /*   * Probe finder related functions   */ @@ -374,8 +551,13 @@ static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs)  	return ref;  } -/* Show a location */ -static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf) +/* + * Convert a location into trace_arg. + * If tvar == NULL, this just checks variable can be converted. + */ +static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr, +				     Dwarf_Op *fb_ops, +				     struct probe_trace_arg *tvar)  {  	Dwarf_Attribute attr;  	Dwarf_Op *op; @@ -384,20 +566,23 @@ static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf)  	Dwarf_Word offs = 0;  	bool ref = false;  	const char *regs; -	struct probe_trace_arg *tvar = pf->tvar;  	int ret; +	if (dwarf_attr(vr_die, DW_AT_external, &attr) != NULL) +		goto static_var; +  	/* TODO: handle more than 1 exprs */  	if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL || -	    dwarf_getlocation_addr(&attr, pf->addr, &op, &nops, 1) <= 0 || +	    dwarf_getlocation_addr(&attr, addr, &op, &nops, 1) <= 0 ||  	    nops == 0) {  		/* TODO: Support const_value */ -		pr_err("Failed to find the location of %s at this address.\n" -		       " Perhaps, it has been optimized out.\n", pf->pvar->var);  		return -ENOENT;  	}  	if (op->atom == DW_OP_addr) { +static_var: +		if (!tvar) +			return 0;  		/* Static variables on memory (not stack), make @varname */  		ret = strlen(dwarf_diename(vr_die));  		tvar->value = zalloc(ret + 2); @@ -412,14 +597,11 @@ static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf)  	/* If this is based on frame buffer, set the offset */  	if (op->atom == DW_OP_fbreg) { -		if (pf->fb_ops == NULL) { -			pr_warning("The attribute of frame base is not " -				   "supported.\n"); +		if (fb_ops == NULL)  			return -ENOTSUP; -		}  		ref = true;  		offs = op->number; -		op = &pf->fb_ops[0]; +		op = &fb_ops[0];  	}  	if (op->atom >= DW_OP_breg0 && op->atom <= DW_OP_breg31) { @@ -435,13 +617,18 @@ static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf)  	} else if (op->atom == DW_OP_regx) {  		regn = op->number;  	} else { -		pr_warning("DW_OP %x is not supported.\n", op->atom); +		pr_debug("DW_OP %x is not supported.\n", op->atom);  		return -ENOTSUP;  	} +	if (!tvar) +		return 0; +  	regs = get_arch_regstr(regn);  	if (!regs) { -		pr_warning("Mapping for DWARF register number %u missing on this architecture.", regn); +		/* This should be a bug in DWARF or this tool */ +		pr_warning("Mapping for DWARF register number %u " +			   "missing on this architecture.", regn);  		return -ERANGE;  	} @@ -666,8 +853,14 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)  	pr_debug("Converting variable %s into trace event.\n",  		 dwarf_diename(vr_die)); -	ret = convert_variable_location(vr_die, pf); -	if (ret == 0 && pf->pvar->field) { +	ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops, +					pf->tvar); +	if (ret == -ENOENT) +		pr_err("Failed to find the location of %s at this address.\n" +		       " Perhaps, it has been optimized out.\n", pf->pvar->var); +	else if (ret == -ENOTSUP) +		pr_err("Sorry, we don't support this variable location yet.\n"); +	else if (pf->pvar->field) {  		ret = convert_variable_fields(vr_die, pf->pvar->var,  					      pf->pvar->field, &pf->tvar->ref,  					      &die_mem); @@ -722,56 +915,39 @@ static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf)  	pr_debug("Searching '%s' variable in context.\n",  		 pf->pvar->var);  	/* Search child die for local variables and parameters. */ -	if (die_find_variable(sp_die, pf->pvar->var, &vr_die)) +	if (die_find_variable_at(sp_die, pf->pvar->var, pf->addr, &vr_die))  		ret = convert_variable(&vr_die, pf);  	else {  		/* Search upper class */  		nscopes = dwarf_getscopes_die(sp_die, &scopes); -		if (nscopes > 0) { -			ret = dwarf_getscopevar(scopes, nscopes, pf->pvar->var, -						0, NULL, 0, 0, &vr_die); -			if (ret >= 0) +		while (nscopes-- > 1) { +			pr_debug("Searching variables in %s\n", +				 dwarf_diename(&scopes[nscopes])); +			/* We should check this scope, so give dummy address */ +			if (die_find_variable_at(&scopes[nscopes], +						 pf->pvar->var, 0, +						 &vr_die)) {  				ret = convert_variable(&vr_die, pf); -			else -				ret = -ENOENT; +				goto found; +			} +		} +		if (scopes)  			free(scopes); -		} else -			ret = -ENOENT; +		ret = -ENOENT;  	} +found:  	if (ret < 0)  		pr_warning("Failed to find '%s' in this function.\n",  			   pf->pvar->var);  	return ret;  } -/* Show a probe point to output buffer */ -static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf) +/* Convert subprogram DIE to trace point */ +static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr, +				  bool retprobe, struct probe_trace_point *tp)  { -	struct probe_trace_event *tev;  	Dwarf_Addr eaddr; -	Dwarf_Die die_mem;  	const char *name; -	int ret, i; -	Dwarf_Attribute fb_attr; -	size_t nops; - -	if (pf->ntevs == pf->max_tevs) { -		pr_warning("Too many( > %d) probe point found.\n", -			   pf->max_tevs); -		return -ERANGE; -	} -	tev = &pf->tevs[pf->ntevs++]; - -	/* If no real subprogram, find a real one */ -	if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) { -		sp_die = die_find_real_subprogram(&pf->cu_die, -						 pf->addr, &die_mem); -		if (!sp_die) { -			pr_warning("Failed to find probe point in any " -				   "functions.\n"); -			return -ENOENT; -		} -	}  	/* Copy the name of probe point */  	name = dwarf_diename(sp_die); @@ -781,26 +957,45 @@ static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf)  				   dwarf_diename(sp_die));  			return -ENOENT;  		} -		tev->point.symbol = strdup(name); -		if (tev->point.symbol == NULL) +		tp->symbol = strdup(name); +		if (tp->symbol == NULL)  			return -ENOMEM; -		tev->point.offset = (unsigned long)(pf->addr - eaddr); +		tp->offset = (unsigned long)(paddr - eaddr);  	} else  		/* This function has no name. */ -		tev->point.offset = (unsigned long)pf->addr; +		tp->offset = (unsigned long)paddr;  	/* Return probe must be on the head of a subprogram */ -	if (pf->pev->point.retprobe) { -		if (tev->point.offset != 0) { +	if (retprobe) { +		if (eaddr != paddr) {  			pr_warning("Return probe must be on the head of"  				   " a real function\n");  			return -EINVAL;  		} -		tev->point.retprobe = true; +		tp->retprobe = true;  	} -	pr_debug("Probe point found: %s+%lu\n", tev->point.symbol, -		 tev->point.offset); +	return 0; +} + +/* Call probe_finder callback with real subprogram DIE */ +static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf) +{ +	Dwarf_Die die_mem; +	Dwarf_Attribute fb_attr; +	size_t nops; +	int ret; + +	/* If no real subprogram, find a real one */ +	if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) { +		sp_die = die_find_real_subprogram(&pf->cu_die, +						  pf->addr, &die_mem); +		if (!sp_die) { +			pr_warning("Failed to find probe point in any " +				   "functions.\n"); +			return -ENOENT; +		} +	}  	/* Get the frame base attribute/ops */  	dwarf_attr(sp_die, DW_AT_frame_base, &fb_attr); @@ -820,22 +1015,13 @@ static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf)  #endif  	} -	/* Find each argument */ -	tev->nargs = pf->pev->nargs; -	tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs); -	if (tev->args == NULL) -		return -ENOMEM; -	for (i = 0; i < pf->pev->nargs; i++) { -		pf->pvar = &pf->pev->args[i]; -		pf->tvar = &tev->args[i]; -		ret = find_variable(sp_die, pf); -		if (ret != 0) -			return ret; -	} +	/* Call finder's callback handler */ +	ret = pf->callback(sp_die, pf);  	/* *pf->fb_ops will be cached in libdw. Don't free it. */  	pf->fb_ops = NULL; -	return 0; + +	return ret;  }  /* Find probe point from its line number */ @@ -871,7 +1057,7 @@ static int find_probe_point_by_line(struct probe_finder *pf)  			 (int)i, lineno, (uintmax_t)addr);  		pf->addr = addr; -		ret = convert_probe_point(NULL, pf); +		ret = call_probe_finder(NULL, pf);  		/* Continuing, because target line might be inlined. */  	}  	return ret; @@ -984,7 +1170,7 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)  			 (int)i, lineno, (unsigned long long)addr);  		pf->addr = addr; -		ret = convert_probe_point(sp_die, pf); +		ret = call_probe_finder(sp_die, pf);  		/* Continuing, because target line might be inlined. */  	}  	/* TODO: deallocate lines, but how? */ @@ -1019,7 +1205,7 @@ static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)  		pr_debug("found inline addr: 0x%jx\n",  			 (uintmax_t)pf->addr); -		param->retval = convert_probe_point(in_die, pf); +		param->retval = call_probe_finder(in_die, pf);  		if (param->retval < 0)  			return DWARF_CB_ABORT;  	} @@ -1057,7 +1243,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)  			}  			pf->addr += pp->offset;  			/* TODO: Check the address in this function */ -			param->retval = convert_probe_point(sp_die, pf); +			param->retval = call_probe_finder(sp_die, pf);  		}  	} else {  		struct dwarf_callback_param _param = {.data = (void *)pf, @@ -1079,90 +1265,276 @@ static int find_probe_point_by_func(struct probe_finder *pf)  	return _param.retval;  } -/* Find probe_trace_events specified by perf_probe_event from debuginfo */ -int find_probe_trace_events(int fd, struct perf_probe_event *pev, -			     struct probe_trace_event **tevs, int max_tevs) +/* Find probe points from debuginfo */ +static int find_probes(int fd, struct probe_finder *pf)  { -	struct probe_finder pf = {.pev = pev, .max_tevs = max_tevs}; -	struct perf_probe_point *pp = &pev->point; +	struct perf_probe_point *pp = &pf->pev->point;  	Dwarf_Off off, noff;  	size_t cuhl;  	Dwarf_Die *diep; -	Dwarf *dbg; +	Dwarf *dbg = NULL; +	Dwfl *dwfl; +	Dwarf_Addr bias;	/* Currently ignored */  	int ret = 0; -	pf.tevs = zalloc(sizeof(struct probe_trace_event) * max_tevs); -	if (pf.tevs == NULL) -		return -ENOMEM; -	*tevs = pf.tevs; -	pf.ntevs = 0; - -	dbg = dwarf_begin(fd, DWARF_C_READ); +	dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);  	if (!dbg) {  		pr_warning("No dwarf info found in the vmlinux - "  			"please rebuild with CONFIG_DEBUG_INFO=y.\n"); -		free(pf.tevs); -		*tevs = NULL;  		return -EBADF;  	}  #if _ELFUTILS_PREREQ(0, 142)  	/* Get the call frame information from this dwarf */ -	pf.cfi = dwarf_getcfi(dbg); +	pf->cfi = dwarf_getcfi(dbg);  #endif  	off = 0; -	line_list__init(&pf.lcache); +	line_list__init(&pf->lcache);  	/* Loop on CUs (Compilation Unit) */  	while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL) &&  	       ret >= 0) {  		/* Get the DIE(Debugging Information Entry) of this CU */ -		diep = dwarf_offdie(dbg, off + cuhl, &pf.cu_die); +		diep = dwarf_offdie(dbg, off + cuhl, &pf->cu_die);  		if (!diep)  			continue;  		/* Check if target file is included. */  		if (pp->file) -			pf.fname = cu_find_realpath(&pf.cu_die, pp->file); +			pf->fname = cu_find_realpath(&pf->cu_die, pp->file);  		else -			pf.fname = NULL; +			pf->fname = NULL; -		if (!pp->file || pf.fname) { +		if (!pp->file || pf->fname) {  			if (pp->function) -				ret = find_probe_point_by_func(&pf); +				ret = find_probe_point_by_func(pf);  			else if (pp->lazy_line) -				ret = find_probe_point_lazy(NULL, &pf); +				ret = find_probe_point_lazy(NULL, pf);  			else { -				pf.lno = pp->line; -				ret = find_probe_point_by_line(&pf); +				pf->lno = pp->line; +				ret = find_probe_point_by_line(pf);  			}  		}  		off = noff;  	} -	line_list__free(&pf.lcache); -	dwarf_end(dbg); +	line_list__free(&pf->lcache); +	if (dwfl) +		dwfl_end(dwfl); -	return (ret < 0) ? ret : pf.ntevs; +	return ret; +} + +/* Add a found probe point into trace event list */ +static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf) +{ +	struct trace_event_finder *tf = +			container_of(pf, struct trace_event_finder, pf); +	struct probe_trace_event *tev; +	int ret, i; + +	/* Check number of tevs */ +	if (tf->ntevs == tf->max_tevs) { +		pr_warning("Too many( > %d) probe point found.\n", +			   tf->max_tevs); +		return -ERANGE; +	} +	tev = &tf->tevs[tf->ntevs++]; + +	ret = convert_to_trace_point(sp_die, pf->addr, pf->pev->point.retprobe, +				     &tev->point); +	if (ret < 0) +		return ret; + +	pr_debug("Probe point found: %s+%lu\n", tev->point.symbol, +		 tev->point.offset); + +	/* Find each argument */ +	tev->nargs = pf->pev->nargs; +	tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs); +	if (tev->args == NULL) +		return -ENOMEM; +	for (i = 0; i < pf->pev->nargs; i++) { +		pf->pvar = &pf->pev->args[i]; +		pf->tvar = &tev->args[i]; +		ret = find_variable(sp_die, pf); +		if (ret != 0) +			return ret; +	} + +	return 0; +} + +/* Find probe_trace_events specified by perf_probe_event from debuginfo */ +int find_probe_trace_events(int fd, struct perf_probe_event *pev, +			    struct probe_trace_event **tevs, int max_tevs) +{ +	struct trace_event_finder tf = { +			.pf = {.pev = pev, .callback = add_probe_trace_event}, +			.max_tevs = max_tevs}; +	int ret; + +	/* Allocate result tevs array */ +	*tevs = zalloc(sizeof(struct probe_trace_event) * max_tevs); +	if (*tevs == NULL) +		return -ENOMEM; + +	tf.tevs = *tevs; +	tf.ntevs = 0; + +	ret = find_probes(fd, &tf.pf); +	if (ret < 0) { +		free(*tevs); +		*tevs = NULL; +		return ret; +	} + +	return (ret < 0) ? ret : tf.ntevs; +} + +#define MAX_VAR_LEN 64 + +/* Collect available variables in this scope */ +static int collect_variables_cb(Dwarf_Die *die_mem, void *data) +{ +	struct available_var_finder *af = data; +	struct variable_list *vl; +	char buf[MAX_VAR_LEN]; +	int tag, ret; + +	vl = &af->vls[af->nvls - 1]; + +	tag = dwarf_tag(die_mem); +	if (tag == DW_TAG_formal_parameter || +	    tag == DW_TAG_variable) { +		ret = convert_variable_location(die_mem, af->pf.addr, +						af->pf.fb_ops, NULL); +		if (ret == 0) { +			ret = die_get_varname(die_mem, buf, MAX_VAR_LEN); +			pr_debug2("Add new var: %s\n", buf); +			if (ret > 0) +				strlist__add(vl->vars, buf); +		} +	} + +	if (af->child && dwarf_haspc(die_mem, af->pf.addr)) +		return DIE_FIND_CB_CONTINUE; +	else +		return DIE_FIND_CB_SIBLING; +} + +/* Add a found vars into available variables list */ +static int add_available_vars(Dwarf_Die *sp_die, struct probe_finder *pf) +{ +	struct available_var_finder *af = +			container_of(pf, struct available_var_finder, pf); +	struct variable_list *vl; +	Dwarf_Die die_mem, *scopes = NULL; +	int ret, nscopes; + +	/* Check number of tevs */ +	if (af->nvls == af->max_vls) { +		pr_warning("Too many( > %d) probe point found.\n", af->max_vls); +		return -ERANGE; +	} +	vl = &af->vls[af->nvls++]; + +	ret = convert_to_trace_point(sp_die, pf->addr, pf->pev->point.retprobe, +				     &vl->point); +	if (ret < 0) +		return ret; + +	pr_debug("Probe point found: %s+%lu\n", vl->point.symbol, +		 vl->point.offset); + +	/* Find local variables */ +	vl->vars = strlist__new(true, NULL); +	if (vl->vars == NULL) +		return -ENOMEM; +	af->child = true; +	die_find_child(sp_die, collect_variables_cb, (void *)af, &die_mem); + +	/* Find external variables */ +	if (!af->externs) +		goto out; +	/* Don't need to search child DIE for externs. */ +	af->child = false; +	nscopes = dwarf_getscopes_die(sp_die, &scopes); +	while (nscopes-- > 1) +		die_find_child(&scopes[nscopes], collect_variables_cb, +			       (void *)af, &die_mem); +	if (scopes) +		free(scopes); + +out: +	if (strlist__empty(vl->vars)) { +		strlist__delete(vl->vars); +		vl->vars = NULL; +	} + +	return ret; +} + +/* Find available variables at given probe point */ +int find_available_vars_at(int fd, struct perf_probe_event *pev, +			   struct variable_list **vls, int max_vls, +			   bool externs) +{ +	struct available_var_finder af = { +			.pf = {.pev = pev, .callback = add_available_vars}, +			.max_vls = max_vls, .externs = externs}; +	int ret; + +	/* Allocate result vls array */ +	*vls = zalloc(sizeof(struct variable_list) * max_vls); +	if (*vls == NULL) +		return -ENOMEM; + +	af.vls = *vls; +	af.nvls = 0; + +	ret = find_probes(fd, &af.pf); +	if (ret < 0) { +		/* Free vlist for error */ +		while (af.nvls--) { +			if (af.vls[af.nvls].point.symbol) +				free(af.vls[af.nvls].point.symbol); +			if (af.vls[af.nvls].vars) +				strlist__delete(af.vls[af.nvls].vars); +		} +		free(af.vls); +		*vls = NULL; +		return ret; +	} + +	return (ret < 0) ? ret : af.nvls;  }  /* Reverse search */ -int find_perf_probe_point(int fd, unsigned long addr, -			  struct perf_probe_point *ppt) +int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt)  {  	Dwarf_Die cudie, spdie, indie; -	Dwarf *dbg; +	Dwarf *dbg = NULL; +	Dwfl *dwfl = NULL;  	Dwarf_Line *line; -	Dwarf_Addr laddr, eaddr; +	Dwarf_Addr laddr, eaddr, bias = 0;  	const char *tmp;  	int lineno, ret = 0;  	bool found = false; -	dbg = dwarf_begin(fd, DWARF_C_READ); -	if (!dbg) -		return -EBADF; +	/* Open the live linux kernel */ +	dbg = dwfl_init_live_kernel_dwarf(addr, &dwfl, &bias); +	if (!dbg) { +		pr_warning("No dwarf info found in the vmlinux - " +			"please rebuild with CONFIG_DEBUG_INFO=y.\n"); +		ret = -EINVAL; +		goto end; +	} +	/* Adjust address with bias */ +	addr += bias;  	/* Find cu die */ -	if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr, &cudie)) { +	if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr - bias, &cudie)) { +		pr_warning("No CU DIE is found at %lx\n", addr);  		ret = -EINVAL;  		goto end;  	} @@ -1225,7 +1597,8 @@ found:  	}  end: -	dwarf_end(dbg); +	if (dwfl) +		dwfl_end(dwfl);  	if (ret >= 0)  		ret = found ? 1 : 0;  	return ret; @@ -1358,6 +1731,9 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)  	struct line_finder *lf = param->data;  	struct line_range *lr = lf->lr; +	pr_debug("find (%llx) %s\n", +		 (unsigned long long)dwarf_dieoffset(sp_die), +		 dwarf_diename(sp_die));  	if (dwarf_tag(sp_die) == DW_TAG_subprogram &&  	    die_compare_name(sp_die, lr->function)) {  		lf->fname = dwarf_decl_file(sp_die); @@ -1401,10 +1777,12 @@ int find_line_range(int fd, struct line_range *lr)  	Dwarf_Off off = 0, noff;  	size_t cuhl;  	Dwarf_Die *diep; -	Dwarf *dbg; +	Dwarf *dbg = NULL; +	Dwfl *dwfl; +	Dwarf_Addr bias;	/* Currently ignored */  	const char *comp_dir; -	dbg = dwarf_begin(fd, DWARF_C_READ); +	dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);  	if (!dbg) {  		pr_warning("No dwarf info found in the vmlinux - "  			"please rebuild with CONFIG_DEBUG_INFO=y.\n"); @@ -1450,8 +1828,7 @@ int find_line_range(int fd, struct line_range *lr)  	}  	pr_debug("path: %s\n", lr->path); -	dwarf_end(dbg); - +	dwfl_end(dwfl);  	return (ret < 0) ? ret : lf.found;  } diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 4507d519f183..bba69d455699 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -22,20 +22,27 @@ extern int find_probe_trace_events(int fd, struct perf_probe_event *pev,  				    int max_tevs);  /* Find a perf_probe_point from debuginfo */ -extern int find_perf_probe_point(int fd, unsigned long addr, +extern int find_perf_probe_point(unsigned long addr,  				 struct perf_probe_point *ppt); +/* Find a line range */  extern int find_line_range(int fd, struct line_range *lr); +/* Find available variables */ +extern int find_available_vars_at(int fd, struct perf_probe_event *pev, +				  struct variable_list **vls, int max_points, +				  bool externs); +  #include <dwarf.h>  #include <libdw.h> +#include <libdwfl.h>  #include <version.h>  struct probe_finder {  	struct perf_probe_event	*pev;		/* Target probe event */ -	struct probe_trace_event *tevs;		/* Result trace events */ -	int			ntevs;		/* Number of trace events */ -	int			max_tevs;	/* Max number of trace events */ + +	/* Callback when a probe point is found */ +	int (*callback)(Dwarf_Die *sp_die, struct probe_finder *pf);  	/* For function searching */  	int			lno;		/* Line number */ @@ -53,6 +60,22 @@ struct probe_finder {  	struct probe_trace_arg	*tvar;		/* Current result variable */  }; +struct trace_event_finder { +	struct probe_finder	pf; +	struct probe_trace_event *tevs;		/* Found trace events */ +	int			ntevs;		/* Number of trace events */ +	int			max_tevs;	/* Max number of trace events */ +}; + +struct available_var_finder { +	struct probe_finder	pf; +	struct variable_list	*vls;		/* Found variable lists */ +	int			nvls;		/* Number of variable lists */ +	int			max_vls;	/* Max no. of variable lists */ +	bool			externs;	/* Find external vars too */ +	bool			child;		/* Search child scopes */ +}; +  struct line_finder {  	struct line_range	*lr;		/* Target line range */ diff --git a/tools/perf/util/ui/browser.c b/tools/perf/util/ui/browser.c index 6d0df809a2ed..8bc010edca25 100644 --- a/tools/perf/util/ui/browser.c +++ b/tools/perf/util/ui/browser.c @@ -1,4 +1,3 @@ -#include <slang.h>  #include "libslang.h"  #include <linux/compiler.h>  #include <linux/list.h> | 
