From b50311dc2ac1c04ad19163c2359910b25e16caf6 Mon Sep 17 00:00:00 2001 From: Roberto Agostino Vitillo Date: Thu, 9 Feb 2012 23:21:03 +0100 Subject: perf report: Add support for taken branch sampling This patch adds support for taken branch sampling, i.e, the PERF_SAMPLE_BRANCH_STACK feature to perf report. In other words, to display histograms based on taken branches rather than executed instructions addresses. The new option is called -b and it takes no argument. To generate meaningful output, the perf.data must have been obtained using perf record -b xxx ... where xxx is a branch filter option. The output shows symbols, modules, sorted by 'who branches where' the most often. The percentages reported in the first column refer to the total number of branches captured and not the usual number of samples. Here is a quick example. Here branchy is simple test program which looks as follows: void f2(void) {} void f3(void) {} void f1(unsigned long n) { if (n & 1UL) f2(); else f3(); } int main(void) { unsigned long i; for (i=0; i < N; i++) f1(i); return 0; } Here is the output captured on Nehalem, if we are only interested in user level function calls. $ perf record -b any_call,u -e cycles:u branchy $ perf report -b --sort=symbol 52.34% [.] main [.] f1 24.04% [.] f1 [.] f3 23.60% [.] f1 [.] f2 0.01% [k] _IO_new_file_xsputn [k] _IO_file_overflow 0.01% [k] _IO_vfprintf_internal [k] _IO_new_file_xsputn 0.01% [k] _IO_vfprintf_internal [k] strchrnul 0.01% [k] __printf [k] _IO_vfprintf_internal 0.01% [k] main [k] __printf About half (52%) of the call branches captured are from main() -> f1(). The second half (24%+23%) is split in two equal shares between f1() -> f2(), f1() ->f3(). The output is as expected given the code. It should be noted, that using -b in perf record does not eliminate information in the perf.data file. Consequently, a typical profile can also be obtained by perf report by simply not using its -b option. It is possible to sort on branch related columns: - dso_from, symbol_from - dso_to, symbol_to - mispredict Signed-off-by: Roberto Agostino Vitillo Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: acme@redhat.com Cc: robert.richter@amd.com Cc: ming.m.lin@intel.com Cc: andi@firstfloor.org Cc: asharma@fb.com Cc: vweaver1@eecs.utk.edu Cc: khandual@linux.vnet.ibm.com Cc: dsahern@gmail.com Link: http://lkml.kernel.org/r/1328826068-11713-14-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 107 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 98 insertions(+), 9 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 25d34d483e49..528789f6c702 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -53,6 +53,50 @@ struct perf_report { DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); }; +static int perf_report__add_branch_hist_entry(struct perf_tool *tool, + struct addr_location *al, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine) +{ + struct perf_report *rep = container_of(tool, struct perf_report, tool); + struct symbol *parent = NULL; + int err = 0; + unsigned i; + struct hist_entry *he; + struct branch_info *bi; + + if ((sort__has_parent || symbol_conf.use_callchain) + && sample->callchain) { + err = machine__resolve_callchain(machine, evsel, al->thread, + sample->callchain, &parent); + if (err) + return err; + } + + bi = machine__resolve_bstack(machine, al->thread, + sample->branch_stack); + if (!bi) + return -ENOMEM; + + for (i = 0; i < sample->branch_stack->nr; i++) { + if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym)) + continue; + /* + * The report shows the percentage of total branches captured + * and not events sampled. Thus we use a pseudo period of 1. + */ + he = __hists__add_branch_entry(&evsel->hists, al, parent, + &bi[i], 1); + if (he) { + evsel->hists.stats.total_period += 1; + hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); + } else + return -ENOMEM; + } + return err; +} + static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, struct addr_location *al, struct perf_sample *sample, @@ -126,14 +170,21 @@ static int process_sample_event(struct perf_tool *tool, if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) return 0; - if (al.map != NULL) - al.map->dso->hit = 1; + if (sort__branch_mode) { + if (perf_report__add_branch_hist_entry(tool, &al, sample, + evsel, machine)) { + pr_debug("problem adding lbr entry, skipping event\n"); + return -1; + } + } else { + if (al.map != NULL) + al.map->dso->hit = 1; - if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) { - pr_debug("problem incrementing symbol period, skipping event\n"); - return -1; + if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) { + pr_debug("problem incrementing symbol period, skipping event\n"); + return -1; + } } - return 0; } @@ -188,6 +239,15 @@ static int perf_report__setup_sample_type(struct perf_report *rep) } } + if (sort__branch_mode) { + if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) { + fprintf(stderr, "selected -b but no branch data." + " Did you call perf record without" + " -b?\n"); + return -1; + } + } + return 0; } @@ -477,7 +537,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) OPT_BOOLEAN(0, "stdio", &report.use_stdio, "Use the stdio interface"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", - "sort by key(s): pid, comm, dso, symbol, parent"), + "sort by key(s): pid, comm, dso, symbol, parent, dso_to," + " dso_from, symbol_to, symbol_from, mispredict"), OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, "Show sample percentage for different cpu modes"), OPT_STRING('p', "parent", &parent_pattern, "regex", @@ -517,6 +578,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) "Specify disassembler style (e.g. -M intel for intel syntax)"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), + OPT_BOOLEAN('b', "branch-stack", &sort__branch_mode, + "use branch records for histogram filling"), OPT_END() }; @@ -537,10 +600,36 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) report.input_name = "perf.data"; } - if (strcmp(report.input_name, "-") != 0) + if (sort__branch_mode) { + if (use_browser) + fprintf(stderr, "Warning: TUI interface not supported" + " in branch mode\n"); + if (symbol_conf.dso_list_str != NULL) + fprintf(stderr, "Warning: dso filtering not supported" + " in branch mode\n"); + if (symbol_conf.sym_list_str != NULL) + fprintf(stderr, "Warning: symbol filtering not" + " supported in branch mode\n"); + + report.use_stdio = true; + use_browser = 0; setup_browser(true); - else + symbol_conf.dso_list_str = NULL; + symbol_conf.sym_list_str = NULL; + + /* + * if no sort_order is provided, then specify branch-mode + * specific order + */ + if (sort_order == default_sort_order) + sort_order = "comm,dso_from,symbol_from," + "dso_to,symbol_to"; + + } else if (strcmp(report.input_name, "-") != 0) { + setup_browser(true); + } else { use_browser = 0; + } /* * Only in the newt browser we are doing integrated annotation, -- cgit v1.2.3 From 993ac88d5892629fbe1f8a54857f9947f49f0d96 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 8 Mar 2012 23:47:47 +0100 Subject: perf report: Auto-detect branch stack sampling mode This patch enhances perf report to auto-detect when the perf.data file contains samples with branch stacks. That way it is not necessary to use the -b option. To force branch view mode to off, simply use --no-branch-stack. Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: acme@redhat.com Cc: asharma@fb.com Cc: ravitillo@lbl.gov Cc: vweaver1@eecs.utk.edu Cc: khandual@linux.vnet.ibm.com Cc: dsahern@gmail.com Link: http://lkml.kernel.org/r/1331246868-19905-4-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 50 ++++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 16 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 528789f6c702..66e852376a05 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -170,7 +170,7 @@ static int process_sample_event(struct perf_tool *tool, if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) return 0; - if (sort__branch_mode) { + if (sort__branch_mode == 1) { if (perf_report__add_branch_hist_entry(tool, &al, sample, evsel, machine)) { pr_debug("problem adding lbr entry, skipping event\n"); @@ -239,7 +239,7 @@ static int perf_report__setup_sample_type(struct perf_report *rep) } } - if (sort__branch_mode) { + if (sort__branch_mode == 1) { if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) { fprintf(stderr, "selected -b but no branch data." " Did you call perf record without" @@ -306,7 +306,7 @@ static int __cmd_report(struct perf_report *rep) { int ret = -EINVAL; u64 nr_samples; - struct perf_session *session; + struct perf_session *session = rep->session; struct perf_evsel *pos; struct map *kernel_map; struct kmap *kernel_kmap; @@ -314,13 +314,6 @@ static int __cmd_report(struct perf_report *rep) signal(SIGINT, sig_handler); - session = perf_session__new(rep->input_name, O_RDONLY, - rep->force, false, &rep->tool); - if (session == NULL) - return -ENOMEM; - - rep->session = session; - if (rep->cpu_list) { ret = perf_session__cpu_bitmap(session, rep->cpu_list, rep->cpu_bitmap); @@ -487,9 +480,19 @@ setup: return 0; } +static int +parse_branch_mode(const struct option *opt __used, const char *str __used, int unset) +{ + sort__branch_mode = !unset; + return 0; +} + int cmd_report(int argc, const char **argv, const char *prefix __used) { + struct perf_session *session; struct stat st; + bool has_br_stack = false; + int ret = -1; char callchain_default_opt[] = "fractal,0.5,callee"; const char * const report_usage[] = { "perf report []", @@ -578,8 +581,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) "Specify disassembler style (e.g. -M intel for intel syntax)"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), - OPT_BOOLEAN('b', "branch-stack", &sort__branch_mode, - "use branch records for histogram filling"), + OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "", + "use branch records for histogram filling", parse_branch_mode), OPT_END() }; @@ -599,8 +602,20 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) else report.input_name = "perf.data"; } + session = perf_session__new(report.input_name, O_RDONLY, + report.force, false, &report.tool); + if (session == NULL) + return -ENOMEM; + + report.session = session; + + has_br_stack = perf_header__has_feat(&session->header, + HEADER_BRANCH_STACK); - if (sort__branch_mode) { + if (sort__branch_mode == -1 && has_br_stack) + sort__branch_mode = 1; + + if (sort__branch_mode == 1) { if (use_browser) fprintf(stderr, "Warning: TUI interface not supported" " in branch mode\n"); @@ -657,13 +672,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) } if (symbol__init() < 0) - return -1; + goto error; setup_sorting(report_usage, options); if (parent_pattern != default_parent_pattern) { if (sort_dimension__add("parent") < 0) - return -1; + goto error; /* * Only show the parent fields if we explicitly @@ -685,5 +700,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); - return __cmd_report(&report); + ret = __cmd_report(&report); +error: + perf_session__delete(session); + return ret; } -- cgit v1.2.3 From a68c2c58171391ef368fced32a555b2f0ff106e5 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 8 Mar 2012 23:47:48 +0100 Subject: perf report: Enable TUI in branch view mode This patch updates perf report to support TUI mode when the perf.data file contains samples with branch stacks. For each row in the report, it is possible to annotate either the source or target of each branch. Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: acme@redhat.com Cc: asharma@fb.com Cc: ravitillo@lbl.gov Cc: vweaver1@eecs.utk.edu Cc: khandual@linux.vnet.ibm.com Cc: dsahern@gmail.com Link: http://lkml.kernel.org/r/1331246868-19905-5-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 73 +++++++++++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 23 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 66e852376a05..8e91c6eba18a 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -64,7 +64,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, int err = 0; unsigned i; struct hist_entry *he; - struct branch_info *bi; + struct branch_info *bi, *bx; if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { @@ -87,13 +87,45 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, * and not events sampled. Thus we use a pseudo period of 1. */ he = __hists__add_branch_entry(&evsel->hists, al, parent, - &bi[i], 1); + &bi[i], 1); if (he) { + struct annotation *notes; + err = -ENOMEM; + bx = he->branch_info; + if (bx->from.sym && use_browser > 0) { + notes = symbol__annotation(bx->from.sym); + if (!notes->src + && symbol__alloc_hist(bx->from.sym) < 0) + goto out; + + err = symbol__inc_addr_samples(bx->from.sym, + bx->from.map, + evsel->idx, + bx->from.al_addr); + if (err) + goto out; + } + + if (bx->to.sym && use_browser > 0) { + notes = symbol__annotation(bx->to.sym); + if (!notes->src + && symbol__alloc_hist(bx->to.sym) < 0) + goto out; + + err = symbol__inc_addr_samples(bx->to.sym, + bx->to.map, + evsel->idx, + bx->to.al_addr); + if (err) + goto out; + } evsel->hists.stats.total_period += 1; hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); + err = 0; } else return -ENOMEM; } +out: return err; } @@ -615,32 +647,19 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) if (sort__branch_mode == -1 && has_br_stack) sort__branch_mode = 1; + /* sort__branch_mode could be 0 if --no-branch-stack */ if (sort__branch_mode == 1) { - if (use_browser) - fprintf(stderr, "Warning: TUI interface not supported" - " in branch mode\n"); - if (symbol_conf.dso_list_str != NULL) - fprintf(stderr, "Warning: dso filtering not supported" - " in branch mode\n"); - if (symbol_conf.sym_list_str != NULL) - fprintf(stderr, "Warning: symbol filtering not" - " supported in branch mode\n"); - - report.use_stdio = true; - use_browser = 0; - setup_browser(true); - symbol_conf.dso_list_str = NULL; - symbol_conf.sym_list_str = NULL; - /* - * if no sort_order is provided, then specify branch-mode - * specific order + * if no sort_order is provided, then specify + * branch-mode specific order */ if (sort_order == default_sort_order) sort_order = "comm,dso_from,symbol_from," "dso_to,symbol_to"; - } else if (strcmp(report.input_name, "-") != 0) { + } + + if (strcmp(report.input_name, "-") != 0) { setup_browser(true); } else { use_browser = 0; @@ -696,9 +715,17 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) if (argc) usage_with_options(report_usage, options); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); - sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); + + if (sort__branch_mode == 1) { + sort_entry__setup_elide(&sort_dso_from, symbol_conf.dso_from_list, "dso_from", stdout); + sort_entry__setup_elide(&sort_dso_to, symbol_conf.dso_to_list, "dso_to", stdout); + sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout); + sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout); + } else { + sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); + sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); + } ret = __cmd_report(&report); error: -- cgit v1.2.3