From 44b1e60ab576c343aa592a2a6c679297cc69740d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 30 May 2016 12:49:42 -0300 Subject: perf stat: Basic support for TopDown in perf stat Add basic plumbing for TopDown in perf stat TopDown is intended to replace the frontend cycles idle/ backend cycles idle metrics in standard perf stat output. These metrics are not reliable in many workloads, due to out of order effects. This implements a new --topdown mode in perf stat (similar to --transaction) that measures the pipe line bottlenecks using standardized formulas. The measurement can be all done with 5 counters (one fixed counter) The result are four metrics: FrontendBound, BackendBound, BadSpeculation, Retiring that describe the CPU pipeline behavior on a high level. The full top down methology has many hierarchical metrics. This implementation only supports level 1 which can be collected without multiplexing. A full implementation of top down on top of perf is available in pmu-tools toplev. (http://github.com/andikleen/pmu-tools) The current version works on Intel Core CPUs starting with Sandy Bridge, and Atom CPUs starting with Silvermont. In principle the generic metrics should be also implementable on other out of order CPUs. TopDown level 1 uses a set of abstracted metrics which are generic to out of order CPU cores (although some CPUs may not implement all of them): topdown-total-slots Available slots in the pipeline topdown-slots-issued Slots issued into the pipeline topdown-slots-retired Slots successfully retired topdown-fetch-bubbles Pipeline gaps in the frontend topdown-recovery-bubbles Pipeline gaps during recovery from misspeculation These metrics then allow to compute four useful metrics: FrontendBound, BackendBound, Retiring, BadSpeculation. Add a new --topdown options to enable events. When --topdown is specified set up events for all topdown events supported by the kernel. Add topdown-* as a special case to the event parser, as is needed for all events containing -. The actual code to compute the metrics is in follow-on patches. v2: Use standard sysctl read function. v3: Move x86 specific code to arch/ v4: Enable --metric-only implicitly for topdown. v5: Add --single-thread option to not force per core mode v6: Fix output order of topdown metrics v7: Allow combining with -d v8: Remove --single-thread again v9: Rename functions, adding arch_ and topdown_. v10: Expand man page and describe TopDown better Paste intro into commit description. Print error when malloc fails. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1464119559-17203-1-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 119 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 116 insertions(+), 3 deletions(-) (limited to 'tools/perf/builtin-stat.c') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ee7ada78d86f..fd76bb0b18d1 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -59,10 +59,13 @@ #include "util/thread.h" #include "util/thread_map.h" #include "util/counts.h" +#include "util/group.h" #include "util/session.h" #include "util/tool.h" +#include "util/group.h" #include "asm/bug.h" +#include #include #include #include @@ -98,6 +101,15 @@ static const char * transaction_limited_attrs = { "}" }; +static const char * topdown_attrs[] = { + "topdown-total-slots", + "topdown-slots-retired", + "topdown-recovery-bubbles", + "topdown-fetch-bubbles", + "topdown-slots-issued", + NULL, +}; + static struct perf_evlist *evsel_list; static struct target target = { @@ -112,6 +124,7 @@ static volatile pid_t child_pid = -1; static bool null_run = false; static int detailed_run = 0; static bool transaction_run; +static bool topdown_run = false; static bool big_num = true; static int big_num_opt = -1; static const char *csv_sep = NULL; @@ -124,6 +137,7 @@ static unsigned int initial_delay = 0; static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; static bool metric_only = false; +static bool force_metric_only = false; static struct timespec ref_time; static struct cpu_map *aggr_map; static aggr_get_id_t aggr_get_id; @@ -1520,6 +1534,14 @@ static int stat__set_big_num(const struct option *opt __maybe_unused, return 0; } +static int enable_metric_only(const struct option *opt __maybe_unused, + const char *s __maybe_unused, int unset) +{ + force_metric_only = true; + metric_only = !unset; + return 0; +} + static const struct option stat_options[] = { OPT_BOOLEAN('T', "transaction", &transaction_run, "hardware transaction statistics"), @@ -1578,8 +1600,10 @@ static const struct option stat_options[] = { "aggregate counts per thread", AGGR_THREAD), OPT_UINTEGER('D', "delay", &initial_delay, "ms to wait before starting measurement after program start"), - OPT_BOOLEAN(0, "metric-only", &metric_only, - "Only print computed metrics. No raw values"), + OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, + "Only print computed metrics. No raw values", enable_metric_only), + OPT_BOOLEAN(0, "topdown", &topdown_run, + "measure topdown level 1 statistics"), OPT_END() }; @@ -1772,12 +1796,62 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) return 0; } +static int topdown_filter_events(const char **attr, char **str, bool use_group) +{ + int off = 0; + int i; + int len = 0; + char *s; + + for (i = 0; attr[i]; i++) { + if (pmu_have_event("cpu", attr[i])) { + len += strlen(attr[i]) + 1; + attr[i - off] = attr[i]; + } else + off++; + } + attr[i - off] = NULL; + + *str = malloc(len + 1 + 2); + if (!*str) + return -1; + s = *str; + if (i - off == 0) { + *s = 0; + return 0; + } + if (use_group) + *s++ = '{'; + for (i = 0; attr[i]; i++) { + strcpy(s, attr[i]); + s += strlen(s); + *s++ = ','; + } + if (use_group) { + s[-1] = '}'; + *s = 0; + } else + s[-1] = 0; + return 0; +} + +__weak bool arch_topdown_check_group(bool *warn) +{ + *warn = false; + return false; +} + +__weak void arch_topdown_group_warn(void) +{ +} + /* * Add default attributes, if there were no attributes specified or * if -d/--detailed, -d -d or -d -d -d is used: */ static int add_default_attributes(void) { + int err; struct perf_event_attr default_attrs0[] = { { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, @@ -1896,7 +1970,6 @@ static int add_default_attributes(void) return 0; if (transaction_run) { - int err; if (pmu_have_event("cpu", "cycles-ct") && pmu_have_event("cpu", "el-start")) err = parse_events(evsel_list, transaction_attrs, NULL); @@ -1909,6 +1982,46 @@ static int add_default_attributes(void) return 0; } + if (topdown_run) { + char *str = NULL; + bool warn = false; + + if (stat_config.aggr_mode != AGGR_GLOBAL && + stat_config.aggr_mode != AGGR_CORE) { + pr_err("top down event configuration requires --per-core mode\n"); + return -1; + } + stat_config.aggr_mode = AGGR_CORE; + if (nr_cgroups || !target__has_cpu(&target)) { + pr_err("top down event configuration requires system-wide mode (-a)\n"); + return -1; + } + + if (!force_metric_only) + metric_only = true; + if (topdown_filter_events(topdown_attrs, &str, + arch_topdown_check_group(&warn)) < 0) { + pr_err("Out of memory\n"); + return -1; + } + if (topdown_attrs[0] && str) { + if (warn) + arch_topdown_group_warn(); + err = parse_events(evsel_list, str, NULL); + if (err) { + fprintf(stderr, + "Cannot set up top down events %s: %d\n", + str, err); + free(str); + return -1; + } + } else { + fprintf(stderr, "System does not support topdown\n"); + return -1; + } + free(str); + } + if (!evsel_list->nr_entries) { if (target__has_cpu(&target)) default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; -- cgit v1.2.3 From 41c8ca2a924b359e8f1768f8550487cd13a1ec03 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 24 May 2016 12:52:38 -0700 Subject: perf stat: Print topology/time headers with --metric-only When --metric-only is enabled there were no headers for the topology in interval mode. Also when headers were printed they were on a separate line. Before: $ perf stat --metric-only -A -I 1000 -a 1.001038376 frontend cycles idle insn per cycle stalled cycles per insn branch-misses of all branches 1.001038376 CPU0 123.54% 0.23 5.29 7.61% 1.001038376 CPU1 137.78% 0.24 5.13 10.07% 1.001038376 CPU2 64.48% 0.22 5.50 6.84% After: $ perf stat --metric-only -A -I 1000 -a 1.001111114 CPU0 82.46% 0.32 2.60 7.64% 1.001111114 CPU1 126.63% 0.02 42.83 0.15% 1.001111114 CPU2 193.54% 0.32 2.59 6.92% v2: Move all headers on a single line Reported-by: Jiri Olsa Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1464119559-17203-3-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'tools/perf/builtin-stat.c') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index fd76bb0b18d1..a168e726756b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1316,7 +1316,7 @@ static int aggr_header_lens[] = { [AGGR_GLOBAL] = 0, }; -static void print_metric_headers(char *prefix) +static void print_metric_headers(const char *prefix, bool no_indent) { struct perf_stat_output_ctx out; struct perf_evsel *counter; @@ -1327,7 +1327,7 @@ static void print_metric_headers(char *prefix) if (prefix) fprintf(stat_config.output, "%s", prefix); - if (!csv_output) + if (!csv_output && !no_indent) fprintf(stat_config.output, "%*s", aggr_header_lens[stat_config.aggr_mode], ""); @@ -1352,28 +1352,40 @@ static void print_interval(char *prefix, struct timespec *ts) sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); - if (num_print_interval == 0 && !csv_output && !metric_only) { + if (num_print_interval == 0 && !csv_output) { switch (stat_config.aggr_mode) { case AGGR_SOCKET: - fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); + fprintf(output, "# time socket cpus"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); break; case AGGR_CORE: - fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); + fprintf(output, "# time core cpus"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); break; case AGGR_NONE: - fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); + fprintf(output, "# time CPU"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); break; case AGGR_THREAD: - fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit"); + fprintf(output, "# time comm-pid"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); break; case AGGR_GLOBAL: default: - fprintf(output, "# time counts %*s events\n", unit_width, "unit"); + fprintf(output, "# time"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); case AGGR_UNSET: break; } } + if (num_print_interval == 0 && metric_only) + print_metric_headers(" ", true); if (++num_print_interval == 25) num_print_interval = 0; } @@ -1442,8 +1454,8 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) if (metric_only) { static int num_print_iv; - if (num_print_iv == 0) - print_metric_headers(prefix); + if (num_print_iv == 0 && !interval) + print_metric_headers(prefix, false); if (num_print_iv++ == 25) num_print_iv = 0; if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) -- cgit v1.2.3 From c51fd6395d67a6d414834db7f892c95594247d6f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 24 May 2016 12:52:39 -0700 Subject: perf stat: Add missing aggregation headers for --metric-only CSV When in CSV mode --metric-only outputs an header, unlike the other modes. Previously it did not properly print headers for the aggregation columns, so the headers were actually shifted against the real values. Fix this here by outputting the correct headers for CSV. v2: Indent array. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1464119559-17203-4-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools/perf/builtin-stat.c') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a168e726756b..dff63733dfb7 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1316,6 +1316,14 @@ static int aggr_header_lens[] = { [AGGR_GLOBAL] = 0, }; +static const char *aggr_header_csv[] = { + [AGGR_CORE] = "core,cpus,", + [AGGR_SOCKET] = "socket,cpus", + [AGGR_NONE] = "cpu,", + [AGGR_THREAD] = "comm-pid,", + [AGGR_GLOBAL] = "" +}; + static void print_metric_headers(const char *prefix, bool no_indent) { struct perf_stat_output_ctx out; @@ -1330,6 +1338,12 @@ static void print_metric_headers(const char *prefix, bool no_indent) if (!csv_output && !no_indent) fprintf(stat_config.output, "%*s", aggr_header_lens[stat_config.aggr_mode], ""); + if (csv_output) { + if (stat_config.interval) + fputs("time,", stat_config.output); + fputs(aggr_header_csv[stat_config.aggr_mode], + stat_config.output); + } /* Print metrics headers only */ evlist__for_each(evsel_list, counter) { -- cgit v1.2.3 From e5cadb93d0839d268a7c4199e0fdef0f94722117 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 23 Jun 2016 11:26:15 -0300 Subject: perf evlist: Rename for_each() macros to for_each_entry() To match the semantics for list.h in the kernel, that are used to implement those macros. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Cc: Taeung Song Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-qbcjlgj0ffxquxscahbpddi3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'tools/perf/builtin-stat.c') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index dff63733dfb7..c367a43525e6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -331,7 +331,7 @@ static void read_counters(bool close_counters) { struct perf_evsel *counter; - evlist__for_each(evsel_list, counter) { + evlist__for_each_entry(evsel_list, counter) { if (read_counter(counter)) pr_debug("failed to read counter %s\n", counter->name); @@ -417,7 +417,7 @@ static int perf_stat_synthesize_config(bool is_pipe) * Synthesize other events stuff not carried within * attr event - unit, scale, name */ - evlist__for_each(evsel_list, counter) { + evlist__for_each_entry(evsel_list, counter) { if (!counter->supported) continue; @@ -550,7 +550,7 @@ static int __run_perf_stat(int argc, const char **argv) if (group) perf_evlist__set_leader(evsel_list); - evlist__for_each(evsel_list, counter) { + evlist__for_each_entry(evsel_list, counter) { try_again: if (create_perf_stat_counter(counter) < 0) { /* @@ -1134,7 +1134,7 @@ static void aggr_update_shadow(void) for (s = 0; s < aggr_map->nr; s++) { id = aggr_map->map[s]; - evlist__for_each(evsel_list, counter) { + evlist__for_each_entry(evsel_list, counter) { val = 0; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { s2 = aggr_get_id(evsel_list->cpus, cpu); @@ -1173,7 +1173,7 @@ static void print_aggr(char *prefix) id = aggr_map->map[s]; first = true; - evlist__for_each(evsel_list, counter) { + evlist__for_each_entry(evsel_list, counter) { val = ena = run = 0; nr = 0; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { @@ -1292,7 +1292,7 @@ static void print_no_aggr_metric(char *prefix) if (prefix) fputs(prefix, stat_config.output); - evlist__for_each(evsel_list, counter) { + evlist__for_each_entry(evsel_list, counter) { if (first) { aggr_printout(counter, cpu, 0); first = false; @@ -1346,7 +1346,7 @@ static void print_metric_headers(const char *prefix, bool no_indent) } /* Print metrics headers only */ - evlist__for_each(evsel_list, counter) { + evlist__for_each_entry(evsel_list, counter) { os.evsel = counter; out.ctx = &os; out.print_metric = print_metric_header; @@ -1482,11 +1482,11 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) print_aggr(prefix); break; case AGGR_THREAD: - evlist__for_each(evsel_list, counter) + evlist__for_each_entry(evsel_list, counter) print_aggr_thread(counter, prefix); break; case AGGR_GLOBAL: - evlist__for_each(evsel_list, counter) + evlist__for_each_entry(evsel_list, counter) print_counter_aggr(counter, prefix); if (metric_only) fputc('\n', stat_config.output); @@ -1495,7 +1495,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) if (metric_only) print_no_aggr_metric(prefix); else { - evlist__for_each(evsel_list, counter) + evlist__for_each_entry(evsel_list, counter) print_counter(counter, prefix); } break; @@ -2149,7 +2149,7 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused, const char **argv = session->header.env.cmdline_argv; int argc = session->header.env.nr_cmdline; - evlist__for_each(evsel_list, counter) + evlist__for_each_entry(evsel_list, counter) perf_stat_process_counter(&stat_config, counter); if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) -- cgit v1.2.3 From c8b5f2c96d1bf6cefcbe12f67dce0b892fe20512 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 6 Jul 2016 11:56:20 -0300 Subject: tools: Introduce str_error_r() The tools so far have been using the strerror_r() GNU variant, that returns a string, be it the buffer passed or something else. But that, besides being tricky in cases where we expect that the function using strerror_r() returns the error formatted in a provided buffer (we have to check if it returned something else and copy that instead), breaks the build on systems not using glibc, like Alpine Linux, where musl libc is used. So, introduce yet another wrapper, str_error_r(), that has the GNU interface, but uses the portable XSI variant of strerror_r(), so that users rest asured that the provided buffer is used and it is what is returned. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-d4t42fnf48ytlk8rjxs822tf@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf/builtin-stat.c') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c367a43525e6..8c5a3bfdfdd7 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -596,7 +596,7 @@ try_again: if (perf_evlist__apply_filters(evsel_list, &counter)) { error("failed to set filter \"%s\" on event %s with %d (%s)\n", counter->filter, perf_evsel__name(counter), errno, - strerror_r(errno, msg, sizeof(msg))); + str_error_r(errno, msg, sizeof(msg))); return -1; } @@ -637,7 +637,7 @@ try_again: wait(&status); if (workload_exec_errno) { - const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg)); + const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); pr_err("Workload failed: %s\n", emsg); return -1; } -- cgit v1.2.3 From 00e727bb389359c81101b03d34fec8cc7be5168d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 15 Jul 2016 11:08:10 +0100 Subject: perf stat: Balance opening and reading events In create_perf_stat_counter, when a target CPU has not been provided, we call __perf_evsel__open with empty_cpu_map, and open a single FD per thread. However, in read_counter we assume that we opened events for the product of threads and CPUs described in the evsel's cpu_map. Thus, if an evsel has a cpu_map with more than one entry, we will attempt to access FDs that we didn't open. This could result in a number of problems (e.g. blocking while reading from STDIN if the fd memory happened to be initialised to zero). This is problematic for systems were a logical CPU PMU covers some arbitrary subset of CPUs. The cpu_map of any evsel for that PMU will be initialised based on the cpumask exposed through sysfs, even if the user requests per-thread events. Signed-off-by: Mark Rutland Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: He Kuang Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1468577293-19667-2-git-send-email-mark.rutland@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools/perf/builtin-stat.c') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 8c5a3bfdfdd7..0c16d20d7e32 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -290,8 +290,12 @@ perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, static int read_counter(struct perf_evsel *counter) { int nthreads = thread_map__nr(evsel_list->threads); - int ncpus = perf_evsel__nr_cpus(counter); - int cpu, thread; + int ncpus, cpu, thread; + + if (target__has_cpu(&target)) + ncpus = perf_evsel__nr_cpus(counter); + else + ncpus = 1; if (!counter->supported) return -ENOENT; -- cgit v1.2.3