From 9396c9cb0d9534ca67bda8a34b2413a2ca1c67e9 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 22 Dec 2016 15:03:50 +0900 Subject: perf sched timehist: Show total scheduling time Show length of analyzed sample time and rate of idle task running. This also takes care of time range given by --time option. $ perf sched timehist -sI | tail Samples do not have callchains. Idle stats: CPU 0 idle for 930.316 msec ( 92.93%) CPU 1 idle for 963.614 msec ( 96.25%) CPU 2 idle for 885.482 msec ( 88.45%) CPU 3 idle for 938.635 msec ( 93.76%) Total number of unique tasks: 118 Total number of context switches: 2337 Total run time (msec): 3718.048 Total scheduling time (msec): 1001.131 (x 4) Suggested-by: David Ahern Signed-off-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161222060350.17655-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index d53e706a6f17..5b134b0d1ff3 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -209,6 +209,7 @@ struct perf_sched { u64 skipped_samples; const char *time_str; struct perf_time_interval ptime; + struct perf_time_interval hist_time; }; /* per thread run time data */ @@ -2460,6 +2461,11 @@ static int timehist_sched_change_event(struct perf_tool *tool, timehist_print_sample(sched, sample, &al, thread, t); out: + if (sched->hist_time.start == 0 && t >= ptime->start) + sched->hist_time.start = t; + if (ptime->end == 0 || t <= ptime->end) + sched->hist_time.end = t; + if (tr) { /* time of this sched_switch event becomes last time task seen */ tr->last_time = sample->time; @@ -2624,6 +2630,7 @@ static void timehist_print_summary(struct perf_sched *sched, struct thread *t; struct thread_runtime *r; int i; + u64 hist_time = sched->hist_time.end - sched->hist_time.start; memset(&totals, 0, sizeof(totals)); @@ -2665,7 +2672,7 @@ static void timehist_print_summary(struct perf_sched *sched, totals.sched_count += r->run_stats.n; printf(" CPU %2d idle for ", i); print_sched_time(r->total_run_time, 6); - printf(" msec\n"); + printf(" msec (%6.2f%%)\n", 100.0 * r->total_run_time / hist_time); } else printf(" CPU %2d idle entire time window\n", i); } @@ -2701,12 +2708,16 @@ static void timehist_print_summary(struct perf_sched *sched, printf("\n" " Total number of unique tasks: %" PRIu64 "\n" - "Total number of context switches: %" PRIu64 "\n" - " Total run time (msec): ", + "Total number of context switches: %" PRIu64 "\n", totals.task_count, totals.sched_count); + printf(" Total run time (msec): "); print_sched_time(totals.total_run_time, 2); printf("\n"); + + printf(" Total scheduling time (msec): "); + print_sched_time(hist_time, 2); + printf(" (x %d)\n", sched->max_cpu); } typedef int (*sched_handler)(struct perf_tool *tool, -- cgit v1.2.3 From 1f2ed153b916c95a49a1ca9d7107738664224b7f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 3 Jan 2017 00:20:49 +0900 Subject: perf probe: Fix to get correct modname from elf header Since 'perf probe' supports cross-arch probes, it is possible to analyze different arch kernel image which has different bits-per-long. In that case, it fails to get the module name because it uses the MOD_NAME_OFFSET macro based on the host machine bits-per-long, instead of the target arch bits-per-long. This fixes above issue by changing modname-offset based on the target archs bit width. This is ok because linux kernel uses LP64 model on 64bit arch. E.g. without this (on x86_64, and target module is arm32): $ perf probe -m build-arm/fs/configfs/configfs.ko -D configfs_lookup p:probe/configfs_lookup :configfs_lookup+0 ^-Here is an empty module name. With this fix, you can see correct module name: $ perf probe -m build-arm/fs/configfs/configfs.ko -D configfs_lookup p:probe/configfs_lookup configfs:configfs_lookup+0 Signed-off-by: Masami Hiramatsu Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/148337043836.6752.383495516397005695.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index d281ae2b54e8..8f810961ec78 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -267,21 +267,6 @@ static bool kprobe_warn_out_range(const char *symbol, unsigned long address) return true; } -/* - * NOTE: - * '.gnu.linkonce.this_module' section of kernel module elf directly - * maps to 'struct module' from linux/module.h. This section contains - * actual module name which will be used by kernel after loading it. - * But, we cannot use 'struct module' here since linux/module.h is not - * exposed to user-space. Offset of 'name' has remained same from long - * time, so hardcoding it here. - */ -#ifdef __LP64__ -#define MOD_NAME_OFFSET 24 -#else -#define MOD_NAME_OFFSET 12 -#endif - /* * @module can be module name of module file path. In case of path, * inspect elf and find out what is actual module name. @@ -296,6 +281,7 @@ static char *find_module_name(const char *module) Elf_Data *data; Elf_Scn *sec; char *mod_name = NULL; + int name_offset; fd = open(module, O_RDONLY); if (fd < 0) @@ -317,7 +303,21 @@ static char *find_module_name(const char *module) if (!data || !data->d_buf) goto ret_err; - mod_name = strdup((char *)data->d_buf + MOD_NAME_OFFSET); + /* + * NOTE: + * '.gnu.linkonce.this_module' section of kernel module elf directly + * maps to 'struct module' from linux/module.h. This section contains + * actual module name which will be used by kernel after loading it. + * But, we cannot use 'struct module' here since linux/module.h is not + * exposed to user-space. Offset of 'name' has remained same from long + * time, so hardcoding it here. + */ + if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) + name_offset = 12; + else /* expect ELFCLASS64 by default */ + name_offset = 24; + + mod_name = strdup((char *)data->d_buf + name_offset); ret_err: elf_end(elf); -- cgit v1.2.3 From b66fb1da5a8cac3f5c3cdbe41937c91efc4e76a4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 Jan 2017 09:19:54 +0100 Subject: tools lib subcmd: Add OPT_STRING_OPTARG_SET option To allow string options with a default argument and variable set when the option is used. Signed-off-by: Jiri Olsa Tested-by: Wang Nan Cc: David Ahern Cc: Josh Poimboeuf Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1483431600-19887-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/subcmd/parse-options.c | 3 +++ tools/lib/subcmd/parse-options.h | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c index 3284bb14ae78..8aad81151d50 100644 --- a/tools/lib/subcmd/parse-options.c +++ b/tools/lib/subcmd/parse-options.c @@ -213,6 +213,9 @@ static int get_value(struct parse_opt_ctx_t *p, else err = get_arg(p, opt, flags, (const char **)opt->value); + if (opt->set) + *(bool *)opt->set = true; + /* PARSE_OPT_NOEMPTY: Allow NULL but disallow empty string. */ if (opt->flags & PARSE_OPT_NOEMPTY) { const char *val = *(const char **)opt->value; diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h index 8866ac438b34..11c3be3bcce7 100644 --- a/tools/lib/subcmd/parse-options.h +++ b/tools/lib/subcmd/parse-options.h @@ -137,6 +137,11 @@ struct option { { .type = OPTION_STRING, .short_name = (s), .long_name = (l), \ .value = check_vtype(v, const char **), (a), .help = (h), \ .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) } +#define OPT_STRING_OPTARG_SET(s, l, v, os, a, h, d) \ + { .type = OPTION_STRING, .short_name = (s), .long_name = (l), \ + .value = check_vtype(v, const char **), (a), .help = (h), \ + .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d), \ + .set = check_vtype(os, bool *)} #define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY} #define OPT_DATE(s, l, v, h) \ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb } -- cgit v1.2.3 From efd21307119d5a23ac83ae8fd5a39a5c7aeb8493 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 Jan 2017 09:19:55 +0100 Subject: perf record: Make __record_options static There's no need for this one to be global. Signed-off-by: Jiri Olsa Tested-by: Wang Nan Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1483431600-19887-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 74d6a035133a..31cf0ce12a65 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1405,7 +1405,7 @@ static bool dry_run; * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', * using pipes, etc. */ -struct option __record_options[] = { +static struct option __record_options[] = { OPT_CALLBACK('e', "event", &record.evlist, "event", "event selector. use 'perf list' to list available events", parse_events_option), -- cgit v1.2.3 From 60437ac02f398e0ee0927748d4798dd5534ac90d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 Jan 2017 09:19:56 +0100 Subject: perf record: Fix --switch-output documentation and comment There's no --signal-trigger option, also adding the code comment into record man page. Signed-off-by: Jiri Olsa Tested-by: Wang Nan Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1483431600-19887-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 4 ++++ tools/perf/builtin-record.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 27fc3617c6a4..5054d9147f0f 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -430,6 +430,10 @@ that gets then processed, possibly via a perf script, to decide if that particular perf.data snapshot should be kept or not. Implies --timestamp-filename, --no-buildid and --no-buildid-cache. +The reason for the latter two is to reduce the data file switching +overhead. You can still switch them on with: + + --switch-output --no-no-buildid --no-no-buildid-cache --dry-run:: Parse options then exit. --dry-run can be used to detect errors in cmdline diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 31cf0ce12a65..4ec10e9427d9 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1636,7 +1636,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) * overhead. Still generate buildid if they are required * explicitly using * - * perf record --signal-trigger --no-no-buildid \ + * perf record --switch-output --no-no-buildid \ * --no-no-buildid-cache * * Following code equals to: -- cgit v1.2.3 From 074859184d770824f4437dca716bdeb625ae8b1c Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Tue, 3 Jan 2017 12:42:42 +0100 Subject: tools lib traceevent: Fix prev/next_prio for deadline tasks Currently, the sched:sched_switch tracepoint reports deadline tasks with priority -1. But when reading the trace via perf script I've got the following output: # ./d & # (d is a deadline task, see [1]) # perf record -e sched:sched_switch -a sleep 1 # perf script ... swapper 0 [000] 2146.962441: sched:sched_switch: swapper/0:0 [120] R ==> d:2593 [4294967295] d 2593 [000] 2146.972472: sched:sched_switch: d:2593 [4294967295] R ==> g:2590 [4294967295] The task d reports the wrong priority [4294967295]. This happens because the "int prio" is stored in an unsigned long long val. Although it is set as a %lld, as int is shorter than unsigned long long, trace_seq_printf prints it as a positive number. The fix is just to cast the val as an int, and print it as a %d, as in the sched:sched_switch tracepoint's "format". The output with the fix is: # ./d & # perf record -e sched:sched_switch -a sleep 1 # perf script ... swapper 0 [000] 4306.374037: sched:sched_switch: swapper/0:0 [120] R ==> d:10941 [-1] d 10941 [000] 4306.383823: sched:sched_switch: d:10941 [-1] R ==> swapper/0:0 [120] [1] d.c --- #include #include #include #include #include struct sched_attr { __u32 size, sched_policy; __u64 sched_flags; __s32 sched_nice; __u32 sched_priority; __u64 sched_runtime, sched_deadline, sched_period; }; int sched_setattr(pid_t pid, const struct sched_attr *attr, unsigned int flags) { return syscall(__NR_sched_setattr, pid, attr, flags); } int main(void) { struct sched_attr attr = { .size = sizeof(attr), .sched_policy = SCHED_DEADLINE, /* This creates a 10ms/30ms reservation */ .sched_runtime = 10 * 1000 * 1000, .sched_period = attr.sched_deadline = 30 * 1000 * 1000, }; if (sched_setattr(0, &attr, 0) < 0) { perror("sched_setattr"); return -1; } for(;;); } --- Committer notes: Got the program from the provided URL, http://bristot.me/lkml/d.c, trimmed it and included in the cset log above, so that we have everything needed to test it in one place. Signed-off-by: Daniel Bristot de Oliveira Acked-by: Steven Rostedt Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Daniel Bristot de Oliveira Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/866ef75bcebf670ae91c6a96daa63597ba981f0d.1483443552.git.bristot@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/plugin_sched_switch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c index f1ce60065258..ec30c2fcbac0 100644 --- a/tools/lib/traceevent/plugin_sched_switch.c +++ b/tools/lib/traceevent/plugin_sched_switch.c @@ -111,7 +111,7 @@ static int sched_switch_handler(struct trace_seq *s, trace_seq_printf(s, "%lld ", val); if (pevent_get_field_val(s, event, "prev_prio", record, &val, 0) == 0) - trace_seq_printf(s, "[%lld] ", val); + trace_seq_printf(s, "[%d] ", (int) val); if (pevent_get_field_val(s, event, "prev_state", record, &val, 0) == 0) write_state(s, val); @@ -129,7 +129,7 @@ static int sched_switch_handler(struct trace_seq *s, trace_seq_printf(s, "%lld", val); if (pevent_get_field_val(s, event, "next_prio", record, &val, 0) == 0) - trace_seq_printf(s, " [%lld]", val); + trace_seq_printf(s, " [%d]", (int) val); return 0; } -- cgit v1.2.3 From 30a9c6444810429aa2b7cbfbd453ce339baaadbf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Jan 2017 12:03:59 -0300 Subject: perf tools: Install tools/lib/traceevent plugins with install-bin Those are binaries as well, so should be installed by: make -C tools/perf install-bin' too. Cc: Alexander Shishkin Cc: Daniel Bristot de Oliveira Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/n/tip-3841b37u05evxrs1igkyu6ks@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index e9ec531131ca..4db68aec9913 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -704,9 +704,9 @@ install-tests: all install-gtk $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' -install-bin: install-tools install-tests +install-bin: install-tools install-tests install-traceevent-plugins -install: install-bin try-install-man install-traceevent-plugins +install: install-bin try-install-man install-python_ext: $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)' -- cgit v1.2.3 From 7934c98a6e04028eb34c1293bfb5a6b0ab630b66 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Jan 2017 15:19:21 -0300 Subject: perf symbols: Robustify reading of build-id from sysfs Markus reported that perf segfaults when reading /sys/kernel/notes from a kernel linked with GNU gold, due to what looks like a gold bug, so do some bounds checking to avoid crashing in that case. Reported-by: Markus Trippelsdorf Report-Link: http://lkml.kernel.org/r/20161219161821.GA294@x4 Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ryhgs6a6jxvz207j2636w31c@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 99400b0e8f2a..adbc6c02c3aa 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -537,6 +537,12 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size) break; } else { int n = namesz + descsz; + + if (n > (int)sizeof(bf)) { + n = sizeof(bf); + pr_debug("%s: truncating reading of build id in sysfs file %s: n_namesz=%u, n_descsz=%u.\n", + __func__, filename, nhdr.n_namesz, nhdr.n_descsz); + } if (read(fd, bf, n) != n) break; } -- cgit v1.2.3 From eebc509b20881b92d62e317b2c073e57c5f200f0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 4 Jan 2017 12:29:05 +0900 Subject: perf probe: Fix --funcs to show correct symbols for offline module Fix --funcs (-F) option to show correct symbols for offline module. Since previous perf-probe uses machine__findnew_module_map() for offline module, even if user passes a module file (with full path) which is for other architecture, perf-probe always tries to load symbol map for current kernel module. This fix uses dso__new_map() to load the map from given binary as same as a map for user applications. Signed-off-by: Masami Hiramatsu Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/148350053478.19001.15435255244512631545.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 8f810961ec78..542e6472c4d7 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -163,7 +163,7 @@ static struct map *kernel_get_module_map(const char *module) /* A file path -- this is an offline module */ if (module && strchr(module, '/')) - return machine__findnew_module_map(host_machine, 0, module); + return dso__new_map(module); if (!module) module = "kernel"; @@ -173,6 +173,7 @@ static struct map *kernel_get_module_map(const char *module) if (strncmp(pos->dso->short_name + 1, module, pos->dso->short_name_len - 2) == 0 && module[pos->dso->short_name_len - 2] == '\0') { + map__get(pos); return pos; } } @@ -188,15 +189,6 @@ struct map *get_target_map(const char *target, bool user) return kernel_get_module_map(target); } -static void put_target_map(struct map *map, bool user) -{ - if (map && user) { - /* Only the user map needs to be released */ - map__put(map); - } -} - - static int convert_exec_to_group(const char *exec, char **result) { char *ptr1, *ptr2, *exec_copy; @@ -412,7 +404,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo, } out: - put_target_map(map, uprobes); + map__put(map); return ret; } @@ -2869,7 +2861,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, } out: - put_target_map(map, pev->uprobes); + map__put(map); free(syms); return ret; @@ -3362,10 +3354,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter, return ret; /* Get a symbol map */ - if (user) - map = dso__new_map(target); - else - map = kernel_get_module_map(target); + map = get_target_map(target, user); if (!map) { pr_err("Failed to get a map for %s\n", (target) ? : "kernel"); return -EINVAL; @@ -3397,9 +3386,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter, } end: - if (user) { - map__put(map); - } + map__put(map); exit_probe_symbol_maps(); return ret; -- cgit v1.2.3 From 8a937a25a7e3c19d5fb3f9d92f605cf5fda219d8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 4 Jan 2017 12:30:19 +0900 Subject: perf probe: Fix to probe on gcc generated symbols for offline kernel Fix perf-probe to show probe definition on gcc generated symbols for offline kernel (including cross-arch kernel image). gcc sometimes optimizes functions and generate new symbols with suffixes such as ".constprop.N" or ".isra.N" etc. Since those symbol names are not recorded in DWARF, we have to find correct generated symbols from offline ELF binary to probe on it (kallsyms doesn't correct it). For online kernel or uprobes we don't need it because those are rebased on _text, or a section relative address. E.g. Without this: $ perf probe -k build-arm/vmlinux -F __slab_alloc* __slab_alloc.constprop.9 $ perf probe -k build-arm/vmlinux -D __slab_alloc p:probe/__slab_alloc __slab_alloc+0 If you put above definition on target machine, it should fail because there is no __slab_alloc in kallsyms. With this fix, perf probe shows correct probe definition on __slab_alloc.constprop.9: $ perf probe -k build-arm/vmlinux -D __slab_alloc p:probe/__slab_alloc __slab_alloc.constprop.9+0 Signed-off-by: Masami Hiramatsu Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/148350060434.19001.11864836288580083501.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 48 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 542e6472c4d7..4a57c8a60bd9 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -610,6 +610,51 @@ error: return ret ? : -ENOENT; } +/* + * Rename DWARF symbols to ELF symbols -- gcc sometimes optimizes functions + * and generate new symbols with suffixes such as .constprop.N or .isra.N + * etc. Since those symbols are not recorded in DWARF, we have to find + * correct generated symbols from offline ELF binary. + * For online kernel or uprobes we don't need this because those are + * rebased on _text, or already a section relative address. + */ +static int +post_process_offline_probe_trace_events(struct probe_trace_event *tevs, + int ntevs, const char *pathname) +{ + struct symbol *sym; + struct map *map; + unsigned long stext = 0; + u64 addr; + int i; + + /* Prepare a map for offline binary */ + map = dso__new_map(pathname); + if (!map || get_text_start_address(pathname, &stext) < 0) { + pr_warning("Failed to get ELF symbols for %s\n", pathname); + return -EINVAL; + } + + for (i = 0; i < ntevs; i++) { + addr = tevs[i].point.address + tevs[i].point.offset - stext; + sym = map__find_symbol(map, addr); + if (!sym) + continue; + if (!strcmp(sym->name, tevs[i].point.symbol)) + continue; + /* If we have no realname, use symbol for it */ + if (!tevs[i].point.realname) + tevs[i].point.realname = tevs[i].point.symbol; + else + free(tevs[i].point.symbol); + tevs[i].point.symbol = strdup(sym->name); + tevs[i].point.offset = addr - sym->start; + } + map__put(map); + + return 0; +} + static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs, int ntevs, const char *exec) { @@ -671,7 +716,8 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs, /* Skip post process if the target is an offline kernel */ if (symbol_conf.ignore_vmlinux_buildid) - return 0; + return post_process_offline_probe_trace_events(tevs, ntevs, + symbol_conf.vmlinux_name); reloc_sym = kernel_get_ref_reloc_sym(); if (!reloc_sym) { -- cgit v1.2.3 From a2b1e8a20c992b01eeb76de00d4f534cbe9f3822 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Wed, 14 Dec 2016 11:59:34 +0100 Subject: selftests: do not require bash for the generated test Nothing in this minimal script seems to require bash. We often run these tests on embedded devices where the only shell available is the busybox ash. Use sh instead. Signed-off-by: Rolf Eike Beer Cc: stable@vger.kernel.org Signed-off-by: Shuah Khan --- tools/testing/selftests/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 71b05891a6a1..831022b12848 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -90,7 +90,7 @@ ifdef INSTALL_PATH done; @# Ask all targets to emit their test scripts - echo "#!/bin/bash" > $(ALL_SCRIPT) + echo "#!/bin/sh" > $(ALL_SCRIPT) echo "cd \$$(dirname \$$0)" >> $(ALL_SCRIPT) echo "ROOT=\$$PWD" >> $(ALL_SCRIPT) -- cgit v1.2.3 From d979e13a3fa9067c8cd46e292ed859626d443996 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Wed, 14 Dec 2016 11:58:20 +0100 Subject: selftests: do not require bash to run bpf tests Nothing in this minimal script seems to require bash. We often run these tests on embedded devices where the only shell available is the busybox ash. Use sh instead. Signed-off-by: Rolf Eike Beer Cc: stable@vger.kernel.org Acked-by: Daniel Borkmann Signed-off-by: Shuah Khan --- tools/testing/selftests/bpf/test_kmod.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh index 92e627adf354..6d58cca8e235 100755 --- a/tools/testing/selftests/bpf/test_kmod.sh +++ b/tools/testing/selftests/bpf/test_kmod.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh SRC_TREE=../../../../ -- cgit v1.2.3 From 3659f98b5375d195f1870c3e508fe51e52206839 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Wed, 14 Dec 2016 11:59:57 +0100 Subject: selftests: do not require bash to run netsocktests testcase Nothing in this minimal script seems to require bash. We often run these tests on embedded devices where the only shell available is the busybox ash. Use sh instead. Signed-off-by: Rolf Eike Beer Cc: stable@vger.kernel.org Signed-off-by: Shuah Khan --- tools/testing/selftests/net/run_netsocktests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests index c09a682df56a..16058bbea7a8 100755 --- a/tools/testing/selftests/net/run_netsocktests +++ b/tools/testing/selftests/net/run_netsocktests @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh echo "--------------------" echo "running socket test" -- cgit v1.2.3 From 7738789fba09108a28a5fb4739595d9a0a2f85fe Mon Sep 17 00:00:00 2001 From: Colin King Date: Tue, 27 Dec 2016 16:17:21 +0000 Subject: selftests: x86/pkeys: fix spelling mistake: "itertation" -> "iteration" Fix spelling mistake in print test pass message. Signed-off-by: Colin Ian King Signed-off-by: Shuah Khan --- tools/testing/selftests/x86/protection_keys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index bdd58c78902e..df9e0a0cdf29 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -1367,7 +1367,7 @@ void run_tests_once(void) tracing_off(); close_test_fds(); - printf("test %2d PASSED (itertation %d)\n", test_nr, iteration_nr); + printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr); dprintf1("======================\n\n"); } iteration_nr++; -- cgit v1.2.3 From 41b6167e8f746b475668f1da78599fc4284f18db Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 10 Jan 2017 16:57:42 -0800 Subject: mm: get rid of __GFP_OTHER_NODE The flag was introduced by commit 78afd5612deb ("mm: add __GFP_OTHER_NODE flag") to allow proper accounting of remote node allocations done by kernel daemons on behalf of a process - e.g. khugepaged. After "mm: fix remote numa hits statistics" we do not need and actually use the flag so we can safely remove it because all allocations which are satisfied from their "home" node are accounted properly. [mhocko@suse.com: fix build] Link: http://lkml.kernel.org/r/20170106122225.GK5556@dhcp22.suse.cz Link: http://lkml.kernel.org/r/20170102153057.9451-3-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Michal Hocko Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Taku Izumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/perf/builtin-kmem.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 35a02f8e5a4a..915869e00d86 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -655,7 +655,6 @@ static const struct { { "__GFP_RECLAIM", "R" }, { "__GFP_DIRECT_RECLAIM", "DR" }, { "__GFP_KSWAPD_RECLAIM", "KR" }, - { "__GFP_OTHER_NODE", "ON" }, }; static size_t max_gfp_len; -- cgit v1.2.3 From d2d4edbebe07ddb77980656abe7b9bc7a9e0cdf7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 11 Jan 2017 14:59:38 +0900 Subject: perf probe: Fix to show correct locations for events on modules Fix to show correct locations for events on modules by relocating given address instead of retrying after failure. This happens when the module text size is big enough, bigger than sh_addr, because the original code retries with given address + sh_addr if it failed to find CU DIE at the given address. Any address smaller than sh_addr always fails and it retries with the correct address, but addresses bigger than sh_addr will get a CU DIE which is on the given address (not adjusted by sh_addr). In my environment(x86-64), the sh_addr of ".text" section is 0x10030. Since i915 is a huge kernel module, we can see this issue as below. $ grep "[Tt] .*\[i915\]" /proc/kallsyms | sort | head -n1 ffffffffc0270000 t i915_switcheroo_can_switch [i915] ffffffffc0270000 + 0x10030 = ffffffffc0280030, so we'll check symbols cross this boundary. $ grep "[Tt] .*\[i915\]" /proc/kallsyms | grep -B1 ^ffffffffc028\ | head -n 2 ffffffffc027ff80 t haswell_init_clock_gating [i915] ffffffffc0280110 t valleyview_init_clock_gating [i915] So setup probes on both function and see what happen. $ sudo ./perf probe -m i915 -a haswell_init_clock_gating \ -a valleyview_init_clock_gating Added new events: probe:haswell_init_clock_gating (on haswell_init_clock_gating in i915) probe:valleyview_init_clock_gating (on valleyview_init_clock_gating in i915) You can now use it in all perf tools, such as: perf record -e probe:valleyview_init_clock_gating -aR sleep 1 $ sudo ./perf probe -l probe:haswell_init_clock_gating (on haswell_init_clock_gating@gpu/drm/i915/intel_pm.c in i915) probe:valleyview_init_clock_gating (on i915_vga_set_decode:4@gpu/drm/i915/i915_drv.c in i915) As you can see, haswell_init_clock_gating is correctly shown, but valleyview_init_clock_gating is not. With this patch, both events are shown correctly. $ sudo ./perf probe -l probe:haswell_init_clock_gating (on haswell_init_clock_gating@gpu/drm/i915/intel_pm.c in i915) probe:valleyview_init_clock_gating (on valleyview_init_clock_gating@gpu/drm/i915/intel_pm.c in i915) Committer notes: In my case: # perf probe -m i915 -a haswell_init_clock_gating -a valleyview_init_clock_gating Added new events: probe:haswell_init_clock_gating (on haswell_init_clock_gating in i915) probe:valleyview_init_clock_gating (on valleyview_init_clock_gating in i915) You can now use it in all perf tools, such as: perf record -e probe:valleyview_init_clock_gating -aR sleep 1 # perf probe -l probe:haswell_init_clock_gating (on i915_getparam+432@gpu/drm/i915/i915_drv.c in i915) probe:valleyview_init_clock_gating (on __i915_printk+240@gpu/drm/i915/i915_drv.c in i915) # # readelf -SW /lib/modules/4.9.0+/build/vmlinux | egrep -w '.text|Name' [Nr] Name Type Address Off Size ES Flg Lk Inf Al [ 1] .text PROGBITS ffffffff81000000 200000 822fd3 00 AX 0 0 4096 # So both are b0rked, now with the fix: # perf probe -m i915 -a haswell_init_clock_gating -a valleyview_init_clock_gating Added new events: probe:haswell_init_clock_gating (on haswell_init_clock_gating in i915) probe:valleyview_init_clock_gating (on valleyview_init_clock_gating in i915) You can now use it in all perf tools, such as: perf record -e probe:valleyview_init_clock_gating -aR sleep 1 # perf probe -l probe:haswell_init_clock_gating (on haswell_init_clock_gating@gpu/drm/i915/intel_pm.c in i915) probe:valleyview_init_clock_gating (on valleyview_init_clock_gating@gpu/drm/i915/intel_pm.c in i915) # Both looks correct. Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/148411436777.9978.1440275861947194930.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-finder.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index df4debe564da..0278fe1a4cc6 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1543,16 +1543,12 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, Dwarf_Addr _addr = 0, baseaddr = 0; const char *fname = NULL, *func = NULL, *basefunc = NULL, *tmp; int baseline = 0, lineno = 0, ret = 0; - bool reloc = false; -retry: + /* We always need to relocate the address for aranges */ + if (debuginfo__get_text_offset(dbg, &baseaddr) == 0) + addr += baseaddr; /* Find cu die */ if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) { - if (!reloc && debuginfo__get_text_offset(dbg, &baseaddr) == 0) { - addr += baseaddr; - reloc = true; - goto retry; - } pr_warning("Failed to find debug information for address %lx\n", addr); ret = -EINVAL; -- cgit v1.2.3 From 3e96dac7c956089d3f23aca98c4dfca57b6aaf8a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 11 Jan 2017 15:00:47 +0900 Subject: perf probe: Add error checks to offline probe post-processing Add error check codes on post processing and improve it for offline probe events as: - post processing fails if no matched symbol found in map(-ENOENT) or strdup() failed(-ENOMEM). - Even if the symbol name is the same, it updates symbol address and offset. Signed-off-by: Masami Hiramatsu Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/148411443738.9978.4617979132625405545.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 50 ++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 4a57c8a60bd9..aa8a9227080a 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -610,6 +610,33 @@ error: return ret ? : -ENOENT; } +/* Adjust symbol name and address */ +static int post_process_probe_trace_point(struct probe_trace_point *tp, + struct map *map, unsigned long offs) +{ + struct symbol *sym; + u64 addr = tp->address + tp->offset - offs; + + sym = map__find_symbol(map, addr); + if (!sym) + return -ENOENT; + + if (strcmp(sym->name, tp->symbol)) { + /* If we have no realname, use symbol for it */ + if (!tp->realname) + tp->realname = tp->symbol; + else + free(tp->symbol); + tp->symbol = strdup(sym->name); + if (!tp->symbol) + return -ENOMEM; + } + tp->offset = addr - sym->start; + tp->address -= offs; + + return 0; +} + /* * Rename DWARF symbols to ELF symbols -- gcc sometimes optimizes functions * and generate new symbols with suffixes such as .constprop.N or .isra.N @@ -622,11 +649,9 @@ static int post_process_offline_probe_trace_events(struct probe_trace_event *tevs, int ntevs, const char *pathname) { - struct symbol *sym; struct map *map; unsigned long stext = 0; - u64 addr; - int i; + int i, ret = 0; /* Prepare a map for offline binary */ map = dso__new_map(pathname); @@ -636,23 +661,14 @@ post_process_offline_probe_trace_events(struct probe_trace_event *tevs, } for (i = 0; i < ntevs; i++) { - addr = tevs[i].point.address + tevs[i].point.offset - stext; - sym = map__find_symbol(map, addr); - if (!sym) - continue; - if (!strcmp(sym->name, tevs[i].point.symbol)) - continue; - /* If we have no realname, use symbol for it */ - if (!tevs[i].point.realname) - tevs[i].point.realname = tevs[i].point.symbol; - else - free(tevs[i].point.symbol); - tevs[i].point.symbol = strdup(sym->name); - tevs[i].point.offset = addr - sym->start; + ret = post_process_probe_trace_point(&tevs[i].point, + map, stext); + if (ret < 0) + break; } map__put(map); - return 0; + return ret; } static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs, -- cgit v1.2.3 From 613f050d68a8ed3c0b18b9568698908ef7bbc1f7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 11 Jan 2017 15:01:57 +0900 Subject: perf probe: Fix to probe on gcc generated functions in modules Fix to probe on gcc generated functions on modules. Since probing on a module is based on its symbol name, it should be adjusted on actual symbols. E.g. without this fix, perf probe shows probe definition on non-exist symbol as below. $ perf probe -m build-x86_64/net/netfilter/nf_nat.ko -F in_range* in_range.isra.12 $ perf probe -m build-x86_64/net/netfilter/nf_nat.ko -D in_range p:probe/in_range nf_nat:in_range+0 With this fix, perf probe correctly shows a probe on gcc-generated symbol. $ perf probe -m build-x86_64/net/netfilter/nf_nat.ko -D in_range p:probe/in_range nf_nat:in_range.isra.12+0 This also fixes same problem on online module as below. $ perf probe -m i915 -D assert_plane p:probe/assert_plane i915:assert_plane.constprop.134+0 Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/148411450673.9978.14905987549651656075.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 45 +++++++++++++++++++++++++++--------------- tools/perf/util/probe-finder.c | 7 +++++-- tools/perf/util/probe-finder.h | 3 +++ 3 files changed, 37 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index aa8a9227080a..6a6f44dd594b 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -698,18 +698,31 @@ static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs, return ret; } -static int add_module_to_probe_trace_events(struct probe_trace_event *tevs, - int ntevs, const char *module) +static int +post_process_module_probe_trace_events(struct probe_trace_event *tevs, + int ntevs, const char *module, + struct debuginfo *dinfo) { + Dwarf_Addr text_offs = 0; int i, ret = 0; char *mod_name = NULL; + struct map *map; if (!module) return 0; - mod_name = find_module_name(module); + map = get_target_map(module, false); + if (!map || debuginfo__get_text_offset(dinfo, &text_offs, true) < 0) { + pr_warning("Failed to get ELF symbols for %s\n", module); + return -EINVAL; + } + mod_name = find_module_name(module); for (i = 0; i < ntevs; i++) { + ret = post_process_probe_trace_point(&tevs[i].point, + map, (unsigned long)text_offs); + if (ret < 0) + break; tevs[i].point.module = strdup(mod_name ? mod_name : module); if (!tevs[i].point.module) { @@ -719,6 +732,8 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs, } free(mod_name); + map__put(map); + return ret; } @@ -776,7 +791,7 @@ arch__post_process_probe_trace_events(struct perf_probe_event *pev __maybe_unuse static int post_process_probe_trace_events(struct perf_probe_event *pev, struct probe_trace_event *tevs, int ntevs, const char *module, - bool uprobe) + bool uprobe, struct debuginfo *dinfo) { int ret; @@ -784,7 +799,8 @@ static int post_process_probe_trace_events(struct perf_probe_event *pev, ret = add_exec_to_probe_trace_events(tevs, ntevs, module); else if (module) /* Currently ref_reloc_sym based probe is not for drivers */ - ret = add_module_to_probe_trace_events(tevs, ntevs, module); + ret = post_process_module_probe_trace_events(tevs, ntevs, + module, dinfo); else ret = post_process_kernel_probe_trace_events(tevs, ntevs); @@ -828,30 +844,27 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, } } - debuginfo__delete(dinfo); - if (ntevs > 0) { /* Succeeded to find trace events */ pr_debug("Found %d probe_trace_events.\n", ntevs); ret = post_process_probe_trace_events(pev, *tevs, ntevs, - pev->target, pev->uprobes); + pev->target, pev->uprobes, dinfo); if (ret < 0 || ret == ntevs) { + pr_debug("Post processing failed or all events are skipped. (%d)\n", ret); clear_probe_trace_events(*tevs, ntevs); zfree(tevs); + ntevs = 0; } - if (ret != ntevs) - return ret < 0 ? ret : ntevs; - ntevs = 0; - /* Fall through */ } + debuginfo__delete(dinfo); + if (ntevs == 0) { /* No error but failed to find probe point. */ pr_warning("Probe point '%s' not found.\n", synthesize_perf_probe_point(&pev->point)); return -ENOENT; - } - /* Error path : ntevs < 0 */ - pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs); - if (ntevs < 0) { + } else if (ntevs < 0) { + /* Error path : ntevs < 0 */ + pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs); if (ntevs == -EBADF) pr_warning("Warning: No dwarf info found in the vmlinux - " "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n"); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 0278fe1a4cc6..0d9d6e0803b8 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1501,7 +1501,8 @@ int debuginfo__find_available_vars_at(struct debuginfo *dbg, } /* For the kernel module, we need a special code to get a DIE */ -static int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs) +int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, + bool adjust_offset) { int n, i; Elf32_Word shndx; @@ -1530,6 +1531,8 @@ static int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs) if (!shdr) return -ENOENT; *offs = shdr->sh_addr; + if (adjust_offset) + *offs -= shdr->sh_offset; } } return 0; @@ -1545,7 +1548,7 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, int baseline = 0, lineno = 0, ret = 0; /* We always need to relocate the address for aranges */ - if (debuginfo__get_text_offset(dbg, &baseaddr) == 0) + if (debuginfo__get_text_offset(dbg, &baseaddr, false) == 0) addr += baseaddr; /* Find cu die */ if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) { diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index f1d8558f498e..2956c5198652 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -46,6 +46,9 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, struct perf_probe_point *ppt); +int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, + bool adjust_offset); + /* Find a line range */ int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr); -- cgit v1.2.3 From 3fbfadce6012e7bb384b2e9ad47869d5177f7209 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 16 Jan 2017 22:17:29 -0800 Subject: bpf: Fix test_lru_sanity5() in test_lru_map.c test_lru_sanity5() fails when the number of online cpus is fewer than the number of possible cpus. It can be reproduced with qemu by using cmd args "--smp cpus=2,maxcpus=8". The problem is the loop in test_lru_sanity5() is testing 'i' which is incorrect. This patch: 1. Make sched_next_online() always return -1 if it cannot find a next cpu to schedule the process. 2. In test_lru_sanity5(), the parent process does sched_setaffinity() first (through sched_next_online()) and the forked process will inherit it according to the 'man sched_setaffinity'. Fixes: 5db58faf989f ("bpf: Add tests for the LRU bpf_htab") Reported-by: Daniel Borkmann Signed-off-by: Martin KaFai Lau Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_lru_map.c | 53 +++++++++++++++--------------- 1 file changed, 27 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index b13fed534d76..9f7bd1915c21 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -67,21 +67,23 @@ static int map_equal(int lru_map, int expected) return map_subset(lru_map, expected) && map_subset(expected, lru_map); } -static int sched_next_online(int pid, int next_to_try) +static int sched_next_online(int pid, int *next_to_try) { cpu_set_t cpuset; + int next = *next_to_try; + int ret = -1; - if (next_to_try == nr_cpus) - return -1; - - while (next_to_try < nr_cpus) { + while (next < nr_cpus) { CPU_ZERO(&cpuset); - CPU_SET(next_to_try++, &cpuset); - if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset)) + CPU_SET(next++, &cpuset); + if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset)) { + ret = 0; break; + } } - return next_to_try; + *next_to_try = next; + return ret; } /* Size of the LRU amp is 2 @@ -96,11 +98,12 @@ static void test_lru_sanity0(int map_type, int map_flags) { unsigned long long key, value[nr_cpus]; int lru_map_fd, expected_map_fd; + int next_cpu = 0; printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, map_flags); - assert(sched_next_online(0, 0) != -1); + assert(sched_next_online(0, &next_cpu) != -1); if (map_flags & BPF_F_NO_COMMON_LRU) lru_map_fd = create_map(map_type, map_flags, 2 * nr_cpus); @@ -183,6 +186,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) int lru_map_fd, expected_map_fd; unsigned int batch_size; unsigned int map_size; + int next_cpu = 0; if (map_flags & BPF_F_NO_COMMON_LRU) /* Ther percpu lru list (i.e each cpu has its own LRU @@ -196,7 +200,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, map_flags); - assert(sched_next_online(0, 0) != -1); + assert(sched_next_online(0, &next_cpu) != -1); batch_size = tgt_free / 2; assert(batch_size * 2 == tgt_free); @@ -262,6 +266,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) int lru_map_fd, expected_map_fd; unsigned int batch_size; unsigned int map_size; + int next_cpu = 0; if (map_flags & BPF_F_NO_COMMON_LRU) /* Ther percpu lru list (i.e each cpu has its own LRU @@ -275,7 +280,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, map_flags); - assert(sched_next_online(0, 0) != -1); + assert(sched_next_online(0, &next_cpu) != -1); batch_size = tgt_free / 2; assert(batch_size * 2 == tgt_free); @@ -370,11 +375,12 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) int lru_map_fd, expected_map_fd; unsigned int batch_size; unsigned int map_size; + int next_cpu = 0; printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, map_flags); - assert(sched_next_online(0, 0) != -1); + assert(sched_next_online(0, &next_cpu) != -1); batch_size = tgt_free / 2; assert(batch_size * 2 == tgt_free); @@ -430,11 +436,12 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free) int lru_map_fd, expected_map_fd; unsigned long long key, value[nr_cpus]; unsigned long long end_key; + int next_cpu = 0; printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, map_flags); - assert(sched_next_online(0, 0) != -1); + assert(sched_next_online(0, &next_cpu) != -1); if (map_flags & BPF_F_NO_COMMON_LRU) lru_map_fd = create_map(map_type, map_flags, @@ -502,9 +509,8 @@ static void do_test_lru_sanity5(unsigned long long last_key, int map_fd) static void test_lru_sanity5(int map_type, int map_flags) { unsigned long long key, value[nr_cpus]; - int next_sched_cpu = 0; + int next_cpu = 0; int map_fd; - int i; if (map_flags & BPF_F_NO_COMMON_LRU) return; @@ -519,27 +525,20 @@ static void test_lru_sanity5(int map_type, int map_flags) key = 0; assert(!bpf_map_update(map_fd, &key, value, BPF_NOEXIST)); - for (i = 0; i < nr_cpus; i++) { + while (sched_next_online(0, &next_cpu) != -1) { pid_t pid; pid = fork(); if (pid == 0) { - next_sched_cpu = sched_next_online(0, next_sched_cpu); - if (next_sched_cpu != -1) - do_test_lru_sanity5(key, map_fd); + do_test_lru_sanity5(key, map_fd); exit(0); } else if (pid == -1) { - printf("couldn't spawn #%d process\n", i); + printf("couldn't spawn process to test key:%llu\n", + key); exit(1); } else { int status; - /* It is mostly redundant and just allow the parent - * process to update next_shced_cpu for the next child - * process - */ - next_sched_cpu = sched_next_online(pid, next_sched_cpu); - assert(waitpid(pid, &status, 0) == pid); assert(status == 0); key++; @@ -547,6 +546,8 @@ static void test_lru_sanity5(int map_type, int map_flags) } close(map_fd); + /* At least one key should be tested */ + assert(key > 0); printf("Pass\n"); } -- cgit v1.2.3 From df21d2fa733035e4d414379960f94b2516b41296 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 19 Dec 2016 17:46:53 +0530 Subject: selftest/powerpc: Wrong PMC initialized in pmc56_overflow test Test uses PMC2 to count the event. But PMC1 is being initialized. Patch to fix it. Fixes: 3752e453f6ba ('selftests/powerpc: Add tests of PMU EBBs') Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c index c22860ab9733..30e1ac62e8cb 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c @@ -66,7 +66,7 @@ int pmc56_overflow(void) FAIL_IF(ebb_event_enable(&event)); - mtspr(SPRN_PMC1, pmc_sample_period(sample_period)); + mtspr(SPRN_PMC2, pmc_sample_period(sample_period)); mtspr(SPRN_PMC5, 0); mtspr(SPRN_PMC6, 0); -- cgit v1.2.3 From b5b46c4740aed1538544f0fa849c5b76c7823469 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 18 Jan 2017 14:29:21 +0100 Subject: objtool: Fix IRET's opcode The IRET opcode is 0xcf according to the Intel manual and also to objdump of my vmlinux: 1ea8: 48 cf iretq Fix the opcode in arch_decode_instruction(). The previous value (0xc5) seems to correspond to LDS. Signed-off-by: Jiri Slaby Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170118132921.19319-1-jslaby@suse.cz Signed-off-by: Ingo Molnar --- tools/objtool/arch/x86/decode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 5e0dea2cdc01..039636ffb6c8 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -150,9 +150,9 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, *type = INSN_RETURN; break; - case 0xc5: /* iret */ case 0xca: /* retf */ case 0xcb: /* retf */ + case 0xcf: /* iret */ *type = INSN_CONTEXT_SWITCH; break; -- cgit v1.2.3 From 21f5eda9b8671744539c8295b9df62991fffb2ce Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Mon, 29 Aug 2016 18:25:22 +0200 Subject: tools/virtio/ringtest: fix run-on-all.sh for offline cpus Since ef1b144d ("tools/virtio/ringtest: fix run-on-all.sh to work without /dev/cpu") run-on-all.sh uses seq 0 $HOST_AFFINITY as the list of ids of the CPUs to run the command on (assuming ids of online CPUs are consecutive and start from 0), where $HOST_AFFINITY is the highest CPU id in the system previously determined using lscpu. This can fail on systems with offline CPUs. Instead let's use lscpu to determine the list of online CPUs. Signed-off-by: Halil Pasic Fixes: ef1b144d ("tools/virtio/ringtest: fix run-on-all.sh to work without /dev/cpu") Reviewed-by: Sascha Silbe Signed-off-by: Cornelia Huck --- tools/virtio/ringtest/run-on-all.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh index 2e69ca812b4c..29b0d3920bfc 100755 --- a/tools/virtio/ringtest/run-on-all.sh +++ b/tools/virtio/ringtest/run-on-all.sh @@ -1,12 +1,13 @@ #!/bin/sh +CPUS_ONLINE=$(lscpu --online -p=cpu|grep -v -e '#') #use last CPU for host. Why not the first? #many devices tend to use cpu0 by default so #it tends to be busier -HOST_AFFINITY=$(lscpu -p=cpu | tail -1) +HOST_AFFINITY=$(echo "${CPUS_ONLINE}"|tail -n 1) #run command on all cpus -for cpu in $(seq 0 $HOST_AFFINITY) +for cpu in $CPUS_ONLINE do #Don't run guest and host on same CPU #It actually works ok if using signalling -- cgit v1.2.3 From 47a4c49af6cc1982ce613c8ee79aab459d04c44c Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Fri, 2 Sep 2016 17:59:36 +0200 Subject: tools/virtio/ringtest: tweaks for s390 Make ringtest work on s390 too. Signed-off-by: Halil Pasic Acked-by: Sascha Silbe Signed-off-by: Cornelia Huck --- tools/virtio/ringtest/main.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'tools') diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h index 34e63cc4c572..14142faf040b 100644 --- a/tools/virtio/ringtest/main.h +++ b/tools/virtio/ringtest/main.h @@ -26,6 +26,16 @@ static inline void wait_cycles(unsigned long long cycles) #define VMEXIT_CYCLES 500 #define VMENTRY_CYCLES 500 +#elif defined(__s390x__) +static inline void wait_cycles(unsigned long long cycles) +{ + asm volatile("0: brctg %0,0b" : : "d" (cycles)); +} + +/* tweak me */ +#define VMEXIT_CYCLES 200 +#define VMENTRY_CYCLES 200 + #else static inline void wait_cycles(unsigned long long cycles) { @@ -81,6 +91,8 @@ extern unsigned ring_size; /* Is there a portable way to do this? */ #if defined(__x86_64__) || defined(__i386__) #define cpu_relax() asm ("rep; nop" ::: "memory") +#elif defined(__s390x__) +#define cpu_relax() barrier() #else #define cpu_relax() assert(0) #endif -- cgit v1.2.3 From 8381cdd0e32dd748bd34ca3ace476949948bd793 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 18 Jan 2017 14:14:56 +0900 Subject: perf diff: Fix segfault on 'perf diff -o N' option The -o/--order option is to select column number to sort a diff result. It does the job by adding a hpp field at the beginning of the sort list. But it should not be added to the output field list as it has no callbacks required by a output field. During the setup_sorting(), the perf_hpp__setup_output_field() appends the given sort keys to the output field if it's not there already. Originally it was checked by fmt->list being non-empty. But commit 3f931f2c4274 ("perf hists: Make hpp setup function generic") changed it to check the ->equal callback. Anyways, we don't need to add the pseudo hpp field to the output field list since it won't be used for output. So just skip fields if they have no ->color or ->entry callbacks. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Peter Zijlstra Fixes: 3f931f2c4274 ("perf hists: Make hpp setup function generic") Link: http://lkml.kernel.org/r/20170118051457.30946-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 37388397b5bc..4ec79b2f9416 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -560,6 +560,10 @@ void perf_hpp__setup_output_field(struct perf_hpp_list *list) perf_hpp_list__for_each_sort_list(list, fmt) { struct perf_hpp_fmt *pos; + /* skip sort-only fields ("sort_compute" in perf diff) */ + if (!fmt->entry && !fmt->color) + continue; + perf_hpp_list__for_each_format(list, pos) { if (fmt_equal(fmt, pos)) goto next; -- cgit v1.2.3 From a1c9f97f0b64e6337d9cfcc08c134450934fdd90 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 18 Jan 2017 14:14:57 +0900 Subject: perf diff: Fix -o/--order option behavior (again) Commit 21e6d8428664 ("perf diff: Use perf_hpp__register_sort_field interface") changed list_add() to perf_hpp__register_sort_field(). This resulted in a behavior change since the field was added to the tail instead of the head. So the -o option is mostly ignored due to its order in the list. This patch fixes it by adding perf_hpp__prepend_sort_field(). Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Peter Zijlstra Fixes: 21e6d8428664 ("perf diff: Use perf_hpp__register_sort_field interface") Link: http://lkml.kernel.org/r/20170118051457.30946-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 2 +- tools/perf/ui/hist.c | 6 ++++++ tools/perf/util/hist.h | 7 +++++++ 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 9ff0db4e2d0c..933aeec46f4a 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1199,7 +1199,7 @@ static int ui_init(void) BUG_ON(1); } - perf_hpp__register_sort_field(fmt); + perf_hpp__prepend_sort_field(fmt); return 0; } diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 4ec79b2f9416..18cfcdc90356 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -521,6 +521,12 @@ void perf_hpp_list__register_sort_field(struct perf_hpp_list *list, list_add_tail(&format->sort_list, &list->sorts); } +void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list, + struct perf_hpp_fmt *format) +{ + list_add(&format->sort_list, &list->sorts); +} + void perf_hpp__column_unregister(struct perf_hpp_fmt *format) { list_del(&format->list); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index d4b6514eeef5..28c216e3d5b7 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -283,6 +283,8 @@ void perf_hpp_list__column_register(struct perf_hpp_list *list, struct perf_hpp_fmt *format); void perf_hpp_list__register_sort_field(struct perf_hpp_list *list, struct perf_hpp_fmt *format); +void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list, + struct perf_hpp_fmt *format); static inline void perf_hpp__column_register(struct perf_hpp_fmt *format) { @@ -294,6 +296,11 @@ static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) perf_hpp_list__register_sort_field(&perf_hpp_list, format); } +static inline void perf_hpp__prepend_sort_field(struct perf_hpp_fmt *format) +{ + perf_hpp_list__prepend_sort_field(&perf_hpp_list, format); +} + #define perf_hpp_list__for_each_format(_list, format) \ list_for_each_entry(format, &(_list)->fields, list) -- cgit v1.2.3 From aa33b9b9a2ebb00d33c83a5312d4fbf2d5aeba36 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Thu, 5 Jan 2017 22:23:31 -0800 Subject: perf callchain: Reference count maps If dso__load_kcore frees all of the existing maps, but one has already been attached to a callchain cursor node, then we can get a SIGSEGV in any function that happens to try to use this invalid cursor. Use the existing map refcount mechanism to forestall cleanup of a map until the cursor iterates past the node. Signed-off-by: Krister Johansen Tested-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: stable@kernel.org Fixes: 84c2cafa2889 ("perf tools: Reference count struct map") Link: http://lkml.kernel.org/r/20170106062331.GB2707@templeofstupid.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 11 +++++++++-- tools/perf/util/callchain.h | 6 ++++++ tools/perf/util/hist.c | 7 +++++++ 3 files changed, 22 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 42922512c1c6..8b610dd9e2f6 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -437,7 +437,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) } call->ip = cursor_node->ip; call->ms.sym = cursor_node->sym; - call->ms.map = cursor_node->map; + call->ms.map = map__get(cursor_node->map); if (cursor_node->branch) { call->branch_count = 1; @@ -477,6 +477,7 @@ add_child(struct callchain_node *parent, list_for_each_entry_safe(call, tmp, &new->val, list) { list_del(&call->list); + map__zput(call->ms.map); free(call); } free(new); @@ -761,6 +762,7 @@ merge_chain_branch(struct callchain_cursor *cursor, list->ms.map, list->ms.sym, false, NULL, 0, 0); list_del(&list->list); + map__zput(list->ms.map); free(list); } @@ -811,7 +813,8 @@ int callchain_cursor_append(struct callchain_cursor *cursor, } node->ip = ip; - node->map = map; + map__zput(node->map); + node->map = map__get(map); node->sym = sym; node->branch = branch; node->nr_loop_iter = nr_loop_iter; @@ -1142,11 +1145,13 @@ static void free_callchain_node(struct callchain_node *node) list_for_each_entry_safe(list, tmp, &node->parent_val, list) { list_del(&list->list); + map__zput(list->ms.map); free(list); } list_for_each_entry_safe(list, tmp, &node->val, list) { list_del(&list->list); + map__zput(list->ms.map); free(list); } @@ -1210,6 +1215,7 @@ int callchain_node__make_parent_list(struct callchain_node *node) goto out; *new = *chain; new->has_children = false; + map__get(new->ms.map); list_add_tail(&new->list, &head); } parent = parent->parent; @@ -1230,6 +1236,7 @@ int callchain_node__make_parent_list(struct callchain_node *node) out: list_for_each_entry_safe(chain, new, &head, list) { list_del(&chain->list); + map__zput(chain->ms.map); free(chain); } return -ENOMEM; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 35c8e379530f..4f4b60f1558a 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -5,6 +5,7 @@ #include #include #include "event.h" +#include "map.h" #include "symbol.h" #define HELP_PAD "\t\t\t\t" @@ -184,8 +185,13 @@ int callchain_merge(struct callchain_cursor *cursor, */ static inline void callchain_cursor_reset(struct callchain_cursor *cursor) { + struct callchain_cursor_node *node; + cursor->nr = 0; cursor->last = &cursor->first; + + for (node = cursor->first; node != NULL; node = node->next) + map__zput(node->map); } int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6770a9645609..7d1b7d33e644 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1,6 +1,7 @@ #include "util.h" #include "build-id.h" #include "hist.h" +#include "map.h" #include "session.h" #include "sort.h" #include "evlist.h" @@ -1019,6 +1020,10 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, int max_stack_depth, void *arg) { int err, err2; + struct map *alm = NULL; + + if (al && al->map) + alm = map__get(al->map); err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent, iter->evsel, al, max_stack_depth); @@ -1058,6 +1063,8 @@ out: if (!err) err = err2; + map__put(alm); + return err; } -- cgit v1.2.3 From 7f677633379b4abb3281cdbe7e7006f049305c03 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 10 Feb 2017 20:28:24 -0800 Subject: bpf: introduce BPF_F_ALLOW_OVERRIDE flag If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command to the given cgroup the descendent cgroup will be able to override effective bpf program that was inherited from this cgroup. By default it's not passed, therefore override is disallowed. Examples: 1. prog X attached to /A with default prog Y fails to attach to /A/B and /A/B/C Everything under /A runs prog X 2. prog X attached to /A with allow_override. prog Y fails to attach to /A/B with default (non-override) prog M attached to /A/B with allow_override. Everything under /A/B runs prog M only. 3. prog X attached to /A with allow_override. prog Y fails to attach to /A with default. The user has to detach first to switch the mode. In the future this behavior may be extended with a chain of non-overridable programs. Also fix the bug where detach from cgroup where nothing is attached was not throwing error. Return ENOENT in such case. Add several testcases and adjust libbpf. Fixes: 3007098494be ("cgroup: add support for eBPF programs") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Tejun Heo Acked-by: Daniel Mack Signed-off-by: David S. Miller --- tools/lib/bpf/bpf.c | 4 +++- tools/lib/bpf/bpf.h | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 3ddb58a36d3c..ae752fa4eaa7 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -168,7 +168,8 @@ int bpf_obj_get(const char *pathname) return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); } -int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type) +int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, + unsigned int flags) { union bpf_attr attr; @@ -176,6 +177,7 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type) attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; + attr.attach_flags = flags; return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index a2f9853dd882..4ac6c4b84100 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -41,7 +41,8 @@ int bpf_map_delete_elem(int fd, void *key); int bpf_map_get_next_key(int fd, void *key, void *next_key); int bpf_obj_pin(int fd, const char *pathname); int bpf_obj_get(const char *pathname); -int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type); +int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, + unsigned int flags); int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); -- cgit v1.2.3 From 5463b3d043826ff8ef487edbd1ef1bfffb677437 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 14 Feb 2017 08:22:20 +1100 Subject: bpf: kernel header files need to be copied into the tools directory Signed-off-by: Stephen Rothwell Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- tools/include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0eb0e87dbe9f..d2b0ac799d03 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -116,6 +116,12 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE +/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command + * to the given target_fd cgroup the descendent cgroup will be able to + * override effective bpf program that was inherited from this cgroup + */ +#define BPF_F_ALLOW_OVERRIDE (1U << 0) + #define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -171,6 +177,7 @@ union bpf_attr { __u32 target_fd; /* container object to attach to */ __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; + __u32 attach_flags; }; } __attribute__((aligned(8))); -- cgit v1.2.3